From d3ad8087022b013f6eadbfdd3b3cc6165ae50a22 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Tue, 4 Mar 2014 15:57:40 +0000
Subject: [PATCH 01/32] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@11583
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/lattice.html | 6 ++++--
doc/lattice.txt | 6 ++++--
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/doc/lattice.html b/doc/lattice.html
index f0c45f21a2..e3dd79db8a 100644
--- a/doc/lattice.html
+++ b/doc/lattice.html
@@ -121,8 +121,10 @@ and a3 are 3 orthogonal unit vectors (edges of a unit cube). But you
can specify them to be of any length and non-orthogonal to each other,
so that they describe a tilted parallelepiped. Via the basis
keyword you add atoms, one at a time, to the unit cell. Its arguments
-are fractional coordinates (0.0 <= x,y,z < 1.0), so that a value of
-0.5 means a position half-way across the unit cell in that dimension.
+are fractional coordinates (0.0 <= x,y,z < 1.0). The position vector
+x of a basis atom within the unit cell is thus a linear combination of
+the the unit cell's 3 edge vectors, i.e. x = bx a1 + by a2 + bz a3,
+where bx,by,bz are the 3 values specified for the basis keyword.
diff --git a/doc/lattice.txt b/doc/lattice.txt
index ec622d3434..38f38ae58b 100644
--- a/doc/lattice.txt
+++ b/doc/lattice.txt
@@ -113,8 +113,10 @@ and a3 are 3 orthogonal unit vectors (edges of a unit cube). But you
can specify them to be of any length and non-orthogonal to each other,
so that they describe a tilted parallelepiped. Via the {basis}
keyword you add atoms, one at a time, to the unit cell. Its arguments
-are fractional coordinates (0.0 <= x,y,z < 1.0), so that a value of
-0.5 means a position half-way across the unit cell in that dimension.
+are fractional coordinates (0.0 <= x,y,z < 1.0). The position vector
+x of a basis atom within the unit cell is thus a linear combination of
+the the unit cell's 3 edge vectors, i.e. x = bx a1 + by a2 + bz a3,
+where bx,by,bz are the 3 values specified for the {basis} keyword.
:line
From f1e2227ed1cf33c4ab6f097ae280559099324430 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Tue, 4 Mar 2014 16:45:44 +0000
Subject: [PATCH 02/32] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@11584
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/KSPACE/ewald.cpp | 2984 +++---
src/KSPACE/ewald_disp.cpp | 2949 +++---
src/KSPACE/msm.cpp | 2 +-
src/KSPACE/pppm.cpp | 7002 +++++++--------
src/KSPACE/pppm_disp.cpp | 16418 +++++++++++++++++-----------------
src/KSPACE/pppm_old.cpp | 5726 ++++++------
src/USER-CUDA/pppm_cuda.cpp | 2872 +++---
7 files changed, 18977 insertions(+), 18976 deletions(-)
diff --git a/src/KSPACE/ewald.cpp b/src/KSPACE/ewald.cpp
index ac98f224f2..f750c2cbf3 100644
--- a/src/KSPACE/ewald.cpp
+++ b/src/KSPACE/ewald.cpp
@@ -1,1492 +1,1492 @@
-/* ----------------------------------------------------------------------
- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
- http://lammps.sandia.gov, Sandia National Laboratories
- Steve Plimpton, sjplimp@sandia.gov
-
- Copyright (2003) Sandia Corporation. Under the terms of Contract
- DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
- certain rights in this software. This software is distributed under
- the GNU General Public License.
-
- See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
- Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
- per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU)
- group/group energy/force added by Stan Moore (BYU)
- triclinic added by Stan Moore (SNL)
-------------------------------------------------------------------------- */
-
-#include "mpi.h"
-#include "stdlib.h"
-#include "stdio.h"
-#include "string.h"
-#include "math.h"
-#include "ewald.h"
-#include "atom.h"
-#include "comm.h"
-#include "force.h"
-#include "pair.h"
-#include "domain.h"
-#include "math_const.h"
-#include "memory.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-
-#define SMALL 0.00001
-
-/* ---------------------------------------------------------------------- */
-
-Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
-{
- if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command");
-
- ewaldflag = 1;
- group_group_enable = 1;
- group_allocate_flag = 0;
-
- accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
-
- kmax = 0;
- kxvecs = kyvecs = kzvecs = NULL;
- ug = NULL;
- eg = vg = NULL;
- sfacrl = sfacim = sfacrl_all = sfacim_all = NULL;
-
- nmax = 0;
- ek = NULL;
- cs = sn = NULL;
-
- kcount = 0;
-}
-
-/* ----------------------------------------------------------------------
- free all memory
-------------------------------------------------------------------------- */
-
-Ewald::~Ewald()
-{
- deallocate();
- if (group_allocate_flag) deallocate_groups();
- memory->destroy(ek);
- memory->destroy3d_offset(cs,-kmax_created);
- memory->destroy3d_offset(sn,-kmax_created);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void Ewald::init()
-{
- if (comm->me == 0) {
- if (screen) fprintf(screen,"Ewald initialization ...\n");
- if (logfile) fprintf(logfile,"Ewald initialization ...\n");
- }
-
- // error check
-
- triclinic_check();
- if (domain->dimension == 2)
- error->all(FLERR,"Cannot use Ewald with 2d simulation");
-
- if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
-
- if (slabflag == 0 && domain->nonperiodic > 0)
- error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald");
- if (slabflag) {
- if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
- domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
- error->all(FLERR,"Incorrect boundaries with slab Ewald");
- if (domain->triclinic)
- error->all(FLERR,"Cannot (yet) use Ewald with triclinic box "
- "and slab correction");
- }
-
- // extract short-range Coulombic cutoff from pair style
-
- scale = 1.0;
-
- pair_check();
-
- int itmp;
- double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
- if (p_cutoff == NULL)
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- double cutoff = *p_cutoff;
-
- qsum = qsqsum = 0.0;
- for (int i = 0; i < atom->nlocal; i++) {
- qsum += atom->q[i];
- qsqsum += atom->q[i]*atom->q[i];
- }
-
- double tmp;
- MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsum = tmp;
- MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsqsum = tmp;
-
- if (qsqsum == 0.0)
- error->all(FLERR,"Cannot use kspace solver on system with no charge");
- if (fabs(qsum) > SMALL && comm->me == 0) {
- char str[128];
- sprintf(str,"System is not charge neutral, net charge = %g",qsum);
- error->warning(FLERR,str);
- }
-
- // set accuracy (force units) from accuracy_relative or accuracy_absolute
-
- if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
- else accuracy = accuracy_relative * two_charge_force;
-
- // setup K-space resolution
-
- q2 = qsqsum * force->qqrd2e / force->dielectric;
- bigint natoms = atom->natoms;
-
- triclinic = domain->triclinic;
-
- // use xprd,yprd,zprd even if triclinic so grid size is the same
- // adjust z dimension for 2d slab Ewald
- // 3d Ewald just uses zprd since slab_volfactor = 1.0
-
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
-
- // make initial g_ewald estimate
- // based on desired accuracy and real space cutoff
- // fluid-occupied volume used to estimate real-space error
- // zprd used rather than zprd_slab
-
- if (!gewaldflag) {
- if (accuracy <= 0.0)
- error->all(FLERR,"KSpace accuracy must be > 0");
- g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
- if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
- else g_ewald = sqrt(-log(g_ewald)) / cutoff;
- }
-
- // setup Ewald coefficients so can print stats
-
- setup();
-
- // final RMS accuracy
-
- double lprx = rms(kxmax_orig,xprd,natoms,q2);
- double lpry = rms(kymax_orig,yprd,natoms,q2);
- double lprz = rms(kzmax_orig,zprd_slab,natoms,q2);
- double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
- double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab);
- double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
- double tpr = estimate_table_accuracy(q2_over_sqrt,spr);
- double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr);
-
- // stats
-
- if (comm->me == 0) {
- if (screen) {
- fprintf(screen," G vector (1/distance) = %g\n",g_ewald);
- fprintf(screen," estimated absolute RMS force accuracy = %g\n",
- estimated_accuracy);
- fprintf(screen," estimated relative force accuracy = %g\n",
- estimated_accuracy/two_charge_force);
- fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n",
- kcount,kmax,kmax3d);
- fprintf(screen," kxmax kymax kzmax = %d %d %d\n",
- kxmax,kymax,kzmax);
- }
- if (logfile) {
- fprintf(logfile," G vector (1/distance) = %g\n",g_ewald);
- fprintf(logfile," estimated absolute RMS force accuracy = %g\n",
- estimated_accuracy);
- fprintf(logfile," estimated relative force accuracy = %g\n",
- estimated_accuracy/two_charge_force);
- fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n",
- kcount,kmax,kmax3d);
- fprintf(logfile," kxmax kymax kzmax = %d %d %d\n",
- kxmax,kymax,kzmax);
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- adjust Ewald coeffs, called initially and whenever volume has changed
-------------------------------------------------------------------------- */
-
-void Ewald::setup()
-{
- // volume-dependent factors
-
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
-
- // adjustment of z dimension for 2d slab Ewald
- // 3d Ewald just uses zprd since slab_volfactor = 1.0
-
- double zprd_slab = zprd*slab_volfactor;
- volume = xprd * yprd * zprd_slab;
-
- unitk[0] = 2.0*MY_PI/xprd;
- unitk[1] = 2.0*MY_PI/yprd;
- unitk[2] = 2.0*MY_PI/zprd_slab;
-
- int kmax_old = kmax;
-
- if (kewaldflag == 0) {
-
- // determine kmax
- // function of current box size, accuracy, G_ewald (short-range cutoff)
-
- bigint natoms = atom->natoms;
- double err;
- kxmax = 1;
- kymax = 1;
- kzmax = 1;
-
- err = rms(kxmax,xprd,natoms,q2);
- while (err > accuracy) {
- kxmax++;
- err = rms(kxmax,xprd,natoms,q2);
- }
-
- err = rms(kymax,yprd,natoms,q2);
- while (err > accuracy) {
- kymax++;
- err = rms(kymax,yprd,natoms,q2);
- }
-
- err = rms(kzmax,zprd_slab,natoms,q2);
- while (err > accuracy) {
- kzmax++;
- err = rms(kzmax,zprd_slab,natoms,q2);
- }
-
- kmax = MAX(kxmax,kymax);
- kmax = MAX(kmax,kzmax);
- kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
-
- double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax;
- double gsqymx = unitk[1]*unitk[1]*kymax*kymax;
- double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax;
- gsqmx = MAX(gsqxmx,gsqymx);
- gsqmx = MAX(gsqmx,gsqzmx);
-
- kxmax_orig = kxmax;
- kymax_orig = kymax;
- kzmax_orig = kzmax;
-
- // scale lattice vectors for triclinic skew
-
- if (triclinic) {
- double tmp[3];
- tmp[0] = kxmax/xprd;
- tmp[1] = kymax/yprd;
- tmp[2] = kzmax/zprd;
- lamda2xT(&tmp[0],&tmp[0]);
- kxmax = MAX(1,static_cast(tmp[0]));
- kymax = MAX(1,static_cast(tmp[1]));
- kzmax = MAX(1,static_cast(tmp[2]));
-
- kmax = MAX(kxmax,kymax);
- kmax = MAX(kmax,kzmax);
- kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
- }
-
- } else {
-
- kxmax = kx_ewald;
- kymax = ky_ewald;
- kzmax = kz_ewald;
-
- kxmax_orig = kxmax;
- kymax_orig = kymax;
- kzmax_orig = kzmax;
-
- kmax = MAX(kxmax,kymax);
- kmax = MAX(kmax,kzmax);
- kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
-
- double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax;
- double gsqymx = unitk[1]*unitk[1]*kymax*kymax;
- double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax;
- gsqmx = MAX(gsqxmx,gsqymx);
- gsqmx = MAX(gsqmx,gsqzmx);
- }
-
- gsqmx *= 1.00001;
-
- // if size has grown, reallocate k-dependent and nlocal-dependent arrays
-
- if (kmax > kmax_old) {
- deallocate();
- allocate();
- group_allocate_flag = 0;
-
- memory->destroy(ek);
- memory->destroy3d_offset(cs,-kmax_created);
- memory->destroy3d_offset(sn,-kmax_created);
- nmax = atom->nmax;
- memory->create(ek,nmax,3,"ewald:ek");
- memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
- memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
- kmax_created = kmax;
- }
-
- // pre-compute Ewald coefficients
-
- if (triclinic == 0)
- coeffs();
- else
- coeffs_triclinic();
-}
-
-/* ----------------------------------------------------------------------
- compute RMS accuracy for a dimension
-------------------------------------------------------------------------- */
-
-double Ewald::rms(int km, double prd, bigint natoms, double q2)
-{
- double value = 2.0*q2*g_ewald/prd *
- sqrt(1.0/(MY_PI*km*natoms)) *
- exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd));
-
- return value;
-}
-
-/* ----------------------------------------------------------------------
- compute the Ewald long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void Ewald::compute(int eflag, int vflag)
-{
- int i,j,k;
-
- // set energy/virial flags
-
- if (eflag || vflag) ev_setup(eflag,vflag);
- else evflag = evflag_atom = eflag_global = vflag_global =
- eflag_atom = vflag_atom = 0;
-
- // extend size of per-atom arrays if necessary
-
- if (atom->nlocal > nmax) {
- memory->destroy(ek);
- memory->destroy3d_offset(cs,-kmax_created);
- memory->destroy3d_offset(sn,-kmax_created);
- nmax = atom->nmax;
- memory->create(ek,nmax,3,"ewald:ek");
- memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
- memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
- kmax_created = kmax;
- }
-
- // partial structure factors on each processor
- // total structure factor by summing over procs
-
- if (triclinic == 0)
- eik_dot_r();
- else
- eik_dot_r_triclinic();
-
- MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world);
- MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world);
-
- // K-space portion of electric field
- // double loop over K-vectors and local atoms
- // perform per-atom calculations if needed
-
- double **f = atom->f;
- double *q = atom->q;
- int nlocal = atom->nlocal;
-
- int kx,ky,kz;
- double cypz,sypz,exprl,expim,partial,partial_peratom;
-
- for (i = 0; i < nlocal; i++) {
- ek[i][0] = 0.0;
- ek[i][1] = 0.0;
- ek[i][2] = 0.0;
- }
-
- for (k = 0; k < kcount; k++) {
- kx = kxvecs[k];
- ky = kyvecs[k];
- kz = kzvecs[k];
-
- for (i = 0; i < nlocal; i++) {
- cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
- sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
- exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
- expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
- partial = expim*sfacrl_all[k] - exprl*sfacim_all[k];
- ek[i][0] += partial*eg[k][0];
- ek[i][1] += partial*eg[k][1];
- ek[i][2] += partial*eg[k][2];
-
- if (evflag_atom) {
- partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k];
- if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom;
- if (vflag_atom)
- for (j = 0; j < 6; j++)
- vatom[i][j] += ug[k]*vg[k][j]*partial_peratom;
- }
- }
- }
-
- // convert E-field to force
-
- const double qscale = force->qqrd2e * scale;
-
- for (i = 0; i < nlocal; i++) {
- f[i][0] += qscale * q[i]*ek[i][0];
- f[i][1] += qscale * q[i]*ek[i][1];
- if (slabflag != 2) f[i][2] += qscale * q[i]*ek[i][2];
- }
-
- // global energy
-
- if (eflag_global) {
- for (k = 0; k < kcount; k++)
- energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] +
- sfacim_all[k]*sfacim_all[k]);
- energy -= g_ewald*qsqsum/MY_PIS +
- MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
- energy *= qscale;
- }
-
- // global virial
-
- if (vflag_global) {
- double uk;
- for (k = 0; k < kcount; k++) {
- uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]);
- for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j];
- }
- for (j = 0; j < 6; j++) virial[j] *= qscale;
- }
-
- // per-atom energy/virial
- // energy includes self-energy correction
-
- if (evflag_atom) {
- if (eflag_atom) {
- for (i = 0; i < nlocal; i++) {
- eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
- (g_ewald*g_ewald*volume);
- eatom[i] *= qscale;
- }
- }
-
- if (vflag_atom)
- for (i = 0; i < nlocal; i++)
- for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale;
- }
-
- // 2d slab correction
-
- if (slabflag == 1) slabcorr();
-}
-
-/* ---------------------------------------------------------------------- */
-
-void Ewald::eik_dot_r()
-{
- int i,k,l,m,n,ic;
- double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4;
- double sqk,clpm,slpm;
-
- double **x = atom->x;
- double *q = atom->q;
- int nlocal = atom->nlocal;
-
- n = 0;
-
- // (k,0,0), (0,l,0), (0,0,m)
-
- for (ic = 0; ic < 3; ic++) {
- sqk = unitk[ic]*unitk[ic];
- if (sqk <= gsqmx) {
- cstr1 = 0.0;
- sstr1 = 0.0;
- for (i = 0; i < nlocal; i++) {
- cs[0][ic][i] = 1.0;
- sn[0][ic][i] = 0.0;
- cs[1][ic][i] = cos(unitk[ic]*x[i][ic]);
- sn[1][ic][i] = sin(unitk[ic]*x[i][ic]);
- cs[-1][ic][i] = cs[1][ic][i];
- sn[-1][ic][i] = -sn[1][ic][i];
- cstr1 += q[i]*cs[1][ic][i];
- sstr1 += q[i]*sn[1][ic][i];
- }
- sfacrl[n] = cstr1;
- sfacim[n++] = sstr1;
- }
- }
-
- for (m = 2; m <= kmax; m++) {
- for (ic = 0; ic < 3; ic++) {
- sqk = m*unitk[ic] * m*unitk[ic];
- if (sqk <= gsqmx) {
- cstr1 = 0.0;
- sstr1 = 0.0;
- for (i = 0; i < nlocal; i++) {
- cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] -
- sn[m-1][ic][i]*sn[1][ic][i];
- sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] +
- cs[m-1][ic][i]*sn[1][ic][i];
- cs[-m][ic][i] = cs[m][ic][i];
- sn[-m][ic][i] = -sn[m][ic][i];
- cstr1 += q[i]*cs[m][ic][i];
- sstr1 += q[i]*sn[m][ic][i];
- }
- sfacrl[n] = cstr1;
- sfacim[n++] = sstr1;
- }
- }
- }
-
- // 1 = (k,l,0), 2 = (k,-l,0)
-
- for (k = 1; k <= kxmax; k++) {
- for (l = 1; l <= kymax; l++) {
- sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]);
- if (sqk <= gsqmx) {
- cstr1 = 0.0;
- sstr1 = 0.0;
- cstr2 = 0.0;
- sstr2 = 0.0;
- for (i = 0; i < nlocal; i++) {
- cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]);
- sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]);
- cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]);
- sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]);
- }
- sfacrl[n] = cstr1;
- sfacim[n++] = sstr1;
- sfacrl[n] = cstr2;
- sfacim[n++] = sstr2;
- }
- }
- }
-
- // 1 = (0,l,m), 2 = (0,l,-m)
-
- for (l = 1; l <= kymax; l++) {
- for (m = 1; m <= kzmax; m++) {
- sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]);
- if (sqk <= gsqmx) {
- cstr1 = 0.0;
- sstr1 = 0.0;
- cstr2 = 0.0;
- sstr2 = 0.0;
- for (i = 0; i < nlocal; i++) {
- cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]);
- sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]);
- cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]);
- sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]);
- }
- sfacrl[n] = cstr1;
- sfacim[n++] = sstr1;
- sfacrl[n] = cstr2;
- sfacim[n++] = sstr2;
- }
- }
- }
-
- // 1 = (k,0,m), 2 = (k,0,-m)
-
- for (k = 1; k <= kxmax; k++) {
- for (m = 1; m <= kzmax; m++) {
- sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]);
- if (sqk <= gsqmx) {
- cstr1 = 0.0;
- sstr1 = 0.0;
- cstr2 = 0.0;
- sstr2 = 0.0;
- for (i = 0; i < nlocal; i++) {
- cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]);
- sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]);
- cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]);
- sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]);
- }
- sfacrl[n] = cstr1;
- sfacim[n++] = sstr1;
- sfacrl[n] = cstr2;
- sfacim[n++] = sstr2;
- }
- }
- }
-
- // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
-
- for (k = 1; k <= kxmax; k++) {
- for (l = 1; l <= kymax; l++) {
- for (m = 1; m <= kzmax; m++) {
- sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) +
- (m*unitk[2] * m*unitk[2]);
- if (sqk <= gsqmx) {
- cstr1 = 0.0;
- sstr1 = 0.0;
- cstr2 = 0.0;
- sstr2 = 0.0;
- cstr3 = 0.0;
- sstr3 = 0.0;
- cstr4 = 0.0;
- sstr4 = 0.0;
- for (i = 0; i < nlocal; i++) {
- clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
- slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
- cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
- sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
-
- clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
- slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
- cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
- sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
-
- clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
- slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
- cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
- sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
-
- clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
- slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
- cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
- sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
- }
- sfacrl[n] = cstr1;
- sfacim[n++] = sstr1;
- sfacrl[n] = cstr2;
- sfacim[n++] = sstr2;
- sfacrl[n] = cstr3;
- sfacim[n++] = sstr3;
- sfacrl[n] = cstr4;
- sfacim[n++] = sstr4;
- }
- }
- }
- }
-}
-
-/* ---------------------------------------------------------------------- */
-
-void Ewald::eik_dot_r_triclinic()
-{
- int i,k,l,m,n,ic;
- double cstr1,sstr1;
- double sqk,clpm,slpm;
-
- double **x = atom->x;
- double *q = atom->q;
- int nlocal = atom->nlocal;
-
- double unitk_lamda[3];
-
- double max_kvecs[3];
- max_kvecs[0] = kxmax;
- max_kvecs[1] = kymax;
- max_kvecs[2] = kzmax;
-
- // (k,0,0), (0,l,0), (0,0,m)
-
- for (ic = 0; ic < 3; ic++) {
- unitk_lamda[0] = 0.0;
- unitk_lamda[1] = 0.0;
- unitk_lamda[2] = 0.0;
- unitk_lamda[ic] = 2.0*MY_PI;
- x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
- sqk = unitk_lamda[ic]*unitk_lamda[ic];
- if (sqk <= gsqmx) {
- for (i = 0; i < nlocal; i++) {
- cs[0][ic][i] = 1.0;
- sn[0][ic][i] = 0.0;
- cs[1][ic][i] = cos(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]);
- sn[1][ic][i] = sin(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]);
- cs[-1][ic][i] = cs[1][ic][i];
- sn[-1][ic][i] = -sn[1][ic][i];
- }
- }
- }
-
- for (ic = 0; ic < 3; ic++) {
- for (m = 2; m <= max_kvecs[ic]; m++) {
- unitk_lamda[0] = 0.0;
- unitk_lamda[1] = 0.0;
- unitk_lamda[2] = 0.0;
- unitk_lamda[ic] = 2.0*MY_PI*m;
- x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
- sqk = unitk_lamda[ic]*unitk_lamda[ic];
- for (i = 0; i < nlocal; i++) {
- cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] -
- sn[m-1][ic][i]*sn[1][ic][i];
- sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] +
- cs[m-1][ic][i]*sn[1][ic][i];
- cs[-m][ic][i] = cs[m][ic][i];
- sn[-m][ic][i] = -sn[m][ic][i];
- }
- }
- }
-
- for (n = 0; n < kcount; n++) {
- k = kxvecs[n];
- l = kyvecs[n];
- m = kzvecs[n];
- cstr1 = 0.0;
- sstr1 = 0.0;
- for (i = 0; i < nlocal; i++) {
- clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
- slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
- cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
- sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
- }
- sfacrl[n] = cstr1;
- sfacim[n] = sstr1;
- }
-}
-
-/* ----------------------------------------------------------------------
- pre-compute coefficients for each Ewald K-vector
-------------------------------------------------------------------------- */
-
-void Ewald::coeffs()
-{
- int k,l,m;
- double sqk,vterm;
-
- double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald);
- double preu = 4.0*MY_PI/volume;
-
- kcount = 0;
-
- // (k,0,0), (0,l,0), (0,0,m)
-
- for (m = 1; m <= kmax; m++) {
- sqk = (m*unitk[0]) * (m*unitk[0]);
- if (sqk <= gsqmx) {
- kxvecs[kcount] = m;
- kyvecs[kcount] = 0;
- kzvecs[kcount] = 0;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount];
- eg[kcount][1] = 0.0;
- eg[kcount][2] = 0.0;
- vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
- vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m);
- vg[kcount][1] = 1.0;
- vg[kcount][2] = 1.0;
- vg[kcount][3] = 0.0;
- vg[kcount][4] = 0.0;
- vg[kcount][5] = 0.0;
- kcount++;
- }
- sqk = (m*unitk[1]) * (m*unitk[1]);
- if (sqk <= gsqmx) {
- kxvecs[kcount] = 0;
- kyvecs[kcount] = m;
- kzvecs[kcount] = 0;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 0.0;
- eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount];
- eg[kcount][2] = 0.0;
- vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
- vg[kcount][0] = 1.0;
- vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m);
- vg[kcount][2] = 1.0;
- vg[kcount][3] = 0.0;
- vg[kcount][4] = 0.0;
- vg[kcount][5] = 0.0;
- kcount++;
- }
- sqk = (m*unitk[2]) * (m*unitk[2]);
- if (sqk <= gsqmx) {
- kxvecs[kcount] = 0;
- kyvecs[kcount] = 0;
- kzvecs[kcount] = m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 0.0;
- eg[kcount][1] = 0.0;
- eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
- vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
- vg[kcount][0] = 1.0;
- vg[kcount][1] = 1.0;
- vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
- vg[kcount][3] = 0.0;
- vg[kcount][4] = 0.0;
- vg[kcount][5] = 0.0;
- kcount++;
- }
- }
-
- // 1 = (k,l,0), 2 = (k,-l,0)
-
- for (k = 1; k <= kxmax; k++) {
- for (l = 1; l <= kymax; l++) {
- sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l);
- if (sqk <= gsqmx) {
- kxvecs[kcount] = k;
- kyvecs[kcount] = l;
- kzvecs[kcount] = 0;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
- eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
- eg[kcount][2] = 0.0;
- vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
- vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
- vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
- vg[kcount][2] = 1.0;
- vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
- vg[kcount][4] = 0.0;
- vg[kcount][5] = 0.0;
- kcount++;
-
- kxvecs[kcount] = k;
- kyvecs[kcount] = -l;
- kzvecs[kcount] = 0;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
- eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
- eg[kcount][2] = 0.0;
- vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
- vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
- vg[kcount][2] = 1.0;
- vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
- vg[kcount][4] = 0.0;
- vg[kcount][5] = 0.0;
- kcount++;;
- }
- }
- }
-
- // 1 = (0,l,m), 2 = (0,l,-m)
-
- for (l = 1; l <= kymax; l++) {
- for (m = 1; m <= kzmax; m++) {
- sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m);
- if (sqk <= gsqmx) {
- kxvecs[kcount] = 0;
- kyvecs[kcount] = l;
- kzvecs[kcount] = m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 0.0;
- eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
- eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
- vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
- vg[kcount][0] = 1.0;
- vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
- vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
- vg[kcount][3] = 0.0;
- vg[kcount][4] = 0.0;
- vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
- kcount++;
-
- kxvecs[kcount] = 0;
- kyvecs[kcount] = l;
- kzvecs[kcount] = -m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 0.0;
- eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
- eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
- vg[kcount][0] = 1.0;
- vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
- vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
- vg[kcount][3] = 0.0;
- vg[kcount][4] = 0.0;
- vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
- kcount++;
- }
- }
- }
-
- // 1 = (k,0,m), 2 = (k,0,-m)
-
- for (k = 1; k <= kxmax; k++) {
- for (m = 1; m <= kzmax; m++) {
- sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m);
- if (sqk <= gsqmx) {
- kxvecs[kcount] = k;
- kyvecs[kcount] = 0;
- kzvecs[kcount] = m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
- eg[kcount][1] = 0.0;
- eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
- vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
- vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
- vg[kcount][1] = 1.0;
- vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
- vg[kcount][3] = 0.0;
- vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
- vg[kcount][5] = 0.0;
- kcount++;
-
- kxvecs[kcount] = k;
- kyvecs[kcount] = 0;
- kzvecs[kcount] = -m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
- eg[kcount][1] = 0.0;
- eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
- vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
- vg[kcount][1] = 1.0;
- vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
- vg[kcount][3] = 0.0;
- vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
- vg[kcount][5] = 0.0;
- kcount++;
- }
- }
- }
-
- // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
-
- for (k = 1; k <= kxmax; k++) {
- for (l = 1; l <= kymax; l++) {
- for (m = 1; m <= kzmax; m++) {
- sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) +
- (unitk[2]*m) * (unitk[2]*m);
- if (sqk <= gsqmx) {
- kxvecs[kcount] = k;
- kyvecs[kcount] = l;
- kzvecs[kcount] = m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
- eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
- eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
- vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
- vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
- vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
- vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
- vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
- vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
- vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
- kcount++;
-
- kxvecs[kcount] = k;
- kyvecs[kcount] = -l;
- kzvecs[kcount] = m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
- eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
- eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
- vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
- vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
- vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
- vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
- vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
- vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
- kcount++;
-
- kxvecs[kcount] = k;
- kyvecs[kcount] = l;
- kzvecs[kcount] = -m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
- eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
- eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
- vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
- vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
- vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
- vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
- vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
- vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
- kcount++;
-
- kxvecs[kcount] = k;
- kyvecs[kcount] = -l;
- kzvecs[kcount] = -m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
- eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
- eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
- vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
- vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
- vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
- vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
- vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
- vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
- kcount++;
- }
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- pre-compute coefficients for each Ewald K-vector for a triclinic
- system
-------------------------------------------------------------------------- */
-
-void Ewald::coeffs_triclinic()
-{
- int k,l,m;
- double sqk,vterm;
-
- double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald);
- double preu = 4.0*MY_PI/volume;
-
- double unitk_lamda[3];
-
- kcount = 0;
-
- // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
-
- for (k = 1; k <= kxmax; k++) {
- for (l = -kymax; l <= kymax; l++) {
- for (m = -kzmax; m <= kzmax; m++) {
- unitk_lamda[0] = 2.0*MY_PI*k;
- unitk_lamda[1] = 2.0*MY_PI*l;
- unitk_lamda[2] = 2.0*MY_PI*m;
- x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
- sqk = unitk_lamda[0]*unitk_lamda[0] + unitk_lamda[1]*unitk_lamda[1] +
- unitk_lamda[2]*unitk_lamda[2];
- if (sqk <= gsqmx) {
- kxvecs[kcount] = k;
- kyvecs[kcount] = l;
- kzvecs[kcount] = m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 2.0*unitk_lamda[0]*ug[kcount];
- eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount];
- eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount];
- vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
- vg[kcount][0] = 1.0 + vterm*unitk_lamda[0]*unitk_lamda[0];
- vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1];
- vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
- vg[kcount][3] = vterm*unitk_lamda[0]*unitk_lamda[1];
- vg[kcount][4] = vterm*unitk_lamda[0]*unitk_lamda[2];
- vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2];
- kcount++;
- }
- }
- }
- }
-
- // 1 = (0,l,m), 2 = (0,l,-m)
-
- for (l = 1; l <= kymax; l++) {
- for (m = -kzmax; m <= kzmax; m++) {
- unitk_lamda[0] = 0.0;
- unitk_lamda[1] = 2.0*MY_PI*l;
- unitk_lamda[2] = 2.0*MY_PI*m;
- x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
- sqk = unitk_lamda[1]*unitk_lamda[1] + unitk_lamda[2]*unitk_lamda[2];
- if (sqk <= gsqmx) {
- kxvecs[kcount] = 0;
- kyvecs[kcount] = l;
- kzvecs[kcount] = m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 0.0;
- eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount];
- eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount];
- vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
- vg[kcount][0] = 1.0;
- vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1];
- vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
- vg[kcount][3] = 0.0;
- vg[kcount][4] = 0.0;
- vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2];
- kcount++;
- }
- }
- }
-
- // (0,0,m)
-
- for (m = 1; m <= kmax; m++) {
- unitk_lamda[0] = 0.0;
- unitk_lamda[1] = 0.0;
- unitk_lamda[2] = 2.0*MY_PI*m;
- x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
- sqk = unitk_lamda[2]*unitk_lamda[2];
- if (sqk <= gsqmx) {
- kxvecs[kcount] = 0;
- kyvecs[kcount] = 0;
- kzvecs[kcount] = m;
- ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
- eg[kcount][0] = 0.0;
- eg[kcount][1] = 0.0;
- eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount];
- vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
- vg[kcount][0] = 1.0;
- vg[kcount][1] = 1.0;
- vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
- vg[kcount][3] = 0.0;
- vg[kcount][4] = 0.0;
- vg[kcount][5] = 0.0;
- kcount++;
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- allocate memory that depends on # of K-vectors
-------------------------------------------------------------------------- */
-
-void Ewald::allocate()
-{
- kxvecs = new int[kmax3d];
- kyvecs = new int[kmax3d];
- kzvecs = new int[kmax3d];
-
- ug = new double[kmax3d];
- memory->create(eg,kmax3d,3,"ewald:eg");
- memory->create(vg,kmax3d,6,"ewald:vg");
-
- sfacrl = new double[kmax3d];
- sfacim = new double[kmax3d];
- sfacrl_all = new double[kmax3d];
- sfacim_all = new double[kmax3d];
-}
-
-/* ----------------------------------------------------------------------
- deallocate memory that depends on # of K-vectors
-------------------------------------------------------------------------- */
-
-void Ewald::deallocate()
-{
- delete [] kxvecs;
- delete [] kyvecs;
- delete [] kzvecs;
-
- delete [] ug;
- memory->destroy(eg);
- memory->destroy(vg);
-
- delete [] sfacrl;
- delete [] sfacim;
- delete [] sfacrl_all;
- delete [] sfacim_all;
-}
-
-/* ----------------------------------------------------------------------
- Slab-geometry correction term to dampen inter-slab interactions between
- periodically repeating slabs. Yields good approximation to 2D Ewald if
- adequate empty space is left between repeating slabs (J. Chem. Phys.
- 111, 3155). Slabs defined here to be parallel to the xy plane. Also
- extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void Ewald::slabcorr()
-{
- // compute local contribution to global dipole moment
-
- double *q = atom->q;
- double **x = atom->x;
- double zprd = domain->zprd;
- int nlocal = atom->nlocal;
-
- double dipole = 0.0;
- for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
-
- // sum local contributions to get global dipole moment
-
- double dipole_all;
- MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
- // need to make non-neutral systems and/or
- // per-atom energy translationally invariant
-
- double dipole_r2 = 0.0;
- if (eflag_atom || fabs(qsum) > SMALL) {
- for (int i = 0; i < nlocal; i++)
- dipole_r2 += q[i]*x[i][2]*x[i][2];
-
- // sum local contributions
-
- double tmp;
- MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_r2 = tmp;
- }
-
- // compute corrections
-
- const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
- qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
- const double qscale = force->qqrd2e * scale;
-
- if (eflag_global) energy += qscale * e_slabcorr;
-
- // per-atom energy
-
- if (eflag_atom) {
- double efact = qscale * MY_2PI/volume;
- for (int i = 0; i < nlocal; i++)
- eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
- qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
- }
-
- // add on force corrections
-
- double ffact = qscale * (-4.0*MY_PI/volume);
- double **f = atom->f;
-
- for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
-}
-
-/* ----------------------------------------------------------------------
- memory usage of local arrays
-------------------------------------------------------------------------- */
-
-double Ewald::memory_usage()
-{
- double bytes = 3 * kmax3d * sizeof(int);
- bytes += (1 + 3 + 6) * kmax3d * sizeof(double);
- bytes += 4 * kmax3d * sizeof(double);
- bytes += nmax*3 * sizeof(double);
- bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double);
- return bytes;
-}
-
-/* ----------------------------------------------------------------------
- group-group interactions
- ------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
- compute the Ewald total long-range force and energy for groups A and B
- ------------------------------------------------------------------------- */
-
-void Ewald::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
-{
- if (slabflag && triclinic)
- error->all(FLERR,"Cannot (yet) use K-space slab "
- "correction with compute group/group for triclinic systems");
-
- int i,k;
-
- if (!group_allocate_flag) {
- allocate_groups();
- group_allocate_flag = 1;
- }
-
- e2group = 0.0; //energy
- f2group[0] = 0.0; //force in x-direction
- f2group[1] = 0.0; //force in y-direction
- f2group[2] = 0.0; //force in z-direction
-
- // partial and total structure factors for groups A and B
-
- for (k = 0; k < kcount; k++) {
-
- // group A
-
- sfacrl_A[k] = 0.0;
- sfacim_A[k] = 0.0;
- sfacrl_A_all[k] = 0.0;
- sfacim_A_all[k] = 0;
-
- // group B
-
- sfacrl_B[k] = 0.0;
- sfacim_B[k] = 0.0;
- sfacrl_B_all[k] = 0.0;
- sfacim_B_all[k] = 0.0;
- }
-
- double *q = atom->q;
- int nlocal = atom->nlocal;
- int *mask = atom->mask;
-
- int kx,ky,kz;
- double cypz,sypz,exprl,expim;
-
- // partial structure factors for groups A and B on each processor
-
- for (k = 0; k < kcount; k++) {
- kx = kxvecs[k];
- ky = kyvecs[k];
- kz = kzvecs[k];
-
- for (i = 0; i < nlocal; i++) {
-
- if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
- if (AA_flag) continue;
-
- if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
-
- cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
- sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
- exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
- expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
-
- // group A
-
- if (mask[i] & groupbit_A) {
- sfacrl_A[k] += q[i]*exprl;
- sfacim_A[k] += q[i]*expim;
- }
-
- // group B
-
- if (mask[i] & groupbit_B) {
- sfacrl_B[k] += q[i]*exprl;
- sfacim_B[k] += q[i]*expim;
- }
- }
- }
- }
-
- // total structure factor by summing over procs
-
- MPI_Allreduce(sfacrl_A,sfacrl_A_all,kcount,MPI_DOUBLE,MPI_SUM,world);
- MPI_Allreduce(sfacim_A,sfacim_A_all,kcount,MPI_DOUBLE,MPI_SUM,world);
-
- MPI_Allreduce(sfacrl_B,sfacrl_B_all,kcount,MPI_DOUBLE,MPI_SUM,world);
- MPI_Allreduce(sfacim_B,sfacim_B_all,kcount,MPI_DOUBLE,MPI_SUM,world);
-
- const double qscale = force->qqrd2e * scale;
- double partial_group;
-
- // total group A <--> group B energy
- // self and boundary correction terms are in compute_group_group.cpp
-
- for (k = 0; k < kcount; k++) {
- partial_group = sfacrl_A_all[k]*sfacrl_B_all[k] +
- sfacim_A_all[k]*sfacim_B_all[k];
- e2group += ug[k]*partial_group;
- }
-
- e2group *= qscale;
-
- // total group A <--> group B force
-
- for (k = 0; k < kcount; k++) {
- partial_group = sfacim_A_all[k]*sfacrl_B_all[k] -
- sfacrl_A_all[k]*sfacim_B_all[k];
- f2group[0] += eg[k][0]*partial_group;
- f2group[1] += eg[k][1]*partial_group;
- if (slabflag != 2) f2group[2] += eg[k][2]*partial_group;
- }
-
- f2group[0] *= qscale;
- f2group[1] *= qscale;
- f2group[2] *= qscale;
-
- // 2d slab correction
-
- if (slabflag == 1)
- slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
-}
-
-/* ----------------------------------------------------------------------
- Slab-geometry correction term to dampen inter-slab interactions between
- periodically repeating slabs. Yields good approximation to 2D Ewald if
- adequate empty space is left between repeating slabs (J. Chem. Phys.
- 111, 3155). Slabs defined here to be parallel to the xy plane. Also
- extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void Ewald::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag)
-{
- // compute local contribution to global dipole moment
-
- double *q = atom->q;
- double **x = atom->x;
- double zprd = domain->zprd;
- int *mask = atom->mask;
- int nlocal = atom->nlocal;
-
- double qsum_A = 0.0;
- double qsum_B = 0.0;
- double dipole_A = 0.0;
- double dipole_B = 0.0;
- double dipole_r2_A = 0.0;
- double dipole_r2_B = 0.0;
-
- for (int i = 0; i < nlocal; i++) {
- if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
- if (AA_flag) continue;
-
- if (mask[i] & groupbit_A) {
- qsum_A += q[i];
- dipole_A += q[i]*x[i][2];
- dipole_r2_A += q[i]*x[i][2]*x[i][2];
- }
-
- if (mask[i] & groupbit_B) {
- qsum_B += q[i];
- dipole_B += q[i]*x[i][2];
- dipole_r2_B += q[i]*x[i][2]*x[i][2];
- }
- }
-
- // sum local contributions to get total charge and global dipole moment
- // for each group
-
- double tmp;
- MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsum_A = tmp;
-
- MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsum_B = tmp;
-
- MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_A = tmp;
-
- MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_B = tmp;
-
- MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_r2_A = tmp;
-
- MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_r2_B = tmp;
-
- // compute corrections
-
- const double qscale = force->qqrd2e * scale;
- const double efact = qscale * MY_2PI/volume;
-
- e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B +
- qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0);
-
- // add on force corrections
-
- const double ffact = qscale * (-4.0*MY_PI/volume);
- f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A);
-}
-
-/* ----------------------------------------------------------------------
- allocate group-group memory that depends on # of K-vectors
-------------------------------------------------------------------------- */
-
-void Ewald::allocate_groups()
-{
- // group A
-
- sfacrl_A = new double[kmax3d];
- sfacim_A = new double[kmax3d];
- sfacrl_A_all = new double[kmax3d];
- sfacim_A_all = new double[kmax3d];
-
- // group B
-
- sfacrl_B = new double[kmax3d];
- sfacim_B = new double[kmax3d];
- sfacrl_B_all = new double[kmax3d];
- sfacim_B_all = new double[kmax3d];
-}
-
-/* ----------------------------------------------------------------------
- deallocate group-group memory that depends on # of K-vectors
-------------------------------------------------------------------------- */
-
-void Ewald::deallocate_groups()
-{
- // group A
-
- delete [] sfacrl_A;
- delete [] sfacim_A;
- delete [] sfacrl_A_all;
- delete [] sfacim_A_all;
-
- // group B
-
- delete [] sfacrl_B;
- delete [] sfacim_B;
- delete [] sfacrl_B_all;
- delete [] sfacim_B_all;
-}
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+ per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU)
+ group/group energy/force added by Stan Moore (BYU)
+ triclinic added by Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "stdlib.h"
+#include "stdio.h"
+#include "string.h"
+#include "math.h"
+#include "ewald.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "pair.h"
+#include "domain.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.00001
+
+/* ---------------------------------------------------------------------- */
+
+Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+ if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command");
+
+ ewaldflag = 1;
+ group_group_enable = 1;
+ group_allocate_flag = 0;
+
+ accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
+
+ kmax = 0;
+ kxvecs = kyvecs = kzvecs = NULL;
+ ug = NULL;
+ eg = vg = NULL;
+ sfacrl = sfacim = sfacrl_all = sfacim_all = NULL;
+
+ nmax = 0;
+ ek = NULL;
+ cs = sn = NULL;
+
+ kcount = 0;
+}
+
+/* ----------------------------------------------------------------------
+ free all memory
+------------------------------------------------------------------------- */
+
+Ewald::~Ewald()
+{
+ deallocate();
+ if (group_allocate_flag) deallocate_groups();
+ memory->destroy(ek);
+ memory->destroy3d_offset(cs,-kmax_created);
+ memory->destroy3d_offset(sn,-kmax_created);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Ewald::init()
+{
+ if (comm->me == 0) {
+ if (screen) fprintf(screen,"Ewald initialization ...\n");
+ if (logfile) fprintf(logfile,"Ewald initialization ...\n");
+ }
+
+ // error check
+
+ triclinic_check();
+ if (domain->dimension == 2)
+ error->all(FLERR,"Cannot use Ewald with 2d simulation");
+
+ if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
+
+ if (slabflag == 0 && domain->nonperiodic > 0)
+ error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald");
+ if (slabflag) {
+ if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
+ domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+ error->all(FLERR,"Incorrect boundaries with slab Ewald");
+ if (domain->triclinic)
+ error->all(FLERR,"Cannot (yet) use Ewald with triclinic box "
+ "and slab correction");
+ }
+
+ // extract short-range Coulombic cutoff from pair style
+
+ scale = 1.0;
+
+ pair_check();
+
+ int itmp;
+ double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
+ if (p_cutoff == NULL)
+ error->all(FLERR,"KSpace style is incompatible with Pair style");
+ double cutoff = *p_cutoff;
+
+ qsum = qsqsum = 0.0;
+ for (int i = 0; i < atom->nlocal; i++) {
+ qsum += atom->q[i];
+ qsqsum += atom->q[i]*atom->q[i];
+ }
+
+ double tmp;
+ MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsum = tmp;
+ MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsqsum = tmp;
+
+ if (qsqsum == 0.0)
+ error->all(FLERR,"Cannot use kspace solver on system with no charge");
+ if (fabs(qsum) > SMALL && comm->me == 0) {
+ char str[128];
+ sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+ error->warning(FLERR,str);
+ }
+
+ // set accuracy (force units) from accuracy_relative or accuracy_absolute
+
+ if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
+ else accuracy = accuracy_relative * two_charge_force;
+
+ // setup K-space resolution
+
+ q2 = qsqsum * force->qqrd2e;
+ bigint natoms = atom->natoms;
+
+ triclinic = domain->triclinic;
+
+ // use xprd,yprd,zprd even if triclinic so grid size is the same
+ // adjust z dimension for 2d slab Ewald
+ // 3d Ewald just uses zprd since slab_volfactor = 1.0
+
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ double zprd_slab = zprd*slab_volfactor;
+
+ // make initial g_ewald estimate
+ // based on desired accuracy and real space cutoff
+ // fluid-occupied volume used to estimate real-space error
+ // zprd used rather than zprd_slab
+
+ if (!gewaldflag) {
+ if (accuracy <= 0.0)
+ error->all(FLERR,"KSpace accuracy must be > 0");
+ g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
+ if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
+ else g_ewald = sqrt(-log(g_ewald)) / cutoff;
+ }
+
+ // setup Ewald coefficients so can print stats
+
+ setup();
+
+ // final RMS accuracy
+
+ double lprx = rms(kxmax_orig,xprd,natoms,q2);
+ double lpry = rms(kymax_orig,yprd,natoms,q2);
+ double lprz = rms(kzmax_orig,zprd_slab,natoms,q2);
+ double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
+ double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab);
+ double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
+ double tpr = estimate_table_accuracy(q2_over_sqrt,spr);
+ double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr);
+
+ // stats
+
+ if (comm->me == 0) {
+ if (screen) {
+ fprintf(screen," G vector (1/distance) = %g\n",g_ewald);
+ fprintf(screen," estimated absolute RMS force accuracy = %g\n",
+ estimated_accuracy);
+ fprintf(screen," estimated relative force accuracy = %g\n",
+ estimated_accuracy/two_charge_force);
+ fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n",
+ kcount,kmax,kmax3d);
+ fprintf(screen," kxmax kymax kzmax = %d %d %d\n",
+ kxmax,kymax,kzmax);
+ }
+ if (logfile) {
+ fprintf(logfile," G vector (1/distance) = %g\n",g_ewald);
+ fprintf(logfile," estimated absolute RMS force accuracy = %g\n",
+ estimated_accuracy);
+ fprintf(logfile," estimated relative force accuracy = %g\n",
+ estimated_accuracy/two_charge_force);
+ fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n",
+ kcount,kmax,kmax3d);
+ fprintf(logfile," kxmax kymax kzmax = %d %d %d\n",
+ kxmax,kymax,kzmax);
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ adjust Ewald coeffs, called initially and whenever volume has changed
+------------------------------------------------------------------------- */
+
+void Ewald::setup()
+{
+ // volume-dependent factors
+
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+
+ // adjustment of z dimension for 2d slab Ewald
+ // 3d Ewald just uses zprd since slab_volfactor = 1.0
+
+ double zprd_slab = zprd*slab_volfactor;
+ volume = xprd * yprd * zprd_slab;
+
+ unitk[0] = 2.0*MY_PI/xprd;
+ unitk[1] = 2.0*MY_PI/yprd;
+ unitk[2] = 2.0*MY_PI/zprd_slab;
+
+ int kmax_old = kmax;
+
+ if (kewaldflag == 0) {
+
+ // determine kmax
+ // function of current box size, accuracy, G_ewald (short-range cutoff)
+
+ bigint natoms = atom->natoms;
+ double err;
+ kxmax = 1;
+ kymax = 1;
+ kzmax = 1;
+
+ err = rms(kxmax,xprd,natoms,q2);
+ while (err > accuracy) {
+ kxmax++;
+ err = rms(kxmax,xprd,natoms,q2);
+ }
+
+ err = rms(kymax,yprd,natoms,q2);
+ while (err > accuracy) {
+ kymax++;
+ err = rms(kymax,yprd,natoms,q2);
+ }
+
+ err = rms(kzmax,zprd_slab,natoms,q2);
+ while (err > accuracy) {
+ kzmax++;
+ err = rms(kzmax,zprd_slab,natoms,q2);
+ }
+
+ kmax = MAX(kxmax,kymax);
+ kmax = MAX(kmax,kzmax);
+ kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
+
+ double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax;
+ double gsqymx = unitk[1]*unitk[1]*kymax*kymax;
+ double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax;
+ gsqmx = MAX(gsqxmx,gsqymx);
+ gsqmx = MAX(gsqmx,gsqzmx);
+
+ kxmax_orig = kxmax;
+ kymax_orig = kymax;
+ kzmax_orig = kzmax;
+
+ // scale lattice vectors for triclinic skew
+
+ if (triclinic) {
+ double tmp[3];
+ tmp[0] = kxmax/xprd;
+ tmp[1] = kymax/yprd;
+ tmp[2] = kzmax/zprd;
+ lamda2xT(&tmp[0],&tmp[0]);
+ kxmax = MAX(1,static_cast(tmp[0]));
+ kymax = MAX(1,static_cast(tmp[1]));
+ kzmax = MAX(1,static_cast(tmp[2]));
+
+ kmax = MAX(kxmax,kymax);
+ kmax = MAX(kmax,kzmax);
+ kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
+ }
+
+ } else {
+
+ kxmax = kx_ewald;
+ kymax = ky_ewald;
+ kzmax = kz_ewald;
+
+ kxmax_orig = kxmax;
+ kymax_orig = kymax;
+ kzmax_orig = kzmax;
+
+ kmax = MAX(kxmax,kymax);
+ kmax = MAX(kmax,kzmax);
+ kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
+
+ double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax;
+ double gsqymx = unitk[1]*unitk[1]*kymax*kymax;
+ double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax;
+ gsqmx = MAX(gsqxmx,gsqymx);
+ gsqmx = MAX(gsqmx,gsqzmx);
+ }
+
+ gsqmx *= 1.00001;
+
+ // if size has grown, reallocate k-dependent and nlocal-dependent arrays
+
+ if (kmax > kmax_old) {
+ deallocate();
+ allocate();
+ group_allocate_flag = 0;
+
+ memory->destroy(ek);
+ memory->destroy3d_offset(cs,-kmax_created);
+ memory->destroy3d_offset(sn,-kmax_created);
+ nmax = atom->nmax;
+ memory->create(ek,nmax,3,"ewald:ek");
+ memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
+ memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
+ kmax_created = kmax;
+ }
+
+ // pre-compute Ewald coefficients
+
+ if (triclinic == 0)
+ coeffs();
+ else
+ coeffs_triclinic();
+}
+
+/* ----------------------------------------------------------------------
+ compute RMS accuracy for a dimension
+------------------------------------------------------------------------- */
+
+double Ewald::rms(int km, double prd, bigint natoms, double q2)
+{
+ double value = 2.0*q2*g_ewald/prd *
+ sqrt(1.0/(MY_PI*km*natoms)) *
+ exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd));
+
+ return value;
+}
+
+/* ----------------------------------------------------------------------
+ compute the Ewald long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void Ewald::compute(int eflag, int vflag)
+{
+ int i,j,k;
+
+ // set energy/virial flags
+
+ if (eflag || vflag) ev_setup(eflag,vflag);
+ else evflag = evflag_atom = eflag_global = vflag_global =
+ eflag_atom = vflag_atom = 0;
+
+ // extend size of per-atom arrays if necessary
+
+ if (atom->nlocal > nmax) {
+ memory->destroy(ek);
+ memory->destroy3d_offset(cs,-kmax_created);
+ memory->destroy3d_offset(sn,-kmax_created);
+ nmax = atom->nmax;
+ memory->create(ek,nmax,3,"ewald:ek");
+ memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
+ memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
+ kmax_created = kmax;
+ }
+
+ // partial structure factors on each processor
+ // total structure factor by summing over procs
+
+ if (triclinic == 0)
+ eik_dot_r();
+ else
+ eik_dot_r_triclinic();
+
+ MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+ MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+
+ // K-space portion of electric field
+ // double loop over K-vectors and local atoms
+ // perform per-atom calculations if needed
+
+ double **f = atom->f;
+ double *q = atom->q;
+ int nlocal = atom->nlocal;
+
+ int kx,ky,kz;
+ double cypz,sypz,exprl,expim,partial,partial_peratom;
+
+ for (i = 0; i < nlocal; i++) {
+ ek[i][0] = 0.0;
+ ek[i][1] = 0.0;
+ ek[i][2] = 0.0;
+ }
+
+ for (k = 0; k < kcount; k++) {
+ kx = kxvecs[k];
+ ky = kyvecs[k];
+ kz = kzvecs[k];
+
+ for (i = 0; i < nlocal; i++) {
+ cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
+ sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
+ exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
+ expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
+ partial = expim*sfacrl_all[k] - exprl*sfacim_all[k];
+ ek[i][0] += partial*eg[k][0];
+ ek[i][1] += partial*eg[k][1];
+ ek[i][2] += partial*eg[k][2];
+
+ if (evflag_atom) {
+ partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k];
+ if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom;
+ if (vflag_atom)
+ for (j = 0; j < 6; j++)
+ vatom[i][j] += ug[k]*vg[k][j]*partial_peratom;
+ }
+ }
+ }
+
+ // convert E-field to force
+
+ const double qscale = force->qqrd2e * scale;
+
+ for (i = 0; i < nlocal; i++) {
+ f[i][0] += qscale * q[i]*ek[i][0];
+ f[i][1] += qscale * q[i]*ek[i][1];
+ if (slabflag != 2) f[i][2] += qscale * q[i]*ek[i][2];
+ }
+
+ // global energy
+
+ if (eflag_global) {
+ for (k = 0; k < kcount; k++)
+ energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] +
+ sfacim_all[k]*sfacim_all[k]);
+ energy -= g_ewald*qsqsum/MY_PIS +
+ MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+ energy *= qscale;
+ }
+
+ // global virial
+
+ if (vflag_global) {
+ double uk;
+ for (k = 0; k < kcount; k++) {
+ uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]);
+ for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j];
+ }
+ for (j = 0; j < 6; j++) virial[j] *= qscale;
+ }
+
+ // per-atom energy/virial
+ // energy includes self-energy correction
+
+ if (evflag_atom) {
+ if (eflag_atom) {
+ for (i = 0; i < nlocal; i++) {
+ eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
+ (g_ewald*g_ewald*volume);
+ eatom[i] *= qscale;
+ }
+ }
+
+ if (vflag_atom)
+ for (i = 0; i < nlocal; i++)
+ for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale;
+ }
+
+ // 2d slab correction
+
+ if (slabflag == 1) slabcorr();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Ewald::eik_dot_r()
+{
+ int i,k,l,m,n,ic;
+ double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4;
+ double sqk,clpm,slpm;
+
+ double **x = atom->x;
+ double *q = atom->q;
+ int nlocal = atom->nlocal;
+
+ n = 0;
+
+ // (k,0,0), (0,l,0), (0,0,m)
+
+ for (ic = 0; ic < 3; ic++) {
+ sqk = unitk[ic]*unitk[ic];
+ if (sqk <= gsqmx) {
+ cstr1 = 0.0;
+ sstr1 = 0.0;
+ for (i = 0; i < nlocal; i++) {
+ cs[0][ic][i] = 1.0;
+ sn[0][ic][i] = 0.0;
+ cs[1][ic][i] = cos(unitk[ic]*x[i][ic]);
+ sn[1][ic][i] = sin(unitk[ic]*x[i][ic]);
+ cs[-1][ic][i] = cs[1][ic][i];
+ sn[-1][ic][i] = -sn[1][ic][i];
+ cstr1 += q[i]*cs[1][ic][i];
+ sstr1 += q[i]*sn[1][ic][i];
+ }
+ sfacrl[n] = cstr1;
+ sfacim[n++] = sstr1;
+ }
+ }
+
+ for (m = 2; m <= kmax; m++) {
+ for (ic = 0; ic < 3; ic++) {
+ sqk = m*unitk[ic] * m*unitk[ic];
+ if (sqk <= gsqmx) {
+ cstr1 = 0.0;
+ sstr1 = 0.0;
+ for (i = 0; i < nlocal; i++) {
+ cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] -
+ sn[m-1][ic][i]*sn[1][ic][i];
+ sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] +
+ cs[m-1][ic][i]*sn[1][ic][i];
+ cs[-m][ic][i] = cs[m][ic][i];
+ sn[-m][ic][i] = -sn[m][ic][i];
+ cstr1 += q[i]*cs[m][ic][i];
+ sstr1 += q[i]*sn[m][ic][i];
+ }
+ sfacrl[n] = cstr1;
+ sfacim[n++] = sstr1;
+ }
+ }
+ }
+
+ // 1 = (k,l,0), 2 = (k,-l,0)
+
+ for (k = 1; k <= kxmax; k++) {
+ for (l = 1; l <= kymax; l++) {
+ sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]);
+ if (sqk <= gsqmx) {
+ cstr1 = 0.0;
+ sstr1 = 0.0;
+ cstr2 = 0.0;
+ sstr2 = 0.0;
+ for (i = 0; i < nlocal; i++) {
+ cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]);
+ sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]);
+ cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]);
+ sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]);
+ }
+ sfacrl[n] = cstr1;
+ sfacim[n++] = sstr1;
+ sfacrl[n] = cstr2;
+ sfacim[n++] = sstr2;
+ }
+ }
+ }
+
+ // 1 = (0,l,m), 2 = (0,l,-m)
+
+ for (l = 1; l <= kymax; l++) {
+ for (m = 1; m <= kzmax; m++) {
+ sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]);
+ if (sqk <= gsqmx) {
+ cstr1 = 0.0;
+ sstr1 = 0.0;
+ cstr2 = 0.0;
+ sstr2 = 0.0;
+ for (i = 0; i < nlocal; i++) {
+ cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]);
+ sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]);
+ cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]);
+ sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]);
+ }
+ sfacrl[n] = cstr1;
+ sfacim[n++] = sstr1;
+ sfacrl[n] = cstr2;
+ sfacim[n++] = sstr2;
+ }
+ }
+ }
+
+ // 1 = (k,0,m), 2 = (k,0,-m)
+
+ for (k = 1; k <= kxmax; k++) {
+ for (m = 1; m <= kzmax; m++) {
+ sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]);
+ if (sqk <= gsqmx) {
+ cstr1 = 0.0;
+ sstr1 = 0.0;
+ cstr2 = 0.0;
+ sstr2 = 0.0;
+ for (i = 0; i < nlocal; i++) {
+ cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]);
+ sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]);
+ cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]);
+ sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]);
+ }
+ sfacrl[n] = cstr1;
+ sfacim[n++] = sstr1;
+ sfacrl[n] = cstr2;
+ sfacim[n++] = sstr2;
+ }
+ }
+ }
+
+ // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+ for (k = 1; k <= kxmax; k++) {
+ for (l = 1; l <= kymax; l++) {
+ for (m = 1; m <= kzmax; m++) {
+ sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) +
+ (m*unitk[2] * m*unitk[2]);
+ if (sqk <= gsqmx) {
+ cstr1 = 0.0;
+ sstr1 = 0.0;
+ cstr2 = 0.0;
+ sstr2 = 0.0;
+ cstr3 = 0.0;
+ sstr3 = 0.0;
+ cstr4 = 0.0;
+ sstr4 = 0.0;
+ for (i = 0; i < nlocal; i++) {
+ clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+ slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+ cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+ sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+
+ clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+ slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+ cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+ sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+
+ clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+ slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+ cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+ sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+
+ clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+ slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+ cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+ sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+ }
+ sfacrl[n] = cstr1;
+ sfacim[n++] = sstr1;
+ sfacrl[n] = cstr2;
+ sfacim[n++] = sstr2;
+ sfacrl[n] = cstr3;
+ sfacim[n++] = sstr3;
+ sfacrl[n] = cstr4;
+ sfacim[n++] = sstr4;
+ }
+ }
+ }
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Ewald::eik_dot_r_triclinic()
+{
+ int i,k,l,m,n,ic;
+ double cstr1,sstr1;
+ double sqk,clpm,slpm;
+
+ double **x = atom->x;
+ double *q = atom->q;
+ int nlocal = atom->nlocal;
+
+ double unitk_lamda[3];
+
+ double max_kvecs[3];
+ max_kvecs[0] = kxmax;
+ max_kvecs[1] = kymax;
+ max_kvecs[2] = kzmax;
+
+ // (k,0,0), (0,l,0), (0,0,m)
+
+ for (ic = 0; ic < 3; ic++) {
+ unitk_lamda[0] = 0.0;
+ unitk_lamda[1] = 0.0;
+ unitk_lamda[2] = 0.0;
+ unitk_lamda[ic] = 2.0*MY_PI;
+ x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+ sqk = unitk_lamda[ic]*unitk_lamda[ic];
+ if (sqk <= gsqmx) {
+ for (i = 0; i < nlocal; i++) {
+ cs[0][ic][i] = 1.0;
+ sn[0][ic][i] = 0.0;
+ cs[1][ic][i] = cos(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]);
+ sn[1][ic][i] = sin(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]);
+ cs[-1][ic][i] = cs[1][ic][i];
+ sn[-1][ic][i] = -sn[1][ic][i];
+ }
+ }
+ }
+
+ for (ic = 0; ic < 3; ic++) {
+ for (m = 2; m <= max_kvecs[ic]; m++) {
+ unitk_lamda[0] = 0.0;
+ unitk_lamda[1] = 0.0;
+ unitk_lamda[2] = 0.0;
+ unitk_lamda[ic] = 2.0*MY_PI*m;
+ x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+ sqk = unitk_lamda[ic]*unitk_lamda[ic];
+ for (i = 0; i < nlocal; i++) {
+ cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] -
+ sn[m-1][ic][i]*sn[1][ic][i];
+ sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] +
+ cs[m-1][ic][i]*sn[1][ic][i];
+ cs[-m][ic][i] = cs[m][ic][i];
+ sn[-m][ic][i] = -sn[m][ic][i];
+ }
+ }
+ }
+
+ for (n = 0; n < kcount; n++) {
+ k = kxvecs[n];
+ l = kyvecs[n];
+ m = kzvecs[n];
+ cstr1 = 0.0;
+ sstr1 = 0.0;
+ for (i = 0; i < nlocal; i++) {
+ clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+ slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+ cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+ sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+ }
+ sfacrl[n] = cstr1;
+ sfacim[n] = sstr1;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ pre-compute coefficients for each Ewald K-vector
+------------------------------------------------------------------------- */
+
+void Ewald::coeffs()
+{
+ int k,l,m;
+ double sqk,vterm;
+
+ double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald);
+ double preu = 4.0*MY_PI/volume;
+
+ kcount = 0;
+
+ // (k,0,0), (0,l,0), (0,0,m)
+
+ for (m = 1; m <= kmax; m++) {
+ sqk = (m*unitk[0]) * (m*unitk[0]);
+ if (sqk <= gsqmx) {
+ kxvecs[kcount] = m;
+ kyvecs[kcount] = 0;
+ kzvecs[kcount] = 0;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount];
+ eg[kcount][1] = 0.0;
+ eg[kcount][2] = 0.0;
+ vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+ vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m);
+ vg[kcount][1] = 1.0;
+ vg[kcount][2] = 1.0;
+ vg[kcount][3] = 0.0;
+ vg[kcount][4] = 0.0;
+ vg[kcount][5] = 0.0;
+ kcount++;
+ }
+ sqk = (m*unitk[1]) * (m*unitk[1]);
+ if (sqk <= gsqmx) {
+ kxvecs[kcount] = 0;
+ kyvecs[kcount] = m;
+ kzvecs[kcount] = 0;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 0.0;
+ eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount];
+ eg[kcount][2] = 0.0;
+ vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+ vg[kcount][0] = 1.0;
+ vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m);
+ vg[kcount][2] = 1.0;
+ vg[kcount][3] = 0.0;
+ vg[kcount][4] = 0.0;
+ vg[kcount][5] = 0.0;
+ kcount++;
+ }
+ sqk = (m*unitk[2]) * (m*unitk[2]);
+ if (sqk <= gsqmx) {
+ kxvecs[kcount] = 0;
+ kyvecs[kcount] = 0;
+ kzvecs[kcount] = m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 0.0;
+ eg[kcount][1] = 0.0;
+ eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
+ vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+ vg[kcount][0] = 1.0;
+ vg[kcount][1] = 1.0;
+ vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+ vg[kcount][3] = 0.0;
+ vg[kcount][4] = 0.0;
+ vg[kcount][5] = 0.0;
+ kcount++;
+ }
+ }
+
+ // 1 = (k,l,0), 2 = (k,-l,0)
+
+ for (k = 1; k <= kxmax; k++) {
+ for (l = 1; l <= kymax; l++) {
+ sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l);
+ if (sqk <= gsqmx) {
+ kxvecs[kcount] = k;
+ kyvecs[kcount] = l;
+ kzvecs[kcount] = 0;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+ eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
+ eg[kcount][2] = 0.0;
+ vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+ vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+ vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+ vg[kcount][2] = 1.0;
+ vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
+ vg[kcount][4] = 0.0;
+ vg[kcount][5] = 0.0;
+ kcount++;
+
+ kxvecs[kcount] = k;
+ kyvecs[kcount] = -l;
+ kzvecs[kcount] = 0;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+ eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
+ eg[kcount][2] = 0.0;
+ vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+ vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+ vg[kcount][2] = 1.0;
+ vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
+ vg[kcount][4] = 0.0;
+ vg[kcount][5] = 0.0;
+ kcount++;;
+ }
+ }
+ }
+
+ // 1 = (0,l,m), 2 = (0,l,-m)
+
+ for (l = 1; l <= kymax; l++) {
+ for (m = 1; m <= kzmax; m++) {
+ sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m);
+ if (sqk <= gsqmx) {
+ kxvecs[kcount] = 0;
+ kyvecs[kcount] = l;
+ kzvecs[kcount] = m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 0.0;
+ eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
+ eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
+ vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+ vg[kcount][0] = 1.0;
+ vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+ vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+ vg[kcount][3] = 0.0;
+ vg[kcount][4] = 0.0;
+ vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
+ kcount++;
+
+ kxvecs[kcount] = 0;
+ kyvecs[kcount] = l;
+ kzvecs[kcount] = -m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 0.0;
+ eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
+ eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
+ vg[kcount][0] = 1.0;
+ vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+ vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+ vg[kcount][3] = 0.0;
+ vg[kcount][4] = 0.0;
+ vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
+ kcount++;
+ }
+ }
+ }
+
+ // 1 = (k,0,m), 2 = (k,0,-m)
+
+ for (k = 1; k <= kxmax; k++) {
+ for (m = 1; m <= kzmax; m++) {
+ sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m);
+ if (sqk <= gsqmx) {
+ kxvecs[kcount] = k;
+ kyvecs[kcount] = 0;
+ kzvecs[kcount] = m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+ eg[kcount][1] = 0.0;
+ eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
+ vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+ vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+ vg[kcount][1] = 1.0;
+ vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+ vg[kcount][3] = 0.0;
+ vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
+ vg[kcount][5] = 0.0;
+ kcount++;
+
+ kxvecs[kcount] = k;
+ kyvecs[kcount] = 0;
+ kzvecs[kcount] = -m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+ eg[kcount][1] = 0.0;
+ eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
+ vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+ vg[kcount][1] = 1.0;
+ vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+ vg[kcount][3] = 0.0;
+ vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
+ vg[kcount][5] = 0.0;
+ kcount++;
+ }
+ }
+ }
+
+ // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+ for (k = 1; k <= kxmax; k++) {
+ for (l = 1; l <= kymax; l++) {
+ for (m = 1; m <= kzmax; m++) {
+ sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) +
+ (unitk[2]*m) * (unitk[2]*m);
+ if (sqk <= gsqmx) {
+ kxvecs[kcount] = k;
+ kyvecs[kcount] = l;
+ kzvecs[kcount] = m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+ eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
+ eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
+ vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+ vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+ vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+ vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+ vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
+ vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
+ vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
+ kcount++;
+
+ kxvecs[kcount] = k;
+ kyvecs[kcount] = -l;
+ kzvecs[kcount] = m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+ eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
+ eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
+ vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+ vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+ vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+ vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
+ vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
+ vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
+ kcount++;
+
+ kxvecs[kcount] = k;
+ kyvecs[kcount] = l;
+ kzvecs[kcount] = -m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+ eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
+ eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
+ vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+ vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+ vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+ vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
+ vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
+ vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
+ kcount++;
+
+ kxvecs[kcount] = k;
+ kyvecs[kcount] = -l;
+ kzvecs[kcount] = -m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+ eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
+ eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
+ vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+ vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+ vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+ vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
+ vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
+ vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
+ kcount++;
+ }
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ pre-compute coefficients for each Ewald K-vector for a triclinic
+ system
+------------------------------------------------------------------------- */
+
+void Ewald::coeffs_triclinic()
+{
+ int k,l,m;
+ double sqk,vterm;
+
+ double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald);
+ double preu = 4.0*MY_PI/volume;
+
+ double unitk_lamda[3];
+
+ kcount = 0;
+
+ // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+ for (k = 1; k <= kxmax; k++) {
+ for (l = -kymax; l <= kymax; l++) {
+ for (m = -kzmax; m <= kzmax; m++) {
+ unitk_lamda[0] = 2.0*MY_PI*k;
+ unitk_lamda[1] = 2.0*MY_PI*l;
+ unitk_lamda[2] = 2.0*MY_PI*m;
+ x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+ sqk = unitk_lamda[0]*unitk_lamda[0] + unitk_lamda[1]*unitk_lamda[1] +
+ unitk_lamda[2]*unitk_lamda[2];
+ if (sqk <= gsqmx) {
+ kxvecs[kcount] = k;
+ kyvecs[kcount] = l;
+ kzvecs[kcount] = m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 2.0*unitk_lamda[0]*ug[kcount];
+ eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount];
+ eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount];
+ vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+ vg[kcount][0] = 1.0 + vterm*unitk_lamda[0]*unitk_lamda[0];
+ vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1];
+ vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
+ vg[kcount][3] = vterm*unitk_lamda[0]*unitk_lamda[1];
+ vg[kcount][4] = vterm*unitk_lamda[0]*unitk_lamda[2];
+ vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2];
+ kcount++;
+ }
+ }
+ }
+ }
+
+ // 1 = (0,l,m), 2 = (0,l,-m)
+
+ for (l = 1; l <= kymax; l++) {
+ for (m = -kzmax; m <= kzmax; m++) {
+ unitk_lamda[0] = 0.0;
+ unitk_lamda[1] = 2.0*MY_PI*l;
+ unitk_lamda[2] = 2.0*MY_PI*m;
+ x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+ sqk = unitk_lamda[1]*unitk_lamda[1] + unitk_lamda[2]*unitk_lamda[2];
+ if (sqk <= gsqmx) {
+ kxvecs[kcount] = 0;
+ kyvecs[kcount] = l;
+ kzvecs[kcount] = m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 0.0;
+ eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount];
+ eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount];
+ vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+ vg[kcount][0] = 1.0;
+ vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1];
+ vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
+ vg[kcount][3] = 0.0;
+ vg[kcount][4] = 0.0;
+ vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2];
+ kcount++;
+ }
+ }
+ }
+
+ // (0,0,m)
+
+ for (m = 1; m <= kmax; m++) {
+ unitk_lamda[0] = 0.0;
+ unitk_lamda[1] = 0.0;
+ unitk_lamda[2] = 2.0*MY_PI*m;
+ x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+ sqk = unitk_lamda[2]*unitk_lamda[2];
+ if (sqk <= gsqmx) {
+ kxvecs[kcount] = 0;
+ kyvecs[kcount] = 0;
+ kzvecs[kcount] = m;
+ ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+ eg[kcount][0] = 0.0;
+ eg[kcount][1] = 0.0;
+ eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount];
+ vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+ vg[kcount][0] = 1.0;
+ vg[kcount][1] = 1.0;
+ vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
+ vg[kcount][3] = 0.0;
+ vg[kcount][4] = 0.0;
+ vg[kcount][5] = 0.0;
+ kcount++;
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ allocate memory that depends on # of K-vectors
+------------------------------------------------------------------------- */
+
+void Ewald::allocate()
+{
+ kxvecs = new int[kmax3d];
+ kyvecs = new int[kmax3d];
+ kzvecs = new int[kmax3d];
+
+ ug = new double[kmax3d];
+ memory->create(eg,kmax3d,3,"ewald:eg");
+ memory->create(vg,kmax3d,6,"ewald:vg");
+
+ sfacrl = new double[kmax3d];
+ sfacim = new double[kmax3d];
+ sfacrl_all = new double[kmax3d];
+ sfacim_all = new double[kmax3d];
+}
+
+/* ----------------------------------------------------------------------
+ deallocate memory that depends on # of K-vectors
+------------------------------------------------------------------------- */
+
+void Ewald::deallocate()
+{
+ delete [] kxvecs;
+ delete [] kyvecs;
+ delete [] kzvecs;
+
+ delete [] ug;
+ memory->destroy(eg);
+ memory->destroy(vg);
+
+ delete [] sfacrl;
+ delete [] sfacim;
+ delete [] sfacrl_all;
+ delete [] sfacim_all;
+}
+
+/* ----------------------------------------------------------------------
+ Slab-geometry correction term to dampen inter-slab interactions between
+ periodically repeating slabs. Yields good approximation to 2D Ewald if
+ adequate empty space is left between repeating slabs (J. Chem. Phys.
+ 111, 3155). Slabs defined here to be parallel to the xy plane. Also
+ extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void Ewald::slabcorr()
+{
+ // compute local contribution to global dipole moment
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double zprd = domain->zprd;
+ int nlocal = atom->nlocal;
+
+ double dipole = 0.0;
+ for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+
+ // sum local contributions to get global dipole moment
+
+ double dipole_all;
+ MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+ // need to make non-neutral systems and/or
+ // per-atom energy translationally invariant
+
+ double dipole_r2 = 0.0;
+ if (eflag_atom || fabs(qsum) > SMALL) {
+ for (int i = 0; i < nlocal; i++)
+ dipole_r2 += q[i]*x[i][2]*x[i][2];
+
+ // sum local contributions
+
+ double tmp;
+ MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_r2 = tmp;
+ }
+
+ // compute corrections
+
+ const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
+ qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
+ const double qscale = force->qqrd2e * scale;
+
+ if (eflag_global) energy += qscale * e_slabcorr;
+
+ // per-atom energy
+
+ if (eflag_atom) {
+ double efact = qscale * MY_2PI/volume;
+ for (int i = 0; i < nlocal; i++)
+ eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
+ qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
+ }
+
+ // add on force corrections
+
+ double ffact = qscale * (-4.0*MY_PI/volume);
+ double **f = atom->f;
+
+ for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
+}
+
+/* ----------------------------------------------------------------------
+ memory usage of local arrays
+------------------------------------------------------------------------- */
+
+double Ewald::memory_usage()
+{
+ double bytes = 3 * kmax3d * sizeof(int);
+ bytes += (1 + 3 + 6) * kmax3d * sizeof(double);
+ bytes += 4 * kmax3d * sizeof(double);
+ bytes += nmax*3 * sizeof(double);
+ bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double);
+ return bytes;
+}
+
+/* ----------------------------------------------------------------------
+ group-group interactions
+ ------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ compute the Ewald total long-range force and energy for groups A and B
+ ------------------------------------------------------------------------- */
+
+void Ewald::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
+{
+ if (slabflag && triclinic)
+ error->all(FLERR,"Cannot (yet) use K-space slab "
+ "correction with compute group/group for triclinic systems");
+
+ int i,k;
+
+ if (!group_allocate_flag) {
+ allocate_groups();
+ group_allocate_flag = 1;
+ }
+
+ e2group = 0.0; //energy
+ f2group[0] = 0.0; //force in x-direction
+ f2group[1] = 0.0; //force in y-direction
+ f2group[2] = 0.0; //force in z-direction
+
+ // partial and total structure factors for groups A and B
+
+ for (k = 0; k < kcount; k++) {
+
+ // group A
+
+ sfacrl_A[k] = 0.0;
+ sfacim_A[k] = 0.0;
+ sfacrl_A_all[k] = 0.0;
+ sfacim_A_all[k] = 0;
+
+ // group B
+
+ sfacrl_B[k] = 0.0;
+ sfacim_B[k] = 0.0;
+ sfacrl_B_all[k] = 0.0;
+ sfacim_B_all[k] = 0.0;
+ }
+
+ double *q = atom->q;
+ int nlocal = atom->nlocal;
+ int *mask = atom->mask;
+
+ int kx,ky,kz;
+ double cypz,sypz,exprl,expim;
+
+ // partial structure factors for groups A and B on each processor
+
+ for (k = 0; k < kcount; k++) {
+ kx = kxvecs[k];
+ ky = kyvecs[k];
+ kz = kzvecs[k];
+
+ for (i = 0; i < nlocal; i++) {
+
+ if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
+ if (AA_flag) continue;
+
+ if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
+
+ cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
+ sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
+ exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
+ expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
+
+ // group A
+
+ if (mask[i] & groupbit_A) {
+ sfacrl_A[k] += q[i]*exprl;
+ sfacim_A[k] += q[i]*expim;
+ }
+
+ // group B
+
+ if (mask[i] & groupbit_B) {
+ sfacrl_B[k] += q[i]*exprl;
+ sfacim_B[k] += q[i]*expim;
+ }
+ }
+ }
+ }
+
+ // total structure factor by summing over procs
+
+ MPI_Allreduce(sfacrl_A,sfacrl_A_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+ MPI_Allreduce(sfacim_A,sfacim_A_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+
+ MPI_Allreduce(sfacrl_B,sfacrl_B_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+ MPI_Allreduce(sfacim_B,sfacim_B_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+
+ const double qscale = force->qqrd2e * scale;
+ double partial_group;
+
+ // total group A <--> group B energy
+ // self and boundary correction terms are in compute_group_group.cpp
+
+ for (k = 0; k < kcount; k++) {
+ partial_group = sfacrl_A_all[k]*sfacrl_B_all[k] +
+ sfacim_A_all[k]*sfacim_B_all[k];
+ e2group += ug[k]*partial_group;
+ }
+
+ e2group *= qscale;
+
+ // total group A <--> group B force
+
+ for (k = 0; k < kcount; k++) {
+ partial_group = sfacim_A_all[k]*sfacrl_B_all[k] -
+ sfacrl_A_all[k]*sfacim_B_all[k];
+ f2group[0] += eg[k][0]*partial_group;
+ f2group[1] += eg[k][1]*partial_group;
+ if (slabflag != 2) f2group[2] += eg[k][2]*partial_group;
+ }
+
+ f2group[0] *= qscale;
+ f2group[1] *= qscale;
+ f2group[2] *= qscale;
+
+ // 2d slab correction
+
+ if (slabflag == 1)
+ slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
+}
+
+/* ----------------------------------------------------------------------
+ Slab-geometry correction term to dampen inter-slab interactions between
+ periodically repeating slabs. Yields good approximation to 2D Ewald if
+ adequate empty space is left between repeating slabs (J. Chem. Phys.
+ 111, 3155). Slabs defined here to be parallel to the xy plane. Also
+ extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void Ewald::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag)
+{
+ // compute local contribution to global dipole moment
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double zprd = domain->zprd;
+ int *mask = atom->mask;
+ int nlocal = atom->nlocal;
+
+ double qsum_A = 0.0;
+ double qsum_B = 0.0;
+ double dipole_A = 0.0;
+ double dipole_B = 0.0;
+ double dipole_r2_A = 0.0;
+ double dipole_r2_B = 0.0;
+
+ for (int i = 0; i < nlocal; i++) {
+ if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
+ if (AA_flag) continue;
+
+ if (mask[i] & groupbit_A) {
+ qsum_A += q[i];
+ dipole_A += q[i]*x[i][2];
+ dipole_r2_A += q[i]*x[i][2]*x[i][2];
+ }
+
+ if (mask[i] & groupbit_B) {
+ qsum_B += q[i];
+ dipole_B += q[i]*x[i][2];
+ dipole_r2_B += q[i]*x[i][2]*x[i][2];
+ }
+ }
+
+ // sum local contributions to get total charge and global dipole moment
+ // for each group
+
+ double tmp;
+ MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsum_A = tmp;
+
+ MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsum_B = tmp;
+
+ MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_A = tmp;
+
+ MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_B = tmp;
+
+ MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_r2_A = tmp;
+
+ MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_r2_B = tmp;
+
+ // compute corrections
+
+ const double qscale = force->qqrd2e * scale;
+ const double efact = qscale * MY_2PI/volume;
+
+ e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B +
+ qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0);
+
+ // add on force corrections
+
+ const double ffact = qscale * (-4.0*MY_PI/volume);
+ f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A);
+}
+
+/* ----------------------------------------------------------------------
+ allocate group-group memory that depends on # of K-vectors
+------------------------------------------------------------------------- */
+
+void Ewald::allocate_groups()
+{
+ // group A
+
+ sfacrl_A = new double[kmax3d];
+ sfacim_A = new double[kmax3d];
+ sfacrl_A_all = new double[kmax3d];
+ sfacim_A_all = new double[kmax3d];
+
+ // group B
+
+ sfacrl_B = new double[kmax3d];
+ sfacim_B = new double[kmax3d];
+ sfacrl_B_all = new double[kmax3d];
+ sfacim_B_all = new double[kmax3d];
+}
+
+/* ----------------------------------------------------------------------
+ deallocate group-group memory that depends on # of K-vectors
+------------------------------------------------------------------------- */
+
+void Ewald::deallocate_groups()
+{
+ // group A
+
+ delete [] sfacrl_A;
+ delete [] sfacim_A;
+ delete [] sfacrl_A_all;
+ delete [] sfacim_A_all;
+
+ // group B
+
+ delete [] sfacrl_B;
+ delete [] sfacim_B;
+ delete [] sfacrl_B_all;
+ delete [] sfacim_B_all;
+}
diff --git a/src/KSPACE/ewald_disp.cpp b/src/KSPACE/ewald_disp.cpp
index f623c3a5df..ba88e40f14 100644
--- a/src/KSPACE/ewald_disp.cpp
+++ b/src/KSPACE/ewald_disp.cpp
@@ -1,1474 +1,1475 @@
-/* ----------------------------------------------------------------------
- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
- http://lammps.sandia.gov, Sandia National Laboratories
- Steve Plimpton, sjplimp@sandia.gov
-
- Copyright (2003) Sandia Corporation. Under the terms of Contract
- DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
- certain rights in this software. This software is distributed under
- the GNU General Public License.
-
- See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
- Contributing authors: Pieter in 't Veld (SNL), Stan Moore (SNL)
-------------------------------------------------------------------------- */
-
-#include "mpi.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "math.h"
-#include "ewald_disp.h"
-#include "math_vector.h"
-#include "math_const.h"
-#include "math_special.h"
-#include "atom.h"
-#include "comm.h"
-#include "force.h"
-#include "pair.h"
-#include "domain.h"
-#include "memory.h"
-#include "error.h"
-#include "update.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-using namespace MathSpecial;
-
-#define SMALL 0.00001
-
-enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; // same as in pair.h
-
-//#define DEBUG
-
-/* ---------------------------------------------------------------------- */
-
-EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
-{
- if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command");
-
- ewaldflag = dispersionflag = dipoleflag = 1;
- accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
-
- memset(function, 0, EWALD_NORDER*sizeof(int));
- kenergy = kvirial = NULL;
- cek_local = cek_global = NULL;
- ekr_local = NULL;
- hvec = NULL;
- kvec = NULL;
- B = NULL;
- first_output = 0;
- energy_self_peratom = NULL;
- virial_self_peratom = NULL;
- nmax = 0;
- q2 = 0;
- b2 = 0;
- M2 = 0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-EwaldDisp::~EwaldDisp()
-{
- deallocate();
- deallocate_peratom();
- delete [] ekr_local;
- delete [] B;
-}
-
-/* --------------------------------------------------------------------- */
-
-void EwaldDisp::init()
-{
- nkvec = nkvec_max = nevec = nevec_max = 0;
- nfunctions = nsums = sums = 0;
- nbox = -1;
- bytes = 0.0;
-
- if (!comm->me) {
- if (screen) fprintf(screen,"EwaldDisp initialization ...\n");
- if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n");
- }
-
- triclinic_check();
- if (domain->dimension == 2)
- error->all(FLERR,"Cannot use EwaldDisp with 2d simulation");
- if (slabflag == 0 && domain->nonperiodic > 0)
- error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp");
- if (slabflag == 1) {
- if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
- domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
- error->all(FLERR,"Incorrect boundaries with slab EwaldDisp");
- }
-
- scale = 1.0;
- mumurd2e = force->qqrd2e;
- dielectric = force->dielectric;
-
- int tmp;
- Pair *pair = force->pair;
- int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
- double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
- if (!(ptr||cutoff))
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- int ewald_order = ptr ? *((int *) ptr) : 1<<1;
- int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
- memset(function, 0, EWALD_NFUNCS*sizeof(int));
- for (int i=0; i<=EWALD_NORDER; ++i) // transcribe order
- if (ewald_order&(1<all(FLERR,
- "Unsupported mixing rule in kspace_style ewald/disp");
- default:
- error->all(FLERR,"Unsupported order in kspace_style ewald/disp");
- }
- nfunctions += function[k] = 1;
- nsums += n[k];
- }
-
- if (!gewaldflag) g_ewald = 0.0;
- pair->init(); // so B is defined
- init_coeffs();
- init_coeff_sums();
-
- double qsum, qsqsum, bsbsum;
- qsum = qsqsum = bsbsum = 0.0;
- if (function[0]) {
- qsum = sum[0].x;
- qsqsum = sum[0].x2;
- }
-
- // turn off coulombic if no charge
-
- if (function[0] && qsqsum == 0.0) {
- function[0] = 0;
- nfunctions -= 1;
- nsums -= 1;
- }
-
- if (function[1]) bsbsum = sum[1].x2;
- if (function[2]) bsbsum = sum[2].x2;
-
- if (function[3]) M2 = sum[9].x2;
-
- if (function[3] && strcmp(update->unit_style,"electron") == 0)
- error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
-
- if (qsqsum == 0.0 && bsbsum == 0.0 && M2 == 0.0)
- error->all(FLERR,"Cannot use Ewald/disp solver "
- "on system with no charge, dipole, or LJ particles");
- if (fabs(qsum) > SMALL && comm->me == 0) {
- char str[128];
- sprintf(str,"System is not charge neutral, net charge = %g",qsum);
- error->warning(FLERR,str);
- }
-
- if (!function[1] && !function[2])
- dispersionflag = 0;
-
- if (!function[3])
- dipoleflag = 0;
-
- pair_check();
-
- // set accuracy (force units) from accuracy_relative or accuracy_absolute
-
- if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
- else accuracy = accuracy_relative * two_charge_force;
-
- // setup K-space resolution
-
- q2 = qsqsum * force->qqrd2e / force->dielectric;
- M2 *= mumurd2e / force->dielectric;
- b2 = bsbsum; //Are these units right?
- bigint natoms = atom->natoms;
-
- if (!gewaldflag) {
- if (function[0]) {
- g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2);
- if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff);
- else g_ewald = sqrt(-log(g_ewald)) / (*cutoff);
- }
- else if (function[1] || function[2]) {
- //Try Newton Solver
- //Use old method to get guess
- g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
-
- double g_ewald_new =
- NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2);
- if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
- else error->warning(FLERR,"Ewald/disp Newton solver failed, "
- "using old method to estimate g_ewald");
- } else if (function[3]) {
- //Try Newton Solver
- //Use old method to get guess
- g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
- double g_ewald_new =
- NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),M2);
- if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
- else error->warning(FLERR,"Ewald/disp Newton solver failed, "
- "using old method to estimate g_ewald");
- }
- }
-
- if (!comm->me) {
- if (screen) fprintf(screen, " G vector = %g\n", g_ewald);
- if (logfile) fprintf(logfile, " G vector = %g\n", g_ewald);
- }
-
- g_ewald_6 = g_ewald;
- deallocate_peratom();
- peratom_allocate_flag = 0;
-}
-
-/* ----------------------------------------------------------------------
- adjust EwaldDisp coeffs, called initially and whenever volume has changed
-------------------------------------------------------------------------- */
-
-void EwaldDisp::setup()
-{
- volume = shape_det(domain->h)*slab_volfactor;
- memcpy(unit, domain->h_inv, sizeof(shape));
- shape_scalar_mult(unit, 2.0*MY_PI);
- unit[2] /= slab_volfactor;
-
- // int nbox_old = nbox, nkvec_old = nkvec;
-
- if (accuracy >= 1) {
- nbox = 0;
- error->all(FLERR,"KSpace accuracy too low");
- }
-
- bigint natoms = atom->natoms;
- double err;
- int kxmax = 1;
- int kymax = 1;
- int kzmax = 1;
- err = rms(kxmax,domain->h[0],natoms,q2,b2,M2);
- while (err > accuracy) {
- kxmax++;
- err = rms(kxmax,domain->h[0],natoms,q2,b2,M2);
- }
- err = rms(kymax,domain->h[1],natoms,q2,b2,M2);
- while (err > accuracy) {
- kymax++;
- err = rms(kymax,domain->h[1],natoms,q2,b2,M2);
- }
- err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2);
- while (err > accuracy) {
- kzmax++;
- err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2);
- }
- nbox = MAX(kxmax,kymax);
- nbox = MAX(nbox,kzmax);
- double gsqxmx = unit[0]*unit[0]*kxmax*kxmax;
- double gsqymx = unit[1]*unit[1]*kymax*kymax;
- double gsqzmx = unit[2]*unit[2]*kzmax*kzmax;
- gsqmx = MAX(gsqxmx,gsqymx);
- gsqmx = MAX(gsqmx,gsqzmx);
- gsqmx *= 1.00001;
-
- reallocate();
- coefficients();
- init_coeffs();
- init_coeff_sums();
- init_self();
-
- if (!(first_output||comm->me)) {
- first_output = 1;
- if (screen) fprintf(screen,
- " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec);
- if (logfile) fprintf(logfile,
- " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec);
- }
-}
-
-/* ----------------------------------------------------------------------
- compute RMS accuracy for a dimension
-------------------------------------------------------------------------- */
-
-double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2, double M2)
-{
- double value = 0.0;
-
- // Coulombic
-
- double g2 = g_ewald*g_ewald;
-
- value += 2.0*q2*g_ewald/prd *
- sqrt(1.0/(MY_PI*km*natoms)) *
- exp(-MY_PI*MY_PI*km*km/(g2*prd*prd));
-
- // Lennard-Jones
-
- double g7 = g2*g2*g2*g_ewald;
-
- value += 4.0*b2*g7/3.0 *
- sqrt(1.0/(MY_PI*natoms)) *
- (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) *
- (MY_PI*km/(g_ewald*prd) + 1));
-
- // dipole
-
- value += 8.0*MY_PI*M2/volume*g_ewald *
- sqrt(2.0*MY_PI*km*km*km/(15.0*natoms)) *
- exp(-pow(MY_PI*km/(g_ewald*prd),2.0));
-
- return value;
-}
-
-void EwaldDisp::reallocate()
-{
- int ix, iy, iz;
- int nkvec_max = nkvec;
- vector h;
-
- nkvec = 0;
- int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)];
- int *flag = kflag;
-
- for (ix=0; ix<=nbox; ++ix)
- for (iy=-nbox; iy<=nbox; ++iy)
- for (iz=-nbox; iz<=nbox; ++iz)
- if (!(ix||iy||iz)) *(flag++) = 0;
- else if ((!ix)&&(iy<0)) *(flag++) = 0;
- else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0; // use symmetry
- else {
- h[0] = unit[0]*ix;
- h[1] = unit[5]*ix+unit[1]*iy;
- h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz;
- if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec;
- }
-
- if (nkvec>nkvec_max) {
- deallocate(); // free memory
- hvec = new hvector[nkvec]; // hvec
- bytes += (nkvec-nkvec_max)*sizeof(hvector);
- kvec = new kvector[nkvec]; // kvec
- bytes += (nkvec-nkvec_max)*sizeof(kvector);
- kenergy = new double[nkvec*nfunctions]; // kenergy
- bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double);
- kvirial = new double[6*nkvec*nfunctions]; // kvirial
- bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double);
- cek_local = new complex[nkvec*nsums]; // cek_local
- bytes += (nkvec-nkvec_max)*nsums*sizeof(complex);
- cek_global = new complex[nkvec*nsums]; // cek_global
- bytes += (nkvec-nkvec_max)*nsums*sizeof(complex);
- nkvec_max = nkvec;
- }
-
- flag = kflag; // create index and
- kvector *k = kvec; // wave vectors
- hvector *hi = hvec;
- for (ix=0; ix<=nbox; ++ix)
- for (iy=-nbox; iy<=nbox; ++iy)
- for (iz=-nbox; iz<=nbox; ++iz)
- if (*(flag++)) {
- hi->x = unit[0]*ix;
- hi->y = unit[5]*ix+unit[1]*iy;
- (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz;
- k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; }
-
- delete [] kflag;
-}
-
-
-void EwaldDisp::reallocate_atoms()
-{
- if (eflag_atom || vflag_atom)
- if (atom->nlocal > nmax) {
- deallocate_peratom();
- allocate_peratom();
- nmax = atom->nmax;
- }
-
- if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return;
- delete [] ekr_local;
- ekr_local = new cvector[nevec];
- bytes += (nevec-nevec_max)*sizeof(cvector);
- nevec_max = nevec;
-}
-
-
-void EwaldDisp::allocate_peratom()
-{
- memory->create(energy_self_peratom,
- atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom");
- memory->create(virial_self_peratom,
- atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom");
-}
-
-
-void EwaldDisp::deallocate_peratom() // free memory
-{
- memory->destroy(energy_self_peratom);
- memory->destroy(virial_self_peratom);
-}
-
-
-void EwaldDisp::deallocate() // free memory
-{
- delete [] hvec; hvec = NULL;
- delete [] kvec; kvec = NULL;
- delete [] kenergy; kenergy = NULL;
- delete [] kvirial; kvirial = NULL;
- delete [] cek_local; cek_local = NULL;
- delete [] cek_global; cek_global = NULL;
-}
-
-
-void EwaldDisp::coefficients()
-{
- vector h;
- hvector *hi = hvec, *nh;
- double eta2 = 0.25/(g_ewald*g_ewald);
- double b1, b2, expb2, h1, h2, c1, c2;
- double *ke = kenergy, *kv = kvirial;
- int func0 = function[0], func12 = function[1]||function[2],
- func3 = function[3];
-
- for (nh = (hi = hvec)+nkvec; hintypes;
-
- if (function[1]) { // geometric 1/r^6
- double **b = (double **) force->pair->extract("B",tmp);
- delete [] B;
- B = new double[n+1];
- bytes += (n+1)*sizeof(double);
- for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
- }
- if (function[2]) { // arithmetic 1/r^6
- double **epsilon = (double **) force->pair->extract("epsilon",tmp);
- double **sigma = (double **) force->pair->extract("sigma",tmp);
- double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
- double c[7] = {
- 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
-
- if (!(epsilon&&sigma))
- error->all(
- FLERR,"Epsilon or sigma reference not set by pair style in ewald/n");
- for (int i=0; i<=n; ++i) {
- eps_i = sqrt(epsilon[i][i]);
- sigma_i = sigma[i][i];
- sigma_n = 1.0;
- for (int j=0; j<7; ++j) {
- *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i;
- }
- }
- }
-}
-
-void EwaldDisp::init_coeff_sums()
-{
- if (sums) return; // calculated only once
- sums = 1;
-
- Sum sum_local[EWALD_MAX_NSUMS];
-
- memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum));
- if (function[0]) { // 1/r
- double *q = atom->q, *qn = q+atom->nlocal;
- for (double *i=q; itype, *ntype = type+atom->nlocal;
- for (int *i=type; itype, *ntype = type+atom->nlocal;
- for (int *i=type; imu) { // dipole
- double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal;
- for (double *i = mu; i < nmu; i += 4)
- sum_local[9].x2 += i[3]*i[3];
- }
- MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world);
-}
-
-
-void EwaldDisp::init_self()
-{
- double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2;
- const double qscale = force->qqrd2e * scale;
-
- memset(energy_self, 0, EWALD_NFUNCS*sizeof(double)); // self energy
- memset(virial_self, 0, EWALD_NFUNCS*sizeof(double));
-
- if (function[0]) { // 1/r
- virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x;
- energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0];
- }
- if (function[1]) { // geometric 1/r^6
- virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x;
- energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1];
- }
- if (function[2]) { // arithmetic 1/r^6
- virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+
- sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x);
- energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2];
- }
- if (function[3]) { // dipole
- virial_self[3] = 0; // in surface
- energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3];
- }
-}
-
-
-void EwaldDisp::init_self_peratom()
-{
- if (!(vflag_atom || eflag_atom)) return;
-
- double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2;
- const double qscale = force->qqrd2e * scale;
- double *energy = energy_self_peratom[0];
- double *virial = virial_self_peratom[0];
- int nlocal = atom->nlocal;
-
- memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double));
- memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double));
-
- if (function[0]) { // 1/r
- double *ei = energy;
- double *vi = virial;
- double ce = qscale*g1/MY_PIS;
- double cv = -0.5*MY_PI*qscale/(g2*volume);
- double *qi = atom->q, *qn = qi + nlocal;
- for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
- double q = *qi;
- *vi = cv*q*sum[0].x;
- *ei = ce*q*q-vi[0];
- }
- }
- if (function[1]) { // geometric 1/r^6
- double *ei = energy+1;
- double *vi = virial+1;
- double ce = -g3*g3/12.0;
- double cv = MY_PI*MY_PIS*g3/(6.0*volume);
- int *typei = atom->type, *typen = typei + atom->nlocal;
- for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
- double b = B[*typei];
- *vi = cv*b*sum[1].x;
- *ei = ce*b*b+vi[0];
- }
- }
- if (function[2]) { // arithmetic 1/r^6
- double *bi;
- double *ei = energy+2;
- double *vi = virial+2;
- double ce = -g3*g3/3.0;
- double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume);
- int *typei = atom->type, *typen = typei + atom->nlocal;
- for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
- bi = B+7*typei[0]+7;
- for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0];
-
- /* PJV 20120225:
- should this be this instead? above implies an inverse dependence
- seems to be the above way in original; i recall having tested
- arithmetic mixing in the conception phase, but an extra test would
- be prudent (pattern repeats in multiple functions below)
-
- bi = B+7*typei[0];
- for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0];
-
- */
-
- *ei = ce*bi[0]*bi[6]+vi[0];
- }
- }
- if (function[3]&&atom->mu) { // dipole
- double *ei = energy+3;
- double *vi = virial+3;
- double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal;
- double ce = mumurd2e*2.0*g3/3.0/MY_PIS;
- for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
- *vi = 0; // in surface
- *ei = ce*imu[3]*imu[3]-vi[0];
- }
- }
-}
-
-
-/* ----------------------------------------------------------------------
- compute the EwaldDisp long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void EwaldDisp::compute(int eflag, int vflag)
-{
- if (!nbox) return;
-
- // set energy/virial flags
- // invoke allocate_peratom() if needed for first time
-
- if (eflag || vflag) ev_setup(eflag,vflag);
- else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
-
- if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) {
- allocate_peratom();
- peratom_allocate_flag = 1;
- nmax = atom->nmax;
- }
-
- reallocate_atoms();
- init_self_peratom();
- compute_ek();
- compute_force();
- //compute_surface(); // assume conducting metal (tinfoil) boundary conditions
- compute_energy();
- compute_energy_peratom();
- compute_virial();
- compute_virial_dipole();
- compute_virial_peratom();
-}
-
-
-void EwaldDisp::compute_ek()
-{
- cvector *ekr = ekr_local;
- int lbytes = (2*nbox+1)*sizeof(cvector);
- hvector *h = NULL;
- kvector *k, *nk = kvec+nkvec;
- cvector *z = new cvector[2*nbox+1];
- cvector z1, *zx, *zy, *zz, *zn = z+2*nbox;
- complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL;
- vector mui;
- double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0;
- double bi = 0.0, ci[7];
- double *mu = atom->mu ? atom->mu[0] : NULL;
- int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic;
- int func[EWALD_NFUNCS];
-
- memcpy(func, function, EWALD_NFUNCS*sizeof(int));
- memset(cek_local, 0, n*sizeof(complex)); // reset sums
- while (xx, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0); // z[0]
- if (tri) { // triclinic z[1]
- C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]);
- C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]);
- C_ANGLE(z1.z, x[2]*unit[2]); x += 3;
- }
- else { // orthogonal z[1]
- C_ANGLE(z1.x, *(x++)*unit[0]);
- C_ANGLE(z1.y, *(x++)*unit[1]);
- C_ANGLE(z1.z, *(x++)*unit[2]);
- }
- for (; zzx, zz->x, z1.x); // 3D k-vector
- C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y);
- C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z);
- }
- kx = ky = -1;
- cek = cek_local;
- if (func[0]) qi = *(q++);
- if (func[1]) bi = B[*type];
- if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double));
- if (func[3]) {
- memcpy(mui, mu, sizeof(vector));
- mu += 4;
- h = hvec;
- }
- for (k=kvec; ky) { // based on order in
- if (kx!=k->x) cx = z[kx = k->x].x; // reallocate
- C_RMULT(zxy, z[ky = k->y].y, cx);
- }
- C_RMULT(zxyz, z[k->z].z, zxy);
- if (func[0]) {
- cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi;
- }
- if (func[1]) {
- cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi;
- }
- if (func[2]) for (i=0; i<7; ++i) {
- cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i];
- }
- if (func[3]) {
- register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h;
- cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk;
- }
- }
- ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes);
- ++type;
- }
- MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world);
-
- delete [] z;
-}
-
-
-void EwaldDisp::compute_force()
-{
- kvector *k;
- hvector *h, *nh;
- cvector *z = ekr_local;
- vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL;
- complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
- complex *cek_coul;
- double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL;
- double *mu = atom->mu ? atom->mu[0] : NULL;
- const double qscale = force->qqrd2e * scale;
- double *ke, c[EWALD_NFUNCS] = {
- 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume),
- 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume};
- double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3];
- int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
- int func[EWALD_NFUNCS];
-
- if (atom->torque) t = atom->torque[0];
- memcpy(func, function, EWALD_NFUNCS*sizeof(int));
- memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector)); // fj = -dE/dr =
- for (; fy) { // based on order in
- if (kx!=k->x) zx = z[kx = k->x].x; // reallocate
- C_RMULT(zxy, z[ky = k->y].y, zx);
- }
- C_CRMULT(zc, z[k->z].z, zxy);
- if (func[0]) { // 1/r
- register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re);
- if (func[3]) cek_coul = cek;
- ++cek;
- sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im;
- }
- if (func[1]) { // geometric 1/r^6
- register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek;
- sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im;
- }
- if (func[2]) { // arithmetic 1/r^6
- register double im, c = *(ke++);
- for (i=2; i<9; ++i) {
- im = c*(zc.im*cek->re+cek->im*zc.re); ++cek;
- sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im;
- }
- }
- if (func[3]) { // dipole
- register double im = *(ke)*(zc.im*cek->re+
- cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
- register double im2 = *(ke)*(zc.re*cek->re-
- cek->im*zc.im);
- sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im;
- t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque
- t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2;
- t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2;
- if (func[0]) { // charge-dipole
- register double qi = *(q)*c[0];
- im = - *(ke)*(zc.re*cek_coul->re -
- cek_coul->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
- im += *(ke)*(zc.re*cek->re - cek->im*zc.im)*qi;
- sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im;
-
- im2 = *(ke)*(zc.re*cek_coul->im + cek_coul->re*zc.im);
- im2 += -*(ke)*(zc.re*cek->im - cek->im*zc.re);
- t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque
- t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2;
- t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2;
- }
- ++cek;
- ke++;
- }
- }
- if (func[0]) { // 1/r
- register double qi = *(q++)*c[0];
- f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi;
- }
- if (func[1]) { // geometric 1/r^6
- register double bi = B[*type]*c[1];
- f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi;
- }
- if (func[2]) { // arithmetic 1/r^6
- register double *bi = B+7*type[0]+7;
- for (i=2; i<9; ++i) {
- register double c2 = (--bi)[0]*c[2];
- f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2;
- }
- }
- if (func[3]) { // dipole
- f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2];
- }
- z = (cvector *) ((char *) z+lbytes);
- ++type;
- t += 3;
- }
-}
-
-
-void EwaldDisp::compute_surface()
-{
- // assume conducting metal (tinfoil) boundary conditions, so this function is
- // not called because dielectric --> infinity, which makes all the terms here zero.
-
- if (!function[3]) return;
- if (!atom->mu) return;
-
- vector sum_local = VECTOR_NULL, sum_total;
- memset(sum_local, 0, sizeof(vector));
- double *i, *n, *mu = atom->mu[0];
-
- for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) {
- sum_local[0] += (i++)[0];
- sum_local[1] += (i++)[0];
- sum_local[2] += (i++)[0];
- }
- MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world);
-
- virial_self[3] =
- mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume);
- energy_self[3] -= virial_self[3];
-
- if (!(vflag_atom || eflag_atom)) return;
-
- double *ei = energy_self_peratom[0]+3;
- double *vi = virial_self_peratom[0]+3;
- double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume;
-
- for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) {
- *vi = cv*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]);
- *ei -= *vi;
- }
-}
-
-
-void EwaldDisp::compute_energy()
-{
- energy = 0.0;
- if (!eflag_global) return;
-
- complex *cek = cek_global;
- complex *cek_coul;
- double *ke = kenergy;
- const double qscale = force->qqrd2e * scale;
- double c[EWALD_NFUNCS] = {
- 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
- 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
- double sum[EWALD_NFUNCS];
- int func[EWALD_NFUNCS];
-
- memcpy(func, function, EWALD_NFUNCS*sizeof(int));
- memset(sum, 0, EWALD_NFUNCS*sizeof(double)); // reset sums
- for (int k=0; kre*cek->re+cek->im*cek->im);
- if (func[3]) cek_coul = cek;
- ++cek;
- }
- if (func[1]) { // geometric 1/r^6
- sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; }
- if (func[2]) { // arithmetic 1/r^6
- register double r =
- (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+
- (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+
- (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+
- 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7;
- sum[2] += *(ke++)*r;
- }
- if (func[3]) { // dipole
- sum[3] += *(ke)*(cek->re*cek->re+cek->im*cek->im);
- if (func[0]) { // charge-dipole
- sum[3] += *(ke)*2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re);
- }
- ke++;
- ++cek;
- }
- }
- for (int k=0; kq;
- double *eatomj = eatom;
- double *mu = atom->mu ? atom->mu[0] : NULL;
- const double qscale = force->qqrd2e * scale;
- double *ke = kenergy;
- double c[EWALD_NFUNCS] = {
- 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
- 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
- int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
- int func[EWALD_NFUNCS];
-
- memcpy(func, function, EWALD_NFUNCS*sizeof(int));
- for (int j = 0; j < atom->nlocal; j++, ++eatomj) {
- k = kvec;
- kx = ky = -1;
- ke = kenergy;
- cek = cek_global;
- memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double));
- if (func[3]) {
- register double di = c[3];
- mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
- mu++;
- }
- for (nh = (h = hvec)+nkvec; hy) { // based on order in
- if (kx!=k->x) zx = z[kx = k->x].x; // reallocate
- C_RMULT(zxy, z[ky = k->y].y, zx);
- }
- C_CRMULT(zc, z[k->z].z, zxy);
- if (func[0]) { // 1/r
- sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im);
- if (func[3]) cek_coul = cek;
- ++cek;
- }
- if (func[1]) { // geometric 1/r^6
- sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; }
- if (func[2]) { // arithmetic 1/r^6
- register double im, c = *(ke++);
- for (i=2; i<9; ++i) {
- im = c*(cek->re*zc.re - cek->im*zc.im); ++cek;
- sum[i] += im;
- }
- }
- if (func[3]) { // dipole
- double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
- sum[9] += *(ke)*(cek->re*zc.re - cek->im*zc.im)*muk;
- if (func[0]) { // charge-dipole
- register double qj = *(q)*c[0];
- sum[9] += *(ke)*(cek_coul->im*zc.re + cek_coul->re*zc.im)*muk;
- sum[9] -= *(ke)*(cek->re*zc.im + cek->im*zc.re)*qj;
- }
- ++cek;
- ke++;
- }
- }
-
- if (func[0]) { // 1/r
- register double qj = *(q++)*c[0];
- *eatomj += sum[0]*qj - energy_self_peratom[j][0];
- }
- if (func[1]) { // geometric 1/r^6
- register double bj = B[*type]*c[1];
- *eatomj += sum[1]*bj - energy_self_peratom[j][1];
- }
- if (func[2]) { // arithmetic 1/r^6
- register double *bj = B+7*type[0]+7;
- for (i=2; i<9; ++i) {
- register double c2 = (--bj)[0]*c[2];
- *eatomj += 0.5*sum[i]*c2;
- }
- *eatomj -= energy_self_peratom[j][2];
- }
- if (func[3]) { // dipole
- *eatomj += sum[9] - energy_self_peratom[j][3];
- }
- z = (cvector *) ((char *) z+lbytes);
- ++type;
- }
-}
-
-
-#define swap(a, b) { register double t = a; a= b; b = t; }
-
-void EwaldDisp::compute_virial()
-{
- memset(virial, 0, sizeof(shape));
- if (!vflag_global) return;
-
- complex *cek = cek_global;
- complex *cek_coul;
- double *kv = kvirial;
- const double qscale = force->qqrd2e * scale;
- double c[EWALD_NFUNCS] = {
- 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
- 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
- shape sum[EWALD_NFUNCS];
- int func[EWALD_NFUNCS];
-
- memcpy(func, function, EWALD_NFUNCS*sizeof(int));
- memset(sum, 0, EWALD_NFUNCS*sizeof(shape));
- for (int k=0; kre*cek->re+cek->im*cek->im;
- if (func[3]) cek_coul = cek;
- ++cek;
- sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r;
- sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r;
- }
- if (func[1]) { // geometric 1/r^6
- register double r = cek->re*cek->re+cek->im*cek->im; ++cek;
- sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r;
- sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r;
- }
- if (func[2]) { // arithmetic 1/r^6
- register double r =
- (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+
- (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+
- (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+
- 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7;
- sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r;
- sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r;
- }
- if (func[3]) {
- register double r = cek->re*cek->re+cek->im*cek->im;
- sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r;
- sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r;
- if (func[0]) { // charge-dipole
- kv -= 6;
- register double r = 2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re);
- sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r;
- sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r;
- }
- ++cek;
- }
- }
- for (int k=0; kmu ? atom->mu[0] : NULL;
- double *vatomj = NULL;
- if (vflag_atom && vatom) vatomj = vatom[0];
- const double qscale = force->qqrd2e * scale;
- double *ke, c[EWALD_NFUNCS] = {
- 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume),
- 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume};
- double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3];
- int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
- int func[EWALD_NFUNCS];
-
- memcpy(func, function, EWALD_NFUNCS*sizeof(int));
- memset(&sum[0], 0, 6*sizeof(double));
- memset(&sum_total[0], 0, 6*sizeof(double));
- for (int j = 0; j < atom->nlocal; j++) {
- k = kvec;
- kx = ky = -1;
- ke = kenergy;
- cek = cek_global;
- memset(&sum[0], 0, 6*sizeof(double));
- if (func[3]) {
- register double di = c[3];
- mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
- mu++;
- }
- for (nh = (h = hvec)+nkvec; hy) { // based on order in
- if (kx!=k->x) zx = z[kx = k->x].x; // reallocate
- C_RMULT(zxy, z[ky = k->y].y, zx);
- }
- C_CRMULT(zc, z[k->z].z, zxy);
- double im = 0.0;
- if (func[0]) { // 1/r
- ke++;
- if (func[3]) cek_coul = cek;
- ++cek;
- }
- if (func[1]) { // geometric 1/r^6
- ke++;
- ++cek;
- }
- if (func[2]) { // arithmetic 1/r^6
- ke++;
- for (i=2; i<9; ++i) {
- ++cek;
- }
- }
- if (func[3]) { // dipole
- im = *(ke)*(zc.re*cek->re - cek->im*zc.im);
- if (func[0]) { // charge-dipole
- im += *(ke)*(zc.im*cek_coul->re + cek_coul->im*zc.re);
- }
- sum[0] -= mui[0]*h->x*im;
- sum[1] -= mui[1]*h->y*im;
- sum[2] -= mui[2]*h->z*im;
- sum[3] -= mui[0]*h->y*im;
- sum[4] -= mui[0]*h->z*im;
- sum[5] -= mui[1]*h->z*im;
- ++cek;
- ke++;
- }
- }
-
- if (vflag_global)
- for (int n = 0; n < 6; n++)
- sum_total[n] -= sum[n];
-
- if (vflag_atom)
- for (int n = 0; n < 6; n++)
- vatomj[n] -= sum[n];
-
- z = (cvector *) ((char *) z+lbytes);
- ++type;
- if (vflag_atom) vatomj += 6;
- }
-
- if (vflag_global) {
- MPI_Allreduce(&sum_total[0],&sum[0],6,MPI_DOUBLE,MPI_SUM,world);
- for (int n = 0; n < 6; n++)
- virial[n] += sum[n];
- }
-
-}
-
-void EwaldDisp::compute_virial_peratom()
-{
- if (!vflag_atom) return;
-
- kvector *k;
- hvector *h, *nh;
- cvector *z = ekr_local;
- vector mui = VECTOR_NULL;
- complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
- complex *cek_coul;
- double *kv;
- double *q = atom->q;
- double *vatomj = vatom ? vatom[0] : NULL;
- double *mu = atom->mu ? atom->mu[0] : NULL;
- const double qscale = force->qqrd2e * scale;
- double c[EWALD_NFUNCS] = {
- 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
- 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
- shape sum[EWALD_MAX_NSUMS];
- int func[EWALD_NFUNCS];
-
- memcpy(func, function, EWALD_NFUNCS*sizeof(int));
- int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
- for (int j = 0; j < atom->nlocal; j++) {
- k = kvec;
- kx = ky = -1;
- kv = kvirial;
- cek = cek_global;
- memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape));
- if (func[3]) {
- register double di = c[3];
- mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
- mu++;
- }
- for (nh = (h = hvec)+nkvec; hy) { // based on order in
- if (kx!=k->x) zx = z[kx = k->x].x; // reallocate
- C_RMULT(zxy, z[ky = k->y].y, zx);
- }
- C_CRMULT(zc, z[k->z].z, zxy);
- if (func[0]) { // 1/r
- if (func[3]) cek_coul = cek;
- register double r = cek->re*zc.re - cek->im*zc.im; ++cek;
- sum[0][0] += *(kv++)*r;
- sum[0][1] += *(kv++)*r;
- sum[0][2] += *(kv++)*r;
- sum[0][3] += *(kv++)*r;
- sum[0][4] += *(kv++)*r;
- sum[0][5] += *(kv++)*r;
- }
- if (func[1]) { // geometric 1/r^6
- register double r = cek->re*zc.re - cek->im*zc.im; ++cek;
- sum[1][0] += *(kv++)*r;
- sum[1][1] += *(kv++)*r;
- sum[1][2] += *(kv++)*r;
- sum[1][3] += *(kv++)*r;
- sum[1][4] += *(kv++)*r;
- sum[1][5] += *(kv++)*r;
- }
- if (func[2]) { // arithmetic 1/r^6
- register double r;
- for (i=2; i<9; ++i) {
- r = cek->re*zc.re - cek->im*zc.im; ++cek;
- sum[i][0] += *(kv++)*r;
- sum[i][1] += *(kv++)*r;
- sum[i][2] += *(kv++)*r;
- sum[i][3] += *(kv++)*r;
- sum[i][4] += *(kv++)*r;
- sum[i][5] += *(kv++)*r;
- kv -= 6;
- }
- kv += 6;
- }
- if (func[3]) { // dipole
- double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
- register double
- r = (cek->re*zc.re - cek->im*zc.im)*muk;
- sum[9][0] += *(kv++)*r;
- sum[9][1] += *(kv++)*r;
- sum[9][2] += *(kv++)*r;
- sum[9][3] += *(kv++)*r;
- sum[9][4] += *(kv++)*r;
- sum[9][5] += *(kv++)*r;
- if (func[0]) { // charge-dipole
- kv -= 6;
- register double qj = *(q)*c[0];
- r = (cek_coul->im*zc.re + cek_coul->re*zc.im)*muk;
- r += -(cek->re*zc.im + cek->im*zc.re)*qj;
- sum[9][0] += *(kv++)*r; sum[9][1] += *(kv++)*r; sum[9][2] += *(kv++)*r;
- sum[9][3] += *(kv++)*r; sum[9][4] += *(kv++)*r; sum[9][5] += *(kv++)*r;
- }
- ++cek;
- }
- }
-
- if (func[0]) { // 1/r
- register double qi = *(q++)*c[0];
- for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi;
- }
- if (func[1]) { // geometric 1/r^6
- register double bi = B[*type]*c[1];
- for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi;
- }
- if (func[2]) { // arithmetic 1/r^6
- register double *bj = B+7*type[0]+7;
- for (i=2; i<9; ++i) {
- register double c2 = (--bj)[0]*c[2];
- for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2;
- }
- }
- if (func[3]) { // dipole
- for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n];
- }
-
- for (int k=0; kq;
- double **x = atom->x;
- double zprd = domain->zprd;
- int nlocal = atom->nlocal;
-
- double qsum = 0.0;
- if (function[0]) qsum = sum[0].x;
-
- double dipole = 0.0;
- for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
-
- if (function[3] && atom->mu) {
- double **mu = atom->mu;
- for (int i = 0; i < nlocal; i++) dipole += mu[i][2];
- }
-
- // sum local contributions to get global dipole moment
-
- double dipole_all;
- MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
- // need to make non-neutral systems and/or
- // per-atom energy translationally invariant
-
- double dipole_r2 = 0.0;
- if (eflag_atom || fabs(qsum) > SMALL) {
-
- if (function[3] && atom->mu)
- error->all(FLERR,"Cannot (yet) use kspace slab correction with "
- "long-range dipoles and non-neutral systems or per-atom energy");
-
- for (int i = 0; i < nlocal; i++)
- dipole_r2 += q[i]*x[i][2]*x[i][2];
-
- // sum local contributions
-
- double tmp;
- MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_r2 = tmp;
- }
-
- // compute corrections
-
- const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
- qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
- const double qscale = force->qqrd2e * scale;
-
- if (eflag_global) energy += qscale * e_slabcorr;
-
- // per-atom energy
-
- if (eflag_atom) {
- double efact = qscale * MY_2PI/volume;
- for (int i = 0; i < nlocal; i++)
- eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
- qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
- }
-
- // add on force corrections
-
- double ffact = qscale * (-4.0*MY_PI/volume);
- double **f = atom->f;
-
- for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
-
- // add on torque corrections
-
- if (function[3] && atom->mu && atom->torque) {
- double **mu = atom->mu;
- double **torque = atom->torque;
- for (int i = 0; i < nlocal; i++) {
- torque[i][0] += ffact * dipole_all * mu[i][1];
- torque[i][1] += -ffact * dipole_all * mu[i][0];
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- Newton solver used to find g_ewald for LJ systems
- ------------------------------------------------------------------------- */
-
-double EwaldDisp::NewtonSolve(double x, double Rc,
- bigint natoms, double vol, double b2)
-{
- double dx,tol;
- int maxit;
-
- maxit = 10000; //Maximum number of iterations
- tol = 0.00001; //Convergence tolerance
-
- //Begin algorithm
-
- for (int i = 0; i < maxit; i++) {
- dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2);
- x = x - dx; //Update x
- if (fabs(dx) < tol) return x;
- if (x < 0 || x != x) // solver failed
- return -1;
- }
- return -1;
-}
-
-/* ----------------------------------------------------------------------
- Calculate f(x)
- ------------------------------------------------------------------------- */
-
-double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
-{
- double a = Rc*x;
- double f = 0.0;
-
- if (function[1] || function[2]) { // LJ
- f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
- (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
- } else { // dipole
- double rg2 = a*a;
- double rg4 = rg2*rg2;
- double rg6 = rg4*rg2;
- double Cc = 4.0*rg4 + 6.0*rg2 + 3.0;
- double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0;
- f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) *
- sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) *
- exp(-rg2)) - accuracy;
- }
-
- return f;
-}
-
-/* ----------------------------------------------------------------------
- Calculate numerical derivative f'(x)
- ------------------------------------------------------------------------- */
-
-double EwaldDisp::derivf(double x, double Rc,
- bigint natoms, double vol, double b2)
-{
- double h = 0.000001; //Derivative step-size
- return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h;
-}
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing authors: Pieter in 't Veld (SNL), Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "string.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "math.h"
+#include "ewald_disp.h"
+#include "math_vector.h"
+#include "math_const.h"
+#include "math_special.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "pair.h"
+#include "domain.h"
+#include "memory.h"
+#include "error.h"
+#include "update.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using namespace MathSpecial;
+
+#define SMALL 0.00001
+
+enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; // same as in pair.h
+
+//#define DEBUG
+
+/* ---------------------------------------------------------------------- */
+
+EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+ if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command");
+
+ ewaldflag = dispersionflag = dipoleflag = 1;
+ accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
+
+ memset(function, 0, EWALD_NORDER*sizeof(int));
+ kenergy = kvirial = NULL;
+ cek_local = cek_global = NULL;
+ ekr_local = NULL;
+ hvec = NULL;
+ kvec = NULL;
+ B = NULL;
+ first_output = 0;
+ energy_self_peratom = NULL;
+ virial_self_peratom = NULL;
+ nmax = 0;
+ q2 = 0;
+ b2 = 0;
+ M2 = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+EwaldDisp::~EwaldDisp()
+{
+ deallocate();
+ deallocate_peratom();
+ delete [] ekr_local;
+ delete [] B;
+}
+
+/* --------------------------------------------------------------------- */
+
+void EwaldDisp::init()
+{
+ nkvec = nkvec_max = nevec = nevec_max = 0;
+ nfunctions = nsums = sums = 0;
+ nbox = -1;
+ bytes = 0.0;
+
+ if (!comm->me) {
+ if (screen) fprintf(screen,"EwaldDisp initialization ...\n");
+ if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n");
+ }
+
+ triclinic_check();
+ if (domain->dimension == 2)
+ error->all(FLERR,"Cannot use EwaldDisp with 2d simulation");
+ if (slabflag == 0 && domain->nonperiodic > 0)
+ error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp");
+ if (slabflag == 1) {
+ if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
+ domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+ error->all(FLERR,"Incorrect boundaries with slab EwaldDisp");
+ }
+
+ scale = 1.0;
+ mumurd2e = force->qqrd2e;
+ dielectric = force->dielectric;
+
+ int tmp;
+ Pair *pair = force->pair;
+ int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
+ double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
+ if (!(ptr||cutoff))
+ error->all(FLERR,"KSpace style is incompatible with Pair style");
+ int ewald_order = ptr ? *((int *) ptr) : 1<<1;
+ int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
+ memset(function, 0, EWALD_NFUNCS*sizeof(int));
+ for (int i=0; i<=EWALD_NORDER; ++i) // transcribe order
+ if (ewald_order&(1<all(FLERR,
+ "Unsupported mixing rule in kspace_style ewald/disp");
+ default:
+ error->all(FLERR,"Unsupported order in kspace_style ewald/disp");
+ }
+ nfunctions += function[k] = 1;
+ nsums += n[k];
+ }
+
+ if (!gewaldflag) g_ewald = 0.0;
+ pair->init(); // so B is defined
+ init_coeffs();
+ init_coeff_sums();
+
+ double qsum, qsqsum, bsbsum;
+ qsum = qsqsum = bsbsum = 0.0;
+ if (function[0]) {
+ qsum = sum[0].x;
+ qsqsum = sum[0].x2;
+ }
+
+ // turn off coulombic if no charge
+
+ if (function[0] && qsqsum == 0.0) {
+ function[0] = 0;
+ nfunctions -= 1;
+ nsums -= 1;
+ }
+
+ if (function[1]) bsbsum = sum[1].x2;
+ if (function[2]) bsbsum = sum[2].x2;
+
+ if (function[3]) M2 = sum[9].x2;
+
+ if (function[3] && strcmp(update->unit_style,"electron") == 0)
+ error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
+
+ if (qsqsum == 0.0 && bsbsum == 0.0 && M2 == 0.0)
+ error->all(FLERR,"Cannot use Ewald/disp solver "
+ "on system with no charge, dipole, or LJ particles");
+ if (fabs(qsum) > SMALL && comm->me == 0) {
+ char str[128];
+ sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+ error->warning(FLERR,str);
+ }
+
+ if (!function[1] && !function[2])
+ dispersionflag = 0;
+
+ if (!function[3])
+ dipoleflag = 0;
+
+ pair_check();
+
+ // set accuracy (force units) from accuracy_relative or accuracy_absolute
+
+ if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
+ else accuracy = accuracy_relative * two_charge_force;
+
+ // setup K-space resolution
+
+ q2 = qsqsum * force->qqrd2e;
+ M2 *= mumurd2e;
+ b2 = bsbsum; //Are these units right?
+ bigint natoms = atom->natoms;
+
+ if (!gewaldflag) {
+ if (function[0]) {
+ g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2);
+ if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff);
+ else g_ewald = sqrt(-log(g_ewald)) / (*cutoff);
+ }
+ else if (function[1] || function[2]) {
+ //Try Newton Solver
+ //Use old method to get guess
+ g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
+
+ double g_ewald_new =
+ NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2);
+ if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
+ else error->warning(FLERR,"Ewald/disp Newton solver failed, "
+ "using old method to estimate g_ewald");
+ } else if (function[3]) {
+ //Try Newton Solver
+ //Use old method to get guess
+ g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
+ double g_ewald_new =
+ NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),M2);
+ if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
+ else error->warning(FLERR,"Ewald/disp Newton solver failed, "
+ "using old method to estimate g_ewald");
+ }
+ }
+
+ if (!comm->me) {
+ if (screen) fprintf(screen, " G vector = %g\n", g_ewald);
+ if (logfile) fprintf(logfile, " G vector = %g\n", g_ewald);
+ }
+
+ g_ewald_6 = g_ewald;
+ deallocate_peratom();
+ peratom_allocate_flag = 0;
+}
+
+/* ----------------------------------------------------------------------
+ adjust EwaldDisp coeffs, called initially and whenever volume has changed
+------------------------------------------------------------------------- */
+
+void EwaldDisp::setup()
+{
+ volume = shape_det(domain->h)*slab_volfactor;
+ memcpy(unit, domain->h_inv, sizeof(shape));
+ shape_scalar_mult(unit, 2.0*MY_PI);
+ unit[2] /= slab_volfactor;
+
+ // int nbox_old = nbox, nkvec_old = nkvec;
+
+ if (accuracy >= 1) {
+ nbox = 0;
+ error->all(FLERR,"KSpace accuracy too low");
+ }
+
+ bigint natoms = atom->natoms;
+ double err;
+ int kxmax = 1;
+ int kymax = 1;
+ int kzmax = 1;
+ err = rms(kxmax,domain->h[0],natoms,q2,b2,M2);
+ while (err > accuracy) {
+ kxmax++;
+ err = rms(kxmax,domain->h[0],natoms,q2,b2,M2);
+ }
+ err = rms(kymax,domain->h[1],natoms,q2,b2,M2);
+ while (err > accuracy) {
+ kymax++;
+ err = rms(kymax,domain->h[1],natoms,q2,b2,M2);
+ }
+ err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2);
+ while (err > accuracy) {
+ kzmax++;
+ err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2);
+ }
+ nbox = MAX(kxmax,kymax);
+ nbox = MAX(nbox,kzmax);
+ double gsqxmx = unit[0]*unit[0]*kxmax*kxmax;
+ double gsqymx = unit[1]*unit[1]*kymax*kymax;
+ double gsqzmx = unit[2]*unit[2]*kzmax*kzmax;
+ gsqmx = MAX(gsqxmx,gsqymx);
+ gsqmx = MAX(gsqmx,gsqzmx);
+ gsqmx *= 1.00001;
+
+ reallocate();
+ coefficients();
+ init_coeffs();
+ init_coeff_sums();
+ init_self();
+
+ if (!(first_output||comm->me)) {
+ first_output = 1;
+ if (screen) fprintf(screen,
+ " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec);
+ if (logfile) fprintf(logfile,
+ " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec);
+ }
+}
+
+/* ----------------------------------------------------------------------
+ compute RMS accuracy for a dimension
+------------------------------------------------------------------------- */
+
+double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2, double M2)
+{
+ double value = 0.0;
+
+ // Coulombic
+
+ double g2 = g_ewald*g_ewald;
+
+ value += 2.0*q2*g_ewald/prd *
+ sqrt(1.0/(MY_PI*km*natoms)) *
+ exp(-MY_PI*MY_PI*km*km/(g2*prd*prd));
+
+ // Lennard-Jones
+
+ double g7 = g2*g2*g2*g_ewald;
+
+ value += 4.0*b2*g7/3.0 *
+ sqrt(1.0/(MY_PI*natoms)) *
+ (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) *
+ (MY_PI*km/(g_ewald*prd) + 1));
+
+ // dipole
+
+ value += 8.0*MY_PI*M2/volume*g_ewald *
+ sqrt(2.0*MY_PI*km*km*km/(15.0*natoms)) *
+ exp(-pow(MY_PI*km/(g_ewald*prd),2.0));
+
+ return value;
+}
+
+void EwaldDisp::reallocate()
+{
+ int ix, iy, iz;
+ int nkvec_max = nkvec;
+ vector h;
+
+ nkvec = 0;
+ int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)];
+ int *flag = kflag;
+
+ for (ix=0; ix<=nbox; ++ix)
+ for (iy=-nbox; iy<=nbox; ++iy)
+ for (iz=-nbox; iz<=nbox; ++iz)
+ if (!(ix||iy||iz)) *(flag++) = 0;
+ else if ((!ix)&&(iy<0)) *(flag++) = 0;
+ else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0; // use symmetry
+ else {
+ h[0] = unit[0]*ix;
+ h[1] = unit[5]*ix+unit[1]*iy;
+ h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz;
+ if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec;
+ }
+
+ if (nkvec>nkvec_max) {
+ deallocate(); // free memory
+ hvec = new hvector[nkvec]; // hvec
+ bytes += (nkvec-nkvec_max)*sizeof(hvector);
+ kvec = new kvector[nkvec]; // kvec
+ bytes += (nkvec-nkvec_max)*sizeof(kvector);
+ kenergy = new double[nkvec*nfunctions]; // kenergy
+ bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double);
+ kvirial = new double[6*nkvec*nfunctions]; // kvirial
+ bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double);
+ cek_local = new complex[nkvec*nsums]; // cek_local
+ bytes += (nkvec-nkvec_max)*nsums*sizeof(complex);
+ cek_global = new complex[nkvec*nsums]; // cek_global
+ bytes += (nkvec-nkvec_max)*nsums*sizeof(complex);
+ nkvec_max = nkvec;
+ }
+
+ flag = kflag; // create index and
+ kvector *k = kvec; // wave vectors
+ hvector *hi = hvec;
+ for (ix=0; ix<=nbox; ++ix)
+ for (iy=-nbox; iy<=nbox; ++iy)
+ for (iz=-nbox; iz<=nbox; ++iz)
+ if (*(flag++)) {
+ hi->x = unit[0]*ix;
+ hi->y = unit[5]*ix+unit[1]*iy;
+ (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz;
+ k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; }
+
+ delete [] kflag;
+}
+
+
+void EwaldDisp::reallocate_atoms()
+{
+ if (eflag_atom || vflag_atom)
+ if (atom->nlocal > nmax) {
+ deallocate_peratom();
+ allocate_peratom();
+ nmax = atom->nmax;
+ }
+
+ if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return;
+ delete [] ekr_local;
+ ekr_local = new cvector[nevec];
+ bytes += (nevec-nevec_max)*sizeof(cvector);
+ nevec_max = nevec;
+}
+
+
+void EwaldDisp::allocate_peratom()
+{
+ memory->create(energy_self_peratom,
+ atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom");
+ memory->create(virial_self_peratom,
+ atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom");
+}
+
+
+void EwaldDisp::deallocate_peratom() // free memory
+{
+ memory->destroy(energy_self_peratom);
+ memory->destroy(virial_self_peratom);
+}
+
+
+void EwaldDisp::deallocate() // free memory
+{
+ delete [] hvec; hvec = NULL;
+ delete [] kvec; kvec = NULL;
+ delete [] kenergy; kenergy = NULL;
+ delete [] kvirial; kvirial = NULL;
+ delete [] cek_local; cek_local = NULL;
+ delete [] cek_global; cek_global = NULL;
+}
+
+
+void EwaldDisp::coefficients()
+{
+ vector h;
+ hvector *hi = hvec, *nh;
+ double eta2 = 0.25/(g_ewald*g_ewald);
+ double b1, b2, expb2, h1, h2, c1, c2;
+ double *ke = kenergy, *kv = kvirial;
+ int func0 = function[0], func12 = function[1]||function[2],
+ func3 = function[3];
+
+ for (nh = (hi = hvec)+nkvec; hintypes;
+
+ if (function[1]) { // geometric 1/r^6
+ double **b = (double **) force->pair->extract("B",tmp);
+ delete [] B;
+ B = new double[n+1];
+ bytes += (n+1)*sizeof(double);
+ for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
+ }
+ if (function[2]) { // arithmetic 1/r^6
+ double **epsilon = (double **) force->pair->extract("epsilon",tmp);
+ double **sigma = (double **) force->pair->extract("sigma",tmp);
+ double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
+ double c[7] = {
+ 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
+
+ if (!(epsilon&&sigma))
+ error->all(
+ FLERR,"Epsilon or sigma reference not set by pair style in ewald/n");
+ for (int i=0; i<=n; ++i) {
+ eps_i = sqrt(epsilon[i][i]);
+ sigma_i = sigma[i][i];
+ sigma_n = 1.0;
+ for (int j=0; j<7; ++j) {
+ *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i;
+ }
+ }
+ }
+}
+
+void EwaldDisp::init_coeff_sums()
+{
+ if (sums) return; // calculated only once
+ sums = 1;
+
+ Sum sum_local[EWALD_MAX_NSUMS];
+
+ memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum));
+ if (function[0]) { // 1/r
+ double *q = atom->q, *qn = q+atom->nlocal;
+ for (double *i=q; itype, *ntype = type+atom->nlocal;
+ for (int *i=type; itype, *ntype = type+atom->nlocal;
+ for (int *i=type; imu) { // dipole
+ double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal;
+ for (double *i = mu; i < nmu; i += 4)
+ sum_local[9].x2 += i[3]*i[3];
+ }
+ MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world);
+}
+
+
+void EwaldDisp::init_self()
+{
+ double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2;
+ const double qscale = force->qqrd2e * scale;
+
+ memset(energy_self, 0, EWALD_NFUNCS*sizeof(double)); // self energy
+ memset(virial_self, 0, EWALD_NFUNCS*sizeof(double));
+
+ if (function[0]) { // 1/r
+ virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x;
+ energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0];
+ }
+ if (function[1]) { // geometric 1/r^6
+ virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x;
+ energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1];
+ }
+ if (function[2]) { // arithmetic 1/r^6
+ virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+
+ sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x);
+ energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2];
+ }
+ if (function[3]) { // dipole
+ virial_self[3] = 0; // in surface
+ energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3];
+ }
+}
+
+
+void EwaldDisp::init_self_peratom()
+{
+ if (!(vflag_atom || eflag_atom)) return;
+
+ double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2;
+ const double qscale = force->qqrd2e * scale;
+ double *energy = energy_self_peratom[0];
+ double *virial = virial_self_peratom[0];
+ int nlocal = atom->nlocal;
+
+ memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double));
+ memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double));
+
+ if (function[0]) { // 1/r
+ double *ei = energy;
+ double *vi = virial;
+ double ce = qscale*g1/MY_PIS;
+ double cv = -0.5*MY_PI*qscale/(g2*volume);
+ double *qi = atom->q, *qn = qi + nlocal;
+ for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
+ double q = *qi;
+ *vi = cv*q*sum[0].x;
+ *ei = ce*q*q-vi[0];
+ }
+ }
+ if (function[1]) { // geometric 1/r^6
+ double *ei = energy+1;
+ double *vi = virial+1;
+ double ce = -g3*g3/12.0;
+ double cv = MY_PI*MY_PIS*g3/(6.0*volume);
+ int *typei = atom->type, *typen = typei + atom->nlocal;
+ for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
+ double b = B[*typei];
+ *vi = cv*b*sum[1].x;
+ *ei = ce*b*b+vi[0];
+ }
+ }
+ if (function[2]) { // arithmetic 1/r^6
+ double *bi;
+ double *ei = energy+2;
+ double *vi = virial+2;
+ double ce = -g3*g3/3.0;
+ double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume);
+ int *typei = atom->type, *typen = typei + atom->nlocal;
+ for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
+ bi = B+7*typei[0]+7;
+ for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0];
+
+ /* PJV 20120225:
+ should this be this instead? above implies an inverse dependence
+ seems to be the above way in original; i recall having tested
+ arithmetic mixing in the conception phase, but an extra test would
+ be prudent (pattern repeats in multiple functions below)
+
+ bi = B+7*typei[0];
+ for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0];
+
+ */
+
+ *ei = ce*bi[0]*bi[6]+vi[0];
+ }
+ }
+ if (function[3]&&atom->mu) { // dipole
+ double *ei = energy+3;
+ double *vi = virial+3;
+ double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal;
+ double ce = mumurd2e*2.0*g3/3.0/MY_PIS;
+ for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
+ *vi = 0; // in surface
+ *ei = ce*imu[3]*imu[3]-vi[0];
+ }
+ }
+}
+
+
+/* ----------------------------------------------------------------------
+ compute the EwaldDisp long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void EwaldDisp::compute(int eflag, int vflag)
+{
+ if (!nbox) return;
+
+ // set energy/virial flags
+ // invoke allocate_peratom() if needed for first time
+
+ if (eflag || vflag) ev_setup(eflag,vflag);
+ else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
+
+ if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) {
+ allocate_peratom();
+ peratom_allocate_flag = 1;
+ nmax = atom->nmax;
+ }
+
+ reallocate_atoms();
+ init_self_peratom();
+ compute_ek();
+ compute_force();
+ //compute_surface(); // assume conducting metal (tinfoil) boundary conditions
+ compute_energy();
+ compute_energy_peratom();
+ compute_virial();
+ compute_virial_dipole();
+ compute_virial_peratom();
+}
+
+
+void EwaldDisp::compute_ek()
+{
+ cvector *ekr = ekr_local;
+ int lbytes = (2*nbox+1)*sizeof(cvector);
+ hvector *h = NULL;
+ kvector *k, *nk = kvec+nkvec;
+ cvector *z = new cvector[2*nbox+1];
+ cvector z1, *zx, *zy, *zz, *zn = z+2*nbox;
+ complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL;
+ vector mui;
+ double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0;
+ double bi = 0.0, ci[7];
+ double *mu = atom->mu ? atom->mu[0] : NULL;
+ int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic;
+ int func[EWALD_NFUNCS];
+
+ memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+ memset(cek_local, 0, n*sizeof(complex)); // reset sums
+ while (xx, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0); // z[0]
+ if (tri) { // triclinic z[1]
+ C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]);
+ C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]);
+ C_ANGLE(z1.z, x[2]*unit[2]); x += 3;
+ }
+ else { // orthogonal z[1]
+ C_ANGLE(z1.x, *(x++)*unit[0]);
+ C_ANGLE(z1.y, *(x++)*unit[1]);
+ C_ANGLE(z1.z, *(x++)*unit[2]);
+ }
+ for (; zzx, zz->x, z1.x); // 3D k-vector
+ C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y);
+ C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z);
+ }
+ kx = ky = -1;
+ cek = cek_local;
+ if (func[0]) qi = *(q++);
+ if (func[1]) bi = B[*type];
+ if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double));
+ if (func[3]) {
+ memcpy(mui, mu, sizeof(vector));
+ mu += 4;
+ h = hvec;
+ }
+ for (k=kvec; ky) { // based on order in
+ if (kx!=k->x) cx = z[kx = k->x].x; // reallocate
+ C_RMULT(zxy, z[ky = k->y].y, cx);
+ }
+ C_RMULT(zxyz, z[k->z].z, zxy);
+ if (func[0]) {
+ cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi;
+ }
+ if (func[1]) {
+ cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi;
+ }
+ if (func[2]) for (i=0; i<7; ++i) {
+ cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i];
+ }
+ if (func[3]) {
+ register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h;
+ cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk;
+ }
+ }
+ ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes);
+ ++type;
+ }
+ MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world);
+
+ delete [] z;
+}
+
+
+void EwaldDisp::compute_force()
+{
+ kvector *k;
+ hvector *h, *nh;
+ cvector *z = ekr_local;
+ vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL;
+ complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
+ complex *cek_coul;
+ double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL;
+ double *mu = atom->mu ? atom->mu[0] : NULL;
+ const double qscale = force->qqrd2e * scale;
+ double *ke, c[EWALD_NFUNCS] = {
+ 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume),
+ 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume};
+ double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3];
+ int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
+ int func[EWALD_NFUNCS];
+
+ if (atom->torque) t = atom->torque[0];
+ memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+ memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector)); // fj = -dE/dr =
+ for (; fy) { // based on order in
+ if (kx!=k->x) zx = z[kx = k->x].x; // reallocate
+ C_RMULT(zxy, z[ky = k->y].y, zx);
+ }
+ C_CRMULT(zc, z[k->z].z, zxy);
+ if (func[0]) { // 1/r
+ register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re);
+ if (func[3]) cek_coul = cek;
+ ++cek;
+ sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im;
+ }
+ if (func[1]) { // geometric 1/r^6
+ register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek;
+ sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im;
+ }
+ if (func[2]) { // arithmetic 1/r^6
+ register double im, c = *(ke++);
+ for (i=2; i<9; ++i) {
+ im = c*(zc.im*cek->re+cek->im*zc.re); ++cek;
+ sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im;
+ }
+ }
+ if (func[3]) { // dipole
+ register double im = *(ke)*(zc.im*cek->re+
+ cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
+ register double im2 = *(ke)*(zc.re*cek->re-
+ cek->im*zc.im);
+ sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im;
+ t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque
+ t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2;
+ t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2;
+ if (func[0]) { // charge-dipole
+ register double qi = *(q)*c[0];
+ im = - *(ke)*(zc.re*cek_coul->re -
+ cek_coul->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
+ im += *(ke)*(zc.re*cek->re - cek->im*zc.im)*qi;
+ sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im;
+
+ im2 = *(ke)*(zc.re*cek_coul->im + cek_coul->re*zc.im);
+ im2 += -*(ke)*(zc.re*cek->im - cek->im*zc.re);
+ t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque
+ t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2;
+ t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2;
+ }
+ ++cek;
+ ke++;
+ }
+ }
+ if (func[0]) { // 1/r
+ register double qi = *(q++)*c[0];
+ f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi;
+ }
+ if (func[1]) { // geometric 1/r^6
+ register double bi = B[*type]*c[1];
+ f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi;
+ }
+ if (func[2]) { // arithmetic 1/r^6
+ register double *bi = B+7*type[0]+7;
+ for (i=2; i<9; ++i) {
+ register double c2 = (--bi)[0]*c[2];
+ f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2;
+ }
+ }
+ if (func[3]) { // dipole
+ f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2];
+ }
+ z = (cvector *) ((char *) z+lbytes);
+ ++type;
+ t += 3;
+ }
+}
+
+
+void EwaldDisp::compute_surface()
+{
+ // assume conducting metal (tinfoil) boundary conditions, so this function is
+ // not called because dielectric at the boundary --> infinity, which makes all
+ // the terms here zero.
+
+ if (!function[3]) return;
+ if (!atom->mu) return;
+
+ vector sum_local = VECTOR_NULL, sum_total;
+ memset(sum_local, 0, sizeof(vector));
+ double *i, *n, *mu = atom->mu[0];
+
+ for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) {
+ sum_local[0] += (i++)[0];
+ sum_local[1] += (i++)[0];
+ sum_local[2] += (i++)[0];
+ }
+ MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world);
+
+ virial_self[3] =
+ mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume);
+ energy_self[3] -= virial_self[3];
+
+ if (!(vflag_atom || eflag_atom)) return;
+
+ double *ei = energy_self_peratom[0]+3;
+ double *vi = virial_self_peratom[0]+3;
+ double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume;
+
+ for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) {
+ *vi = cv*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]);
+ *ei -= *vi;
+ }
+}
+
+
+void EwaldDisp::compute_energy()
+{
+ energy = 0.0;
+ if (!eflag_global) return;
+
+ complex *cek = cek_global;
+ complex *cek_coul;
+ double *ke = kenergy;
+ const double qscale = force->qqrd2e * scale;
+ double c[EWALD_NFUNCS] = {
+ 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
+ 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
+ double sum[EWALD_NFUNCS];
+ int func[EWALD_NFUNCS];
+
+ memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+ memset(sum, 0, EWALD_NFUNCS*sizeof(double)); // reset sums
+ for (int k=0; kre*cek->re+cek->im*cek->im);
+ if (func[3]) cek_coul = cek;
+ ++cek;
+ }
+ if (func[1]) { // geometric 1/r^6
+ sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; }
+ if (func[2]) { // arithmetic 1/r^6
+ register double r =
+ (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+
+ (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+
+ (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+
+ 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7;
+ sum[2] += *(ke++)*r;
+ }
+ if (func[3]) { // dipole
+ sum[3] += *(ke)*(cek->re*cek->re+cek->im*cek->im);
+ if (func[0]) { // charge-dipole
+ sum[3] += *(ke)*2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re);
+ }
+ ke++;
+ ++cek;
+ }
+ }
+ for (int k=0; kq;
+ double *eatomj = eatom;
+ double *mu = atom->mu ? atom->mu[0] : NULL;
+ const double qscale = force->qqrd2e * scale;
+ double *ke = kenergy;
+ double c[EWALD_NFUNCS] = {
+ 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
+ 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
+ int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
+ int func[EWALD_NFUNCS];
+
+ memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+ for (int j = 0; j < atom->nlocal; j++, ++eatomj) {
+ k = kvec;
+ kx = ky = -1;
+ ke = kenergy;
+ cek = cek_global;
+ memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double));
+ if (func[3]) {
+ register double di = c[3];
+ mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
+ mu++;
+ }
+ for (nh = (h = hvec)+nkvec; hy) { // based on order in
+ if (kx!=k->x) zx = z[kx = k->x].x; // reallocate
+ C_RMULT(zxy, z[ky = k->y].y, zx);
+ }
+ C_CRMULT(zc, z[k->z].z, zxy);
+ if (func[0]) { // 1/r
+ sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im);
+ if (func[3]) cek_coul = cek;
+ ++cek;
+ }
+ if (func[1]) { // geometric 1/r^6
+ sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; }
+ if (func[2]) { // arithmetic 1/r^6
+ register double im, c = *(ke++);
+ for (i=2; i<9; ++i) {
+ im = c*(cek->re*zc.re - cek->im*zc.im); ++cek;
+ sum[i] += im;
+ }
+ }
+ if (func[3]) { // dipole
+ double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
+ sum[9] += *(ke)*(cek->re*zc.re - cek->im*zc.im)*muk;
+ if (func[0]) { // charge-dipole
+ register double qj = *(q)*c[0];
+ sum[9] += *(ke)*(cek_coul->im*zc.re + cek_coul->re*zc.im)*muk;
+ sum[9] -= *(ke)*(cek->re*zc.im + cek->im*zc.re)*qj;
+ }
+ ++cek;
+ ke++;
+ }
+ }
+
+ if (func[0]) { // 1/r
+ register double qj = *(q++)*c[0];
+ *eatomj += sum[0]*qj - energy_self_peratom[j][0];
+ }
+ if (func[1]) { // geometric 1/r^6
+ register double bj = B[*type]*c[1];
+ *eatomj += sum[1]*bj - energy_self_peratom[j][1];
+ }
+ if (func[2]) { // arithmetic 1/r^6
+ register double *bj = B+7*type[0]+7;
+ for (i=2; i<9; ++i) {
+ register double c2 = (--bj)[0]*c[2];
+ *eatomj += 0.5*sum[i]*c2;
+ }
+ *eatomj -= energy_self_peratom[j][2];
+ }
+ if (func[3]) { // dipole
+ *eatomj += sum[9] - energy_self_peratom[j][3];
+ }
+ z = (cvector *) ((char *) z+lbytes);
+ ++type;
+ }
+}
+
+
+#define swap(a, b) { register double t = a; a= b; b = t; }
+
+void EwaldDisp::compute_virial()
+{
+ memset(virial, 0, sizeof(shape));
+ if (!vflag_global) return;
+
+ complex *cek = cek_global;
+ complex *cek_coul;
+ double *kv = kvirial;
+ const double qscale = force->qqrd2e * scale;
+ double c[EWALD_NFUNCS] = {
+ 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
+ 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
+ shape sum[EWALD_NFUNCS];
+ int func[EWALD_NFUNCS];
+
+ memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+ memset(sum, 0, EWALD_NFUNCS*sizeof(shape));
+ for (int k=0; kre*cek->re+cek->im*cek->im;
+ if (func[3]) cek_coul = cek;
+ ++cek;
+ sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r;
+ sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r;
+ }
+ if (func[1]) { // geometric 1/r^6
+ register double r = cek->re*cek->re+cek->im*cek->im; ++cek;
+ sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r;
+ sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r;
+ }
+ if (func[2]) { // arithmetic 1/r^6
+ register double r =
+ (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+
+ (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+
+ (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+
+ 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7;
+ sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r;
+ sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r;
+ }
+ if (func[3]) {
+ register double r = cek->re*cek->re+cek->im*cek->im;
+ sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r;
+ sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r;
+ if (func[0]) { // charge-dipole
+ kv -= 6;
+ register double r = 2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re);
+ sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r;
+ sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r;
+ }
+ ++cek;
+ }
+ }
+ for (int k=0; kmu ? atom->mu[0] : NULL;
+ double *vatomj = NULL;
+ if (vflag_atom && vatom) vatomj = vatom[0];
+ const double qscale = force->qqrd2e * scale;
+ double *ke, c[EWALD_NFUNCS] = {
+ 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume),
+ 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume};
+ double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3];
+ int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
+ int func[EWALD_NFUNCS];
+
+ memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+ memset(&sum[0], 0, 6*sizeof(double));
+ memset(&sum_total[0], 0, 6*sizeof(double));
+ for (int j = 0; j < atom->nlocal; j++) {
+ k = kvec;
+ kx = ky = -1;
+ ke = kenergy;
+ cek = cek_global;
+ memset(&sum[0], 0, 6*sizeof(double));
+ if (func[3]) {
+ register double di = c[3];
+ mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
+ mu++;
+ }
+ for (nh = (h = hvec)+nkvec; hy) { // based on order in
+ if (kx!=k->x) zx = z[kx = k->x].x; // reallocate
+ C_RMULT(zxy, z[ky = k->y].y, zx);
+ }
+ C_CRMULT(zc, z[k->z].z, zxy);
+ double im = 0.0;
+ if (func[0]) { // 1/r
+ ke++;
+ if (func[3]) cek_coul = cek;
+ ++cek;
+ }
+ if (func[1]) { // geometric 1/r^6
+ ke++;
+ ++cek;
+ }
+ if (func[2]) { // arithmetic 1/r^6
+ ke++;
+ for (i=2; i<9; ++i) {
+ ++cek;
+ }
+ }
+ if (func[3]) { // dipole
+ im = *(ke)*(zc.re*cek->re - cek->im*zc.im);
+ if (func[0]) { // charge-dipole
+ im += *(ke)*(zc.im*cek_coul->re + cek_coul->im*zc.re);
+ }
+ sum[0] -= mui[0]*h->x*im;
+ sum[1] -= mui[1]*h->y*im;
+ sum[2] -= mui[2]*h->z*im;
+ sum[3] -= mui[0]*h->y*im;
+ sum[4] -= mui[0]*h->z*im;
+ sum[5] -= mui[1]*h->z*im;
+ ++cek;
+ ke++;
+ }
+ }
+
+ if (vflag_global)
+ for (int n = 0; n < 6; n++)
+ sum_total[n] -= sum[n];
+
+ if (vflag_atom)
+ for (int n = 0; n < 6; n++)
+ vatomj[n] -= sum[n];
+
+ z = (cvector *) ((char *) z+lbytes);
+ ++type;
+ if (vflag_atom) vatomj += 6;
+ }
+
+ if (vflag_global) {
+ MPI_Allreduce(&sum_total[0],&sum[0],6,MPI_DOUBLE,MPI_SUM,world);
+ for (int n = 0; n < 6; n++)
+ virial[n] += sum[n];
+ }
+
+}
+
+void EwaldDisp::compute_virial_peratom()
+{
+ if (!vflag_atom) return;
+
+ kvector *k;
+ hvector *h, *nh;
+ cvector *z = ekr_local;
+ vector mui = VECTOR_NULL;
+ complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
+ complex *cek_coul;
+ double *kv;
+ double *q = atom->q;
+ double *vatomj = vatom ? vatom[0] : NULL;
+ double *mu = atom->mu ? atom->mu[0] : NULL;
+ const double qscale = force->qqrd2e * scale;
+ double c[EWALD_NFUNCS] = {
+ 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
+ 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
+ shape sum[EWALD_MAX_NSUMS];
+ int func[EWALD_NFUNCS];
+
+ memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+ int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
+ for (int j = 0; j < atom->nlocal; j++) {
+ k = kvec;
+ kx = ky = -1;
+ kv = kvirial;
+ cek = cek_global;
+ memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape));
+ if (func[3]) {
+ register double di = c[3];
+ mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
+ mu++;
+ }
+ for (nh = (h = hvec)+nkvec; hy) { // based on order in
+ if (kx!=k->x) zx = z[kx = k->x].x; // reallocate
+ C_RMULT(zxy, z[ky = k->y].y, zx);
+ }
+ C_CRMULT(zc, z[k->z].z, zxy);
+ if (func[0]) { // 1/r
+ if (func[3]) cek_coul = cek;
+ register double r = cek->re*zc.re - cek->im*zc.im; ++cek;
+ sum[0][0] += *(kv++)*r;
+ sum[0][1] += *(kv++)*r;
+ sum[0][2] += *(kv++)*r;
+ sum[0][3] += *(kv++)*r;
+ sum[0][4] += *(kv++)*r;
+ sum[0][5] += *(kv++)*r;
+ }
+ if (func[1]) { // geometric 1/r^6
+ register double r = cek->re*zc.re - cek->im*zc.im; ++cek;
+ sum[1][0] += *(kv++)*r;
+ sum[1][1] += *(kv++)*r;
+ sum[1][2] += *(kv++)*r;
+ sum[1][3] += *(kv++)*r;
+ sum[1][4] += *(kv++)*r;
+ sum[1][5] += *(kv++)*r;
+ }
+ if (func[2]) { // arithmetic 1/r^6
+ register double r;
+ for (i=2; i<9; ++i) {
+ r = cek->re*zc.re - cek->im*zc.im; ++cek;
+ sum[i][0] += *(kv++)*r;
+ sum[i][1] += *(kv++)*r;
+ sum[i][2] += *(kv++)*r;
+ sum[i][3] += *(kv++)*r;
+ sum[i][4] += *(kv++)*r;
+ sum[i][5] += *(kv++)*r;
+ kv -= 6;
+ }
+ kv += 6;
+ }
+ if (func[3]) { // dipole
+ double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
+ register double
+ r = (cek->re*zc.re - cek->im*zc.im)*muk;
+ sum[9][0] += *(kv++)*r;
+ sum[9][1] += *(kv++)*r;
+ sum[9][2] += *(kv++)*r;
+ sum[9][3] += *(kv++)*r;
+ sum[9][4] += *(kv++)*r;
+ sum[9][5] += *(kv++)*r;
+ if (func[0]) { // charge-dipole
+ kv -= 6;
+ register double qj = *(q)*c[0];
+ r = (cek_coul->im*zc.re + cek_coul->re*zc.im)*muk;
+ r += -(cek->re*zc.im + cek->im*zc.re)*qj;
+ sum[9][0] += *(kv++)*r; sum[9][1] += *(kv++)*r; sum[9][2] += *(kv++)*r;
+ sum[9][3] += *(kv++)*r; sum[9][4] += *(kv++)*r; sum[9][5] += *(kv++)*r;
+ }
+ ++cek;
+ }
+ }
+
+ if (func[0]) { // 1/r
+ register double qi = *(q++)*c[0];
+ for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi;
+ }
+ if (func[1]) { // geometric 1/r^6
+ register double bi = B[*type]*c[1];
+ for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi;
+ }
+ if (func[2]) { // arithmetic 1/r^6
+ register double *bj = B+7*type[0]+7;
+ for (i=2; i<9; ++i) {
+ register double c2 = (--bj)[0]*c[2];
+ for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2;
+ }
+ }
+ if (func[3]) { // dipole
+ for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n];
+ }
+
+ for (int k=0; kq;
+ double **x = atom->x;
+ double zprd = domain->zprd;
+ int nlocal = atom->nlocal;
+
+ double qsum = 0.0;
+ if (function[0]) qsum = sum[0].x;
+
+ double dipole = 0.0;
+ for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+
+ if (function[3] && atom->mu) {
+ double **mu = atom->mu;
+ for (int i = 0; i < nlocal; i++) dipole += mu[i][2];
+ }
+
+ // sum local contributions to get global dipole moment
+
+ double dipole_all;
+ MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+ // need to make non-neutral systems and/or
+ // per-atom energy translationally invariant
+
+ double dipole_r2 = 0.0;
+ if (eflag_atom || fabs(qsum) > SMALL) {
+
+ if (function[3] && atom->mu)
+ error->all(FLERR,"Cannot (yet) use kspace slab correction with "
+ "long-range dipoles and non-neutral systems or per-atom energy");
+
+ for (int i = 0; i < nlocal; i++)
+ dipole_r2 += q[i]*x[i][2]*x[i][2];
+
+ // sum local contributions
+
+ double tmp;
+ MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_r2 = tmp;
+ }
+
+ // compute corrections
+
+ const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
+ qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
+ const double qscale = force->qqrd2e * scale;
+
+ if (eflag_global) energy += qscale * e_slabcorr;
+
+ // per-atom energy
+
+ if (eflag_atom) {
+ double efact = qscale * MY_2PI/volume;
+ for (int i = 0; i < nlocal; i++)
+ eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
+ qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
+ }
+
+ // add on force corrections
+
+ double ffact = qscale * (-4.0*MY_PI/volume);
+ double **f = atom->f;
+
+ for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
+
+ // add on torque corrections
+
+ if (function[3] && atom->mu && atom->torque) {
+ double **mu = atom->mu;
+ double **torque = atom->torque;
+ for (int i = 0; i < nlocal; i++) {
+ torque[i][0] += ffact * dipole_all * mu[i][1];
+ torque[i][1] += -ffact * dipole_all * mu[i][0];
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ Newton solver used to find g_ewald for LJ systems
+ ------------------------------------------------------------------------- */
+
+double EwaldDisp::NewtonSolve(double x, double Rc,
+ bigint natoms, double vol, double b2)
+{
+ double dx,tol;
+ int maxit;
+
+ maxit = 10000; //Maximum number of iterations
+ tol = 0.00001; //Convergence tolerance
+
+ //Begin algorithm
+
+ for (int i = 0; i < maxit; i++) {
+ dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2);
+ x = x - dx; //Update x
+ if (fabs(dx) < tol) return x;
+ if (x < 0 || x != x) // solver failed
+ return -1;
+ }
+ return -1;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate f(x)
+ ------------------------------------------------------------------------- */
+
+double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
+{
+ double a = Rc*x;
+ double f = 0.0;
+
+ if (function[1] || function[2]) { // LJ
+ f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
+ (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
+ } else { // dipole
+ double rg2 = a*a;
+ double rg4 = rg2*rg2;
+ double rg6 = rg4*rg2;
+ double Cc = 4.0*rg4 + 6.0*rg2 + 3.0;
+ double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0;
+ f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) *
+ sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) *
+ exp(-rg2)) - accuracy;
+ }
+
+ return f;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate numerical derivative f'(x)
+ ------------------------------------------------------------------------- */
+
+double EwaldDisp::derivf(double x, double Rc,
+ bigint natoms, double vol, double b2)
+{
+ double h = 0.000001; //Derivative step-size
+ return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h;
+}
diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp
index c7dd91e083..a99d5bb1fb 100644
--- a/src/KSPACE/msm.cpp
+++ b/src/KSPACE/msm.cpp
@@ -191,7 +191,7 @@ void MSM::init()
qsum = tmp;
MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
qsqsum = tmp;
- q2 = qsqsum * force->qqrd2e / force->dielectric;
+ q2 = qsqsum * force->qqrd2e;
if (qsqsum == 0.0)
error->all(FLERR,"Cannot use kspace solver on system with no charge");
diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index 9b94ecdae1..9c5db42ad8 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -1,3501 +1,3501 @@
-/* ----------------------------------------------------------------------
- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
- http://lammps.sandia.gov, Sandia National Laboratories
- Steve Plimpton, sjplimp@sandia.gov
-
- Copyright (2003) Sandia Corporation. Under the terms of Contract
- DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
- certain rights in this software. This software is distributed under
- the GNU General Public License.
-
- See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
- Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
- per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
- analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University)
- triclinic added by Stan Moore (SNL)
-------------------------------------------------------------------------- */
-
-#include "lmptype.h"
-#include "mpi.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "math.h"
-#include "pppm.h"
-#include "atom.h"
-#include "comm.h"
-#include "commgrid.h"
-#include "neighbor.h"
-#include "force.h"
-#include "pair.h"
-#include "bond.h"
-#include "angle.h"
-#include "domain.h"
-#include "fft3d_wrap.h"
-#include "remap_wrap.h"
-#include "memory.h"
-#include "error.h"
-
-#include "math_const.h"
-#include "math_special.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-using namespace MathSpecial;
-
-#define MAXORDER 7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
-
-enum{REVERSE_RHO};
-enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
-
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF 1.0f
-#else
-#define ZEROF 0.0
-#define ONEF 1.0
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
-{
- if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
-
- pppmflag = 1;
- group_group_enable = 1;
-
- accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
-
- nfactors = 3;
- factors = new int[nfactors];
- factors[0] = 2;
- factors[1] = 3;
- factors[2] = 5;
-
- MPI_Comm_rank(world,&me);
- MPI_Comm_size(world,&nprocs);
-
- density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
- density_fft = NULL;
- u_brick = NULL;
- v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
- greensfn = NULL;
- work1 = work2 = NULL;
- vg = NULL;
- fkx = fky = fkz = NULL;
-
- sf_precoeff1 = sf_precoeff2 = sf_precoeff3 =
- sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
-
- density_A_brick = density_B_brick = NULL;
- density_A_fft = density_B_fft = NULL;
-
- gf_b = NULL;
- rho1d = rho_coeff = drho1d = drho_coeff = NULL;
-
- fft1 = fft2 = NULL;
- remap = NULL;
- cg = NULL;
- cg_peratom = NULL;
-
- nmax = 0;
- part2grid = NULL;
-
- peratom_allocate_flag = 0;
- group_allocate_flag = 0;
-
- // define acons coefficients for estimation of kspace errors
- // see JCP 109, pg 7698 for derivation of coefficients
- // higher order coefficients may be computed if needed
-
- memory->create(acons,8,7,"pppm:acons");
- acons[1][0] = 2.0 / 3.0;
- acons[2][0] = 1.0 / 50.0;
- acons[2][1] = 5.0 / 294.0;
- acons[3][0] = 1.0 / 588.0;
- acons[3][1] = 7.0 / 1440.0;
- acons[3][2] = 21.0 / 3872.0;
- acons[4][0] = 1.0 / 4320.0;
- acons[4][1] = 3.0 / 1936.0;
- acons[4][2] = 7601.0 / 2271360.0;
- acons[4][3] = 143.0 / 28800.0;
- acons[5][0] = 1.0 / 23232.0;
- acons[5][1] = 7601.0 / 13628160.0;
- acons[5][2] = 143.0 / 69120.0;
- acons[5][3] = 517231.0 / 106536960.0;
- acons[5][4] = 106640677.0 / 11737571328.0;
- acons[6][0] = 691.0 / 68140800.0;
- acons[6][1] = 13.0 / 57600.0;
- acons[6][2] = 47021.0 / 35512320.0;
- acons[6][3] = 9694607.0 / 2095994880.0;
- acons[6][4] = 733191589.0 / 59609088000.0;
- acons[6][5] = 326190917.0 / 11700633600.0;
- acons[7][0] = 1.0 / 345600.0;
- acons[7][1] = 3617.0 / 35512320.0;
- acons[7][2] = 745739.0 / 838397952.0;
- acons[7][3] = 56399353.0 / 12773376000.0;
- acons[7][4] = 25091609.0 / 1560084480.0;
- acons[7][5] = 1755948832039.0 / 36229939200000.0;
- acons[7][6] = 4887769399.0 / 37838389248.0;
-}
-
-/* ----------------------------------------------------------------------
- free all memory
-------------------------------------------------------------------------- */
-
-PPPM::~PPPM()
-{
- delete [] factors;
- deallocate();
- if (peratom_allocate_flag) deallocate_peratom();
- if (group_allocate_flag) deallocate_groups();
- memory->destroy(part2grid);
- memory->destroy(acons);
-}
-
-/* ----------------------------------------------------------------------
- called once before run
-------------------------------------------------------------------------- */
-
-void PPPM::init()
-{
- if (me == 0) {
- if (screen) fprintf(screen,"PPPM initialization ...\n");
- if (logfile) fprintf(logfile,"PPPM initialization ...\n");
- }
-
- // error check
-
- triclinic_check();
- if (domain->triclinic && differentiation_flag == 1)
- error->all(FLERR,"Cannot (yet) use PPPM with triclinic box "
- "and kspace_modify diff ad");
- if (domain->triclinic && slabflag)
- error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and "
- "slab correction");
- if (domain->dimension == 2) error->all(FLERR,
- "Cannot use PPPM with 2d simulation");
-
- if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
-
- if (slabflag == 0 && domain->nonperiodic > 0)
- error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
- if (slabflag) {
- if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
- domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
- error->all(FLERR,"Incorrect boundaries with slab PPPM");
- }
-
- if (order < 2 || order > MAXORDER) {
- char str[128];
- sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
- error->all(FLERR,str);
- }
-
- // extract short-range Coulombic cutoff from pair style
-
- triclinic = domain->triclinic;
- scale = 1.0;
-
- pair_check();
-
- int itmp = 0;
- double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
- if (p_cutoff == NULL)
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- cutoff = *p_cutoff;
-
- // if kspace is TIP4P, extract TIP4P params from pair style
- // bond/angle are not yet init(), so insure equilibrium request is valid
-
- qdist = 0.0;
-
- if (tip4pflag) {
- double *p_qdist = (double *) force->pair->extract("qdist",itmp);
- int *p_typeO = (int *) force->pair->extract("typeO",itmp);
- int *p_typeH = (int *) force->pair->extract("typeH",itmp);
- int *p_typeA = (int *) force->pair->extract("typeA",itmp);
- int *p_typeB = (int *) force->pair->extract("typeB",itmp);
- if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- qdist = *p_qdist;
- typeO = *p_typeO;
- typeH = *p_typeH;
- int typeA = *p_typeA;
- int typeB = *p_typeB;
-
- if (force->angle == NULL || force->bond == NULL ||
- force->angle->setflag == NULL || force->bond->setflag == NULL)
- error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
- if (typeA < 1 || typeA > atom->nangletypes ||
- force->angle->setflag[typeA] == 0)
- error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
- if (typeB < 1 || typeB > atom->nbondtypes ||
- force->bond->setflag[typeB] == 0)
- error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
- double theta = force->angle->equilibrium_angle(typeA);
- double blen = force->bond->equilibrium_distance(typeB);
- alpha = qdist / (cos(0.5*theta) * blen);
- if (domain->triclinic)
- error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P");
- }
-
- // compute qsum & qsqsum and warn if not charge-neutral
-
- qsum = qsqsum = 0.0;
- for (int i = 0; i < atom->nlocal; i++) {
- qsum += atom->q[i];
- qsqsum += atom->q[i]*atom->q[i];
- }
-
- double tmp;
- MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsum = tmp;
- MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsqsum = tmp;
- q2 = qsqsum * force->qqrd2e / force->dielectric;
-
- if (qsqsum == 0.0)
- error->all(FLERR,"Cannot use kspace solver on system with no charge");
- if (fabs(qsum) > SMALL && me == 0) {
- char str[128];
- sprintf(str,"System is not charge neutral, net charge = %g",qsum);
- error->warning(FLERR,str);
- }
-
- // set accuracy (force units) from accuracy_relative or accuracy_absolute
-
- if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
- else accuracy = accuracy_relative * two_charge_force;
-
- // free all arrays previously allocated
-
- deallocate();
- if (peratom_allocate_flag) deallocate_peratom();
- if (group_allocate_flag) deallocate_groups();
-
- // setup FFT grid resolution and g_ewald
- // normally one iteration thru while loop is all that is required
- // if grid stencil does not extend beyond neighbor proc
- // or overlap is allowed, then done
- // else reduce order and try again
-
- int (*procneigh)[2] = comm->procneigh;
-
- CommGrid *cgtmp = NULL;
- int iteration = 0;
-
- while (order >= minorder) {
- if (iteration && me == 0)
- error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
- "beyond nearest neighbor processor");
-
- if (stagger_flag && !differentiation_flag) compute_gf_denom();
- set_grid_global();
- set_grid_local();
- if (overlap_allowed) break;
-
- cgtmp = new CommGrid(lmp,world,1,1,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- cgtmp->ghost_notify();
- if (!cgtmp->ghost_overlap()) break;
- delete cgtmp;
-
- order--;
- iteration++;
- }
-
- if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
- if (!overlap_allowed && cgtmp->ghost_overlap())
- error->all(FLERR,"PPPM grid stencil extends "
- "beyond nearest neighbor processor");
- if (cgtmp) delete cgtmp;
-
- // adjust g_ewald
-
- if (!gewaldflag) adjust_gewald();
-
- // calculate the final accuracy
-
- double estimated_accuracy = final_accuracy();
-
- // print stats
-
- int ngrid_max,nfft_both_max;
- MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
- MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
-
- if (me == 0) {
-
-#ifdef FFT_SINGLE
- const char fft_prec[] = "single";
-#else
- const char fft_prec[] = "double";
-#endif
-
- if (screen) {
- fprintf(screen," G vector (1/distance) = %g\n",g_ewald);
- fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
- fprintf(screen," stencil order = %d\n",order);
- fprintf(screen," estimated absolute RMS force accuracy = %g\n",
- estimated_accuracy);
- fprintf(screen," estimated relative force accuracy = %g\n",
- estimated_accuracy/two_charge_force);
- fprintf(screen," using %s precision FFTs\n",fft_prec);
- fprintf(screen," 3d grid and FFT values/proc = %d %d\n",
- ngrid_max,nfft_both_max);
- }
- if (logfile) {
- fprintf(logfile," G vector (1/distance) = %g\n",g_ewald);
- fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
- fprintf(logfile," stencil order = %d\n",order);
- fprintf(logfile," estimated absolute RMS force accuracy = %g\n",
- estimated_accuracy);
- fprintf(logfile," estimated relative force accuracy = %g\n",
- estimated_accuracy/two_charge_force);
- fprintf(logfile," using %s precision FFTs\n",fft_prec);
- fprintf(logfile," 3d grid and FFT values/proc = %d %d\n",
- ngrid_max,nfft_both_max);
- }
- }
-
- // allocate K-space dependent memory
- // don't invoke allocate peratom() or group(), will be allocated when needed
-
- allocate();
- cg->ghost_notify();
- cg->setup();
-
- // pre-compute Green's function denomiator expansion
- // pre-compute 1d charge distribution coefficients
-
- compute_gf_denom();
- if (differentiation_flag == 1) compute_sf_precoeff();
- compute_rho_coeff();
-}
-
-/* ----------------------------------------------------------------------
- adjust PPPM coeffs, called initially and whenever volume has changed
-------------------------------------------------------------------------- */
-
-void PPPM::setup()
-{
- if (triclinic) {
- setup_triclinic();
- return;
- }
-
- int i,j,k,n;
- double *prd;
-
- // volume-dependent factors
- // adjust z dimension for 2d slab PPPM
- // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
- volume = xprd * yprd * zprd_slab;
-
- delxinv = nx_pppm/xprd;
- delyinv = ny_pppm/yprd;
- delzinv = nz_pppm/zprd_slab;
-
- delvolinv = delxinv*delyinv*delzinv;
-
- double unitkx = (MY_2PI/xprd);
- double unitky = (MY_2PI/yprd);
- double unitkz = (MY_2PI/zprd_slab);
-
- // fkx,fky,fkz for my FFT grid pts
-
- double per;
-
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- per = i - nx_pppm*(2*i/nx_pppm);
- fkx[i] = unitkx*per;
- }
-
- for (i = nylo_fft; i <= nyhi_fft; i++) {
- per = i - ny_pppm*(2*i/ny_pppm);
- fky[i] = unitky*per;
- }
-
- for (i = nzlo_fft; i <= nzhi_fft; i++) {
- per = i - nz_pppm*(2*i/nz_pppm);
- fkz[i] = unitkz*per;
- }
-
- // virial coefficients
-
- double sqk,vterm;
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++) {
- for (j = nylo_fft; j <= nyhi_fft; j++) {
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
- if (sqk == 0.0) {
- vg[n][0] = 0.0;
- vg[n][1] = 0.0;
- vg[n][2] = 0.0;
- vg[n][3] = 0.0;
- vg[n][4] = 0.0;
- vg[n][5] = 0.0;
- } else {
- vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
- vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
- vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
- vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
- vg[n][3] = vterm*fkx[i]*fky[j];
- vg[n][4] = vterm*fkx[i]*fkz[k];
- vg[n][5] = vterm*fky[j]*fkz[k];
- }
- n++;
- }
- }
- }
-
- if (differentiation_flag == 1) compute_gf_ad();
- else compute_gf_ik();
-}
-
-/* ----------------------------------------------------------------------
- adjust PPPM coeffs, called initially and whenever volume has changed
- for a triclinic system
-------------------------------------------------------------------------- */
-
-void PPPM::setup_triclinic()
-{
- int i,j,k,n;
- double *prd;
-
- // volume-dependent factors
- // adjust z dimension for 2d slab PPPM
- // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
- prd = domain->prd;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
- volume = xprd * yprd * zprd_slab;
-
- // use lamda (0-1) coordinates
-
- delxinv = nx_pppm;
- delyinv = ny_pppm;
- delzinv = nz_pppm;
- delvolinv = delxinv*delyinv*delzinv/volume;
-
- // fkx,fky,fkz for my FFT grid pts
-
- double per_i,per_j,per_k;
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++) {
- per_k = k - nz_pppm*(2*k/nz_pppm);
- for (j = nylo_fft; j <= nyhi_fft; j++) {
- per_j = j - ny_pppm*(2*j/ny_pppm);
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- per_i = i - nx_pppm*(2*i/nx_pppm);
-
- double unitk_lamda[3];
- unitk_lamda[0] = 2.0*MY_PI*per_i;
- unitk_lamda[1] = 2.0*MY_PI*per_j;
- unitk_lamda[2] = 2.0*MY_PI*per_k;
- x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
- fkx[n] = unitk_lamda[0];
- fky[n] = unitk_lamda[1];
- fkz[n] = unitk_lamda[2];
- n++;
- }
- }
- }
-
- // virial coefficients
-
- double sqk,vterm;
-
- for (n = 0; n < nfft; n++) {
- sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n];
- if (sqk == 0.0) {
- vg[n][0] = 0.0;
- vg[n][1] = 0.0;
- vg[n][2] = 0.0;
- vg[n][3] = 0.0;
- vg[n][4] = 0.0;
- vg[n][5] = 0.0;
- } else {
- vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
- vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n];
- vg[n][1] = 1.0 + vterm*fky[n]*fky[n];
- vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n];
- vg[n][3] = vterm*fkx[n]*fky[n];
- vg[n][4] = vterm*fkx[n]*fkz[n];
- vg[n][5] = vterm*fky[n]*fkz[n];
- }
- }
-
- compute_gf_ik_triclinic();
-}
-
-/* ----------------------------------------------------------------------
- reset local grid arrays and communication stencils
- called by fix balance b/c it changed sizes of processor sub-domains
-------------------------------------------------------------------------- */
-
-void PPPM::setup_grid()
-{
- // free all arrays previously allocated
-
- deallocate();
- if (peratom_allocate_flag) deallocate_peratom();
- if (group_allocate_flag) deallocate_groups();
-
- // reset portion of global grid that each proc owns
-
- set_grid_local();
-
- // reallocate K-space dependent memory
- // check if grid communication is now overlapping if not allowed
- // don't invoke allocate peratom() or group(), will be allocated when needed
-
- allocate();
-
- cg->ghost_notify();
- if (overlap_allowed == 0 && cg->ghost_overlap())
- error->all(FLERR,"PPPM grid stencil extends "
- "beyond nearest neighbor processor");
- cg->setup();
-
- // pre-compute Green's function denomiator expansion
- // pre-compute 1d charge distribution coefficients
-
- compute_gf_denom();
- if (differentiation_flag == 1) compute_sf_precoeff();
- compute_rho_coeff();
-
- // pre-compute volume-dependent coeffs
-
- setup();
-}
-
-/* ----------------------------------------------------------------------
- compute the PPPM long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void PPPM::compute(int eflag, int vflag)
-{
- int i,j;
-
- // set energy/virial flags
- // invoke allocate_peratom() if needed for first time
-
- if (eflag || vflag) ev_setup(eflag,vflag);
- else evflag = evflag_atom = eflag_global = vflag_global =
- eflag_atom = vflag_atom = 0;
-
- if (evflag_atom && !peratom_allocate_flag) {
- allocate_peratom();
- cg_peratom->ghost_notify();
- cg_peratom->setup();
- }
-
- // convert atoms from box to lamda coords
-
- if (triclinic == 0) boxlo = domain->boxlo;
- else {
- boxlo = domain->boxlo_lamda;
- domain->x2lamda(atom->nlocal);
- }
-
- // extend size of per-atom arrays if necessary
-
- if (atom->nlocal > nmax) {
- memory->destroy(part2grid);
- nmax = atom->nmax;
- memory->create(part2grid,nmax,3,"pppm:part2grid");
- }
-
- // find grid points for all my particles
- // map my particle charge onto my local 3d density grid
-
- particle_map();
- make_rho();
-
- // all procs communicate density values from their ghost cells
- // to fully sum contribution in their 3d bricks
- // remap from 3d decomposition to FFT decomposition
-
- cg->reverse_comm(this,REVERSE_RHO);
- brick2fft();
-
- // compute potential gradient on my FFT grid and
- // portion of e_long on this proc's FFT grid
- // return gradients (electric fields) in 3d brick decomposition
- // also performs per-atom calculations via poisson_peratom()
-
- poisson();
-
- // all procs communicate E-field values
- // to fill ghost cells surrounding their 3d bricks
-
- if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
- else cg->forward_comm(this,FORWARD_IK);
-
- // extra per-atom energy/virial communication
-
- if (evflag_atom) {
- if (differentiation_flag == 1 && vflag_atom)
- cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
- else if (differentiation_flag == 0)
- cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
- }
-
- // calculate the force on my particles
-
- fieldforce();
-
- // extra per-atom energy/virial communication
-
- if (evflag_atom) fieldforce_peratom();
-
- // sum global energy across procs and add in volume-dependent term
-
- const double qscale = force->qqrd2e * scale;
-
- if (eflag_global) {
- double energy_all;
- MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
- energy = energy_all;
-
- energy *= 0.5*volume;
- energy -= g_ewald*qsqsum/MY_PIS +
- MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
- energy *= qscale;
- }
-
- // sum global virial across procs
-
- if (vflag_global) {
- double virial_all[6];
- MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
- for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
- }
-
- // per-atom energy/virial
- // energy includes self-energy correction
- // notal accounts for TIP4P tallying eatom/vatom for ghost atoms
-
- if (evflag_atom) {
- double *q = atom->q;
- int nlocal = atom->nlocal;
- int ntotal = nlocal;
- if (tip4pflag) ntotal += atom->nghost;
-
- if (eflag_atom) {
- for (i = 0; i < nlocal; i++) {
- eatom[i] *= 0.5;
- eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
- (g_ewald*g_ewald*volume);
- eatom[i] *= qscale;
- }
- for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale;
- }
-
- if (vflag_atom) {
- for (i = 0; i < ntotal; i++)
- for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
- }
- }
-
- // 2d slab correction
-
- if (slabflag == 1) slabcorr();
-
- // convert atoms back from lamda to box coords
-
- if (triclinic) domain->lamda2x(atom->nlocal);
-}
-
-/* ----------------------------------------------------------------------
- allocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPM::allocate()
-{
- memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:density_brick");
-
- memory->create(density_fft,nfft_both,"pppm:density_fft");
- memory->create(greensfn,nfft_both,"pppm:greensfn");
- memory->create(work1,2*nfft_both,"pppm:work1");
- memory->create(work2,2*nfft_both,"pppm:work2");
- memory->create(vg,nfft_both,6,"pppm:vg");
-
- if (triclinic == 0) {
- memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
- memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
- memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
- } else {
- memory->create(fkx,nfft_both,"pppm:fkx");
- memory->create(fky,nfft_both,"pppm:fky");
- memory->create(fkz,nfft_both,"pppm:fkz");
- }
-
- if (differentiation_flag == 1) {
- memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:u_brick");
-
- memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1");
- memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2");
- memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3");
- memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4");
- memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5");
- memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6");
-
- } else {
- memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:vdx_brick");
- memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:vdy_brick");
- memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:vdz_brick");
- }
-
- // summation coeffs
-
- order_allocated = order;
- if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b");
- memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
- memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
- memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
- memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
- "pppm:drho_coeff");
-
- // create 2 FFTs and a Remap
- // 1st FFT keeps data in FFT decompostion
- // 2nd FFT returns data in 3d brick decomposition
- // remap takes data from 3d brick to FFT decomposition
-
- int tmp;
-
- fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- 0,0,&tmp);
-
- fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- 0,0,&tmp);
-
- remap = new Remap(lmp,world,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- 1,0,0,FFT_PRECISION);
-
- // create ghost grid object for rho and electric field communication
-
- int (*procneigh)[2] = comm->procneigh;
-
- if (differentiation_flag == 1)
- cg = new CommGrid(lmp,world,1,1,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- else
- cg = new CommGrid(lmp,world,3,1,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-}
-
-/* ----------------------------------------------------------------------
- deallocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPM::deallocate()
-{
- memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
-
- if (differentiation_flag == 1) {
- memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy(sf_precoeff1);
- memory->destroy(sf_precoeff2);
- memory->destroy(sf_precoeff3);
- memory->destroy(sf_precoeff4);
- memory->destroy(sf_precoeff5);
- memory->destroy(sf_precoeff6);
- } else {
- memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
- }
-
- memory->destroy(density_fft);
- memory->destroy(greensfn);
- memory->destroy(work1);
- memory->destroy(work2);
- memory->destroy(vg);
-
- if (triclinic == 0) {
- memory->destroy1d_offset(fkx,nxlo_fft);
- memory->destroy1d_offset(fky,nylo_fft);
- memory->destroy1d_offset(fkz,nzlo_fft);
- } else {
- memory->destroy(fkx);
- memory->destroy(fky);
- memory->destroy(fkz);
- }
-
- memory->destroy(gf_b);
- if (stagger_flag) gf_b = NULL;
- memory->destroy2d_offset(rho1d,-order_allocated/2);
- memory->destroy2d_offset(drho1d,-order_allocated/2);
- memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2);
- memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2);
-
- delete fft1;
- delete fft2;
- delete remap;
- delete cg;
-}
-
-/* ----------------------------------------------------------------------
- allocate per-atom memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPM::allocate_peratom()
-{
- peratom_allocate_flag = 1;
-
- if (differentiation_flag != 1)
- memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:u_brick");
-
- memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v0_brick");
-
- memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v1_brick");
- memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v2_brick");
- memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v3_brick");
- memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v4_brick");
- memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v5_brick");
-
- // create ghost grid object for rho and electric field communication
-
- int (*procneigh)[2] = comm->procneigh;
-
- if (differentiation_flag == 1)
- cg_peratom =
- new CommGrid(lmp,world,6,1,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- else
- cg_peratom =
- new CommGrid(lmp,world,7,1,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-}
-
-/* ----------------------------------------------------------------------
- deallocate per-atom memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPM::deallocate_peratom()
-{
- peratom_allocate_flag = 0;
-
- memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
-
- if (differentiation_flag != 1)
- memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
-
- delete cg_peratom;
-}
-
-/* ----------------------------------------------------------------------
- set global size of PPPM grid = nx,ny,nz_pppm
- used for charge accumulation, FFTs, and electric field interpolation
-------------------------------------------------------------------------- */
-
-void PPPM::set_grid_global()
-{
- // use xprd,yprd,zprd (even if triclinic, and then scale later)
- // adjust z dimension for 2d slab PPPM
- // 3d PPPM just uses zprd since slab_volfactor = 1.0
-
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
-
- // make initial g_ewald estimate
- // based on desired accuracy and real space cutoff
- // fluid-occupied volume used to estimate real-space error
- // zprd used rather than zprd_slab
-
- double h;
- bigint natoms = atom->natoms;
-
- if (!gewaldflag) {
- if (accuracy <= 0.0)
- error->all(FLERR,"KSpace accuracy must be > 0");
- g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
- if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
- else g_ewald = sqrt(-log(g_ewald)) / cutoff;
- }
-
- // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
- // nz_pppm uses extended zprd_slab instead of zprd
- // reduce it until accuracy target is met
-
- if (!gridflag) {
-
- if (differentiation_flag == 1 || stagger_flag) {
-
- h = h_x = h_y = h_z = 4.0/g_ewald;
- int count = 0;
- while (1) {
-
- // set grid dimension
- nx_pppm = static_cast (xprd/h_x);
- ny_pppm = static_cast (yprd/h_y);
- nz_pppm = static_cast (zprd_slab/h_z);
-
- if (nx_pppm <= 1) nx_pppm = 2;
- if (ny_pppm <= 1) ny_pppm = 2;
- if (nz_pppm <= 1) nz_pppm = 2;
-
- //set local grid dimension
- int npey_fft,npez_fft;
- if (nz_pppm >= nprocs) {
- npey_fft = 1;
- npez_fft = nprocs;
- } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
-
- int me_y = me % npey_fft;
- int me_z = me / npey_fft;
-
- nxlo_fft = 0;
- nxhi_fft = nx_pppm - 1;
- nylo_fft = me_y*ny_pppm/npey_fft;
- nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
- nzlo_fft = me_z*nz_pppm/npez_fft;
- nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
-
- double df_kspace = compute_df_kspace();
-
- count++;
-
- // break loop if the accuracy has been reached or
- // too many loops have been performed
-
- if (df_kspace <= accuracy) break;
- if (count > 500) error->all(FLERR, "Could not compute grid size");
- h *= 0.95;
- h_x = h_y = h_z = h;
- }
-
- } else {
-
- double err;
- h_x = h_y = h_z = 1.0/g_ewald;
-
- nx_pppm = static_cast (xprd/h_x) + 1;
- ny_pppm = static_cast (yprd/h_y) + 1;
- nz_pppm = static_cast (zprd_slab/h_z) + 1;
-
- err = estimate_ik_error(h_x,xprd,natoms);
- while (err > accuracy) {
- err = estimate_ik_error(h_x,xprd,natoms);
- nx_pppm++;
- h_x = xprd/nx_pppm;
- }
-
- err = estimate_ik_error(h_y,yprd,natoms);
- while (err > accuracy) {
- err = estimate_ik_error(h_y,yprd,natoms);
- ny_pppm++;
- h_y = yprd/ny_pppm;
- }
-
- err = estimate_ik_error(h_z,zprd_slab,natoms);
- while (err > accuracy) {
- err = estimate_ik_error(h_z,zprd_slab,natoms);
- nz_pppm++;
- h_z = zprd_slab/nz_pppm;
- }
- }
-
- // scale grid for triclinic skew
-
- if (triclinic) {
- double tmp[3];
- tmp[0] = nx_pppm/xprd;
- tmp[1] = ny_pppm/yprd;
- tmp[2] = nz_pppm/zprd;
- lamda2xT(&tmp[0],&tmp[0]);
- nx_pppm = static_cast(tmp[0]) + 1;
- ny_pppm = static_cast(tmp[1]) + 1;
- nz_pppm = static_cast(tmp[2]) + 1;
- }
- }
-
- // boost grid size until it is factorable
-
- while (!factorable(nx_pppm)) nx_pppm++;
- while (!factorable(ny_pppm)) ny_pppm++;
- while (!factorable(nz_pppm)) nz_pppm++;
-
- if (triclinic == 0) {
- h_x = xprd/nx_pppm;
- h_y = yprd/ny_pppm;
- h_z = zprd_slab/nz_pppm;
- } else {
- double tmp[3];
- tmp[0] = nx_pppm;
- tmp[1] = ny_pppm;
- tmp[2] = nz_pppm;
- x2lamdaT(&tmp[0],&tmp[0]);
- h_x = 1.0/tmp[0];
- h_y = 1.0/tmp[1];
- h_z = 1.0/tmp[2];
- }
-
- if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
- error->all(FLERR,"PPPM grid is too large");
-}
-
-/* ----------------------------------------------------------------------
- check if all factors of n are in list of factors
- return 1 if yes, 0 if no
-------------------------------------------------------------------------- */
-
-int PPPM::factorable(int n)
-{
- int i;
-
- while (n > 1) {
- for (i = 0; i < nfactors; i++) {
- if (n % factors[i] == 0) {
- n /= factors[i];
- break;
- }
- }
- if (i == nfactors) return 0;
- }
-
- return 1;
-}
-
-/* ----------------------------------------------------------------------
- compute estimated kspace force error
-------------------------------------------------------------------------- */
-
-double PPPM::compute_df_kspace()
-{
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
- bigint natoms = atom->natoms;
- double df_kspace = 0.0;
- if (differentiation_flag == 1 || stagger_flag) {
- double qopt = compute_qopt();
- df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
- } else {
- double lprx = estimate_ik_error(h_x,xprd,natoms);
- double lpry = estimate_ik_error(h_y,yprd,natoms);
- double lprz = estimate_ik_error(h_z,zprd_slab,natoms);
- df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
- }
- return df_kspace;
-}
-
-/* ----------------------------------------------------------------------
- compute qopt
-------------------------------------------------------------------------- */
-
-double PPPM::compute_qopt()
-{
- double qopt = 0.0;
- double *prd = domain->prd;
-
- const double xprd = prd[0];
- const double yprd = prd[1];
- const double zprd = prd[2];
- const double zprd_slab = zprd*slab_volfactor;
- volume = xprd * yprd * zprd_slab;
-
- const double unitkx = (MY_2PI/xprd);
- const double unitky = (MY_2PI/yprd);
- const double unitkz = (MY_2PI/zprd_slab);
-
- double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
- double u1, u2, sqk;
- double sum1,sum2,sum3,sum4,dot2;
-
- int k,l,m,nx,ny,nz;
- const int twoorder = 2*order;
-
- for (m = nzlo_fft; m <= nzhi_fft; m++) {
- const int mper = m - nz_pppm*(2*m/nz_pppm);
-
- for (l = nylo_fft; l <= nyhi_fft; l++) {
- const int lper = l - ny_pppm*(2*l/ny_pppm);
-
- for (k = nxlo_fft; k <= nxhi_fft; k++) {
- const int kper = k - nx_pppm*(2*k/nx_pppm);
-
- sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
-
- if (sqk != 0.0) {
-
- sum1 = 0.0;
- sum2 = 0.0;
- sum3 = 0.0;
- sum4 = 0.0;
- for (nx = -2; nx <= 2; nx++) {
- qx = unitkx*(kper+nx_pppm*nx);
- sx = exp(-0.25*square(qx/g_ewald));
- argx = 0.5*qx*xprd/nx_pppm;
- wx = powsinxx(argx,twoorder);
- qx *= qx;
-
- for (ny = -2; ny <= 2; ny++) {
- qy = unitky*(lper+ny_pppm*ny);
- sy = exp(-0.25*square(qy/g_ewald));
- argy = 0.5*qy*yprd/ny_pppm;
- wy = powsinxx(argy,twoorder);
- qy *= qy;
-
- for (nz = -2; nz <= 2; nz++) {
- qz = unitkz*(mper+nz_pppm*nz);
- sz = exp(-0.25*square(qz/g_ewald));
- argz = 0.5*qz*zprd_slab/nz_pppm;
- wz = powsinxx(argz,twoorder);
- qz *= qz;
-
- dot2 = qx+qy+qz;
- u1 = sx*sy*sz;
- u2 = wx*wy*wz;
- sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
- sum2 += u1 * u2 * MY_4PI;
- sum3 += u2;
- sum4 += dot2*u2;
- }
- }
- }
- sum2 *= sum2;
- qopt += sum1 - sum2/(sum3*sum4);
- }
- }
- }
- }
- double qopt_all;
- MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
- return qopt_all;
-}
-
-/* ----------------------------------------------------------------------
- estimate kspace force error for ik method
-------------------------------------------------------------------------- */
-
-double PPPM::estimate_ik_error(double h, double prd, bigint natoms)
-{
- double sum = 0.0;
- for (int m = 0; m < order; m++)
- sum += acons[order][m] * pow(h*g_ewald,2.0*m);
- double value = q2 * pow(h*g_ewald,(double)order) *
- sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd);
-
- return value;
-}
-
-/* ----------------------------------------------------------------------
- adjust the g_ewald parameter to near its optimal value
- using a Newton-Raphson solver
-------------------------------------------------------------------------- */
-
-void PPPM::adjust_gewald()
-{
- double dx;
-
- for (int i = 0; i < LARGE; i++) {
- dx = newton_raphson_f() / derivf();
- g_ewald -= dx;
- if (fabs(newton_raphson_f()) < SMALL) return;
- }
-
- char str[128];
- sprintf(str, "Could not compute g_ewald");
- error->all(FLERR, str);
-}
-
-/* ----------------------------------------------------------------------
- Calculate f(x) using Newton-Raphson solver
- ------------------------------------------------------------------------- */
-
-double PPPM::newton_raphson_f()
-{
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- bigint natoms = atom->natoms;
-
- double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
- sqrt(natoms*cutoff*xprd*yprd*zprd);
-
- double df_kspace = compute_df_kspace();
-
- return df_rspace - df_kspace;
-}
-
-/* ----------------------------------------------------------------------
- Calculate numerical derivative f'(x) using forward difference
- [f(x + h) - f(x)] / h
- ------------------------------------------------------------------------- */
-
-double PPPM::derivf()
-{
- double h = 0.000001; //Derivative step-size
- double df,f1,f2,g_ewald_old;
-
- f1 = newton_raphson_f();
- g_ewald_old = g_ewald;
- g_ewald += h;
- f2 = newton_raphson_f();
- g_ewald = g_ewald_old;
- df = (f2 - f1)/h;
-
- return df;
-}
-
-/* ----------------------------------------------------------------------
- Calculate the final estimate of the accuracy
-------------------------------------------------------------------------- */
-
-double PPPM::final_accuracy()
-{
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
- bigint natoms = atom->natoms;
-
- double df_kspace = compute_df_kspace();
- double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
- double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
- double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace);
- double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace +
- df_table*df_table);
-
- return estimated_accuracy;
-}
-
-/* ----------------------------------------------------------------------
- set local subset of PPPM/FFT grid that I own
- n xyz lo/hi in = 3d brick that I own (inclusive)
- n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive)
- n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz)
-------------------------------------------------------------------------- */
-
-void PPPM::set_grid_local()
-{
- // global indices of PPPM grid range from 0 to N-1
- // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
- // global PPPM grid that I own without ghost cells
- // for slab PPPM, assign z grid as if it were not extended
-
- nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm);
- nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
-
- nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm);
- nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
-
- nzlo_in = static_cast
- (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
- nzhi_in = static_cast
- (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
-
- // nlower,nupper = stencil size for mapping particles to PPPM grid
-
- nlower = -(order-1)/2;
- nupper = order/2;
-
- // shift values for particle <-> grid mapping
- // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
- if (order % 2) shift = OFFSET + 0.5;
- else shift = OFFSET;
- if (order % 2) shiftone = 0.0;
- else shiftone = 0.5;
-
- // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
- // global PPPM grid that my particles can contribute charge to
- // effectively nlo_in,nhi_in + ghost cells
- // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
- // position a particle in my box can be at
- // dist[3] = particle position bound = subbox + skin/2.0 + qdist
- // qdist = offset due to TIP4P fictitious charge
- // convert to triclinic if necessary
- // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
- // for slab PPPM, assign z grid as if it were not extended
-
- double *prd,*sublo,*subhi;
-
- if (triclinic == 0) {
- prd = domain->prd;
- boxlo = domain->boxlo;
- sublo = domain->sublo;
- subhi = domain->subhi;
- } else {
- prd = domain->prd_lamda;
- boxlo = domain->boxlo_lamda;
- sublo = domain->sublo_lamda;
- subhi = domain->subhi_lamda;
- }
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double dist[3];
- double cuthalf = 0.5*neighbor->skin + qdist;
- if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
- else kspacebbox(cuthalf,&dist[0]);
-
- int nlo,nhi;
-
- nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) *
- nx_pppm/xprd + shift) - OFFSET;
- nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) *
- nx_pppm/xprd + shift) - OFFSET;
- nxlo_out = nlo + nlower;
- nxhi_out = nhi + nupper;
-
- nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) *
- ny_pppm/yprd + shift) - OFFSET;
- nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) *
- ny_pppm/yprd + shift) - OFFSET;
- nylo_out = nlo + nlower;
- nyhi_out = nhi + nupper;
-
- nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) *
- nz_pppm/zprd_slab + shift) - OFFSET;
- nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) *
- nz_pppm/zprd_slab + shift) - OFFSET;
- nzlo_out = nlo + nlower;
- nzhi_out = nhi + nupper;
-
- if (stagger_flag) {
- nxhi_out++;
- nyhi_out++;
- nzhi_out++;
- }
-
- // for slab PPPM, change the grid boundary for processors at +z end
- // to include the empty volume between periodically repeating slabs
- // for slab PPPM, want charge data communicated from -z proc to +z proc,
- // but not vice versa, also want field data communicated from +z proc to
- // -z proc, but not vice versa
- // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
- // also insure no other procs use ghost cells beyond +z limit
-
- if (slabflag == 1) {
- if (comm->myloc[2] == comm->procgrid[2]-1)
- nzhi_in = nzhi_out = nz_pppm - 1;
- nzhi_out = MIN(nzhi_out,nz_pppm-1);
- }
-
- // decomposition of FFT mesh
- // global indices range from 0 to N-1
- // proc owns entire x-dimension, clumps of columns in y,z dimensions
- // npey_fft,npez_fft = # of procs in y,z dims
- // if nprocs is small enough, proc can own 1 or more entire xy planes,
- // else proc owns 2d sub-blocks of yz plane
- // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
- // nlo_fft,nhi_fft = lower/upper limit of the section
- // of the global FFT mesh that I own
-
- int npey_fft,npez_fft;
- if (nz_pppm >= nprocs) {
- npey_fft = 1;
- npez_fft = nprocs;
- } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
-
- int me_y = me % npey_fft;
- int me_z = me / npey_fft;
-
- nxlo_fft = 0;
- nxhi_fft = nx_pppm - 1;
- nylo_fft = me_y*ny_pppm/npey_fft;
- nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
- nzlo_fft = me_z*nz_pppm/npez_fft;
- nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
-
- // PPPM grid pts owned by this proc, including ghosts
-
- ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
- (nzhi_out-nzlo_out+1);
-
- // FFT grids owned by this proc, without ghosts
- // nfft = FFT points in FFT decomposition on this proc
- // nfft_brick = FFT points in 3d brick-decomposition on this proc
- // nfft_both = greater of 2 values
-
- nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
- (nzhi_fft-nzlo_fft+1);
- int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
- (nzhi_in-nzlo_in+1);
- nfft_both = MAX(nfft,nfft_brick);
-}
-
-/* ----------------------------------------------------------------------
- pre-compute Green's function denominator expansion coeffs, Gamma(2n)
-------------------------------------------------------------------------- */
-
-void PPPM::compute_gf_denom()
-{
- int k,l,m;
-
- for (l = 1; l < order; l++) gf_b[l] = 0.0;
- gf_b[0] = 1.0;
-
- for (m = 1; m < order; m++) {
- for (l = m; l > 0; l--)
- gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
- gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
- }
-
- bigint ifact = 1;
- for (k = 1; k < 2*order; k++) ifact *= k;
- double gaminv = 1.0/ifact;
- for (l = 0; l < order; l++) gf_b[l] *= gaminv;
-}
-
-/* ----------------------------------------------------------------------
- pre-compute modified (Hockney-Eastwood) Coulomb Green's function
-------------------------------------------------------------------------- */
-
-void PPPM::compute_gf_ik()
-{
- const double * const prd = domain->prd;
-
- const double xprd = prd[0];
- const double yprd = prd[1];
- const double zprd = prd[2];
- const double zprd_slab = zprd*slab_volfactor;
- const double unitkx = (MY_2PI/xprd);
- const double unitky = (MY_2PI/yprd);
- const double unitkz = (MY_2PI/zprd_slab);
-
- double snx,sny,snz;
- double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
- double sum1,dot1,dot2;
- double numerator,denominator;
- double sqk;
-
- int k,l,m,n,nx,ny,nz,kper,lper,mper;
-
- const int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) *
- pow(-log(EPS_HOC),0.25));
- const int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) *
- pow(-log(EPS_HOC),0.25));
- const int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
- pow(-log(EPS_HOC),0.25));
- const int twoorder = 2*order;
-
- n = 0;
- for (m = nzlo_fft; m <= nzhi_fft; m++) {
- mper = m - nz_pppm*(2*m/nz_pppm);
- snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
-
- for (l = nylo_fft; l <= nyhi_fft; l++) {
- lper = l - ny_pppm*(2*l/ny_pppm);
- sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
-
- for (k = nxlo_fft; k <= nxhi_fft; k++) {
- kper = k - nx_pppm*(2*k/nx_pppm);
- snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
-
- sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
-
- if (sqk != 0.0) {
- numerator = 12.5663706/sqk;
- denominator = gf_denom(snx,sny,snz);
- sum1 = 0.0;
-
- for (nx = -nbx; nx <= nbx; nx++) {
- qx = unitkx*(kper+nx_pppm*nx);
- sx = exp(-0.25*square(qx/g_ewald));
- argx = 0.5*qx*xprd/nx_pppm;
- wx = powsinxx(argx,twoorder);
-
- for (ny = -nby; ny <= nby; ny++) {
- qy = unitky*(lper+ny_pppm*ny);
- sy = exp(-0.25*square(qy/g_ewald));
- argy = 0.5*qy*yprd/ny_pppm;
- wy = powsinxx(argy,twoorder);
-
- for (nz = -nbz; nz <= nbz; nz++) {
- qz = unitkz*(mper+nz_pppm*nz);
- sz = exp(-0.25*square(qz/g_ewald));
- argz = 0.5*qz*zprd_slab/nz_pppm;
- wz = powsinxx(argz,twoorder);
-
- dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
- dot2 = qx*qx+qy*qy+qz*qz;
- sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
- }
- }
- }
- greensfn[n++] = numerator*sum1/denominator;
- } else greensfn[n++] = 0.0;
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- pre-compute modified (Hockney-Eastwood) Coulomb Green's function
- for a triclinic system
-------------------------------------------------------------------------- */
-
-void PPPM::compute_gf_ik_triclinic()
-{
- double snx,sny,snz;
- double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
- double sum1,dot1,dot2;
- double numerator,denominator;
- double sqk;
-
- int k,l,m,n,nx,ny,nz,kper,lper,mper;
-
- double tmp[3];
- tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25);
- tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25);
- tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25);
- lamda2xT(&tmp[0],&tmp[0]);
- const int nbx = static_cast (tmp[0]);
- const int nby = static_cast (tmp[1]);
- const int nbz = static_cast (tmp[2]);
-
- const int twoorder = 2*order;
-
- n = 0;
- for (m = nzlo_fft; m <= nzhi_fft; m++) {
- mper = m - nz_pppm*(2*m/nz_pppm);
- snz = square(sin(MY_PI*mper/nz_pppm));
-
- for (l = nylo_fft; l <= nyhi_fft; l++) {
- lper = l - ny_pppm*(2*l/ny_pppm);
- sny = square(sin(MY_PI*lper/ny_pppm));
-
- for (k = nxlo_fft; k <= nxhi_fft; k++) {
- kper = k - nx_pppm*(2*k/nx_pppm);
- snx = square(sin(MY_PI*kper/nx_pppm));
-
- double unitk_lamda[3];
- unitk_lamda[0] = 2.0*MY_PI*kper;
- unitk_lamda[1] = 2.0*MY_PI*lper;
- unitk_lamda[2] = 2.0*MY_PI*mper;
- x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
-
- sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]);
-
- if (sqk != 0.0) {
- numerator = 12.5663706/sqk;
- denominator = gf_denom(snx,sny,snz);
- sum1 = 0.0;
-
- for (nx = -nbx; nx <= nbx; nx++) {
- argx = MY_PI*kper/nx_pppm + MY_PI*nx;
- wx = powsinxx(argx,twoorder);
-
- for (ny = -nby; ny <= nby; ny++) {
- argy = MY_PI*lper/ny_pppm + MY_PI*ny;
- wy = powsinxx(argy,twoorder);
-
- for (nz = -nbz; nz <= nbz; nz++) {
- argz = MY_PI*mper/nz_pppm + MY_PI*nz;
- wz = powsinxx(argz,twoorder);
-
- double b[3];
- b[0] = 2.0*MY_PI*nx_pppm*nx;
- b[1] = 2.0*MY_PI*ny_pppm*ny;
- b[2] = 2.0*MY_PI*nz_pppm*nz;
- x2lamdaT(&b[0],&b[0]);
-
- qx = unitk_lamda[0]+b[0];
- sx = exp(-0.25*square(qx/g_ewald));
-
- qy = unitk_lamda[1]+b[1];
- sy = exp(-0.25*square(qy/g_ewald));
-
- qz = unitk_lamda[2]+b[2];
- sz = exp(-0.25*square(qz/g_ewald));
-
- dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz;
- dot2 = qx*qx+qy*qy+qz*qz;
- sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
- }
- }
- }
- greensfn[n++] = numerator*sum1/denominator;
- } else greensfn[n++] = 0.0;
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- compute optimized Green's function for energy calculation
-------------------------------------------------------------------------- */
-
-void PPPM::compute_gf_ad()
-{
- const double * const prd = domain->prd;
-
- const double xprd = prd[0];
- const double yprd = prd[1];
- const double zprd = prd[2];
- const double zprd_slab = zprd*slab_volfactor;
- const double unitkx = (MY_2PI/xprd);
- const double unitky = (MY_2PI/yprd);
- const double unitkz = (MY_2PI/zprd_slab);
-
- double snx,sny,snz,sqk;
- double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
- double numerator,denominator;
- int k,l,m,n,kper,lper,mper;
-
- const int twoorder = 2*order;
-
- for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0;
-
- n = 0;
- for (m = nzlo_fft; m <= nzhi_fft; m++) {
- mper = m - nz_pppm*(2*m/nz_pppm);
- qz = unitkz*mper;
- snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
- sz = exp(-0.25*square(qz/g_ewald));
- argz = 0.5*qz*zprd_slab/nz_pppm;
- wz = powsinxx(argz,twoorder);
-
- for (l = nylo_fft; l <= nyhi_fft; l++) {
- lper = l - ny_pppm*(2*l/ny_pppm);
- qy = unitky*lper;
- sny = square(sin(0.5*qy*yprd/ny_pppm));
- sy = exp(-0.25*square(qy/g_ewald));
- argy = 0.5*qy*yprd/ny_pppm;
- wy = powsinxx(argy,twoorder);
-
- for (k = nxlo_fft; k <= nxhi_fft; k++) {
- kper = k - nx_pppm*(2*k/nx_pppm);
- qx = unitkx*kper;
- snx = square(sin(0.5*qx*xprd/nx_pppm));
- sx = exp(-0.25*square(qx/g_ewald));
- argx = 0.5*qx*xprd/nx_pppm;
- wx = powsinxx(argx,twoorder);
-
- sqk = qx*qx + qy*qy + qz*qz;
-
- if (sqk != 0.0) {
- numerator = MY_4PI/sqk;
- denominator = gf_denom(snx,sny,snz);
- greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
- sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
- sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
- sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
- sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
- sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
- sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
- n++;
- } else {
- greensfn[n] = 0.0;
- sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
- sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
- sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
- sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
- sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
- sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
- n++;
- }
- }
- }
- }
-
- // compute the coefficients for the self-force correction
-
- double prex, prey, prez;
- prex = prey = prez = MY_PI/volume;
- prex *= nx_pppm/xprd;
- prey *= ny_pppm/yprd;
- prez *= nz_pppm/zprd_slab;
- sf_coeff[0] *= prex;
- sf_coeff[1] *= prex*2;
- sf_coeff[2] *= prey;
- sf_coeff[3] *= prey*2;
- sf_coeff[4] *= prez;
- sf_coeff[5] *= prez*2;
-
- // communicate values with other procs
-
- double tmp[6];
- MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
- for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
-}
-
-/* ----------------------------------------------------------------------
- compute self force coefficients for ad-differentiation scheme
-------------------------------------------------------------------------- */
-
-void PPPM::compute_sf_precoeff()
-{
- int i,k,l,m,n;
- int nx,ny,nz,kper,lper,mper;
- double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
- double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
- double u0,u1,u2,u3,u4,u5,u6;
- double sum1,sum2,sum3,sum4,sum5,sum6;
-
- n = 0;
- for (m = nzlo_fft; m <= nzhi_fft; m++) {
- mper = m - nz_pppm*(2*m/nz_pppm);
-
- for (l = nylo_fft; l <= nyhi_fft; l++) {
- lper = l - ny_pppm*(2*l/ny_pppm);
-
- for (k = nxlo_fft; k <= nxhi_fft; k++) {
- kper = k - nx_pppm*(2*k/nx_pppm);
-
- sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
- for (i = 0; i < 5; i++) {
-
- qx0 = MY_2PI*(kper+nx_pppm*(i-2));
- qx1 = MY_2PI*(kper+nx_pppm*(i-1));
- qx2 = MY_2PI*(kper+nx_pppm*(i ));
- wx0[i] = powsinxx(0.5*qx0/nx_pppm,order);
- wx1[i] = powsinxx(0.5*qx1/nx_pppm,order);
- wx2[i] = powsinxx(0.5*qx2/nx_pppm,order);
-
- qy0 = MY_2PI*(lper+ny_pppm*(i-2));
- qy1 = MY_2PI*(lper+ny_pppm*(i-1));
- qy2 = MY_2PI*(lper+ny_pppm*(i ));
- wy0[i] = powsinxx(0.5*qy0/ny_pppm,order);
- wy1[i] = powsinxx(0.5*qy1/ny_pppm,order);
- wy2[i] = powsinxx(0.5*qy2/ny_pppm,order);
-
- qz0 = MY_2PI*(mper+nz_pppm*(i-2));
- qz1 = MY_2PI*(mper+nz_pppm*(i-1));
- qz2 = MY_2PI*(mper+nz_pppm*(i ));
-
- wz0[i] = powsinxx(0.5*qz0/nz_pppm,order);
- wz1[i] = powsinxx(0.5*qz1/nz_pppm,order);
- wz2[i] = powsinxx(0.5*qz2/nz_pppm,order);
- }
-
- for (nx = 0; nx < 5; nx++) {
- for (ny = 0; ny < 5; ny++) {
- for (nz = 0; nz < 5; nz++) {
- u0 = wx0[nx]*wy0[ny]*wz0[nz];
- u1 = wx1[nx]*wy0[ny]*wz0[nz];
- u2 = wx2[nx]*wy0[ny]*wz0[nz];
- u3 = wx0[nx]*wy1[ny]*wz0[nz];
- u4 = wx0[nx]*wy2[ny]*wz0[nz];
- u5 = wx0[nx]*wy0[ny]*wz1[nz];
- u6 = wx0[nx]*wy0[ny]*wz2[nz];
-
- sum1 += u0*u1;
- sum2 += u0*u2;
- sum3 += u0*u3;
- sum4 += u0*u4;
- sum5 += u0*u5;
- sum6 += u0*u6;
- }
- }
- }
-
- // store values
-
- sf_precoeff1[n] = sum1;
- sf_precoeff2[n] = sum2;
- sf_precoeff3[n] = sum3;
- sf_precoeff4[n] = sum4;
- sf_precoeff5[n] = sum5;
- sf_precoeff6[n++] = sum6;
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- find center grid pt for each of my particles
- check that full stencil for the particle will fit in my 3d brick
- store central grid pt indices in part2grid array
-------------------------------------------------------------------------- */
-
-void PPPM::particle_map()
-{
- int nx,ny,nz;
-
- double **x = atom->x;
- int nlocal = atom->nlocal;
-
- int flag = 0;
- for (int i = 0; i < nlocal; i++) {
-
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // current particle coord can be outside global and local box
- // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
- nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
- ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
- nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
-
- part2grid[i][0] = nx;
- part2grid[i][1] = ny;
- part2grid[i][2] = nz;
-
- // check that entire stencil around nx,ny,nz will fit in my 3d brick
-
- if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
- ny+nlower < nylo_out || ny+nupper > nyhi_out ||
- nz+nlower < nzlo_out || nz+nupper > nzhi_out)
- flag = 1;
- }
-
- if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
-}
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = charge "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid
-------------------------------------------------------------------------- */
-
-void PPPM::make_rho()
-{
- int l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
- // clear 3d density array
-
- memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
- ngrid*sizeof(FFT_SCALAR));
-
- // loop over my charges, add their contribution to nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
-
- double *q = atom->q;
- double **x = atom->x;
- int nlocal = atom->nlocal;
-
- for (int i = 0; i < nlocal; i++) {
-
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz);
-
- z0 = delvolinv * q[i];
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- y0 = z0*rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- x0 = y0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- density_brick[mz][my][mx] += x0*rho1d[0][l];
- }
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- remap density from 3d brick decomposition to FFT decomposition
-------------------------------------------------------------------------- */
-
-void PPPM::brick2fft()
-{
- int n,ix,iy,iz;
-
- // copy grabs inner portion of density from 3d brick
- // remap could be done as pre-stage of FFT,
- // but this works optimally on only double values, not complex values
-
- n = 0;
- for (iz = nzlo_in; iz <= nzhi_in; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++)
- density_fft[n++] = density_brick[iz][iy][ix];
-
- remap->perform(density_fft,density_fft,work1);
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver
-------------------------------------------------------------------------- */
-
-void PPPM::poisson()
-{
- if (differentiation_flag == 1) poisson_ad();
- else poisson_ik();
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver for ik
-------------------------------------------------------------------------- */
-
-void PPPM::poisson_ik()
-{
- int i,j,k,n;
- double eng;
-
- // transform charge density (r -> k)
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work1[n++] = density_fft[i];
- work1[n++] = ZEROF;
- }
-
- fft1->compute(work1,work1,1);
-
- // global energy and virial contribution
-
- double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
- double s2 = scaleinv*scaleinv;
-
- if (eflag_global || vflag_global) {
- if (vflag_global) {
- n = 0;
- for (i = 0; i < nfft; i++) {
- eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
- for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
- if (eflag_global) energy += eng;
- n += 2;
- }
- } else {
- n = 0;
- for (i = 0; i < nfft; i++) {
- energy +=
- s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
- n += 2;
- }
- }
- }
-
- // scale by 1/total-grid-pts to get rho(k)
- // multiply by Green's function to get V(k)
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work1[n++] *= scaleinv * greensfn[i];
- work1[n++] *= scaleinv * greensfn[i];
- }
-
- // extra FFTs for per-atom energy/virial
-
- if (evflag_atom) poisson_peratom();
-
- // triclinic system
-
- if (triclinic) {
- poisson_ik_triclinic();
- return;
- }
-
- // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
- // FFT leaves data in 3d brick decomposition
- // copy it into inner portion of vdx,vdy,vdz arrays
-
- // x direction gradient
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- work2[n] = fkx[i]*work1[n+1];
- work2[n+1] = -fkx[i]*work1[n];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- vdx_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- // y direction gradient
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- work2[n] = fky[j]*work1[n+1];
- work2[n+1] = -fky[j]*work1[n];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- vdy_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- // z direction gradient
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- work2[n] = fkz[k]*work1[n+1];
- work2[n+1] = -fkz[k]*work1[n];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- vdz_brick[k][j][i] = work2[n];
- n += 2;
- }
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver for ik for a triclinic system
-------------------------------------------------------------------------- */
-
-void PPPM::poisson_ik_triclinic()
-{
- int i,j,k,n;
-
- // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
- // FFT leaves data in 3d brick decomposition
- // copy it into inner portion of vdx,vdy,vdz arrays
-
- // x direction gradient
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = fkx[i]*work1[n+1];
- work2[n+1] = -fkx[i]*work1[n];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- vdx_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- // y direction gradient
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = fky[i]*work1[n+1];
- work2[n+1] = -fky[i]*work1[n];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- vdy_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- // z direction gradient
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = fkz[i]*work1[n+1];
- work2[n+1] = -fkz[i]*work1[n];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- vdz_brick[k][j][i] = work2[n];
- n += 2;
- }
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver for ad
-------------------------------------------------------------------------- */
-
-void PPPM::poisson_ad()
-{
- int i,j,k,n;
- double eng;
-
- // transform charge density (r -> k)
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work1[n++] = density_fft[i];
- work1[n++] = ZEROF;
- }
-
- fft1->compute(work1,work1,1);
-
- // global energy and virial contribution
-
- double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
- double s2 = scaleinv*scaleinv;
-
- if (eflag_global || vflag_global) {
- if (vflag_global) {
- n = 0;
- for (i = 0; i < nfft; i++) {
- eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
- for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
- if (eflag_global) energy += eng;
- n += 2;
- }
- } else {
- n = 0;
- for (i = 0; i < nfft; i++) {
- energy +=
- s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
- n += 2;
- }
- }
- }
-
- // scale by 1/total-grid-pts to get rho(k)
- // multiply by Green's function to get V(k)
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work1[n++] *= scaleinv * greensfn[i];
- work1[n++] *= scaleinv * greensfn[i];
- }
-
- // extra FFTs for per-atom energy/virial
-
- if (vflag_atom) poisson_peratom();
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n];
- work2[n+1] = work1[n+1];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- u_brick[k][j][i] = work2[n];
- n += 2;
- }
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver for per-atom energy/virial
-------------------------------------------------------------------------- */
-
-void PPPM::poisson_peratom()
-{
- int i,j,k,n;
-
- // energy
-
- if (eflag_atom && differentiation_flag != 1) {
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n];
- work2[n+1] = work1[n+1];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- u_brick[k][j][i] = work2[n];
- n += 2;
- }
- }
-
- // 6 components of virial in v0 thru v5
-
- if (!vflag_atom) return;
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][0];
- work2[n+1] = work1[n+1]*vg[i][0];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v0_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][1];
- work2[n+1] = work1[n+1]*vg[i][1];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v1_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][2];
- work2[n+1] = work1[n+1]*vg[i][2];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v2_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][3];
- work2[n+1] = work1[n+1]*vg[i][3];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v3_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][4];
- work2[n+1] = work1[n+1]*vg[i][4];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v4_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][5];
- work2[n+1] = work1[n+1]*vg[i][5];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v5_brick[k][j][i] = work2[n];
- n += 2;
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get electric field & force on my particles
-------------------------------------------------------------------------- */
-
-void PPPM::fieldforce()
-{
- if (differentiation_flag == 1) fieldforce_ad();
- else fieldforce_ik();
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get electric field & force on my particles for ik
-------------------------------------------------------------------------- */
-
-void PPPM::fieldforce_ik()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR ekx,eky,ekz;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of E-field on particle
-
- double *q = atom->q;
- double **x = atom->x;
- double **f = atom->f;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz);
-
- ekx = eky = ekz = ZEROF;
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- z0 = rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- y0 = z0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- x0 = y0*rho1d[0][l];
- ekx -= x0*vdx_brick[mz][my][mx];
- eky -= x0*vdy_brick[mz][my][mx];
- ekz -= x0*vdz_brick[mz][my][mx];
- }
- }
- }
-
- // convert E-field to force
-
- const double qfactor = force->qqrd2e * scale * q[i];
- f[i][0] += qfactor*ekx;
- f[i][1] += qfactor*eky;
- if (slabflag != 2) f[i][2] += qfactor*ekz;
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get electric field & force on my particles for ad
-------------------------------------------------------------------------- */
-
-void PPPM::fieldforce_ad()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz;
- FFT_SCALAR ekx,eky,ekz;
- double s1,s2,s3;
- double sf = 0.0;
- double *prd;
-
- prd = domain->prd;
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
-
- double hx_inv = nx_pppm/xprd;
- double hy_inv = ny_pppm/yprd;
- double hz_inv = nz_pppm/zprd;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of E-field on particle
-
- double *q = atom->q;
- double **x = atom->x;
- double **f = atom->f;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz);
- compute_drho1d(dx,dy,dz);
-
- ekx = eky = ekz = ZEROF;
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
- eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
- ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
- }
- }
- }
- ekx *= hx_inv;
- eky *= hy_inv;
- ekz *= hz_inv;
-
- // convert E-field to force and substract self forces
-
- const double qfactor = force->qqrd2e * scale;
-
- s1 = x[i][0]*hx_inv;
- s2 = x[i][1]*hy_inv;
- s3 = x[i][2]*hz_inv;
- sf = sf_coeff[0]*sin(2*MY_PI*s1);
- sf += sf_coeff[1]*sin(4*MY_PI*s1);
- sf *= 2*q[i]*q[i];
- f[i][0] += qfactor*(ekx*q[i] - sf);
-
- sf = sf_coeff[2]*sin(2*MY_PI*s2);
- sf += sf_coeff[3]*sin(4*MY_PI*s2);
- sf *= 2*q[i]*q[i];
- f[i][1] += qfactor*(eky*q[i] - sf);
-
-
- sf = sf_coeff[4]*sin(2*MY_PI*s3);
- sf += sf_coeff[5]*sin(4*MY_PI*s3);
- sf *= 2*q[i]*q[i];
- if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get per-atom energy/virial
-------------------------------------------------------------------------- */
-
-void PPPM::fieldforce_peratom()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
-
- // loop over my charges, interpolate from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
-
- double *q = atom->q;
- double **x = atom->x;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz);
-
- u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- z0 = rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- y0 = z0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- x0 = y0*rho1d[0][l];
- if (eflag_atom) u += x0*u_brick[mz][my][mx];
- if (vflag_atom) {
- v0 += x0*v0_brick[mz][my][mx];
- v1 += x0*v1_brick[mz][my][mx];
- v2 += x0*v2_brick[mz][my][mx];
- v3 += x0*v3_brick[mz][my][mx];
- v4 += x0*v4_brick[mz][my][mx];
- v5 += x0*v5_brick[mz][my][mx];
- }
- }
- }
- }
-
- if (eflag_atom) eatom[i] += q[i]*u;
- if (vflag_atom) {
- vatom[i][0] += q[i]*v0;
- vatom[i][1] += q[i]*v1;
- vatom[i][2] += q[i]*v2;
- vatom[i][3] += q[i]*v3;
- vatom[i][4] += q[i]*v4;
- vatom[i][5] += q[i]*v5;
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- pack own values to buf to send to another proc
-------------------------------------------------------------------------- */
-
-void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
- int n = 0;
-
- if (flag == FORWARD_IK) {
- FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- buf[n++] = xsrc[list[i]];
- buf[n++] = ysrc[list[i]];
- buf[n++] = zsrc[list[i]];
- }
- } else if (flag == FORWARD_AD) {
- FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++)
- buf[i] = src[list[i]];
- } else if (flag == FORWARD_IK_PERATOM) {
- FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- if (eflag_atom) buf[n++] = esrc[list[i]];
- if (vflag_atom) {
- buf[n++] = v0src[list[i]];
- buf[n++] = v1src[list[i]];
- buf[n++] = v2src[list[i]];
- buf[n++] = v3src[list[i]];
- buf[n++] = v4src[list[i]];
- buf[n++] = v5src[list[i]];
- }
- }
- } else if (flag == FORWARD_AD_PERATOM) {
- FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- buf[n++] = v0src[list[i]];
- buf[n++] = v1src[list[i]];
- buf[n++] = v2src[list[i]];
- buf[n++] = v3src[list[i]];
- buf[n++] = v4src[list[i]];
- buf[n++] = v5src[list[i]];
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- unpack another proc's own values from buf and set own ghost values
-------------------------------------------------------------------------- */
-
-void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
- int n = 0;
-
- if (flag == FORWARD_IK) {
- FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- xdest[list[i]] = buf[n++];
- ydest[list[i]] = buf[n++];
- zdest[list[i]] = buf[n++];
- }
- } else if (flag == FORWARD_AD) {
- FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++)
- dest[list[i]] = buf[i];
- } else if (flag == FORWARD_IK_PERATOM) {
- FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- if (eflag_atom) esrc[list[i]] = buf[n++];
- if (vflag_atom) {
- v0src[list[i]] = buf[n++];
- v1src[list[i]] = buf[n++];
- v2src[list[i]] = buf[n++];
- v3src[list[i]] = buf[n++];
- v4src[list[i]] = buf[n++];
- v5src[list[i]] = buf[n++];
- }
- }
- } else if (flag == FORWARD_AD_PERATOM) {
- FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- v0src[list[i]] = buf[n++];
- v1src[list[i]] = buf[n++];
- v2src[list[i]] = buf[n++];
- v3src[list[i]] = buf[n++];
- v4src[list[i]] = buf[n++];
- v5src[list[i]] = buf[n++];
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- pack ghost values into buf to send to another proc
-------------------------------------------------------------------------- */
-
-void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
- if (flag == REVERSE_RHO) {
- FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++)
- buf[i] = src[list[i]];
- }
-}
-
-/* ----------------------------------------------------------------------
- unpack another proc's ghost values from buf and add to own values
-------------------------------------------------------------------------- */
-
-void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
- if (flag == REVERSE_RHO) {
- FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++)
- dest[list[i]] += buf[i];
- }
-}
-
-/* ----------------------------------------------------------------------
- map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
-------------------------------------------------------------------------- */
-
-void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
-{
- // loop thru all possible factorizations of nprocs
- // surf = surface area of largest proc sub-domain
- // innermost if test minimizes surface area and surface/volume ratio
-
- int bestsurf = 2 * (nx + ny);
- int bestboxx = 0;
- int bestboxy = 0;
-
- int boxx,boxy,surf,ipx,ipy;
-
- ipx = 1;
- while (ipx <= nprocs) {
- if (nprocs % ipx == 0) {
- ipy = nprocs/ipx;
- boxx = nx/ipx;
- if (nx % ipx) boxx++;
- boxy = ny/ipy;
- if (ny % ipy) boxy++;
- surf = boxx + boxy;
- if (surf < bestsurf ||
- (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
- bestsurf = surf;
- bestboxx = boxx;
- bestboxy = boxy;
- *px = ipx;
- *py = ipy;
- }
- }
- ipx++;
- }
-}
-
-/* ----------------------------------------------------------------------
- charge assignment into rho1d
- dx,dy,dz = distance of particle from "lower left" grid point
-------------------------------------------------------------------------- */
-
-void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
- const FFT_SCALAR &dz)
-{
- int k,l;
- FFT_SCALAR r1,r2,r3;
-
- for (k = (1-order)/2; k <= order/2; k++) {
- r1 = r2 = r3 = ZEROF;
-
- for (l = order-1; l >= 0; l--) {
- r1 = rho_coeff[l][k] + r1*dx;
- r2 = rho_coeff[l][k] + r2*dy;
- r3 = rho_coeff[l][k] + r3*dz;
- }
- rho1d[0][k] = r1;
- rho1d[1][k] = r2;
- rho1d[2][k] = r3;
- }
-}
-
-/* ----------------------------------------------------------------------
- charge assignment into drho1d
- dx,dy,dz = distance of particle from "lower left" grid point
-------------------------------------------------------------------------- */
-
-void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
- const FFT_SCALAR &dz)
-{
- int k,l;
- FFT_SCALAR r1,r2,r3;
-
- for (k = (1-order)/2; k <= order/2; k++) {
- r1 = r2 = r3 = ZEROF;
-
- for (l = order-2; l >= 0; l--) {
- r1 = drho_coeff[l][k] + r1*dx;
- r2 = drho_coeff[l][k] + r2*dy;
- r3 = drho_coeff[l][k] + r3*dz;
- }
- drho1d[0][k] = r1;
- drho1d[1][k] = r2;
- drho1d[2][k] = r3;
- }
-}
-
-/* ----------------------------------------------------------------------
- generate coeffients for the weight function of order n
-
- (n-1)
- Wn(x) = Sum wn(k,x) , Sum is over every other integer
- k=-(n-1)
- For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
- k is odd integers if n is even and even integers if n is odd
- ---
- | n-1
- | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2
- wn(k,x) = < l=0
- |
- | 0 otherwise
- ---
- a coeffients are packed into the array rho_coeff to eliminate zeros
- rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
-------------------------------------------------------------------------- */
-
-void PPPM::compute_rho_coeff()
-{
- int j,k,l,m;
- FFT_SCALAR s;
-
- FFT_SCALAR **a;
- memory->create2d_offset(a,order,-order,order,"pppm:a");
-
- for (k = -order; k <= order; k++)
- for (l = 0; l < order; l++)
- a[l][k] = 0.0;
-
- a[0][0] = 1.0;
- for (j = 1; j < order; j++) {
- for (k = -j; k <= j; k += 2) {
- s = 0.0;
- for (l = 0; l < j; l++) {
- a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
-#ifdef FFT_SINGLE
- s += powf(0.5,(float) l+1) *
- (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
-#else
- s += pow(0.5,(double) l+1) *
- (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
-#endif
- }
- a[0][k] = s;
- }
- }
-
- m = (1-order)/2;
- for (k = -(order-1); k < order; k += 2) {
- for (l = 0; l < order; l++)
- rho_coeff[l][m] = a[l][k];
- for (l = 1; l < order; l++)
- drho_coeff[l-1][m] = l*a[l][k];
- m++;
- }
-
- memory->destroy2d_offset(a,-order);
-}
-
-/* ----------------------------------------------------------------------
- Slab-geometry correction term to dampen inter-slab interactions between
- periodically repeating slabs. Yields good approximation to 2D Ewald if
- adequate empty space is left between repeating slabs (J. Chem. Phys.
- 111, 3155). Slabs defined here to be parallel to the xy plane. Also
- extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void PPPM::slabcorr()
-{
- // compute local contribution to global dipole moment
-
- double *q = atom->q;
- double **x = atom->x;
- double zprd = domain->zprd;
- int nlocal = atom->nlocal;
-
- double dipole = 0.0;
- for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
-
- // sum local contributions to get global dipole moment
-
- double dipole_all;
- MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
- // need to make non-neutral systems and/or
- // per-atom energy translationally invariant
-
- double dipole_r2 = 0.0;
- if (eflag_atom || fabs(qsum) > SMALL) {
- for (int i = 0; i < nlocal; i++)
- dipole_r2 += q[i]*x[i][2]*x[i][2];
-
- // sum local contributions
-
- double tmp;
- MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_r2 = tmp;
- }
-
- // compute corrections
-
- const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
- qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
- const double qscale = force->qqrd2e * scale;
-
- if (eflag_global) energy += qscale * e_slabcorr;
-
- // per-atom energy
-
- if (eflag_atom) {
- double efact = qscale * MY_2PI/volume;
- for (int i = 0; i < nlocal; i++)
- eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
- qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
- }
-
- // add on force corrections
-
- double ffact = qscale * (-4.0*MY_PI/volume);
- double **f = atom->f;
-
- for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
-}
-
-/* ----------------------------------------------------------------------
- perform and time the 1d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPM::timing_1d(int n, double &time1d)
-{
- double time1,time2;
-
- for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
-
- MPI_Barrier(world);
- time1 = MPI_Wtime();
-
- for (int i = 0; i < n; i++) {
- fft1->timing1d(work1,nfft_both,1);
- fft2->timing1d(work1,nfft_both,-1);
- if (differentiation_flag != 1) {
- fft2->timing1d(work1,nfft_both,-1);
- fft2->timing1d(work1,nfft_both,-1);
- }
- }
-
- MPI_Barrier(world);
- time2 = MPI_Wtime();
- time1d = time2 - time1;
-
- if (differentiation_flag) return 2;
- return 4;
-}
-
-/* ----------------------------------------------------------------------
- perform and time the 3d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPM::timing_3d(int n, double &time3d)
-{
- double time1,time2;
-
- for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
-
- MPI_Barrier(world);
- time1 = MPI_Wtime();
-
- for (int i = 0; i < n; i++) {
- fft1->compute(work1,work1,1);
- fft2->compute(work1,work1,-1);
- if (differentiation_flag != 1) {
- fft2->compute(work1,work1,-1);
- fft2->compute(work1,work1,-1);
- }
- }
-
- MPI_Barrier(world);
- time2 = MPI_Wtime();
- time3d = time2 - time1;
-
- if (differentiation_flag) return 2;
- return 4;
-}
-
-/* ----------------------------------------------------------------------
- memory usage of local arrays
-------------------------------------------------------------------------- */
-
-double PPPM::memory_usage()
-{
- double bytes = nmax*3 * sizeof(double);
- int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
- (nzhi_out-nzlo_out+1);
- if (differentiation_flag == 1) {
- bytes += 2 * nbrick * sizeof(FFT_SCALAR);
- } else {
- bytes += 4 * nbrick * sizeof(FFT_SCALAR);
- }
- if (triclinic) bytes += 3 * nfft_both * sizeof(double);
- bytes += 6 * nfft_both * sizeof(double);
- bytes += nfft_both * sizeof(double);
- bytes += nfft_both*5 * sizeof(FFT_SCALAR);
-
- if (peratom_allocate_flag)
- bytes += 6 * nbrick * sizeof(FFT_SCALAR);
-
- if (group_allocate_flag) {
- bytes += 2 * nbrick * sizeof(FFT_SCALAR);
- bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
- }
-
- bytes += cg->memory_usage();
-
- return bytes;
-}
-
-/* ----------------------------------------------------------------------
- group-group interactions
- ------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
- compute the PPPM total long-range force and energy for groups A and B
- ------------------------------------------------------------------------- */
-
-void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
-{
- if (slabflag && triclinic)
- error->all(FLERR,"Cannot (yet) use K-space slab "
- "correction with compute group/group for triclinic systems");
-
- if (differentiation_flag)
- error->all(FLERR,"Cannot (yet) use kspace_modify "
- "diff ad with compute group/group");
-
- if (!group_allocate_flag) allocate_groups();
-
- // convert atoms from box to lamda coords
-
- if (triclinic == 0) boxlo = domain->boxlo;
- else {
- boxlo = domain->boxlo_lamda;
- domain->x2lamda(atom->nlocal);
- }
-
- e2group = 0.0; //energy
- f2group[0] = 0.0; //force in x-direction
- f2group[1] = 0.0; //force in y-direction
- f2group[2] = 0.0; //force in z-direction
-
- // map my particle charge onto my local 3d density grid
-
- make_rho_groups(groupbit_A,groupbit_B,AA_flag);
-
- // all procs communicate density values from their ghost cells
- // to fully sum contribution in their 3d bricks
- // remap from 3d decomposition to FFT decomposition
-
- // temporarily store and switch pointers so we can
- // use brick2fft() for groups A and B (without
- // writing an additional function)
-
- FFT_SCALAR ***density_brick_real = density_brick;
- FFT_SCALAR *density_fft_real = density_fft;
-
- // group A
-
- density_brick = density_A_brick;
- density_fft = density_A_fft;
-
- cg->reverse_comm(this,REVERSE_RHO);
- brick2fft();
-
- // group B
-
- density_brick = density_B_brick;
- density_fft = density_B_fft;
-
- cg->reverse_comm(this,REVERSE_RHO);
- brick2fft();
-
- // switch back pointers
-
- density_brick = density_brick_real;
- density_fft = density_fft_real;
-
- // compute potential gradient on my FFT grid and
- // portion of group-group energy/force on this proc's FFT grid
-
- poisson_groups(AA_flag);
-
- const double qscale = force->qqrd2e * scale;
-
- // total group A <--> group B energy
- // self and boundary correction terms are in compute_group_group.cpp
-
- double e2group_all;
- MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
- e2group = e2group_all;
-
- e2group *= qscale*0.5*volume;
-
- // total group A <--> group B force
-
- double f2group_all[3];
- MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
-
- f2group[0] = qscale*volume*f2group_all[0];
- f2group[1] = qscale*volume*f2group_all[1];
- if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2];
-
- // convert atoms back from lamda to box coords
-
- if (triclinic) domain->lamda2x(atom->nlocal);
-
- if (slabflag == 1)
- slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
-}
-
-/* ----------------------------------------------------------------------
- allocate group-group memory that depends on # of K-vectors and order
- ------------------------------------------------------------------------- */
-
-void PPPM::allocate_groups()
-{
- group_allocate_flag = 1;
-
- memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:density_A_brick");
- memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:density_B_brick");
- memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
- memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
-}
-
-/* ----------------------------------------------------------------------
- deallocate group-group memory that depends on # of K-vectors and order
- ------------------------------------------------------------------------- */
-
-void PPPM::deallocate_groups()
-{
- group_allocate_flag = 0;
-
- memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy(density_A_fft);
- memory->destroy(density_B_fft);
-}
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = charge "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid for group-group interactions
- ------------------------------------------------------------------------- */
-
-void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag)
-{
- int l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
- // clear 3d density arrays
-
- memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
- ngrid*sizeof(FFT_SCALAR));
-
- memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
- ngrid*sizeof(FFT_SCALAR));
-
- // loop over my charges, add their contribution to nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
-
- double *q = atom->q;
- double **x = atom->x;
- int nlocal = atom->nlocal;
- int *mask = atom->mask;
-
- for (int i = 0; i < nlocal; i++) {
-
- if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
- if (AA_flag) continue;
-
- if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
-
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz);
-
- z0 = delvolinv * q[i];
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- y0 = z0*rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- x0 = y0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
-
- // group A
-
- if (mask[i] & groupbit_A)
- density_A_brick[mz][my][mx] += x0*rho1d[0][l];
-
- // group B
-
- if (mask[i] & groupbit_B)
- density_B_brick[mz][my][mx] += x0*rho1d[0][l];
- }
- }
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver for group-group interactions
- ------------------------------------------------------------------------- */
-
-void PPPM::poisson_groups(int AA_flag)
-{
- int i,j,k,n;
-
- // reuse memory (already declared)
-
- FFT_SCALAR *work_A = work1;
- FFT_SCALAR *work_B = work2;
-
- // transform charge density (r -> k)
-
- // group A
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work_A[n++] = density_A_fft[i];
- work_A[n++] = ZEROF;
- }
-
- fft1->compute(work_A,work_A,1);
-
- // group B
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work_B[n++] = density_B_fft[i];
- work_B[n++] = ZEROF;
- }
-
- fft1->compute(work_B,work_B,1);
-
- // group-group energy and force contribution,
- // keep everything in reciprocal space so
- // no inverse FFTs needed
-
- double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
- double s2 = scaleinv*scaleinv;
-
- // energy
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- e2group += s2 * greensfn[i] *
- (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
- n += 2;
- }
-
- if (AA_flag) return;
-
-
- // multiply by Green's function and s2
- // (only for work_A so it is not squared below)
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work_A[n++] *= s2 * greensfn[i];
- work_A[n++] *= s2 * greensfn[i];
- }
-
- // triclinic system
-
- if (triclinic) {
- poisson_groups_triclinic();
- return;
- }
-
- double partial_group;
-
- // force, x direction
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
- f2group[0] += fkx[i] * partial_group;
- n += 2;
- }
-
- // force, y direction
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
- f2group[1] += fky[j] * partial_group;
- n += 2;
- }
-
- // force, z direction
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
- f2group[2] += fkz[k] * partial_group;
- n += 2;
- }
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver for group-group interactions
- for a triclinic system
- ------------------------------------------------------------------------- */
-
-void PPPM::poisson_groups_triclinic()
-{
- int i,j,k,n;
-
- // reuse memory (already declared)
-
- FFT_SCALAR *work_A = work1;
- FFT_SCALAR *work_B = work2;
-
- double partial_group;
-
- // force, x direction
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
- f2group[0] += fkx[i] * partial_group;
- n += 2;
- }
-
- // force, y direction
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
- f2group[1] += fky[i] * partial_group;
- n += 2;
- }
-
- // force, z direction
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
- f2group[2] += fkz[i] * partial_group;
- n += 2;
- }
-}
-
-/* ----------------------------------------------------------------------
- Slab-geometry correction term to dampen inter-slab interactions between
- periodically repeating slabs. Yields good approximation to 2D Ewald if
- adequate empty space is left between repeating slabs (J. Chem. Phys.
- 111, 3155). Slabs defined here to be parallel to the xy plane. Also
- extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag)
-{
- // compute local contribution to global dipole moment
-
- double *q = atom->q;
- double **x = atom->x;
- double zprd = domain->zprd;
- int *mask = atom->mask;
- int nlocal = atom->nlocal;
-
- double qsum_A = 0.0;
- double qsum_B = 0.0;
- double dipole_A = 0.0;
- double dipole_B = 0.0;
- double dipole_r2_A = 0.0;
- double dipole_r2_B = 0.0;
-
- for (int i = 0; i < nlocal; i++) {
- if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
- if (AA_flag) continue;
-
- if (mask[i] & groupbit_A) {
- qsum_A += q[i];
- dipole_A += q[i]*x[i][2];
- dipole_r2_A += q[i]*x[i][2]*x[i][2];
- }
-
- if (mask[i] & groupbit_B) {
- qsum_B += q[i];
- dipole_B += q[i]*x[i][2];
- dipole_r2_B += q[i]*x[i][2]*x[i][2];
- }
- }
-
- // sum local contributions to get total charge and global dipole moment
- // for each group
-
- double tmp;
- MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsum_A = tmp;
-
- MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsum_B = tmp;
-
- MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_A = tmp;
-
- MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_B = tmp;
-
- MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_r2_A = tmp;
-
- MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_r2_B = tmp;
-
- // compute corrections
-
- const double qscale = force->qqrd2e * scale;
- const double efact = qscale * MY_2PI/volume;
-
- e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B +
- qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0);
-
- // add on force corrections
-
- const double ffact = qscale * (-4.0*MY_PI/volume);
- f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A);
-}
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+ per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
+ analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University)
+ triclinic added by Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "string.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "math.h"
+#include "pppm.h"
+#include "atom.h"
+#include "comm.h"
+#include "commgrid.h"
+#include "neighbor.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "domain.h"
+#include "fft3d_wrap.h"
+#include "remap_wrap.h"
+#include "memory.h"
+#include "error.h"
+
+#include "math_const.h"
+#include "math_special.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using namespace MathSpecial;
+
+#define MAXORDER 7
+#define OFFSET 16384
+#define SMALL 0.00001
+#define LARGE 10000.0
+#define EPS_HOC 1.0e-7
+
+enum{REVERSE_RHO};
+enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
+
+#ifdef FFT_SINGLE
+#define ZEROF 0.0f
+#define ONEF 1.0f
+#else
+#define ZEROF 0.0
+#define ONEF 1.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+ if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
+
+ pppmflag = 1;
+ group_group_enable = 1;
+
+ accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
+
+ nfactors = 3;
+ factors = new int[nfactors];
+ factors[0] = 2;
+ factors[1] = 3;
+ factors[2] = 5;
+
+ MPI_Comm_rank(world,&me);
+ MPI_Comm_size(world,&nprocs);
+
+ density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
+ density_fft = NULL;
+ u_brick = NULL;
+ v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
+ greensfn = NULL;
+ work1 = work2 = NULL;
+ vg = NULL;
+ fkx = fky = fkz = NULL;
+
+ sf_precoeff1 = sf_precoeff2 = sf_precoeff3 =
+ sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
+
+ density_A_brick = density_B_brick = NULL;
+ density_A_fft = density_B_fft = NULL;
+
+ gf_b = NULL;
+ rho1d = rho_coeff = drho1d = drho_coeff = NULL;
+
+ fft1 = fft2 = NULL;
+ remap = NULL;
+ cg = NULL;
+ cg_peratom = NULL;
+
+ nmax = 0;
+ part2grid = NULL;
+
+ peratom_allocate_flag = 0;
+ group_allocate_flag = 0;
+
+ // define acons coefficients for estimation of kspace errors
+ // see JCP 109, pg 7698 for derivation of coefficients
+ // higher order coefficients may be computed if needed
+
+ memory->create(acons,8,7,"pppm:acons");
+ acons[1][0] = 2.0 / 3.0;
+ acons[2][0] = 1.0 / 50.0;
+ acons[2][1] = 5.0 / 294.0;
+ acons[3][0] = 1.0 / 588.0;
+ acons[3][1] = 7.0 / 1440.0;
+ acons[3][2] = 21.0 / 3872.0;
+ acons[4][0] = 1.0 / 4320.0;
+ acons[4][1] = 3.0 / 1936.0;
+ acons[4][2] = 7601.0 / 2271360.0;
+ acons[4][3] = 143.0 / 28800.0;
+ acons[5][0] = 1.0 / 23232.0;
+ acons[5][1] = 7601.0 / 13628160.0;
+ acons[5][2] = 143.0 / 69120.0;
+ acons[5][3] = 517231.0 / 106536960.0;
+ acons[5][4] = 106640677.0 / 11737571328.0;
+ acons[6][0] = 691.0 / 68140800.0;
+ acons[6][1] = 13.0 / 57600.0;
+ acons[6][2] = 47021.0 / 35512320.0;
+ acons[6][3] = 9694607.0 / 2095994880.0;
+ acons[6][4] = 733191589.0 / 59609088000.0;
+ acons[6][5] = 326190917.0 / 11700633600.0;
+ acons[7][0] = 1.0 / 345600.0;
+ acons[7][1] = 3617.0 / 35512320.0;
+ acons[7][2] = 745739.0 / 838397952.0;
+ acons[7][3] = 56399353.0 / 12773376000.0;
+ acons[7][4] = 25091609.0 / 1560084480.0;
+ acons[7][5] = 1755948832039.0 / 36229939200000.0;
+ acons[7][6] = 4887769399.0 / 37838389248.0;
+}
+
+/* ----------------------------------------------------------------------
+ free all memory
+------------------------------------------------------------------------- */
+
+PPPM::~PPPM()
+{
+ delete [] factors;
+ deallocate();
+ if (peratom_allocate_flag) deallocate_peratom();
+ if (group_allocate_flag) deallocate_groups();
+ memory->destroy(part2grid);
+ memory->destroy(acons);
+}
+
+/* ----------------------------------------------------------------------
+ called once before run
+------------------------------------------------------------------------- */
+
+void PPPM::init()
+{
+ if (me == 0) {
+ if (screen) fprintf(screen,"PPPM initialization ...\n");
+ if (logfile) fprintf(logfile,"PPPM initialization ...\n");
+ }
+
+ // error check
+
+ triclinic_check();
+ if (domain->triclinic && differentiation_flag == 1)
+ error->all(FLERR,"Cannot (yet) use PPPM with triclinic box "
+ "and kspace_modify diff ad");
+ if (domain->triclinic && slabflag)
+ error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and "
+ "slab correction");
+ if (domain->dimension == 2) error->all(FLERR,
+ "Cannot use PPPM with 2d simulation");
+
+ if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
+
+ if (slabflag == 0 && domain->nonperiodic > 0)
+ error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
+ if (slabflag) {
+ if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
+ domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+ error->all(FLERR,"Incorrect boundaries with slab PPPM");
+ }
+
+ if (order < 2 || order > MAXORDER) {
+ char str[128];
+ sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
+ error->all(FLERR,str);
+ }
+
+ // extract short-range Coulombic cutoff from pair style
+
+ triclinic = domain->triclinic;
+ scale = 1.0;
+
+ pair_check();
+
+ int itmp = 0;
+ double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
+ if (p_cutoff == NULL)
+ error->all(FLERR,"KSpace style is incompatible with Pair style");
+ cutoff = *p_cutoff;
+
+ // if kspace is TIP4P, extract TIP4P params from pair style
+ // bond/angle are not yet init(), so insure equilibrium request is valid
+
+ qdist = 0.0;
+
+ if (tip4pflag) {
+ double *p_qdist = (double *) force->pair->extract("qdist",itmp);
+ int *p_typeO = (int *) force->pair->extract("typeO",itmp);
+ int *p_typeH = (int *) force->pair->extract("typeH",itmp);
+ int *p_typeA = (int *) force->pair->extract("typeA",itmp);
+ int *p_typeB = (int *) force->pair->extract("typeB",itmp);
+ if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
+ error->all(FLERR,"KSpace style is incompatible with Pair style");
+ qdist = *p_qdist;
+ typeO = *p_typeO;
+ typeH = *p_typeH;
+ int typeA = *p_typeA;
+ int typeB = *p_typeB;
+
+ if (force->angle == NULL || force->bond == NULL ||
+ force->angle->setflag == NULL || force->bond->setflag == NULL)
+ error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
+ if (typeA < 1 || typeA > atom->nangletypes ||
+ force->angle->setflag[typeA] == 0)
+ error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
+ if (typeB < 1 || typeB > atom->nbondtypes ||
+ force->bond->setflag[typeB] == 0)
+ error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
+ double theta = force->angle->equilibrium_angle(typeA);
+ double blen = force->bond->equilibrium_distance(typeB);
+ alpha = qdist / (cos(0.5*theta) * blen);
+ if (domain->triclinic)
+ error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P");
+ }
+
+ // compute qsum & qsqsum and warn if not charge-neutral
+
+ qsum = qsqsum = 0.0;
+ for (int i = 0; i < atom->nlocal; i++) {
+ qsum += atom->q[i];
+ qsqsum += atom->q[i]*atom->q[i];
+ }
+
+ double tmp;
+ MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsum = tmp;
+ MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsqsum = tmp;
+ q2 = qsqsum * force->qqrd2e;
+
+ if (qsqsum == 0.0)
+ error->all(FLERR,"Cannot use kspace solver on system with no charge");
+ if (fabs(qsum) > SMALL && me == 0) {
+ char str[128];
+ sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+ error->warning(FLERR,str);
+ }
+
+ // set accuracy (force units) from accuracy_relative or accuracy_absolute
+
+ if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
+ else accuracy = accuracy_relative * two_charge_force;
+
+ // free all arrays previously allocated
+
+ deallocate();
+ if (peratom_allocate_flag) deallocate_peratom();
+ if (group_allocate_flag) deallocate_groups();
+
+ // setup FFT grid resolution and g_ewald
+ // normally one iteration thru while loop is all that is required
+ // if grid stencil does not extend beyond neighbor proc
+ // or overlap is allowed, then done
+ // else reduce order and try again
+
+ int (*procneigh)[2] = comm->procneigh;
+
+ CommGrid *cgtmp = NULL;
+ int iteration = 0;
+
+ while (order >= minorder) {
+ if (iteration && me == 0)
+ error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
+ "beyond nearest neighbor processor");
+
+ if (stagger_flag && !differentiation_flag) compute_gf_denom();
+ set_grid_global();
+ set_grid_local();
+ if (overlap_allowed) break;
+
+ cgtmp = new CommGrid(lmp,world,1,1,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ cgtmp->ghost_notify();
+ if (!cgtmp->ghost_overlap()) break;
+ delete cgtmp;
+
+ order--;
+ iteration++;
+ }
+
+ if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
+ if (!overlap_allowed && cgtmp->ghost_overlap())
+ error->all(FLERR,"PPPM grid stencil extends "
+ "beyond nearest neighbor processor");
+ if (cgtmp) delete cgtmp;
+
+ // adjust g_ewald
+
+ if (!gewaldflag) adjust_gewald();
+
+ // calculate the final accuracy
+
+ double estimated_accuracy = final_accuracy();
+
+ // print stats
+
+ int ngrid_max,nfft_both_max;
+ MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
+ MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
+
+ if (me == 0) {
+
+#ifdef FFT_SINGLE
+ const char fft_prec[] = "single";
+#else
+ const char fft_prec[] = "double";
+#endif
+
+ if (screen) {
+ fprintf(screen," G vector (1/distance) = %g\n",g_ewald);
+ fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+ fprintf(screen," stencil order = %d\n",order);
+ fprintf(screen," estimated absolute RMS force accuracy = %g\n",
+ estimated_accuracy);
+ fprintf(screen," estimated relative force accuracy = %g\n",
+ estimated_accuracy/two_charge_force);
+ fprintf(screen," using %s precision FFTs\n",fft_prec);
+ fprintf(screen," 3d grid and FFT values/proc = %d %d\n",
+ ngrid_max,nfft_both_max);
+ }
+ if (logfile) {
+ fprintf(logfile," G vector (1/distance) = %g\n",g_ewald);
+ fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+ fprintf(logfile," stencil order = %d\n",order);
+ fprintf(logfile," estimated absolute RMS force accuracy = %g\n",
+ estimated_accuracy);
+ fprintf(logfile," estimated relative force accuracy = %g\n",
+ estimated_accuracy/two_charge_force);
+ fprintf(logfile," using %s precision FFTs\n",fft_prec);
+ fprintf(logfile," 3d grid and FFT values/proc = %d %d\n",
+ ngrid_max,nfft_both_max);
+ }
+ }
+
+ // allocate K-space dependent memory
+ // don't invoke allocate peratom() or group(), will be allocated when needed
+
+ allocate();
+ cg->ghost_notify();
+ cg->setup();
+
+ // pre-compute Green's function denomiator expansion
+ // pre-compute 1d charge distribution coefficients
+
+ compute_gf_denom();
+ if (differentiation_flag == 1) compute_sf_precoeff();
+ compute_rho_coeff();
+}
+
+/* ----------------------------------------------------------------------
+ adjust PPPM coeffs, called initially and whenever volume has changed
+------------------------------------------------------------------------- */
+
+void PPPM::setup()
+{
+ if (triclinic) {
+ setup_triclinic();
+ return;
+ }
+
+ int i,j,k,n;
+ double *prd;
+
+ // volume-dependent factors
+ // adjust z dimension for 2d slab PPPM
+ // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+ volume = xprd * yprd * zprd_slab;
+
+ delxinv = nx_pppm/xprd;
+ delyinv = ny_pppm/yprd;
+ delzinv = nz_pppm/zprd_slab;
+
+ delvolinv = delxinv*delyinv*delzinv;
+
+ double unitkx = (MY_2PI/xprd);
+ double unitky = (MY_2PI/yprd);
+ double unitkz = (MY_2PI/zprd_slab);
+
+ // fkx,fky,fkz for my FFT grid pts
+
+ double per;
+
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ per = i - nx_pppm*(2*i/nx_pppm);
+ fkx[i] = unitkx*per;
+ }
+
+ for (i = nylo_fft; i <= nyhi_fft; i++) {
+ per = i - ny_pppm*(2*i/ny_pppm);
+ fky[i] = unitky*per;
+ }
+
+ for (i = nzlo_fft; i <= nzhi_fft; i++) {
+ per = i - nz_pppm*(2*i/nz_pppm);
+ fkz[i] = unitkz*per;
+ }
+
+ // virial coefficients
+
+ double sqk,vterm;
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++) {
+ for (j = nylo_fft; j <= nyhi_fft; j++) {
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
+ if (sqk == 0.0) {
+ vg[n][0] = 0.0;
+ vg[n][1] = 0.0;
+ vg[n][2] = 0.0;
+ vg[n][3] = 0.0;
+ vg[n][4] = 0.0;
+ vg[n][5] = 0.0;
+ } else {
+ vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
+ vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
+ vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
+ vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
+ vg[n][3] = vterm*fkx[i]*fky[j];
+ vg[n][4] = vterm*fkx[i]*fkz[k];
+ vg[n][5] = vterm*fky[j]*fkz[k];
+ }
+ n++;
+ }
+ }
+ }
+
+ if (differentiation_flag == 1) compute_gf_ad();
+ else compute_gf_ik();
+}
+
+/* ----------------------------------------------------------------------
+ adjust PPPM coeffs, called initially and whenever volume has changed
+ for a triclinic system
+------------------------------------------------------------------------- */
+
+void PPPM::setup_triclinic()
+{
+ int i,j,k,n;
+ double *prd;
+
+ // volume-dependent factors
+ // adjust z dimension for 2d slab PPPM
+ // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+ prd = domain->prd;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+ volume = xprd * yprd * zprd_slab;
+
+ // use lamda (0-1) coordinates
+
+ delxinv = nx_pppm;
+ delyinv = ny_pppm;
+ delzinv = nz_pppm;
+ delvolinv = delxinv*delyinv*delzinv/volume;
+
+ // fkx,fky,fkz for my FFT grid pts
+
+ double per_i,per_j,per_k;
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++) {
+ per_k = k - nz_pppm*(2*k/nz_pppm);
+ for (j = nylo_fft; j <= nyhi_fft; j++) {
+ per_j = j - ny_pppm*(2*j/ny_pppm);
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ per_i = i - nx_pppm*(2*i/nx_pppm);
+
+ double unitk_lamda[3];
+ unitk_lamda[0] = 2.0*MY_PI*per_i;
+ unitk_lamda[1] = 2.0*MY_PI*per_j;
+ unitk_lamda[2] = 2.0*MY_PI*per_k;
+ x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+ fkx[n] = unitk_lamda[0];
+ fky[n] = unitk_lamda[1];
+ fkz[n] = unitk_lamda[2];
+ n++;
+ }
+ }
+ }
+
+ // virial coefficients
+
+ double sqk,vterm;
+
+ for (n = 0; n < nfft; n++) {
+ sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n];
+ if (sqk == 0.0) {
+ vg[n][0] = 0.0;
+ vg[n][1] = 0.0;
+ vg[n][2] = 0.0;
+ vg[n][3] = 0.0;
+ vg[n][4] = 0.0;
+ vg[n][5] = 0.0;
+ } else {
+ vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
+ vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n];
+ vg[n][1] = 1.0 + vterm*fky[n]*fky[n];
+ vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n];
+ vg[n][3] = vterm*fkx[n]*fky[n];
+ vg[n][4] = vterm*fkx[n]*fkz[n];
+ vg[n][5] = vterm*fky[n]*fkz[n];
+ }
+ }
+
+ compute_gf_ik_triclinic();
+}
+
+/* ----------------------------------------------------------------------
+ reset local grid arrays and communication stencils
+ called by fix balance b/c it changed sizes of processor sub-domains
+------------------------------------------------------------------------- */
+
+void PPPM::setup_grid()
+{
+ // free all arrays previously allocated
+
+ deallocate();
+ if (peratom_allocate_flag) deallocate_peratom();
+ if (group_allocate_flag) deallocate_groups();
+
+ // reset portion of global grid that each proc owns
+
+ set_grid_local();
+
+ // reallocate K-space dependent memory
+ // check if grid communication is now overlapping if not allowed
+ // don't invoke allocate peratom() or group(), will be allocated when needed
+
+ allocate();
+
+ cg->ghost_notify();
+ if (overlap_allowed == 0 && cg->ghost_overlap())
+ error->all(FLERR,"PPPM grid stencil extends "
+ "beyond nearest neighbor processor");
+ cg->setup();
+
+ // pre-compute Green's function denomiator expansion
+ // pre-compute 1d charge distribution coefficients
+
+ compute_gf_denom();
+ if (differentiation_flag == 1) compute_sf_precoeff();
+ compute_rho_coeff();
+
+ // pre-compute volume-dependent coeffs
+
+ setup();
+}
+
+/* ----------------------------------------------------------------------
+ compute the PPPM long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void PPPM::compute(int eflag, int vflag)
+{
+ int i,j;
+
+ // set energy/virial flags
+ // invoke allocate_peratom() if needed for first time
+
+ if (eflag || vflag) ev_setup(eflag,vflag);
+ else evflag = evflag_atom = eflag_global = vflag_global =
+ eflag_atom = vflag_atom = 0;
+
+ if (evflag_atom && !peratom_allocate_flag) {
+ allocate_peratom();
+ cg_peratom->ghost_notify();
+ cg_peratom->setup();
+ }
+
+ // convert atoms from box to lamda coords
+
+ if (triclinic == 0) boxlo = domain->boxlo;
+ else {
+ boxlo = domain->boxlo_lamda;
+ domain->x2lamda(atom->nlocal);
+ }
+
+ // extend size of per-atom arrays if necessary
+
+ if (atom->nlocal > nmax) {
+ memory->destroy(part2grid);
+ nmax = atom->nmax;
+ memory->create(part2grid,nmax,3,"pppm:part2grid");
+ }
+
+ // find grid points for all my particles
+ // map my particle charge onto my local 3d density grid
+
+ particle_map();
+ make_rho();
+
+ // all procs communicate density values from their ghost cells
+ // to fully sum contribution in their 3d bricks
+ // remap from 3d decomposition to FFT decomposition
+
+ cg->reverse_comm(this,REVERSE_RHO);
+ brick2fft();
+
+ // compute potential gradient on my FFT grid and
+ // portion of e_long on this proc's FFT grid
+ // return gradients (electric fields) in 3d brick decomposition
+ // also performs per-atom calculations via poisson_peratom()
+
+ poisson();
+
+ // all procs communicate E-field values
+ // to fill ghost cells surrounding their 3d bricks
+
+ if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
+ else cg->forward_comm(this,FORWARD_IK);
+
+ // extra per-atom energy/virial communication
+
+ if (evflag_atom) {
+ if (differentiation_flag == 1 && vflag_atom)
+ cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
+ else if (differentiation_flag == 0)
+ cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
+ }
+
+ // calculate the force on my particles
+
+ fieldforce();
+
+ // extra per-atom energy/virial communication
+
+ if (evflag_atom) fieldforce_peratom();
+
+ // sum global energy across procs and add in volume-dependent term
+
+ const double qscale = force->qqrd2e * scale;
+
+ if (eflag_global) {
+ double energy_all;
+ MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+ energy = energy_all;
+
+ energy *= 0.5*volume;
+ energy -= g_ewald*qsqsum/MY_PIS +
+ MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+ energy *= qscale;
+ }
+
+ // sum global virial across procs
+
+ if (vflag_global) {
+ double virial_all[6];
+ MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
+ for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
+ }
+
+ // per-atom energy/virial
+ // energy includes self-energy correction
+ // notal accounts for TIP4P tallying eatom/vatom for ghost atoms
+
+ if (evflag_atom) {
+ double *q = atom->q;
+ int nlocal = atom->nlocal;
+ int ntotal = nlocal;
+ if (tip4pflag) ntotal += atom->nghost;
+
+ if (eflag_atom) {
+ for (i = 0; i < nlocal; i++) {
+ eatom[i] *= 0.5;
+ eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
+ (g_ewald*g_ewald*volume);
+ eatom[i] *= qscale;
+ }
+ for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale;
+ }
+
+ if (vflag_atom) {
+ for (i = 0; i < ntotal; i++)
+ for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
+ }
+ }
+
+ // 2d slab correction
+
+ if (slabflag == 1) slabcorr();
+
+ // convert atoms back from lamda to box coords
+
+ if (triclinic) domain->lamda2x(atom->nlocal);
+}
+
+/* ----------------------------------------------------------------------
+ allocate memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPM::allocate()
+{
+ memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:density_brick");
+
+ memory->create(density_fft,nfft_both,"pppm:density_fft");
+ memory->create(greensfn,nfft_both,"pppm:greensfn");
+ memory->create(work1,2*nfft_both,"pppm:work1");
+ memory->create(work2,2*nfft_both,"pppm:work2");
+ memory->create(vg,nfft_both,6,"pppm:vg");
+
+ if (triclinic == 0) {
+ memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
+ memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
+ memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
+ } else {
+ memory->create(fkx,nfft_both,"pppm:fkx");
+ memory->create(fky,nfft_both,"pppm:fky");
+ memory->create(fkz,nfft_both,"pppm:fkz");
+ }
+
+ if (differentiation_flag == 1) {
+ memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:u_brick");
+
+ memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1");
+ memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2");
+ memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3");
+ memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4");
+ memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5");
+ memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6");
+
+ } else {
+ memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:vdx_brick");
+ memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:vdy_brick");
+ memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:vdz_brick");
+ }
+
+ // summation coeffs
+
+ order_allocated = order;
+ if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b");
+ memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
+ memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
+ memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
+ memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
+ "pppm:drho_coeff");
+
+ // create 2 FFTs and a Remap
+ // 1st FFT keeps data in FFT decompostion
+ // 2nd FFT returns data in 3d brick decomposition
+ // remap takes data from 3d brick to FFT decomposition
+
+ int tmp;
+
+ fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ 0,0,&tmp);
+
+ fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ 0,0,&tmp);
+
+ remap = new Remap(lmp,world,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ 1,0,0,FFT_PRECISION);
+
+ // create ghost grid object for rho and electric field communication
+
+ int (*procneigh)[2] = comm->procneigh;
+
+ if (differentiation_flag == 1)
+ cg = new CommGrid(lmp,world,1,1,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ else
+ cg = new CommGrid(lmp,world,3,1,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+}
+
+/* ----------------------------------------------------------------------
+ deallocate memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPM::deallocate()
+{
+ memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
+
+ if (differentiation_flag == 1) {
+ memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy(sf_precoeff1);
+ memory->destroy(sf_precoeff2);
+ memory->destroy(sf_precoeff3);
+ memory->destroy(sf_precoeff4);
+ memory->destroy(sf_precoeff5);
+ memory->destroy(sf_precoeff6);
+ } else {
+ memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
+ }
+
+ memory->destroy(density_fft);
+ memory->destroy(greensfn);
+ memory->destroy(work1);
+ memory->destroy(work2);
+ memory->destroy(vg);
+
+ if (triclinic == 0) {
+ memory->destroy1d_offset(fkx,nxlo_fft);
+ memory->destroy1d_offset(fky,nylo_fft);
+ memory->destroy1d_offset(fkz,nzlo_fft);
+ } else {
+ memory->destroy(fkx);
+ memory->destroy(fky);
+ memory->destroy(fkz);
+ }
+
+ memory->destroy(gf_b);
+ if (stagger_flag) gf_b = NULL;
+ memory->destroy2d_offset(rho1d,-order_allocated/2);
+ memory->destroy2d_offset(drho1d,-order_allocated/2);
+ memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2);
+ memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2);
+
+ delete fft1;
+ delete fft2;
+ delete remap;
+ delete cg;
+}
+
+/* ----------------------------------------------------------------------
+ allocate per-atom memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPM::allocate_peratom()
+{
+ peratom_allocate_flag = 1;
+
+ if (differentiation_flag != 1)
+ memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:u_brick");
+
+ memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v0_brick");
+
+ memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v1_brick");
+ memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v2_brick");
+ memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v3_brick");
+ memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v4_brick");
+ memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v5_brick");
+
+ // create ghost grid object for rho and electric field communication
+
+ int (*procneigh)[2] = comm->procneigh;
+
+ if (differentiation_flag == 1)
+ cg_peratom =
+ new CommGrid(lmp,world,6,1,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ else
+ cg_peratom =
+ new CommGrid(lmp,world,7,1,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+}
+
+/* ----------------------------------------------------------------------
+ deallocate per-atom memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPM::deallocate_peratom()
+{
+ peratom_allocate_flag = 0;
+
+ memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
+
+ if (differentiation_flag != 1)
+ memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
+
+ delete cg_peratom;
+}
+
+/* ----------------------------------------------------------------------
+ set global size of PPPM grid = nx,ny,nz_pppm
+ used for charge accumulation, FFTs, and electric field interpolation
+------------------------------------------------------------------------- */
+
+void PPPM::set_grid_global()
+{
+ // use xprd,yprd,zprd (even if triclinic, and then scale later)
+ // adjust z dimension for 2d slab PPPM
+ // 3d PPPM just uses zprd since slab_volfactor = 1.0
+
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ double zprd_slab = zprd*slab_volfactor;
+
+ // make initial g_ewald estimate
+ // based on desired accuracy and real space cutoff
+ // fluid-occupied volume used to estimate real-space error
+ // zprd used rather than zprd_slab
+
+ double h;
+ bigint natoms = atom->natoms;
+
+ if (!gewaldflag) {
+ if (accuracy <= 0.0)
+ error->all(FLERR,"KSpace accuracy must be > 0");
+ g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
+ if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
+ else g_ewald = sqrt(-log(g_ewald)) / cutoff;
+ }
+
+ // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
+ // nz_pppm uses extended zprd_slab instead of zprd
+ // reduce it until accuracy target is met
+
+ if (!gridflag) {
+
+ if (differentiation_flag == 1 || stagger_flag) {
+
+ h = h_x = h_y = h_z = 4.0/g_ewald;
+ int count = 0;
+ while (1) {
+
+ // set grid dimension
+ nx_pppm = static_cast (xprd/h_x);
+ ny_pppm = static_cast (yprd/h_y);
+ nz_pppm = static_cast (zprd_slab/h_z);
+
+ if (nx_pppm <= 1) nx_pppm = 2;
+ if (ny_pppm <= 1) ny_pppm = 2;
+ if (nz_pppm <= 1) nz_pppm = 2;
+
+ //set local grid dimension
+ int npey_fft,npez_fft;
+ if (nz_pppm >= nprocs) {
+ npey_fft = 1;
+ npez_fft = nprocs;
+ } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
+
+ int me_y = me % npey_fft;
+ int me_z = me / npey_fft;
+
+ nxlo_fft = 0;
+ nxhi_fft = nx_pppm - 1;
+ nylo_fft = me_y*ny_pppm/npey_fft;
+ nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
+ nzlo_fft = me_z*nz_pppm/npez_fft;
+ nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
+
+ double df_kspace = compute_df_kspace();
+
+ count++;
+
+ // break loop if the accuracy has been reached or
+ // too many loops have been performed
+
+ if (df_kspace <= accuracy) break;
+ if (count > 500) error->all(FLERR, "Could not compute grid size");
+ h *= 0.95;
+ h_x = h_y = h_z = h;
+ }
+
+ } else {
+
+ double err;
+ h_x = h_y = h_z = 1.0/g_ewald;
+
+ nx_pppm = static_cast (xprd/h_x) + 1;
+ ny_pppm = static_cast (yprd/h_y) + 1;
+ nz_pppm = static_cast (zprd_slab/h_z) + 1;
+
+ err = estimate_ik_error(h_x,xprd,natoms);
+ while (err > accuracy) {
+ err = estimate_ik_error(h_x,xprd,natoms);
+ nx_pppm++;
+ h_x = xprd/nx_pppm;
+ }
+
+ err = estimate_ik_error(h_y,yprd,natoms);
+ while (err > accuracy) {
+ err = estimate_ik_error(h_y,yprd,natoms);
+ ny_pppm++;
+ h_y = yprd/ny_pppm;
+ }
+
+ err = estimate_ik_error(h_z,zprd_slab,natoms);
+ while (err > accuracy) {
+ err = estimate_ik_error(h_z,zprd_slab,natoms);
+ nz_pppm++;
+ h_z = zprd_slab/nz_pppm;
+ }
+ }
+
+ // scale grid for triclinic skew
+
+ if (triclinic) {
+ double tmp[3];
+ tmp[0] = nx_pppm/xprd;
+ tmp[1] = ny_pppm/yprd;
+ tmp[2] = nz_pppm/zprd;
+ lamda2xT(&tmp[0],&tmp[0]);
+ nx_pppm = static_cast(tmp[0]) + 1;
+ ny_pppm = static_cast(tmp[1]) + 1;
+ nz_pppm = static_cast(tmp[2]) + 1;
+ }
+ }
+
+ // boost grid size until it is factorable
+
+ while (!factorable(nx_pppm)) nx_pppm++;
+ while (!factorable(ny_pppm)) ny_pppm++;
+ while (!factorable(nz_pppm)) nz_pppm++;
+
+ if (triclinic == 0) {
+ h_x = xprd/nx_pppm;
+ h_y = yprd/ny_pppm;
+ h_z = zprd_slab/nz_pppm;
+ } else {
+ double tmp[3];
+ tmp[0] = nx_pppm;
+ tmp[1] = ny_pppm;
+ tmp[2] = nz_pppm;
+ x2lamdaT(&tmp[0],&tmp[0]);
+ h_x = 1.0/tmp[0];
+ h_y = 1.0/tmp[1];
+ h_z = 1.0/tmp[2];
+ }
+
+ if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
+ error->all(FLERR,"PPPM grid is too large");
+}
+
+/* ----------------------------------------------------------------------
+ check if all factors of n are in list of factors
+ return 1 if yes, 0 if no
+------------------------------------------------------------------------- */
+
+int PPPM::factorable(int n)
+{
+ int i;
+
+ while (n > 1) {
+ for (i = 0; i < nfactors; i++) {
+ if (n % factors[i] == 0) {
+ n /= factors[i];
+ break;
+ }
+ }
+ if (i == nfactors) return 0;
+ }
+
+ return 1;
+}
+
+/* ----------------------------------------------------------------------
+ compute estimated kspace force error
+------------------------------------------------------------------------- */
+
+double PPPM::compute_df_kspace()
+{
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ double zprd_slab = zprd*slab_volfactor;
+ bigint natoms = atom->natoms;
+ double df_kspace = 0.0;
+ if (differentiation_flag == 1 || stagger_flag) {
+ double qopt = compute_qopt();
+ df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
+ } else {
+ double lprx = estimate_ik_error(h_x,xprd,natoms);
+ double lpry = estimate_ik_error(h_y,yprd,natoms);
+ double lprz = estimate_ik_error(h_z,zprd_slab,natoms);
+ df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
+ }
+ return df_kspace;
+}
+
+/* ----------------------------------------------------------------------
+ compute qopt
+------------------------------------------------------------------------- */
+
+double PPPM::compute_qopt()
+{
+ double qopt = 0.0;
+ double *prd = domain->prd;
+
+ const double xprd = prd[0];
+ const double yprd = prd[1];
+ const double zprd = prd[2];
+ const double zprd_slab = zprd*slab_volfactor;
+ volume = xprd * yprd * zprd_slab;
+
+ const double unitkx = (MY_2PI/xprd);
+ const double unitky = (MY_2PI/yprd);
+ const double unitkz = (MY_2PI/zprd_slab);
+
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+ double u1, u2, sqk;
+ double sum1,sum2,sum3,sum4,dot2;
+
+ int k,l,m,nx,ny,nz;
+ const int twoorder = 2*order;
+
+ for (m = nzlo_fft; m <= nzhi_fft; m++) {
+ const int mper = m - nz_pppm*(2*m/nz_pppm);
+
+ for (l = nylo_fft; l <= nyhi_fft; l++) {
+ const int lper = l - ny_pppm*(2*l/ny_pppm);
+
+ for (k = nxlo_fft; k <= nxhi_fft; k++) {
+ const int kper = k - nx_pppm*(2*k/nx_pppm);
+
+ sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
+
+ if (sqk != 0.0) {
+
+ sum1 = 0.0;
+ sum2 = 0.0;
+ sum3 = 0.0;
+ sum4 = 0.0;
+ for (nx = -2; nx <= 2; nx++) {
+ qx = unitkx*(kper+nx_pppm*nx);
+ sx = exp(-0.25*square(qx/g_ewald));
+ argx = 0.5*qx*xprd/nx_pppm;
+ wx = powsinxx(argx,twoorder);
+ qx *= qx;
+
+ for (ny = -2; ny <= 2; ny++) {
+ qy = unitky*(lper+ny_pppm*ny);
+ sy = exp(-0.25*square(qy/g_ewald));
+ argy = 0.5*qy*yprd/ny_pppm;
+ wy = powsinxx(argy,twoorder);
+ qy *= qy;
+
+ for (nz = -2; nz <= 2; nz++) {
+ qz = unitkz*(mper+nz_pppm*nz);
+ sz = exp(-0.25*square(qz/g_ewald));
+ argz = 0.5*qz*zprd_slab/nz_pppm;
+ wz = powsinxx(argz,twoorder);
+ qz *= qz;
+
+ dot2 = qx+qy+qz;
+ u1 = sx*sy*sz;
+ u2 = wx*wy*wz;
+ sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
+ sum2 += u1 * u2 * MY_4PI;
+ sum3 += u2;
+ sum4 += dot2*u2;
+ }
+ }
+ }
+ sum2 *= sum2;
+ qopt += sum1 - sum2/(sum3*sum4);
+ }
+ }
+ }
+ }
+ double qopt_all;
+ MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
+ return qopt_all;
+}
+
+/* ----------------------------------------------------------------------
+ estimate kspace force error for ik method
+------------------------------------------------------------------------- */
+
+double PPPM::estimate_ik_error(double h, double prd, bigint natoms)
+{
+ double sum = 0.0;
+ for (int m = 0; m < order; m++)
+ sum += acons[order][m] * pow(h*g_ewald,2.0*m);
+ double value = q2 * pow(h*g_ewald,(double)order) *
+ sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd);
+
+ return value;
+}
+
+/* ----------------------------------------------------------------------
+ adjust the g_ewald parameter to near its optimal value
+ using a Newton-Raphson solver
+------------------------------------------------------------------------- */
+
+void PPPM::adjust_gewald()
+{
+ double dx;
+
+ for (int i = 0; i < LARGE; i++) {
+ dx = newton_raphson_f() / derivf();
+ g_ewald -= dx;
+ if (fabs(newton_raphson_f()) < SMALL) return;
+ }
+
+ char str[128];
+ sprintf(str, "Could not compute g_ewald");
+ error->all(FLERR, str);
+}
+
+/* ----------------------------------------------------------------------
+ Calculate f(x) using Newton-Raphson solver
+ ------------------------------------------------------------------------- */
+
+double PPPM::newton_raphson_f()
+{
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ bigint natoms = atom->natoms;
+
+ double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
+ sqrt(natoms*cutoff*xprd*yprd*zprd);
+
+ double df_kspace = compute_df_kspace();
+
+ return df_rspace - df_kspace;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate numerical derivative f'(x) using forward difference
+ [f(x + h) - f(x)] / h
+ ------------------------------------------------------------------------- */
+
+double PPPM::derivf()
+{
+ double h = 0.000001; //Derivative step-size
+ double df,f1,f2,g_ewald_old;
+
+ f1 = newton_raphson_f();
+ g_ewald_old = g_ewald;
+ g_ewald += h;
+ f2 = newton_raphson_f();
+ g_ewald = g_ewald_old;
+ df = (f2 - f1)/h;
+
+ return df;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate the final estimate of the accuracy
+------------------------------------------------------------------------- */
+
+double PPPM::final_accuracy()
+{
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ double zprd_slab = zprd*slab_volfactor;
+ bigint natoms = atom->natoms;
+
+ double df_kspace = compute_df_kspace();
+ double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
+ double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
+ double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace);
+ double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace +
+ df_table*df_table);
+
+ return estimated_accuracy;
+}
+
+/* ----------------------------------------------------------------------
+ set local subset of PPPM/FFT grid that I own
+ n xyz lo/hi in = 3d brick that I own (inclusive)
+ n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive)
+ n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz)
+------------------------------------------------------------------------- */
+
+void PPPM::set_grid_local()
+{
+ // global indices of PPPM grid range from 0 to N-1
+ // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
+ // global PPPM grid that I own without ghost cells
+ // for slab PPPM, assign z grid as if it were not extended
+
+ nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm);
+ nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
+
+ nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm);
+ nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
+
+ nzlo_in = static_cast
+ (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
+ nzhi_in = static_cast
+ (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
+
+ // nlower,nupper = stencil size for mapping particles to PPPM grid
+
+ nlower = -(order-1)/2;
+ nupper = order/2;
+
+ // shift values for particle <-> grid mapping
+ // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+ if (order % 2) shift = OFFSET + 0.5;
+ else shift = OFFSET;
+ if (order % 2) shiftone = 0.0;
+ else shiftone = 0.5;
+
+ // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
+ // global PPPM grid that my particles can contribute charge to
+ // effectively nlo_in,nhi_in + ghost cells
+ // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
+ // position a particle in my box can be at
+ // dist[3] = particle position bound = subbox + skin/2.0 + qdist
+ // qdist = offset due to TIP4P fictitious charge
+ // convert to triclinic if necessary
+ // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
+ // for slab PPPM, assign z grid as if it were not extended
+
+ double *prd,*sublo,*subhi;
+
+ if (triclinic == 0) {
+ prd = domain->prd;
+ boxlo = domain->boxlo;
+ sublo = domain->sublo;
+ subhi = domain->subhi;
+ } else {
+ prd = domain->prd_lamda;
+ boxlo = domain->boxlo_lamda;
+ sublo = domain->sublo_lamda;
+ subhi = domain->subhi_lamda;
+ }
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double dist[3];
+ double cuthalf = 0.5*neighbor->skin + qdist;
+ if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
+ else kspacebbox(cuthalf,&dist[0]);
+
+ int nlo,nhi;
+
+ nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) *
+ nx_pppm/xprd + shift) - OFFSET;
+ nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) *
+ nx_pppm/xprd + shift) - OFFSET;
+ nxlo_out = nlo + nlower;
+ nxhi_out = nhi + nupper;
+
+ nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) *
+ ny_pppm/yprd + shift) - OFFSET;
+ nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) *
+ ny_pppm/yprd + shift) - OFFSET;
+ nylo_out = nlo + nlower;
+ nyhi_out = nhi + nupper;
+
+ nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) *
+ nz_pppm/zprd_slab + shift) - OFFSET;
+ nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) *
+ nz_pppm/zprd_slab + shift) - OFFSET;
+ nzlo_out = nlo + nlower;
+ nzhi_out = nhi + nupper;
+
+ if (stagger_flag) {
+ nxhi_out++;
+ nyhi_out++;
+ nzhi_out++;
+ }
+
+ // for slab PPPM, change the grid boundary for processors at +z end
+ // to include the empty volume between periodically repeating slabs
+ // for slab PPPM, want charge data communicated from -z proc to +z proc,
+ // but not vice versa, also want field data communicated from +z proc to
+ // -z proc, but not vice versa
+ // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
+ // also insure no other procs use ghost cells beyond +z limit
+
+ if (slabflag == 1) {
+ if (comm->myloc[2] == comm->procgrid[2]-1)
+ nzhi_in = nzhi_out = nz_pppm - 1;
+ nzhi_out = MIN(nzhi_out,nz_pppm-1);
+ }
+
+ // decomposition of FFT mesh
+ // global indices range from 0 to N-1
+ // proc owns entire x-dimension, clumps of columns in y,z dimensions
+ // npey_fft,npez_fft = # of procs in y,z dims
+ // if nprocs is small enough, proc can own 1 or more entire xy planes,
+ // else proc owns 2d sub-blocks of yz plane
+ // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
+ // nlo_fft,nhi_fft = lower/upper limit of the section
+ // of the global FFT mesh that I own
+
+ int npey_fft,npez_fft;
+ if (nz_pppm >= nprocs) {
+ npey_fft = 1;
+ npez_fft = nprocs;
+ } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
+
+ int me_y = me % npey_fft;
+ int me_z = me / npey_fft;
+
+ nxlo_fft = 0;
+ nxhi_fft = nx_pppm - 1;
+ nylo_fft = me_y*ny_pppm/npey_fft;
+ nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
+ nzlo_fft = me_z*nz_pppm/npez_fft;
+ nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
+
+ // PPPM grid pts owned by this proc, including ghosts
+
+ ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
+ (nzhi_out-nzlo_out+1);
+
+ // FFT grids owned by this proc, without ghosts
+ // nfft = FFT points in FFT decomposition on this proc
+ // nfft_brick = FFT points in 3d brick-decomposition on this proc
+ // nfft_both = greater of 2 values
+
+ nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
+ (nzhi_fft-nzlo_fft+1);
+ int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
+ (nzhi_in-nzlo_in+1);
+ nfft_both = MAX(nfft,nfft_brick);
+}
+
+/* ----------------------------------------------------------------------
+ pre-compute Green's function denominator expansion coeffs, Gamma(2n)
+------------------------------------------------------------------------- */
+
+void PPPM::compute_gf_denom()
+{
+ int k,l,m;
+
+ for (l = 1; l < order; l++) gf_b[l] = 0.0;
+ gf_b[0] = 1.0;
+
+ for (m = 1; m < order; m++) {
+ for (l = m; l > 0; l--)
+ gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
+ gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
+ }
+
+ bigint ifact = 1;
+ for (k = 1; k < 2*order; k++) ifact *= k;
+ double gaminv = 1.0/ifact;
+ for (l = 0; l < order; l++) gf_b[l] *= gaminv;
+}
+
+/* ----------------------------------------------------------------------
+ pre-compute modified (Hockney-Eastwood) Coulomb Green's function
+------------------------------------------------------------------------- */
+
+void PPPM::compute_gf_ik()
+{
+ const double * const prd = domain->prd;
+
+ const double xprd = prd[0];
+ const double yprd = prd[1];
+ const double zprd = prd[2];
+ const double zprd_slab = zprd*slab_volfactor;
+ const double unitkx = (MY_2PI/xprd);
+ const double unitky = (MY_2PI/yprd);
+ const double unitkz = (MY_2PI/zprd_slab);
+
+ double snx,sny,snz;
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+ double sum1,dot1,dot2;
+ double numerator,denominator;
+ double sqk;
+
+ int k,l,m,n,nx,ny,nz,kper,lper,mper;
+
+ const int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) *
+ pow(-log(EPS_HOC),0.25));
+ const int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) *
+ pow(-log(EPS_HOC),0.25));
+ const int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
+ pow(-log(EPS_HOC),0.25));
+ const int twoorder = 2*order;
+
+ n = 0;
+ for (m = nzlo_fft; m <= nzhi_fft; m++) {
+ mper = m - nz_pppm*(2*m/nz_pppm);
+ snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
+
+ for (l = nylo_fft; l <= nyhi_fft; l++) {
+ lper = l - ny_pppm*(2*l/ny_pppm);
+ sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
+
+ for (k = nxlo_fft; k <= nxhi_fft; k++) {
+ kper = k - nx_pppm*(2*k/nx_pppm);
+ snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
+
+ sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
+
+ if (sqk != 0.0) {
+ numerator = 12.5663706/sqk;
+ denominator = gf_denom(snx,sny,snz);
+ sum1 = 0.0;
+
+ for (nx = -nbx; nx <= nbx; nx++) {
+ qx = unitkx*(kper+nx_pppm*nx);
+ sx = exp(-0.25*square(qx/g_ewald));
+ argx = 0.5*qx*xprd/nx_pppm;
+ wx = powsinxx(argx,twoorder);
+
+ for (ny = -nby; ny <= nby; ny++) {
+ qy = unitky*(lper+ny_pppm*ny);
+ sy = exp(-0.25*square(qy/g_ewald));
+ argy = 0.5*qy*yprd/ny_pppm;
+ wy = powsinxx(argy,twoorder);
+
+ for (nz = -nbz; nz <= nbz; nz++) {
+ qz = unitkz*(mper+nz_pppm*nz);
+ sz = exp(-0.25*square(qz/g_ewald));
+ argz = 0.5*qz*zprd_slab/nz_pppm;
+ wz = powsinxx(argz,twoorder);
+
+ dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
+ dot2 = qx*qx+qy*qy+qz*qz;
+ sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
+ }
+ }
+ }
+ greensfn[n++] = numerator*sum1/denominator;
+ } else greensfn[n++] = 0.0;
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ pre-compute modified (Hockney-Eastwood) Coulomb Green's function
+ for a triclinic system
+------------------------------------------------------------------------- */
+
+void PPPM::compute_gf_ik_triclinic()
+{
+ double snx,sny,snz;
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+ double sum1,dot1,dot2;
+ double numerator,denominator;
+ double sqk;
+
+ int k,l,m,n,nx,ny,nz,kper,lper,mper;
+
+ double tmp[3];
+ tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25);
+ tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25);
+ tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25);
+ lamda2xT(&tmp[0],&tmp[0]);
+ const int nbx = static_cast (tmp[0]);
+ const int nby = static_cast (tmp[1]);
+ const int nbz = static_cast (tmp[2]);
+
+ const int twoorder = 2*order;
+
+ n = 0;
+ for (m = nzlo_fft; m <= nzhi_fft; m++) {
+ mper = m - nz_pppm*(2*m/nz_pppm);
+ snz = square(sin(MY_PI*mper/nz_pppm));
+
+ for (l = nylo_fft; l <= nyhi_fft; l++) {
+ lper = l - ny_pppm*(2*l/ny_pppm);
+ sny = square(sin(MY_PI*lper/ny_pppm));
+
+ for (k = nxlo_fft; k <= nxhi_fft; k++) {
+ kper = k - nx_pppm*(2*k/nx_pppm);
+ snx = square(sin(MY_PI*kper/nx_pppm));
+
+ double unitk_lamda[3];
+ unitk_lamda[0] = 2.0*MY_PI*kper;
+ unitk_lamda[1] = 2.0*MY_PI*lper;
+ unitk_lamda[2] = 2.0*MY_PI*mper;
+ x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+
+ sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]);
+
+ if (sqk != 0.0) {
+ numerator = 12.5663706/sqk;
+ denominator = gf_denom(snx,sny,snz);
+ sum1 = 0.0;
+
+ for (nx = -nbx; nx <= nbx; nx++) {
+ argx = MY_PI*kper/nx_pppm + MY_PI*nx;
+ wx = powsinxx(argx,twoorder);
+
+ for (ny = -nby; ny <= nby; ny++) {
+ argy = MY_PI*lper/ny_pppm + MY_PI*ny;
+ wy = powsinxx(argy,twoorder);
+
+ for (nz = -nbz; nz <= nbz; nz++) {
+ argz = MY_PI*mper/nz_pppm + MY_PI*nz;
+ wz = powsinxx(argz,twoorder);
+
+ double b[3];
+ b[0] = 2.0*MY_PI*nx_pppm*nx;
+ b[1] = 2.0*MY_PI*ny_pppm*ny;
+ b[2] = 2.0*MY_PI*nz_pppm*nz;
+ x2lamdaT(&b[0],&b[0]);
+
+ qx = unitk_lamda[0]+b[0];
+ sx = exp(-0.25*square(qx/g_ewald));
+
+ qy = unitk_lamda[1]+b[1];
+ sy = exp(-0.25*square(qy/g_ewald));
+
+ qz = unitk_lamda[2]+b[2];
+ sz = exp(-0.25*square(qz/g_ewald));
+
+ dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz;
+ dot2 = qx*qx+qy*qy+qz*qz;
+ sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
+ }
+ }
+ }
+ greensfn[n++] = numerator*sum1/denominator;
+ } else greensfn[n++] = 0.0;
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ compute optimized Green's function for energy calculation
+------------------------------------------------------------------------- */
+
+void PPPM::compute_gf_ad()
+{
+ const double * const prd = domain->prd;
+
+ const double xprd = prd[0];
+ const double yprd = prd[1];
+ const double zprd = prd[2];
+ const double zprd_slab = zprd*slab_volfactor;
+ const double unitkx = (MY_2PI/xprd);
+ const double unitky = (MY_2PI/yprd);
+ const double unitkz = (MY_2PI/zprd_slab);
+
+ double snx,sny,snz,sqk;
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+ double numerator,denominator;
+ int k,l,m,n,kper,lper,mper;
+
+ const int twoorder = 2*order;
+
+ for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0;
+
+ n = 0;
+ for (m = nzlo_fft; m <= nzhi_fft; m++) {
+ mper = m - nz_pppm*(2*m/nz_pppm);
+ qz = unitkz*mper;
+ snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
+ sz = exp(-0.25*square(qz/g_ewald));
+ argz = 0.5*qz*zprd_slab/nz_pppm;
+ wz = powsinxx(argz,twoorder);
+
+ for (l = nylo_fft; l <= nyhi_fft; l++) {
+ lper = l - ny_pppm*(2*l/ny_pppm);
+ qy = unitky*lper;
+ sny = square(sin(0.5*qy*yprd/ny_pppm));
+ sy = exp(-0.25*square(qy/g_ewald));
+ argy = 0.5*qy*yprd/ny_pppm;
+ wy = powsinxx(argy,twoorder);
+
+ for (k = nxlo_fft; k <= nxhi_fft; k++) {
+ kper = k - nx_pppm*(2*k/nx_pppm);
+ qx = unitkx*kper;
+ snx = square(sin(0.5*qx*xprd/nx_pppm));
+ sx = exp(-0.25*square(qx/g_ewald));
+ argx = 0.5*qx*xprd/nx_pppm;
+ wx = powsinxx(argx,twoorder);
+
+ sqk = qx*qx + qy*qy + qz*qz;
+
+ if (sqk != 0.0) {
+ numerator = MY_4PI/sqk;
+ denominator = gf_denom(snx,sny,snz);
+ greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
+ sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
+ sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
+ sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
+ sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
+ sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
+ sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
+ n++;
+ } else {
+ greensfn[n] = 0.0;
+ sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
+ sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
+ sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
+ sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
+ sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
+ sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
+ n++;
+ }
+ }
+ }
+ }
+
+ // compute the coefficients for the self-force correction
+
+ double prex, prey, prez;
+ prex = prey = prez = MY_PI/volume;
+ prex *= nx_pppm/xprd;
+ prey *= ny_pppm/yprd;
+ prez *= nz_pppm/zprd_slab;
+ sf_coeff[0] *= prex;
+ sf_coeff[1] *= prex*2;
+ sf_coeff[2] *= prey;
+ sf_coeff[3] *= prey*2;
+ sf_coeff[4] *= prez;
+ sf_coeff[5] *= prez*2;
+
+ // communicate values with other procs
+
+ double tmp[6];
+ MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
+ for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
+}
+
+/* ----------------------------------------------------------------------
+ compute self force coefficients for ad-differentiation scheme
+------------------------------------------------------------------------- */
+
+void PPPM::compute_sf_precoeff()
+{
+ int i,k,l,m,n;
+ int nx,ny,nz,kper,lper,mper;
+ double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
+ double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
+ double u0,u1,u2,u3,u4,u5,u6;
+ double sum1,sum2,sum3,sum4,sum5,sum6;
+
+ n = 0;
+ for (m = nzlo_fft; m <= nzhi_fft; m++) {
+ mper = m - nz_pppm*(2*m/nz_pppm);
+
+ for (l = nylo_fft; l <= nyhi_fft; l++) {
+ lper = l - ny_pppm*(2*l/ny_pppm);
+
+ for (k = nxlo_fft; k <= nxhi_fft; k++) {
+ kper = k - nx_pppm*(2*k/nx_pppm);
+
+ sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
+ for (i = 0; i < 5; i++) {
+
+ qx0 = MY_2PI*(kper+nx_pppm*(i-2));
+ qx1 = MY_2PI*(kper+nx_pppm*(i-1));
+ qx2 = MY_2PI*(kper+nx_pppm*(i ));
+ wx0[i] = powsinxx(0.5*qx0/nx_pppm,order);
+ wx1[i] = powsinxx(0.5*qx1/nx_pppm,order);
+ wx2[i] = powsinxx(0.5*qx2/nx_pppm,order);
+
+ qy0 = MY_2PI*(lper+ny_pppm*(i-2));
+ qy1 = MY_2PI*(lper+ny_pppm*(i-1));
+ qy2 = MY_2PI*(lper+ny_pppm*(i ));
+ wy0[i] = powsinxx(0.5*qy0/ny_pppm,order);
+ wy1[i] = powsinxx(0.5*qy1/ny_pppm,order);
+ wy2[i] = powsinxx(0.5*qy2/ny_pppm,order);
+
+ qz0 = MY_2PI*(mper+nz_pppm*(i-2));
+ qz1 = MY_2PI*(mper+nz_pppm*(i-1));
+ qz2 = MY_2PI*(mper+nz_pppm*(i ));
+
+ wz0[i] = powsinxx(0.5*qz0/nz_pppm,order);
+ wz1[i] = powsinxx(0.5*qz1/nz_pppm,order);
+ wz2[i] = powsinxx(0.5*qz2/nz_pppm,order);
+ }
+
+ for (nx = 0; nx < 5; nx++) {
+ for (ny = 0; ny < 5; ny++) {
+ for (nz = 0; nz < 5; nz++) {
+ u0 = wx0[nx]*wy0[ny]*wz0[nz];
+ u1 = wx1[nx]*wy0[ny]*wz0[nz];
+ u2 = wx2[nx]*wy0[ny]*wz0[nz];
+ u3 = wx0[nx]*wy1[ny]*wz0[nz];
+ u4 = wx0[nx]*wy2[ny]*wz0[nz];
+ u5 = wx0[nx]*wy0[ny]*wz1[nz];
+ u6 = wx0[nx]*wy0[ny]*wz2[nz];
+
+ sum1 += u0*u1;
+ sum2 += u0*u2;
+ sum3 += u0*u3;
+ sum4 += u0*u4;
+ sum5 += u0*u5;
+ sum6 += u0*u6;
+ }
+ }
+ }
+
+ // store values
+
+ sf_precoeff1[n] = sum1;
+ sf_precoeff2[n] = sum2;
+ sf_precoeff3[n] = sum3;
+ sf_precoeff4[n] = sum4;
+ sf_precoeff5[n] = sum5;
+ sf_precoeff6[n++] = sum6;
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ find center grid pt for each of my particles
+ check that full stencil for the particle will fit in my 3d brick
+ store central grid pt indices in part2grid array
+------------------------------------------------------------------------- */
+
+void PPPM::particle_map()
+{
+ int nx,ny,nz;
+
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+
+ int flag = 0;
+ for (int i = 0; i < nlocal; i++) {
+
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // current particle coord can be outside global and local box
+ // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+ nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
+ ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
+ nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
+
+ part2grid[i][0] = nx;
+ part2grid[i][1] = ny;
+ part2grid[i][2] = nz;
+
+ // check that entire stencil around nx,ny,nz will fit in my 3d brick
+
+ if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
+ ny+nlower < nylo_out || ny+nupper > nyhi_out ||
+ nz+nlower < nzlo_out || nz+nupper > nzhi_out)
+ flag = 1;
+ }
+
+ if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
+}
+
+/* ----------------------------------------------------------------------
+ create discretized "density" on section of global grid due to my particles
+ density(x,y,z) = charge "density" at grid points of my 3d brick
+ (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+ in global grid
+------------------------------------------------------------------------- */
+
+void PPPM::make_rho()
+{
+ int l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+ // clear 3d density array
+
+ memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
+ ngrid*sizeof(FFT_SCALAR));
+
+ // loop over my charges, add their contribution to nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+
+ double *q = atom->q;
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+
+ for (int i = 0; i < nlocal; i++) {
+
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz);
+
+ z0 = delvolinv * q[i];
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ y0 = z0*rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ x0 = y0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ density_brick[mz][my][mx] += x0*rho1d[0][l];
+ }
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ remap density from 3d brick decomposition to FFT decomposition
+------------------------------------------------------------------------- */
+
+void PPPM::brick2fft()
+{
+ int n,ix,iy,iz;
+
+ // copy grabs inner portion of density from 3d brick
+ // remap could be done as pre-stage of FFT,
+ // but this works optimally on only double values, not complex values
+
+ n = 0;
+ for (iz = nzlo_in; iz <= nzhi_in; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++)
+ density_fft[n++] = density_brick[iz][iy][ix];
+
+ remap->perform(density_fft,density_fft,work1);
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver
+------------------------------------------------------------------------- */
+
+void PPPM::poisson()
+{
+ if (differentiation_flag == 1) poisson_ad();
+ else poisson_ik();
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver for ik
+------------------------------------------------------------------------- */
+
+void PPPM::poisson_ik()
+{
+ int i,j,k,n;
+ double eng;
+
+ // transform charge density (r -> k)
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work1[n++] = density_fft[i];
+ work1[n++] = ZEROF;
+ }
+
+ fft1->compute(work1,work1,1);
+
+ // global energy and virial contribution
+
+ double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+ double s2 = scaleinv*scaleinv;
+
+ if (eflag_global || vflag_global) {
+ if (vflag_global) {
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+ for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
+ if (eflag_global) energy += eng;
+ n += 2;
+ }
+ } else {
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ energy +=
+ s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+ n += 2;
+ }
+ }
+ }
+
+ // scale by 1/total-grid-pts to get rho(k)
+ // multiply by Green's function to get V(k)
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work1[n++] *= scaleinv * greensfn[i];
+ work1[n++] *= scaleinv * greensfn[i];
+ }
+
+ // extra FFTs for per-atom energy/virial
+
+ if (evflag_atom) poisson_peratom();
+
+ // triclinic system
+
+ if (triclinic) {
+ poisson_ik_triclinic();
+ return;
+ }
+
+ // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+ // FFT leaves data in 3d brick decomposition
+ // copy it into inner portion of vdx,vdy,vdz arrays
+
+ // x direction gradient
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ work2[n] = fkx[i]*work1[n+1];
+ work2[n+1] = -fkx[i]*work1[n];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ vdx_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ // y direction gradient
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ work2[n] = fky[j]*work1[n+1];
+ work2[n+1] = -fky[j]*work1[n];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ vdy_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ // z direction gradient
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ work2[n] = fkz[k]*work1[n+1];
+ work2[n+1] = -fkz[k]*work1[n];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ vdz_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver for ik for a triclinic system
+------------------------------------------------------------------------- */
+
+void PPPM::poisson_ik_triclinic()
+{
+ int i,j,k,n;
+
+ // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+ // FFT leaves data in 3d brick decomposition
+ // copy it into inner portion of vdx,vdy,vdz arrays
+
+ // x direction gradient
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = fkx[i]*work1[n+1];
+ work2[n+1] = -fkx[i]*work1[n];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ vdx_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ // y direction gradient
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = fky[i]*work1[n+1];
+ work2[n+1] = -fky[i]*work1[n];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ vdy_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ // z direction gradient
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = fkz[i]*work1[n+1];
+ work2[n+1] = -fkz[i]*work1[n];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ vdz_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver for ad
+------------------------------------------------------------------------- */
+
+void PPPM::poisson_ad()
+{
+ int i,j,k,n;
+ double eng;
+
+ // transform charge density (r -> k)
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work1[n++] = density_fft[i];
+ work1[n++] = ZEROF;
+ }
+
+ fft1->compute(work1,work1,1);
+
+ // global energy and virial contribution
+
+ double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+ double s2 = scaleinv*scaleinv;
+
+ if (eflag_global || vflag_global) {
+ if (vflag_global) {
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+ for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
+ if (eflag_global) energy += eng;
+ n += 2;
+ }
+ } else {
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ energy +=
+ s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+ n += 2;
+ }
+ }
+ }
+
+ // scale by 1/total-grid-pts to get rho(k)
+ // multiply by Green's function to get V(k)
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work1[n++] *= scaleinv * greensfn[i];
+ work1[n++] *= scaleinv * greensfn[i];
+ }
+
+ // extra FFTs for per-atom energy/virial
+
+ if (vflag_atom) poisson_peratom();
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n];
+ work2[n+1] = work1[n+1];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ u_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver for per-atom energy/virial
+------------------------------------------------------------------------- */
+
+void PPPM::poisson_peratom()
+{
+ int i,j,k,n;
+
+ // energy
+
+ if (eflag_atom && differentiation_flag != 1) {
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n];
+ work2[n+1] = work1[n+1];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ u_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+ }
+
+ // 6 components of virial in v0 thru v5
+
+ if (!vflag_atom) return;
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][0];
+ work2[n+1] = work1[n+1]*vg[i][0];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v0_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][1];
+ work2[n+1] = work1[n+1]*vg[i][1];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v1_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][2];
+ work2[n+1] = work1[n+1]*vg[i][2];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v2_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][3];
+ work2[n+1] = work1[n+1]*vg[i][3];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v3_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][4];
+ work2[n+1] = work1[n+1]*vg[i][4];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v4_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][5];
+ work2[n+1] = work1[n+1]*vg[i][5];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v5_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get electric field & force on my particles
+------------------------------------------------------------------------- */
+
+void PPPM::fieldforce()
+{
+ if (differentiation_flag == 1) fieldforce_ad();
+ else fieldforce_ik();
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get electric field & force on my particles for ik
+------------------------------------------------------------------------- */
+
+void PPPM::fieldforce_ik()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR ekx,eky,ekz;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of E-field on particle
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double **f = atom->f;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz);
+
+ ekx = eky = ekz = ZEROF;
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ z0 = rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ y0 = z0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d[0][l];
+ ekx -= x0*vdx_brick[mz][my][mx];
+ eky -= x0*vdy_brick[mz][my][mx];
+ ekz -= x0*vdz_brick[mz][my][mx];
+ }
+ }
+ }
+
+ // convert E-field to force
+
+ const double qfactor = force->qqrd2e * scale * q[i];
+ f[i][0] += qfactor*ekx;
+ f[i][1] += qfactor*eky;
+ if (slabflag != 2) f[i][2] += qfactor*ekz;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get electric field & force on my particles for ad
+------------------------------------------------------------------------- */
+
+void PPPM::fieldforce_ad()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz;
+ FFT_SCALAR ekx,eky,ekz;
+ double s1,s2,s3;
+ double sf = 0.0;
+ double *prd;
+
+ prd = domain->prd;
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+
+ double hx_inv = nx_pppm/xprd;
+ double hy_inv = ny_pppm/yprd;
+ double hz_inv = nz_pppm/zprd;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of E-field on particle
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double **f = atom->f;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz);
+ compute_drho1d(dx,dy,dz);
+
+ ekx = eky = ekz = ZEROF;
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+ eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+ ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
+ }
+ }
+ }
+ ekx *= hx_inv;
+ eky *= hy_inv;
+ ekz *= hz_inv;
+
+ // convert E-field to force and substract self forces
+
+ const double qfactor = force->qqrd2e * scale;
+
+ s1 = x[i][0]*hx_inv;
+ s2 = x[i][1]*hy_inv;
+ s3 = x[i][2]*hz_inv;
+ sf = sf_coeff[0]*sin(2*MY_PI*s1);
+ sf += sf_coeff[1]*sin(4*MY_PI*s1);
+ sf *= 2*q[i]*q[i];
+ f[i][0] += qfactor*(ekx*q[i] - sf);
+
+ sf = sf_coeff[2]*sin(2*MY_PI*s2);
+ sf += sf_coeff[3]*sin(4*MY_PI*s2);
+ sf *= 2*q[i]*q[i];
+ f[i][1] += qfactor*(eky*q[i] - sf);
+
+
+ sf = sf_coeff[4]*sin(2*MY_PI*s3);
+ sf += sf_coeff[5]*sin(4*MY_PI*s3);
+ sf *= 2*q[i]*q[i];
+ if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get per-atom energy/virial
+------------------------------------------------------------------------- */
+
+void PPPM::fieldforce_peratom()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
+
+ // loop over my charges, interpolate from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+
+ double *q = atom->q;
+ double **x = atom->x;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz);
+
+ u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ z0 = rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ y0 = z0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d[0][l];
+ if (eflag_atom) u += x0*u_brick[mz][my][mx];
+ if (vflag_atom) {
+ v0 += x0*v0_brick[mz][my][mx];
+ v1 += x0*v1_brick[mz][my][mx];
+ v2 += x0*v2_brick[mz][my][mx];
+ v3 += x0*v3_brick[mz][my][mx];
+ v4 += x0*v4_brick[mz][my][mx];
+ v5 += x0*v5_brick[mz][my][mx];
+ }
+ }
+ }
+ }
+
+ if (eflag_atom) eatom[i] += q[i]*u;
+ if (vflag_atom) {
+ vatom[i][0] += q[i]*v0;
+ vatom[i][1] += q[i]*v1;
+ vatom[i][2] += q[i]*v2;
+ vatom[i][3] += q[i]*v3;
+ vatom[i][4] += q[i]*v4;
+ vatom[i][5] += q[i]*v5;
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ pack own values to buf to send to another proc
+------------------------------------------------------------------------- */
+
+void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+ int n = 0;
+
+ if (flag == FORWARD_IK) {
+ FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = xsrc[list[i]];
+ buf[n++] = ysrc[list[i]];
+ buf[n++] = zsrc[list[i]];
+ }
+ } else if (flag == FORWARD_AD) {
+ FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++)
+ buf[i] = src[list[i]];
+ } else if (flag == FORWARD_IK_PERATOM) {
+ FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ if (eflag_atom) buf[n++] = esrc[list[i]];
+ if (vflag_atom) {
+ buf[n++] = v0src[list[i]];
+ buf[n++] = v1src[list[i]];
+ buf[n++] = v2src[list[i]];
+ buf[n++] = v3src[list[i]];
+ buf[n++] = v4src[list[i]];
+ buf[n++] = v5src[list[i]];
+ }
+ }
+ } else if (flag == FORWARD_AD_PERATOM) {
+ FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = v0src[list[i]];
+ buf[n++] = v1src[list[i]];
+ buf[n++] = v2src[list[i]];
+ buf[n++] = v3src[list[i]];
+ buf[n++] = v4src[list[i]];
+ buf[n++] = v5src[list[i]];
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ unpack another proc's own values from buf and set own ghost values
+------------------------------------------------------------------------- */
+
+void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+ int n = 0;
+
+ if (flag == FORWARD_IK) {
+ FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ xdest[list[i]] = buf[n++];
+ ydest[list[i]] = buf[n++];
+ zdest[list[i]] = buf[n++];
+ }
+ } else if (flag == FORWARD_AD) {
+ FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++)
+ dest[list[i]] = buf[i];
+ } else if (flag == FORWARD_IK_PERATOM) {
+ FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ if (eflag_atom) esrc[list[i]] = buf[n++];
+ if (vflag_atom) {
+ v0src[list[i]] = buf[n++];
+ v1src[list[i]] = buf[n++];
+ v2src[list[i]] = buf[n++];
+ v3src[list[i]] = buf[n++];
+ v4src[list[i]] = buf[n++];
+ v5src[list[i]] = buf[n++];
+ }
+ }
+ } else if (flag == FORWARD_AD_PERATOM) {
+ FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ v0src[list[i]] = buf[n++];
+ v1src[list[i]] = buf[n++];
+ v2src[list[i]] = buf[n++];
+ v3src[list[i]] = buf[n++];
+ v4src[list[i]] = buf[n++];
+ v5src[list[i]] = buf[n++];
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ pack ghost values into buf to send to another proc
+------------------------------------------------------------------------- */
+
+void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+ if (flag == REVERSE_RHO) {
+ FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++)
+ buf[i] = src[list[i]];
+ }
+}
+
+/* ----------------------------------------------------------------------
+ unpack another proc's ghost values from buf and add to own values
+------------------------------------------------------------------------- */
+
+void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+ if (flag == REVERSE_RHO) {
+ FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++)
+ dest[list[i]] += buf[i];
+ }
+}
+
+/* ----------------------------------------------------------------------
+ map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
+------------------------------------------------------------------------- */
+
+void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
+{
+ // loop thru all possible factorizations of nprocs
+ // surf = surface area of largest proc sub-domain
+ // innermost if test minimizes surface area and surface/volume ratio
+
+ int bestsurf = 2 * (nx + ny);
+ int bestboxx = 0;
+ int bestboxy = 0;
+
+ int boxx,boxy,surf,ipx,ipy;
+
+ ipx = 1;
+ while (ipx <= nprocs) {
+ if (nprocs % ipx == 0) {
+ ipy = nprocs/ipx;
+ boxx = nx/ipx;
+ if (nx % ipx) boxx++;
+ boxy = ny/ipy;
+ if (ny % ipy) boxy++;
+ surf = boxx + boxy;
+ if (surf < bestsurf ||
+ (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
+ bestsurf = surf;
+ bestboxx = boxx;
+ bestboxy = boxy;
+ *px = ipx;
+ *py = ipy;
+ }
+ }
+ ipx++;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ charge assignment into rho1d
+ dx,dy,dz = distance of particle from "lower left" grid point
+------------------------------------------------------------------------- */
+
+void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
+ const FFT_SCALAR &dz)
+{
+ int k,l;
+ FFT_SCALAR r1,r2,r3;
+
+ for (k = (1-order)/2; k <= order/2; k++) {
+ r1 = r2 = r3 = ZEROF;
+
+ for (l = order-1; l >= 0; l--) {
+ r1 = rho_coeff[l][k] + r1*dx;
+ r2 = rho_coeff[l][k] + r2*dy;
+ r3 = rho_coeff[l][k] + r3*dz;
+ }
+ rho1d[0][k] = r1;
+ rho1d[1][k] = r2;
+ rho1d[2][k] = r3;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ charge assignment into drho1d
+ dx,dy,dz = distance of particle from "lower left" grid point
+------------------------------------------------------------------------- */
+
+void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
+ const FFT_SCALAR &dz)
+{
+ int k,l;
+ FFT_SCALAR r1,r2,r3;
+
+ for (k = (1-order)/2; k <= order/2; k++) {
+ r1 = r2 = r3 = ZEROF;
+
+ for (l = order-2; l >= 0; l--) {
+ r1 = drho_coeff[l][k] + r1*dx;
+ r2 = drho_coeff[l][k] + r2*dy;
+ r3 = drho_coeff[l][k] + r3*dz;
+ }
+ drho1d[0][k] = r1;
+ drho1d[1][k] = r2;
+ drho1d[2][k] = r3;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ generate coeffients for the weight function of order n
+
+ (n-1)
+ Wn(x) = Sum wn(k,x) , Sum is over every other integer
+ k=-(n-1)
+ For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
+ k is odd integers if n is even and even integers if n is odd
+ ---
+ | n-1
+ | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2
+ wn(k,x) = < l=0
+ |
+ | 0 otherwise
+ ---
+ a coeffients are packed into the array rho_coeff to eliminate zeros
+ rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
+------------------------------------------------------------------------- */
+
+void PPPM::compute_rho_coeff()
+{
+ int j,k,l,m;
+ FFT_SCALAR s;
+
+ FFT_SCALAR **a;
+ memory->create2d_offset(a,order,-order,order,"pppm:a");
+
+ for (k = -order; k <= order; k++)
+ for (l = 0; l < order; l++)
+ a[l][k] = 0.0;
+
+ a[0][0] = 1.0;
+ for (j = 1; j < order; j++) {
+ for (k = -j; k <= j; k += 2) {
+ s = 0.0;
+ for (l = 0; l < j; l++) {
+ a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
+#ifdef FFT_SINGLE
+ s += powf(0.5,(float) l+1) *
+ (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
+#else
+ s += pow(0.5,(double) l+1) *
+ (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
+#endif
+ }
+ a[0][k] = s;
+ }
+ }
+
+ m = (1-order)/2;
+ for (k = -(order-1); k < order; k += 2) {
+ for (l = 0; l < order; l++)
+ rho_coeff[l][m] = a[l][k];
+ for (l = 1; l < order; l++)
+ drho_coeff[l-1][m] = l*a[l][k];
+ m++;
+ }
+
+ memory->destroy2d_offset(a,-order);
+}
+
+/* ----------------------------------------------------------------------
+ Slab-geometry correction term to dampen inter-slab interactions between
+ periodically repeating slabs. Yields good approximation to 2D Ewald if
+ adequate empty space is left between repeating slabs (J. Chem. Phys.
+ 111, 3155). Slabs defined here to be parallel to the xy plane. Also
+ extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void PPPM::slabcorr()
+{
+ // compute local contribution to global dipole moment
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double zprd = domain->zprd;
+ int nlocal = atom->nlocal;
+
+ double dipole = 0.0;
+ for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+
+ // sum local contributions to get global dipole moment
+
+ double dipole_all;
+ MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+ // need to make non-neutral systems and/or
+ // per-atom energy translationally invariant
+
+ double dipole_r2 = 0.0;
+ if (eflag_atom || fabs(qsum) > SMALL) {
+ for (int i = 0; i < nlocal; i++)
+ dipole_r2 += q[i]*x[i][2]*x[i][2];
+
+ // sum local contributions
+
+ double tmp;
+ MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_r2 = tmp;
+ }
+
+ // compute corrections
+
+ const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
+ qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
+ const double qscale = force->qqrd2e * scale;
+
+ if (eflag_global) energy += qscale * e_slabcorr;
+
+ // per-atom energy
+
+ if (eflag_atom) {
+ double efact = qscale * MY_2PI/volume;
+ for (int i = 0; i < nlocal; i++)
+ eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
+ qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
+ }
+
+ // add on force corrections
+
+ double ffact = qscale * (-4.0*MY_PI/volume);
+ double **f = atom->f;
+
+ for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
+}
+
+/* ----------------------------------------------------------------------
+ perform and time the 1d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPM::timing_1d(int n, double &time1d)
+{
+ double time1,time2;
+
+ for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+
+ MPI_Barrier(world);
+ time1 = MPI_Wtime();
+
+ for (int i = 0; i < n; i++) {
+ fft1->timing1d(work1,nfft_both,1);
+ fft2->timing1d(work1,nfft_both,-1);
+ if (differentiation_flag != 1) {
+ fft2->timing1d(work1,nfft_both,-1);
+ fft2->timing1d(work1,nfft_both,-1);
+ }
+ }
+
+ MPI_Barrier(world);
+ time2 = MPI_Wtime();
+ time1d = time2 - time1;
+
+ if (differentiation_flag) return 2;
+ return 4;
+}
+
+/* ----------------------------------------------------------------------
+ perform and time the 3d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPM::timing_3d(int n, double &time3d)
+{
+ double time1,time2;
+
+ for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+
+ MPI_Barrier(world);
+ time1 = MPI_Wtime();
+
+ for (int i = 0; i < n; i++) {
+ fft1->compute(work1,work1,1);
+ fft2->compute(work1,work1,-1);
+ if (differentiation_flag != 1) {
+ fft2->compute(work1,work1,-1);
+ fft2->compute(work1,work1,-1);
+ }
+ }
+
+ MPI_Barrier(world);
+ time2 = MPI_Wtime();
+ time3d = time2 - time1;
+
+ if (differentiation_flag) return 2;
+ return 4;
+}
+
+/* ----------------------------------------------------------------------
+ memory usage of local arrays
+------------------------------------------------------------------------- */
+
+double PPPM::memory_usage()
+{
+ double bytes = nmax*3 * sizeof(double);
+ int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
+ (nzhi_out-nzlo_out+1);
+ if (differentiation_flag == 1) {
+ bytes += 2 * nbrick * sizeof(FFT_SCALAR);
+ } else {
+ bytes += 4 * nbrick * sizeof(FFT_SCALAR);
+ }
+ if (triclinic) bytes += 3 * nfft_both * sizeof(double);
+ bytes += 6 * nfft_both * sizeof(double);
+ bytes += nfft_both * sizeof(double);
+ bytes += nfft_both*5 * sizeof(FFT_SCALAR);
+
+ if (peratom_allocate_flag)
+ bytes += 6 * nbrick * sizeof(FFT_SCALAR);
+
+ if (group_allocate_flag) {
+ bytes += 2 * nbrick * sizeof(FFT_SCALAR);
+ bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
+ }
+
+ bytes += cg->memory_usage();
+
+ return bytes;
+}
+
+/* ----------------------------------------------------------------------
+ group-group interactions
+ ------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ compute the PPPM total long-range force and energy for groups A and B
+ ------------------------------------------------------------------------- */
+
+void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
+{
+ if (slabflag && triclinic)
+ error->all(FLERR,"Cannot (yet) use K-space slab "
+ "correction with compute group/group for triclinic systems");
+
+ if (differentiation_flag)
+ error->all(FLERR,"Cannot (yet) use kspace_modify "
+ "diff ad with compute group/group");
+
+ if (!group_allocate_flag) allocate_groups();
+
+ // convert atoms from box to lamda coords
+
+ if (triclinic == 0) boxlo = domain->boxlo;
+ else {
+ boxlo = domain->boxlo_lamda;
+ domain->x2lamda(atom->nlocal);
+ }
+
+ e2group = 0.0; //energy
+ f2group[0] = 0.0; //force in x-direction
+ f2group[1] = 0.0; //force in y-direction
+ f2group[2] = 0.0; //force in z-direction
+
+ // map my particle charge onto my local 3d density grid
+
+ make_rho_groups(groupbit_A,groupbit_B,AA_flag);
+
+ // all procs communicate density values from their ghost cells
+ // to fully sum contribution in their 3d bricks
+ // remap from 3d decomposition to FFT decomposition
+
+ // temporarily store and switch pointers so we can
+ // use brick2fft() for groups A and B (without
+ // writing an additional function)
+
+ FFT_SCALAR ***density_brick_real = density_brick;
+ FFT_SCALAR *density_fft_real = density_fft;
+
+ // group A
+
+ density_brick = density_A_brick;
+ density_fft = density_A_fft;
+
+ cg->reverse_comm(this,REVERSE_RHO);
+ brick2fft();
+
+ // group B
+
+ density_brick = density_B_brick;
+ density_fft = density_B_fft;
+
+ cg->reverse_comm(this,REVERSE_RHO);
+ brick2fft();
+
+ // switch back pointers
+
+ density_brick = density_brick_real;
+ density_fft = density_fft_real;
+
+ // compute potential gradient on my FFT grid and
+ // portion of group-group energy/force on this proc's FFT grid
+
+ poisson_groups(AA_flag);
+
+ const double qscale = force->qqrd2e * scale;
+
+ // total group A <--> group B energy
+ // self and boundary correction terms are in compute_group_group.cpp
+
+ double e2group_all;
+ MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
+ e2group = e2group_all;
+
+ e2group *= qscale*0.5*volume;
+
+ // total group A <--> group B force
+
+ double f2group_all[3];
+ MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
+
+ f2group[0] = qscale*volume*f2group_all[0];
+ f2group[1] = qscale*volume*f2group_all[1];
+ if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2];
+
+ // convert atoms back from lamda to box coords
+
+ if (triclinic) domain->lamda2x(atom->nlocal);
+
+ if (slabflag == 1)
+ slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
+}
+
+/* ----------------------------------------------------------------------
+ allocate group-group memory that depends on # of K-vectors and order
+ ------------------------------------------------------------------------- */
+
+void PPPM::allocate_groups()
+{
+ group_allocate_flag = 1;
+
+ memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:density_A_brick");
+ memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:density_B_brick");
+ memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
+ memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
+}
+
+/* ----------------------------------------------------------------------
+ deallocate group-group memory that depends on # of K-vectors and order
+ ------------------------------------------------------------------------- */
+
+void PPPM::deallocate_groups()
+{
+ group_allocate_flag = 0;
+
+ memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy(density_A_fft);
+ memory->destroy(density_B_fft);
+}
+
+/* ----------------------------------------------------------------------
+ create discretized "density" on section of global grid due to my particles
+ density(x,y,z) = charge "density" at grid points of my 3d brick
+ (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+ in global grid for group-group interactions
+ ------------------------------------------------------------------------- */
+
+void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag)
+{
+ int l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+ // clear 3d density arrays
+
+ memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
+ ngrid*sizeof(FFT_SCALAR));
+
+ memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
+ ngrid*sizeof(FFT_SCALAR));
+
+ // loop over my charges, add their contribution to nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+
+ double *q = atom->q;
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+ int *mask = atom->mask;
+
+ for (int i = 0; i < nlocal; i++) {
+
+ if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
+ if (AA_flag) continue;
+
+ if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
+
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz);
+
+ z0 = delvolinv * q[i];
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ y0 = z0*rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ x0 = y0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+
+ // group A
+
+ if (mask[i] & groupbit_A)
+ density_A_brick[mz][my][mx] += x0*rho1d[0][l];
+
+ // group B
+
+ if (mask[i] & groupbit_B)
+ density_B_brick[mz][my][mx] += x0*rho1d[0][l];
+ }
+ }
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver for group-group interactions
+ ------------------------------------------------------------------------- */
+
+void PPPM::poisson_groups(int AA_flag)
+{
+ int i,j,k,n;
+
+ // reuse memory (already declared)
+
+ FFT_SCALAR *work_A = work1;
+ FFT_SCALAR *work_B = work2;
+
+ // transform charge density (r -> k)
+
+ // group A
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work_A[n++] = density_A_fft[i];
+ work_A[n++] = ZEROF;
+ }
+
+ fft1->compute(work_A,work_A,1);
+
+ // group B
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work_B[n++] = density_B_fft[i];
+ work_B[n++] = ZEROF;
+ }
+
+ fft1->compute(work_B,work_B,1);
+
+ // group-group energy and force contribution,
+ // keep everything in reciprocal space so
+ // no inverse FFTs needed
+
+ double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+ double s2 = scaleinv*scaleinv;
+
+ // energy
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ e2group += s2 * greensfn[i] *
+ (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
+ n += 2;
+ }
+
+ if (AA_flag) return;
+
+
+ // multiply by Green's function and s2
+ // (only for work_A so it is not squared below)
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work_A[n++] *= s2 * greensfn[i];
+ work_A[n++] *= s2 * greensfn[i];
+ }
+
+ // triclinic system
+
+ if (triclinic) {
+ poisson_groups_triclinic();
+ return;
+ }
+
+ double partial_group;
+
+ // force, x direction
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+ f2group[0] += fkx[i] * partial_group;
+ n += 2;
+ }
+
+ // force, y direction
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+ f2group[1] += fky[j] * partial_group;
+ n += 2;
+ }
+
+ // force, z direction
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+ f2group[2] += fkz[k] * partial_group;
+ n += 2;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver for group-group interactions
+ for a triclinic system
+ ------------------------------------------------------------------------- */
+
+void PPPM::poisson_groups_triclinic()
+{
+ int i,j,k,n;
+
+ // reuse memory (already declared)
+
+ FFT_SCALAR *work_A = work1;
+ FFT_SCALAR *work_B = work2;
+
+ double partial_group;
+
+ // force, x direction
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+ f2group[0] += fkx[i] * partial_group;
+ n += 2;
+ }
+
+ // force, y direction
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+ f2group[1] += fky[i] * partial_group;
+ n += 2;
+ }
+
+ // force, z direction
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+ f2group[2] += fkz[i] * partial_group;
+ n += 2;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ Slab-geometry correction term to dampen inter-slab interactions between
+ periodically repeating slabs. Yields good approximation to 2D Ewald if
+ adequate empty space is left between repeating slabs (J. Chem. Phys.
+ 111, 3155). Slabs defined here to be parallel to the xy plane. Also
+ extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag)
+{
+ // compute local contribution to global dipole moment
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double zprd = domain->zprd;
+ int *mask = atom->mask;
+ int nlocal = atom->nlocal;
+
+ double qsum_A = 0.0;
+ double qsum_B = 0.0;
+ double dipole_A = 0.0;
+ double dipole_B = 0.0;
+ double dipole_r2_A = 0.0;
+ double dipole_r2_B = 0.0;
+
+ for (int i = 0; i < nlocal; i++) {
+ if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
+ if (AA_flag) continue;
+
+ if (mask[i] & groupbit_A) {
+ qsum_A += q[i];
+ dipole_A += q[i]*x[i][2];
+ dipole_r2_A += q[i]*x[i][2]*x[i][2];
+ }
+
+ if (mask[i] & groupbit_B) {
+ qsum_B += q[i];
+ dipole_B += q[i]*x[i][2];
+ dipole_r2_B += q[i]*x[i][2]*x[i][2];
+ }
+ }
+
+ // sum local contributions to get total charge and global dipole moment
+ // for each group
+
+ double tmp;
+ MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsum_A = tmp;
+
+ MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsum_B = tmp;
+
+ MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_A = tmp;
+
+ MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_B = tmp;
+
+ MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_r2_A = tmp;
+
+ MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_r2_B = tmp;
+
+ // compute corrections
+
+ const double qscale = force->qqrd2e * scale;
+ const double efact = qscale * MY_2PI/volume;
+
+ e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B +
+ qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0);
+
+ // add on force corrections
+
+ const double ffact = qscale * (-4.0*MY_PI/volume);
+ f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A);
+}
diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp
index 6d8d604f5c..426dbf0e7b 100755
--- a/src/KSPACE/pppm_disp.cpp
+++ b/src/KSPACE/pppm_disp.cpp
@@ -1,8209 +1,8209 @@
-/* ----------------------------------------------------------------------
- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
- http://lammps.sandia.gov, Sandia National Laboratories
- Steve Plimpton, sjplimp@sandia.gov
-
- Copyright (2003) Sandia Corporation. Under the terms of Contract
- DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
- certain rights in this software. This software is distributed under
- the GNU General Public License.
-
- See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
- Contributing authors: Rolf Isele-Holder (Aachen University)
- Paul Crozier (SNL)
-------------------------------------------------------------------------- */
-
-#include "lmptype.h"
-#include "mpi.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "math.h"
-#include "pppm_disp.h"
-#include "math_const.h"
-#include "atom.h"
-#include "comm.h"
-#include "commgrid.h"
-#include "neighbor.h"
-#include "force.h"
-#include "pair.h"
-#include "bond.h"
-#include "angle.h"
-#include "domain.h"
-#include "fft3d_wrap.h"
-#include "remap_wrap.h"
-#include "memory.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-
-#define MAXORDER 7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
-
-enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};
-enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE};
-enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM,
- FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G,
- FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A,
- FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE};
-
-
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF 1.0f
-#else
-#define ZEROF 0.0
-#define ONEF 1.0
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
-{
- if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command");
-
- triclinic_support = 0;
- pppmflag = dispersionflag = 1;
- accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
-
- nfactors = 3;
- factors = new int[nfactors];
- factors[0] = 2;
- factors[1] = 3;
- factors[2] = 5;
-
- MPI_Comm_rank(world,&me);
- MPI_Comm_size(world,&nprocs);
-
- csumflag = 0;
- B = NULL;
- cii = NULL;
- csumi = NULL;
- peratom_allocate_flag = 0;
-
- density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
- density_fft = NULL;
- u_brick = v0_brick = v1_brick = v2_brick = v3_brick =
- v4_brick = v5_brick = NULL;
-
- density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
- density_fft_g = NULL;
- u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g =
- v4_brick_g = v5_brick_g = NULL;
-
- density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
- density_fft_a0 = NULL;
- u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 =
- v4_brick_a0 = v5_brick_a0 = NULL;
-
- density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
- density_fft_a1 = NULL;
- u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 =
- v4_brick_a1 = v5_brick_a1 = NULL;
-
- density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
- density_fft_a2 = NULL;
- u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 =
- v4_brick_a2 = v5_brick_a2 = NULL;
-
- density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
- density_fft_a3 = NULL;
- u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 =
- v4_brick_a3 = v5_brick_a3 = NULL;
-
- density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
- density_fft_a4 = NULL;
- u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 =
- v4_brick_a4 = v5_brick_a4 = NULL;
-
- density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
- density_fft_a5 = NULL;
- u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 =
- v4_brick_a5 = v5_brick_a5 = NULL;
-
- density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
- density_fft_a6 = NULL;
- u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 =
- v4_brick_a6 = v5_brick_a6 = NULL;
-
- density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
- density_fft_none = NULL;
- u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none =
- v4_brick_none = v5_brick_none = NULL;
-
- greensfn = NULL;
- greensfn_6 = NULL;
- work1 = work2 = NULL;
- work1_6 = work2_6 = NULL;
- vg = NULL;
- vg2 = NULL;
- vg_6 = NULL;
- vg2_6 = NULL;
- fkx = fky = fkz = NULL;
- fkx2 = fky2 = fkz2 = NULL;
- fkx_6 = fky_6 = fkz_6 = NULL;
- fkx2_6 = fky2_6 = fkz2_6 = NULL;
-
- sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 =
- sf_precoeff5 = sf_precoeff6 = NULL;
- sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 =
- sf_precoeff5_6 = sf_precoeff6_6 = NULL;
-
- gf_b = NULL;
- gf_b_6 = NULL;
- rho1d = rho_coeff = NULL;
- drho1d = drho_coeff = NULL;
- rho1d_6 = rho_coeff_6 = NULL;
- drho1d_6 = drho_coeff_6 = NULL;
- fft1 = fft2 = NULL;
- fft1_6 = fft2_6 = NULL;
- remap = NULL;
- remap_6 = NULL;
-
- nmax = 0;
- part2grid = NULL;
- part2grid_6 = NULL;
-
- cg = NULL;
- cg_peratom = NULL;
- cg_6 = NULL;
- cg_peratom_6 = NULL;
-
- memset(function, 0, EWALD_FUNCS*sizeof(int));
-}
-
-/* ----------------------------------------------------------------------
- free all memory
-------------------------------------------------------------------------- */
-
-PPPMDisp::~PPPMDisp()
-{
- delete [] factors;
- delete [] B;
- B = NULL;
- delete [] cii;
- cii = NULL;
- delete [] csumi;
- csumi = NULL;
- deallocate();
- deallocate_peratom();
- memory->destroy(part2grid);
- memory->destroy(part2grid_6);
- part2grid = part2grid_6 = NULL;
-}
-
-/* ----------------------------------------------------------------------
- called once before run
-------------------------------------------------------------------------- */
-
-void PPPMDisp::init()
-{
- if (me == 0) {
- if (screen) fprintf(screen,"PPPMDisp initialization ...\n");
- if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n");
- }
-
- triclinic_check();
- if (domain->dimension == 2)
- error->all(FLERR,"Cannot use PPPMDisp with 2d simulation");
-
- if (slabflag == 0 && domain->nonperiodic > 0)
- error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
- if (slabflag == 1) {
- if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
- domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
- error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
- }
-
- if (order > MAXORDER || order_6 > MAXORDER) {
- char str[128];
- sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER);
- error->all(FLERR,str);
- }
-
- // free all arrays previously allocated
-
- deallocate();
- deallocate_peratom();
-
- // set scale
-
- scale = 1.0;
-
- triclinic = domain->triclinic;
-
- // check whether cutoff and pair style are set
-
- pair_check();
-
- int tmp;
- Pair *pair = force->pair;
- int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
- double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
- double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL;
- if (!(ptr||*p_cutoff||*p_cutoff_lj))
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- cutoff = *p_cutoff;
- cutoff_lj = *p_cutoff_lj;
-
- double tmp2;
- MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world);
-
- // check out which types of potentials will have to be calculated
-
- int ewald_order = ptr ? *((int *) ptr) : 1<<1;
- int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
- memset(function, 0, EWALD_FUNCS*sizeof(int));
- for (int i=0; i<=EWALD_MAXORDER; ++i) // transcribe order
- if (ewald_order&(1<pair_style);
- error->all(FLERR,str);
- }
- function[k] = 1;
- }
-
-
- // warn, if function[0] is not set but charge attribute is set!
- if (!function[0] && atom->q_flag && me == 0) {
- char str[128];
- sprintf(str, "Charges are set, but coulombic solver is not used");
- error->warning(FLERR, str);
- }
-
- // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral
-
- if (function[0]) {
- if (!atom->q_flag)
- error->all(FLERR,"Kspace style with selected options "
- "requires atom attribute q");
-
- qsum = qsqsum = 0.0;
- for (int i = 0; i < atom->nlocal; i++) {
- qsum += atom->q[i];
- qsqsum += atom->q[i]*atom->q[i];
-
- }
-
- double tmp;
- MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsum = tmp;
- MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsqsum = tmp;
-
- if (qsqsum == 0.0)
- error->all(FLERR,"Cannot use kspace solver with selected options "
- "on system with no charge");
- if (fabs(qsum) > SMALL && me == 0) {
- char str[128];
- sprintf(str,"System is not charge neutral, net charge = %g",qsum);
- error->warning(FLERR,str);
- }
- }
-
- // if kspace is TIP4P, extract TIP4P params from pair style
- // bond/angle are not yet init(), so insure equilibrium request is valid
-
- qdist = 0.0;
-
- if (tip4pflag) {
- int itmp;
- double *p_qdist = (double *) force->pair->extract("qdist",itmp);
- int *p_typeO = (int *) force->pair->extract("typeO",itmp);
- int *p_typeH = (int *) force->pair->extract("typeH",itmp);
- int *p_typeA = (int *) force->pair->extract("typeA",itmp);
- int *p_typeB = (int *) force->pair->extract("typeB",itmp);
- if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- qdist = *p_qdist;
- typeO = *p_typeO;
- typeH = *p_typeH;
- int typeA = *p_typeA;
- int typeB = *p_typeB;
-
- if (force->angle == NULL || force->bond == NULL)
- error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
- if (typeA < 1 || typeA > atom->nangletypes ||
- force->angle->setflag[typeA] == 0)
- error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P");
- if (typeB < 1 || typeB > atom->nbondtypes ||
- force->bond->setflag[typeB] == 0)
- error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P");
- double theta = force->angle->equilibrium_angle(typeA);
- double blen = force->bond->equilibrium_distance(typeB);
- alpha = qdist / (cos(0.5*theta) * blen);
- }
-
-
- // initialize the pair style to get the coefficients
- neighrequest_flag = 0;
- pair->init();
- neighrequest_flag = 1;
- init_coeffs();
-
- //if g_ewald and g_ewald_6 have not been specified, set some initial value
- // to avoid problems when calculating the energies!
-
- if (!gewaldflag) g_ewald = 1;
- if (!gewaldflag_6) g_ewald_6 = 1;
-
- // set accuracy (force units) from accuracy_relative or accuracy_absolute
-
- if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
- else accuracy = accuracy_relative * two_charge_force;
-
- int (*procneigh)[2] = comm->procneigh;
-
- int iteration = 0;
- if (function[0]) {
- CommGrid *cgtmp = NULL;
- while (order >= minorder) {
-
- if (iteration && me == 0)
- error->warning(FLERR,"Reducing PPPMDisp Coulomb order "
- "b/c stencil extends beyond neighbor processor");
- iteration++;
-
- // set grid for dispersion interaction and coulomb interactions
-
- set_grid();
-
- if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
- error->all(FLERR,"PPPMDisp Coulomb grid is too large");
-
- set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
- nxlo_fft, nylo_fft, nzlo_fft,
- nxhi_fft, nyhi_fft, nzhi_fft,
- nxlo_in, nylo_in, nzlo_in,
- nxhi_in, nyhi_in, nzhi_in,
- nxlo_out, nylo_out, nzlo_out,
- nxhi_out, nyhi_out, nzhi_out,
- nlower, nupper,
- ngrid, nfft, nfft_both,
- shift, shiftone, order);
-
- if (overlap_allowed) break;
-
- cgtmp = new CommGrid(lmp, world,1,1,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_out,nxhi_out,nylo_out,nyhi_out,
- nzlo_out,nzhi_out,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- cgtmp->ghost_notify();
- if (!cgtmp->ghost_overlap()) break;
- delete cgtmp;
-
- order--;
- }
-
- if (order < minorder)
- error->all(FLERR,
- "Coulomb PPPMDisp order has been reduced below minorder");
- if (cgtmp) delete cgtmp;
-
- // adjust g_ewald
-
- if (!gewaldflag) adjust_gewald();
-
- // calculate the final accuracy
-
- double acc = final_accuracy();
-
- // print stats
-
- int ngrid_max,nfft_both_max,nbuf_max;
- MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
- MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
-
- if (me == 0) {
- #ifdef FFT_SINGLE
- const char fft_prec[] = "single";
- #else
- const char fft_prec[] = "double";
- #endif
-
- if (screen) {
- fprintf(screen," Coulomb G vector (1/distance)= %g\n",g_ewald);
- fprintf(screen," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
- fprintf(screen," Coulomb stencil order = %d\n",order);
- fprintf(screen," Coulomb estimated absolute RMS force accuracy = %g\n",
- acc);
- fprintf(screen," Coulomb estimated relative force accuracy = %g\n",
- acc/two_charge_force);
- fprintf(screen," using %s precision FFTs\n",fft_prec);
- fprintf(screen," 3d grid and FFT values/proc = %d %d\n",
- ngrid_max, nfft_both_max);
- }
- if (logfile) {
- fprintf(logfile," Coulomb G vector (1/distance) = %g\n",g_ewald);
- fprintf(logfile," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
- fprintf(logfile," Coulomb stencil order = %d\n",order);
- fprintf(logfile,
- " Coulomb estimated absolute RMS force accuracy = %g\n",
- acc);
- fprintf(logfile," Coulomb estimated relative force accuracy = %g\n",
- acc/two_charge_force);
- fprintf(logfile," using %s precision FFTs\n",fft_prec);
- fprintf(logfile," 3d grid and FFT values/proc = %d %d\n",
- ngrid_max, nfft_both_max);
- }
- }
- }
-
- iteration = 0;
- if (function[1] + function[2] + function[3]) {
- CommGrid *cgtmp = NULL;
- while (order_6 >= minorder) {
-
- if (iteration && me == 0)
- error->warning(FLERR,"Reducing PPPMDisp dispersion order "
- "b/c stencil extends beyond neighbor processor");
- iteration++;
-
- set_grid_6();
-
- if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET)
- error->all(FLERR,"PPPMDisp Dispersion grid is too large");
-
- set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
- nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
- nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
- nxlo_in_6, nylo_in_6, nzlo_in_6,
- nxhi_in_6, nyhi_in_6, nzhi_in_6,
- nxlo_out_6, nylo_out_6, nzlo_out_6,
- nxhi_out_6, nyhi_out_6, nzhi_out_6,
- nlower_6, nupper_6,
- ngrid_6, nfft_6, nfft_both_6,
- shift_6, shiftone_6, order_6);
-
- if (overlap_allowed) break;
-
- cgtmp = new CommGrid(lmp,world,1,1,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,
- nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,
- nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- cgtmp->ghost_notify();
- if (!cgtmp->ghost_overlap()) break;
- delete cgtmp;
- order_6--;
- }
-
- if (order_6 < minorder)
- error->all(FLERR,"Dispersion PPPMDisp order has been "
- "reduced below minorder");
- if (cgtmp) delete cgtmp;
-
- // adjust g_ewald_6
-
- if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6)
- adjust_gewald_6();
-
- // calculate the final accuracy
-
- double acc, acc_real, acc_kspace;
- final_accuracy_6(acc, acc_real, acc_kspace);
-
-
- // print stats
-
- int ngrid_max,nfft_both_max,nbuf_max;
- MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world);
- MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
-
- if (me == 0) {
- #ifdef FFT_SINGLE
- const char fft_prec[] = "single";
- #else
- const char fft_prec[] = "double";
- #endif
-
- if (screen) {
- fprintf(screen," Dispersion G vector (1/distance)= %g\n",g_ewald_6);
- fprintf(screen," Dispersion grid = %d %d %d\n",
- nx_pppm_6,ny_pppm_6,nz_pppm_6);
- fprintf(screen," Dispersion stencil order = %d\n",order_6);
- fprintf(screen," Dispersion estimated absolute "
- "RMS force accuracy = %g\n",acc);
- fprintf(screen," Dispersion estimated absolute "
- "real space RMS force accuracy = %g\n",acc_real);
- fprintf(screen," Dispersion estimated absolute "
- "kspace RMS force accuracy = %g\n",acc_kspace);
- fprintf(screen," Dispersion estimated relative force accuracy = %g\n",
- acc/two_charge_force);
- fprintf(screen," using %s precision FFTs\n",fft_prec);
- fprintf(screen," 3d grid and FFT values/proc dispersion = %d %d\n",
- ngrid_max,nfft_both_max);
- }
- if (logfile) {
- fprintf(logfile," Dispersion G vector (1/distance) = %g\n",g_ewald_6);
- fprintf(logfile," Dispersion grid = %d %d %d\n",
- nx_pppm_6,ny_pppm_6,nz_pppm_6);
- fprintf(logfile," Dispersion stencil order = %d\n",order_6);
- fprintf(logfile," Dispersion estimated absolute "
- "RMS force accuracy = %g\n",acc);
- fprintf(logfile," Dispersion estimated absolute "
- "real space RMS force accuracy = %g\n",acc_real);
- fprintf(logfile," Dispersion estimated absolute "
- "kspace RMS force accuracy = %g\n",acc_kspace);
- fprintf(logfile," Disperion estimated relative force accuracy = %g\n",
- acc/two_charge_force);
- fprintf(logfile," using %s precision FFTs\n",fft_prec);
- fprintf(logfile," 3d grid and FFT values/proc dispersion = %d %d\n",
- ngrid_max,nfft_both_max);
- }
- }
- }
-
- // allocate K-space dependent memory
-
- allocate();
-
- // pre-compute Green's function denomiator expansion
- // pre-compute 1d charge distribution coefficients
-
- if (function[0]) {
- compute_gf_denom(gf_b, order);
- compute_rho_coeff(rho_coeff, drho_coeff, order);
- cg->ghost_notify();
- cg->setup();
- if (differentiation_flag == 1)
- compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
- nxlo_fft, nylo_fft, nzlo_fft,
- nxhi_fft, nyhi_fft, nzhi_fft,
- sf_precoeff1, sf_precoeff2, sf_precoeff3,
- sf_precoeff4, sf_precoeff5, sf_precoeff6);
- }
- if (function[1] + function[2] + function[3]) {
- compute_gf_denom(gf_b_6, order_6);
- compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
- cg_6->ghost_notify();
- cg_6->setup();
- if (differentiation_flag == 1)
- compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
- nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
- nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
- sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
- sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
- }
-
-}
-
-/* ----------------------------------------------------------------------
- adjust PPPM coeffs, called initially and whenever volume has changed
-------------------------------------------------------------------------- */
-
-void PPPMDisp::setup()
-{
- double *prd;
-
- // volume-dependent factors
- // adjust z dimension for 2d slab PPPM
- // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
- volume = xprd * yprd * zprd_slab;
-
- // compute fkx,fky,fkz for my FFT grid pts
-
- double unitkx = (2.0*MY_PI/xprd);
- double unitky = (2.0*MY_PI/yprd);
- double unitkz = (2.0*MY_PI/zprd_slab);
-
- //compute the virial coefficients and green functions
- if (function[0]){
-
- delxinv = nx_pppm/xprd;
- delyinv = ny_pppm/yprd;
- delzinv = nz_pppm/zprd_slab;
-
- delvolinv = delxinv*delyinv*delzinv;
-
- double per;
- int i, j, k, n;
-
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- per = i - nx_pppm*(2*i/nx_pppm);
- fkx[i] = unitkx*per;
- j = (nx_pppm - i) % nx_pppm;
- per = j - nx_pppm*(2*j/nx_pppm);
- fkx2[i] = unitkx*per;
- }
-
- for (i = nylo_fft; i <= nyhi_fft; i++) {
- per = i - ny_pppm*(2*i/ny_pppm);
- fky[i] = unitky*per;
- j = (ny_pppm - i) % ny_pppm;
- per = j - ny_pppm*(2*j/ny_pppm);
- fky2[i] = unitky*per;
- }
-
- for (i = nzlo_fft; i <= nzhi_fft; i++) {
- per = i - nz_pppm*(2*i/nz_pppm);
- fkz[i] = unitkz*per;
- j = (nz_pppm - i) % nz_pppm;
- per = j - nz_pppm*(2*j/nz_pppm);
- fkz2[i] = unitkz*per;
- }
-
- double sqk,vterm;
- double gew2inv = 1/(g_ewald*g_ewald);
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++) {
- for (j = nylo_fft; j <= nyhi_fft; j++) {
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
- if (sqk == 0.0) {
- vg[n][0] = 0.0;
- vg[n][1] = 0.0;
- vg[n][2] = 0.0;
- vg[n][3] = 0.0;
- vg[n][4] = 0.0;
- vg[n][5] = 0.0;
- } else {
- vterm = -2.0 * (1.0/sqk + 0.25*gew2inv);
- vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
- vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
- vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
- vg[n][3] = vterm*fkx[i]*fky[j];
- vg[n][4] = vterm*fkx[i]*fkz[k];
- vg[n][5] = vterm*fky[j]*fkz[k];
- vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]);
- vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]);
- vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]);
- }
- n++;
- }
- }
- }
- compute_gf();
- if (differentiation_flag == 1) compute_sf_coeff();
- }
-
- if (function[1] + function[2] + function[3]) {
- delxinv_6 = nx_pppm_6/xprd;
- delyinv_6 = ny_pppm_6/yprd;
- delzinv_6 = nz_pppm_6/zprd_slab;
- delvolinv_6 = delxinv_6*delyinv_6*delzinv_6;
-
- double per;
- int i, j, k, n;
- for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
- per = i - nx_pppm_6*(2*i/nx_pppm_6);
- fkx_6[i] = unitkx*per;
- j = (nx_pppm_6 - i) % nx_pppm_6;
- per = j - nx_pppm_6*(2*j/nx_pppm_6);
- fkx2_6[i] = unitkx*per;
- }
- for (i = nylo_fft_6; i <= nyhi_fft_6; i++) {
- per = i - ny_pppm_6*(2*i/ny_pppm_6);
- fky_6[i] = unitky*per;
- j = (ny_pppm_6 - i) % ny_pppm_6;
- per = j - ny_pppm_6*(2*j/ny_pppm_6);
- fky2_6[i] = unitky*per;
- }
- for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) {
- per = i - nz_pppm_6*(2*i/nz_pppm_6);
- fkz_6[i] = unitkz*per;
- j = (nz_pppm_6 - i) % nz_pppm_6;
- per = j - nz_pppm_6*(2*j/nz_pppm_6);
- fkz2_6[i] = unitkz*per;
- }
- double sqk,vterm;
- long double erft, expt,nom, denom;
- long double b, bs, bt;
- double rtpi = sqrt(MY_PI);
- double gewinv = 1/g_ewald_6;
- n = 0;
- for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) {
- for (j = nylo_fft_6; j <= nyhi_fft_6; j++) {
- for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
- sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k];
- if (sqk == 0.0) {
- vg_6[n][0] = 0.0;
- vg_6[n][1] = 0.0;
- vg_6[n][2] = 0.0;
- vg_6[n][3] = 0.0;
- vg_6[n][4] = 0.0;
- vg_6[n][5] = 0.0;
- } else {
- b = 0.5*sqrt(sqk)*gewinv;
- bs = b*b;
- bt = bs*b;
- erft = 2*bt*rtpi*erfc(b);
- expt = exp(-bs);
- nom = erft - 2*bs*expt;
- denom = nom + expt;
- if (denom == 0) vterm = 3.0/sqk;
- else vterm = 3.0*nom/(sqk*denom);
- vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i];
- vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j];
- vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k];
- vg_6[n][3] = vterm*fkx_6[i]*fky_6[j];
- vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k];
- vg_6[n][5] = vterm*fky_6[j]*fkz_6[k];
- vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]);
- vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]);
- vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]);
- }
- n++;
- }
- }
- }
- compute_gf_6();
- if (differentiation_flag == 1) compute_sf_coeff_6();
- }
-}
-
-/* ----------------------------------------------------------------------
- reset local grid arrays and communication stencils
- called by fix balance b/c it changed sizes of processor sub-domains
-------------------------------------------------------------------------- */
-
-void PPPMDisp::setup_grid()
-{
- // free all arrays previously allocated
-
- deallocate();
- deallocate_peratom();
-
- // reset portion of global grid that each proc owns
-
- if (function[0])
- set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
- nxlo_fft, nylo_fft, nzlo_fft,
- nxhi_fft, nyhi_fft, nzhi_fft,
- nxlo_in, nylo_in, nzlo_in,
- nxhi_in, nyhi_in, nzhi_in,
- nxlo_out, nylo_out, nzlo_out,
- nxhi_out, nyhi_out, nzhi_out,
- nlower, nupper,
- ngrid, nfft, nfft_both,
- shift, shiftone, order);
-
- if (function[1] + function[2] + function[3])
- set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
- nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
- nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
- nxlo_in_6, nylo_in_6, nzlo_in_6,
- nxhi_in_6, nyhi_in_6, nzhi_in_6,
- nxlo_out_6, nylo_out_6, nzlo_out_6,
- nxhi_out_6, nyhi_out_6, nzhi_out_6,
- nlower_6, nupper_6,
- ngrid_6, nfft_6, nfft_both_6,
- shift_6, shiftone_6, order_6);
-
- // reallocate K-space dependent memory
- // check if grid communication is now overlapping if not allowed
- // don't invoke allocate_peratom(), compute() will allocate when needed
-
- allocate();
-
- if (function[0]) {
- cg->ghost_notify();
- if (overlap_allowed == 0 && cg->ghost_overlap())
- error->all(FLERR,"PPPM grid stencil extends "
- "beyond nearest neighbor processor");
- cg->setup();
- }
- if (function[1] + function[2] + function[3]) {
- cg_6->ghost_notify();
- if (overlap_allowed == 0 && cg_6->ghost_overlap())
- error->all(FLERR,"PPPM grid stencil extends "
- "beyond nearest neighbor processor");
- cg_6->setup();
- }
-
- // pre-compute Green's function denomiator expansion
- // pre-compute 1d charge distribution coefficients
-
- if (function[0]) {
- compute_gf_denom(gf_b, order);
- compute_rho_coeff(rho_coeff, drho_coeff, order);
- if (differentiation_flag == 1)
- compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
- nxlo_fft, nylo_fft, nzlo_fft,
- nxhi_fft, nyhi_fft, nzhi_fft,
- sf_precoeff1, sf_precoeff2, sf_precoeff3,
- sf_precoeff4, sf_precoeff5, sf_precoeff6);
- }
- if (function[1] + function[2] + function[3]) {
- compute_gf_denom(gf_b_6, order_6);
- compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
- if (differentiation_flag == 1)
- compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
- nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
- nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
- sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
- sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
- }
-
- // pre-compute volume-dependent coeffs
-
- setup();
-}
-
-/* ----------------------------------------------------------------------
- compute the PPPM long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute(int eflag, int vflag)
-{
-
- int i;
- // convert atoms from box to lamda coords
-
- if (eflag || vflag) ev_setup(eflag,vflag);
- else evflag = evflag_atom = eflag_global = vflag_global =
- eflag_atom = vflag_atom = 0;
-
- if (evflag_atom && !peratom_allocate_flag) {
- allocate_peratom();
- if (function[0]) {
- cg_peratom->ghost_notify();
- cg_peratom->setup();
- }
- if (function[1] + function[2] + function[3]) {
- cg_peratom_6->ghost_notify();
- cg_peratom_6->setup();
- }
- peratom_allocate_flag = 1;
- }
-
- if (triclinic == 0) boxlo = domain->boxlo;
- else {
- boxlo = domain->boxlo_lamda;
- domain->x2lamda(atom->nlocal);
- }
- // extend size of per-atom arrays if necessary
-
- if (atom->nlocal > nmax) {
-
- if (function[0]) memory->destroy(part2grid);
- if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6);
- nmax = atom->nmax;
- if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid");
- if (function[1] + function[2] + function[3])
- memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6");
- }
-
-
- energy = 0.0;
- energy_1 = 0.0;
- energy_6 = 0.0;
- if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0;
-
- // find grid points for all my particles
- // distribute partcles' charges/dispersion coefficients on the grid
- // communication between processors and remapping two fft
- // Solution of poissons equation in k-space and backtransformation
- // communication between processors
- // calculation of forces
-
- if (function[0]) {
-
- //perfrom calculations for coulomb interactions only
-
- particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower,
- nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out);
-
- make_rho_c();
-
- cg->reverse_comm(this,REVERSE_RHO);
-
- brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
- density_brick, density_fft, work1,remap);
-
- if (differentiation_flag == 1) {
-
- poisson_ad(work1, work2, density_fft, fft1, fft2,
- nx_pppm, ny_pppm, nz_pppm, nfft,
- nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
- nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
- energy_1, greensfn,
- virial_1, vg,vg2,
- u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
-
- cg->forward_comm(this,FORWARD_AD);
-
- fieldforce_c_ad();
-
- if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM);
-
- } else {
- poisson_ik(work1, work2, density_fft, fft1, fft2,
- nx_pppm, ny_pppm, nz_pppm, nfft,
- nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
- nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
- energy_1, greensfn,
- fkx, fky, fkz,fkx2, fky2, fkz2,
- vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2,
- u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
-
- cg->forward_comm(this, FORWARD_IK);
-
- fieldforce_c_ik();
-
- if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM);
- }
- if (evflag_atom) fieldforce_c_peratom();
- }
-
- if (function[1]) {
- //perfrom calculations for geometric mixing
- particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
- nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
- make_rho_g();
-
-
- cg_6->reverse_comm(this, REVERSE_RHO_G);
-
- brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
- density_brick_g, density_fft_g, work1_6,remap_6);
-
- if (differentiation_flag == 1) {
-
- poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
- nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
- nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
- nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
- energy_6, greensfn_6,
- virial_6, vg_6, vg2_6,
- u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
-
- cg_6->forward_comm(this,FORWARD_AD_G);
-
- fieldforce_g_ad();
-
- if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G);
-
- } else {
- poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
- nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
- nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
- nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
- energy_6, greensfn_6,
- fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
- vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6,
- u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
-
- cg_6->forward_comm(this,FORWARD_IK_G);
-
- fieldforce_g_ik();
-
-
- if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G);
- }
- if (evflag_atom) fieldforce_g_peratom();
- }
-
- if (function[2]) {
- //perform calculations for arithmetic mixing
- particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
- nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
- make_rho_a();
-
- cg_6->reverse_comm(this, REVERSE_RHO_A);
-
- brick2fft_a();
-
- if ( differentiation_flag == 1) {
-
- poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
- nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
- nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
- nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
- energy_6, greensfn_6,
- virial_6, vg_6, vg2_6,
- u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
- poisson_2s_ad(density_fft_a0, density_fft_a6,
- u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
- u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
- poisson_2s_ad(density_fft_a1, density_fft_a5,
- u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
- u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
- poisson_2s_ad(density_fft_a2, density_fft_a4,
- u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
- u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
-
- cg_6->forward_comm(this, FORWARD_AD_A);
-
- fieldforce_a_ad();
-
- if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A);
-
- } else {
-
- poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
- nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
- nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
- nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
- energy_6, greensfn_6,
- fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
- vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6,
- u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
- poisson_2s_ik(density_fft_a0, density_fft_a6,
- vdx_brick_a0, vdy_brick_a0, vdz_brick_a0,
- vdx_brick_a6, vdy_brick_a6, vdz_brick_a6,
- u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
- u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
- poisson_2s_ik(density_fft_a1, density_fft_a5,
- vdx_brick_a1, vdy_brick_a1, vdz_brick_a1,
- vdx_brick_a5, vdy_brick_a5, vdz_brick_a5,
- u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
- u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
- poisson_2s_ik(density_fft_a2, density_fft_a4,
- vdx_brick_a2, vdy_brick_a2, vdz_brick_a2,
- vdx_brick_a4, vdy_brick_a4, vdz_brick_a4,
- u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
- u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
-
- cg_6->forward_comm(this, FORWARD_IK_A);
-
- fieldforce_a_ik();
-
- if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A);
- }
- if (evflag_atom) fieldforce_a_peratom();
- }
-
- if (function[3]) {
- //perfrom calculations if no mixing rule applies
- particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
- nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
-
- make_rho_none();
-
- cg_6->reverse_comm(this, REVERSE_RHO_NONE);
-
- brick2fft_none();
-
- if (differentiation_flag == 1) {
-
- int n = 0;
- for (int k = 0; kforward_comm(this,FORWARD_AD_NONE);
-
- fieldforce_none_ad();
-
- if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE);
-
- } else {
- int n = 0;
- for (int k = 0; kforward_comm(this,FORWARD_IK_NONE);
-
- fieldforce_none_ik();
-
-
- if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE);
- }
- if (evflag_atom) fieldforce_none_peratom();
- }
-
- // sum energy across procs and add in volume-dependent term
-
- const double qscale = force->qqrd2e * scale;
- if (eflag_global) {
- double energy_all;
- MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
- energy_1 = energy_all;
- MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
- energy_6 = energy_all;
-
- energy_1 *= 0.5*volume;
- energy_6 *= 0.5*volume;
-
- energy_1 -= g_ewald*qsqsum/MY_PIS +
- MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
- energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij +
- 1.0/12.0*pow(g_ewald_6,6)*csum;
- energy_1 *= qscale;
- }
-
- // sum virial across procs
-
- if (vflag_global) {
- double virial_all[6];
- MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
- for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
- MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
- for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i];
- if (function[1]+function[2]+function[3]){
- double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij;
- virial[0] -= a;
- virial[1] -= a;
- virial[2] -= a;
- }
- }
-
- if (eflag_atom) {
- if (function[0]) {
- double *q = atom->q;
- for (i = 0; i < atom->nlocal; i++) {
- eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction
- }
- }
- if (function[1] + function[2] + function[3]) {
- int tmp;
- for (i = 0; i < atom->nlocal; i++) {
- tmp = atom->type[i];
- eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] +
- 1.0/12.0*pow(g_ewald_6,6)*cii[tmp];
- }
- }
- }
-
- if (vflag_atom) {
- if (function[1] + function[2] + function[3]) {
- int tmp;
- for (i = 0; i < atom->nlocal; i++) {
- tmp = atom->type[i];
- for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction
- }
- }
- }
-
-
- // 2d slab correction
-
- if (slabflag) slabcorr(eflag);
- if (function[0]) energy += energy_1;
- if (function[1] + function[2] + function[3]) energy += energy_6;
-
- // convert atoms back from lamda to box coords
-
- if (triclinic) domain->lamda2x(atom->nlocal);
-}
-
-/* ----------------------------------------------------------------------
- initialize coefficients needed for the dispersion density on the grids
-------------------------------------------------------------------------- */
-
-void PPPMDisp::init_coeffs() // local pair coeffs
-{
- int tmp;
- int n = atom->ntypes;
- int converged;
- delete [] B;
- if (function[3] + function[2]) { // no mixing rule or arithmetic
- if (function[2] && me == 0) {
- if (screen) fprintf(screen," Optimizing splitting of Dispersion coefficients\n");
- if (logfile) fprintf(logfile," Optimizing splitting of Dispersion coefficients\n");
- }
- // get dispersion coefficients
- double **b = (double **) force->pair->extract("B",tmp);
- // allocate data for eigenvalue decomposition
- double **A;
- double **Q;
- memory->create(A,n,n,"pppm/disp:A");
- memory->create(Q,n,n,"pppm/disp:Q");
- // fill coefficients to matrix a
- for (int i = 1; i <= n; i++)
- for (int j = 1; j <= n; j++)
- A[i-1][j-1] = b[i][j];
- // transform q to a unity matrix
- for (int i = 0; i < n; i++)
- for (int j = 0; j < n; j++)
- Q[i][j] = 0.0;
- for (int i = 0; i < n; i++)
- Q[i][i] = 1.0;
- // perfrom eigenvalue decomposition with QR algorithm
- converged = qr_alg(A,Q,n);
- if (function[3] && !converged) {
- error->all(FLERR,"Matrix factorization to split dispersion coefficients failed");
- }
- // determine number of used eigenvalues
- // based on maximum allowed number or cutoff criterion
- // sort eigenvalues according to their size with bubble sort
- double t;
- for (int i = 0; i < n; i++) {
- for (int j = 0; j < n-1-i; j++) {
- if (fabs(A[j][j]) < fabs(A[j+1][j+1])) {
- t = A[j][j];
- A[j][j] = A[j+1][j+1];
- A[j+1][j+1] = t;
- for (int k = 0; k < n; k++) {
- t = Q[k][j];
- Q[k][j] = Q[k][j+1];
- Q[k][j+1] = t;
- }
- }
- }
- }
-
- // check which eigenvalue is the first that is smaller
- // than a specified tolerance
- // check how many are maximum allowed by the user
- double amax = fabs(A[0][0]);
- double acrit = amax*splittol;
- double bmax = 0;
- double err = 0;
- nsplit = 0;
- for (int i = 0; i < n; i++) {
- if (fabs(A[i][i]) > acrit) nsplit++;
- else {
- bmax = fabs(A[i][i]);
- break;
- }
- }
-
- err = bmax/amax;
- if (err > 1.0e-4) {
- char str[128];
- sprintf(str,"Error in splitting of dispersion coeffs is estimated %g",err);
- error->warning(FLERR, str);
- }
- // set B
- B = new double[nsplit*n+nsplit];
- for (int i = 0; i< nsplit; i++) {
- B[i] = A[i][i];
- for (int j = 0; j < n; j++) {
- B[nsplit*(j+1) + i] = Q[j][i];
- }
- }
-
- nsplit_alloc = nsplit;
- if (nsplit%2 == 1) nsplit_alloc = nsplit + 1;
- // check if the function should preferably be [1] or [2] or [3]
- if (nsplit == 1) {
- delete [] B;
- function[3] = 0;
- function[2] = 0;
- function[1] = 1;
- if (me == 0) {
- if (screen) fprintf(screen," Using geometric mixing for reciprocal space\n");
- if (logfile) fprintf(logfile," Using geometric mixing for reciprocal space\n");
- }
- }
- if (function[2] && nsplit <= 6) {
- if (me == 0) {
- if (screen) fprintf(screen," Using %d instead of 7 structure factors\n",nsplit);
- if (logfile) fprintf(logfile," Using %d instead of 7 structure factors\n",nsplit);
- }
- function[3] = 1;
- function[2] = 0;
- }
- if (function[2] && (nsplit > 6)) {
- if (me == 0) {
- if (screen) fprintf(screen," Using 7 structure factors\n");
- if (logfile) fprintf(logfile," Using 7 structure factors\n");
- }
- delete [] B;
- }
- if (function[3]) {
- if (me == 0) {
- if (screen) fprintf(screen," Using %d structure factors\n",nsplit);
- if (logfile) fprintf(logfile," Using %d structure factors\n",nsplit);
- }
- if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors");
- }
-
- memory->destroy(A);
- memory->destroy(Q);
- }
- if (function[1]) { // geometric 1/r^6
- double **b = (double **) force->pair->extract("B",tmp);
- B = new double[n+1];
- for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
- }
- if (function[2]) { // arithmetic 1/r^6
- //cannot use epsilon, because this has not been set yet
- double **epsilon = (double **) force->pair->extract("epsilon",tmp);
- //cannot use sigma, because this has not been set yet
- double **sigma = (double **) force->pair->extract("sigma",tmp);
- if (!(epsilon&&sigma))
- error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp");
- double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
- double c[7] = {
- 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
- for (int i=0; i<=n; ++i) {
- eps_i = sqrt(epsilon[i][i]);
- sigma_i = sigma[i][i];
- sigma_n = 1.0;
- for (int j=0; j<7; ++j) {
- *(bi++) = sigma_n*eps_i*c[j]*0.25;
- sigma_n *= sigma_i;
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- Eigenvalue decomposition of a real, symmetric matrix with the QR
- method (includes transpformation to Tridiagonal Matrix + Wilkinson
- shift)
-------------------------------------------------------------------------- */
-
-int PPPMDisp::qr_alg(double **A, double **Q, int n)
-{
- int converged = 0;
- double an1, an, bn1, d, mue;
- // allocate some memory for the required operations
- double **A0,**Qi,**C,**D,**E;
- // make a copy of A for convergence check
- memory->create(A0,n,n,"pppm/disp:A0");
- for (int i = 0; i < n; i++)
- for (int j = 0; j < n; j++)
- A0[i][j] = A[i][j];
-
- // allocate an auxiliary matrix Qi
- memory->create(Qi,n,n,"pppm/disp:Qi");
-
- // alllocate an auxillary matrices for the matrix multiplication
- memory->create(C,n,n,"pppm/disp:C");
- memory->create(D,n,n,"pppm/disp:D");
- memory->create(E,n,n,"pppm/disp:E");
-
- // transform Matrix A to Tridiagonal form
- hessenberg(A,Q,n);
-
- // start loop for the matrix factorization
- int count = 0;
- int countmax = 100000;
- while (1) {
- // make a Wilkinson shift
- an1 = A[n-2][n-2];
- an = A[n-1][n-1];
- bn1 = A[n-2][n-1];
- d = (an1-an)/2;
- mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1);
- for (int i = 0; i < n; i++)
- A[i][i] -= mue;
-
- // perform a QR factorization for a tridiagonal matrix A
- qr_tri(Qi,A,n);
-
- // update the matrices
- mmult(A,Qi,C,n);
- mmult(Q,Qi,C,n);
-
- // backward Wilkinson shift
- for (int i = 0; i < n; i++)
- A[i][i] += mue;
-
- // check the convergence
- converged = check_convergence(A,Q,A0,C,D,E,n);
- if (converged) break;
- count = count + 1;
- if (count == countmax) break;
- }
-
- // free allocated memory
- memory->destroy(Qi);
- memory->destroy(A0);
- memory->destroy(C);
- memory->destroy(D);
- memory->destroy(E);
-
- return converged;
-}
-
-/* ----------------------------------------------------------------------
- Transform a Matrix to Hessenberg form (for symmetric Matrices, the
- result will be a tridiagonal matrix)
-------------------------------------------------------------------------- */
-
-void PPPMDisp::hessenberg(double **A, double **Q, int n)
-{
- double r,a,b,c,s,x1,x2;
- for (int i = 0; i < n-1; i++) {
- for (int j = i+2; j < n; j++) {
- // compute coeffs for the rotation matrix
- a = A[i+1][i];
- b = A[j][i];
- r = sqrt(a*a + b*b);
- c = a/r;
- s = b/r;
- // update the entries of A with multiplication from the left
- for (int k = 0; k < n; k++) {
- x1 = A[i+1][k];
- x2 = A[j][k];
- A[i+1][k] = c*x1 + s*x2;
- A[j][k] = -s*x1 + c*x2;
- }
- // update the entries of A and Q with a multiplication from the right
- for (int k = 0; k < n; k++) {
- x1 = A[k][i+1];
- x2 = A[k][j];
- A[k][i+1] = c*x1 + s*x2;
- A[k][j] = -s*x1 + c*x2;
- x1 = Q[k][i+1];
- x2 = Q[k][j];
- Q[k][i+1] = c*x1 + s*x2;
- Q[k][j] = -s*x1 + c*x2;
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- QR factorization for a tridiagonal matrix; Result of the factorization
- is stored in A and Qi
-------------------------------------------------------------------------- */
-
-void PPPMDisp::qr_tri(double** Qi,double** A,int n)
-{
- double r,a,b,c,s,x1,x2;
- int j,k,k0,kmax;
- // make Qi a unity matrix
- for (int i = 0; i < n; i++)
- for (int j = 0; j < n; j++)
- Qi[i][j] = 0.0;
- for (int i = 0; i < n; i++)
- Qi[i][i] = 1.0;
- // loop over main diagonal and first of diagonal of A
- for (int i = 0; i < n-1; i++) {
- j = i+1;
- // coefficients of the rotation matrix
- a = A[i][i];
- b = A[j][i];
- r = sqrt(a*a + b*b);
- c = a/r;
- s = b/r;
- // update the entries of A and Q
- k0 = (i-1>0)?i-1:0; //min(i-1,0);
- kmax = (i+3A0[i][j])?Bmax:A0[i][j]; //max(Bmax,A0[i][j]);
- double epsabs = eps*Bmax;
-
- // reconstruct the original matrix
- // store the diagonal elements in D
- for (int i = 0; i < n; i++)
- for (int j = 0; j < n; j++)
- D[i][j] = 0.0;
- for (int i = 0; i < n; i++)
- D[i][i] = A[i][i];
- // store matrix Q in E
- for (int i = 0; i < n; i++)
- for (int j = 0; j < n; j++)
- E[i][j] = Q[i][j];
- // E = Q*A
- mmult(E,D,C,n);
- // store transpose of Q in D
- for (int i = 0; i < n; i++)
- for (int j = 0; j < n; j++)
- D[i][j] = Q[j][i];
- // E = Q*A*Q.t
- mmult(E,D,C,n);
-
- //compare the original matrix and the final matrix
- for (int i = 0; i < n; i++) {
- for (int j = 0; j < n; j++) {
- diff = A0[i][j] - E[i][j];
- epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff));
- }
- }
- if (epsmax > epsabs) converged = 0;
- return converged;
-}
-
-/* ----------------------------------------------------------------------
- allocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPMDisp::allocate()
-{
-
- int (*procneigh)[2] = comm->procneigh;
-
- if (function[0]) {
- memory->create(work1,2*nfft_both,"pppm/disp:work1");
- memory->create(work2,2*nfft_both,"pppm/disp:work2");
-
- memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx");
- memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky");
- memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz");
-
- memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2");
- memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2");
- memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2");
-
-
- memory->create(gf_b,order,"pppm/disp:gf_b");
- memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d");
- memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff");
- memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d");
- memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff");
-
- memory->create(greensfn,nfft_both,"pppm/disp:greensfn");
- memory->create(vg,nfft_both,6,"pppm/disp:vg");
- memory->create(vg2,nfft_both,3,"pppm/disp:vg2");
-
- memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:density_brick");
- if ( differentiation_flag == 1) {
- memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:u_brick");
- memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1");
- memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2");
- memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3");
- memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4");
- memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5");
- memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6");
-
- } else {
- memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:vdx_brick");
- memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:vdy_brick");
- memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:vdz_brick");
- }
- memory->create(density_fft,nfft_both,"pppm/disp:density_fft");
-
- int tmp;
-
- fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- 0,0,&tmp);
-
- fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- 0,0,&tmp);
-
- remap = new Remap(lmp,world,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- 1,0,0,FFT_PRECISION);
-
- // create ghost grid object for rho and electric field communication
-
- if (differentiation_flag == 1)
- cg = new CommGrid(lmp,world,1,1,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- else
- cg = new CommGrid(lmp,world,3,1,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- }
-
- if (function[1]) {
- memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
- memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
-
- memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
- memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
- memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
-
- memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
- memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
- memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
-
- memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
- memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
- memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
- memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
- memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
-
- memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
- memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
- memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
-
- memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g");
- if ( differentiation_flag == 1) {
- memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
-
- memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
- memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
- memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
- memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
- memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
- memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
-
- } else {
- memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g");
- memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g");
- memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g");
- }
- memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g");
-
-
- int tmp;
-
- fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- 0,0,&tmp);
-
- fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- 0,0,&tmp);
-
- remap_6 = new Remap(lmp,world,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- 1,0,0,FFT_PRECISION);
-
- // create ghost grid object for rho and electric field communication
-
- if (differentiation_flag == 1)
- cg_6 = new CommGrid(lmp,world,1,1,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- else
- cg_6 = new CommGrid(lmp,world,3,1,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- }
-
- if (function[2]) {
- memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
- memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
-
- memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
- memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
- memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
-
- memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
- memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
- memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
-
- memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
- memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
- memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
- memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
- memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
-
- memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
- memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
- memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
-
- memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0");
- memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1");
- memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2");
- memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3");
- memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4");
- memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5");
- memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6");
-
- memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0");
- memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1");
- memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2");
- memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3");
- memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4");
- memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5");
- memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6");
-
-
- if ( differentiation_flag == 1 ) {
- memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
- memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
- memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
- memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
- memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
- memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
- memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
-
- memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
- memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
- memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
- memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
- memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
- memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
-
- } else {
-
- memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0");
- memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0");
- memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0");
-
- memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1");
- memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1");
- memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1");
-
- memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2");
- memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2");
- memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2");
-
- memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3");
- memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3");
- memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3");
-
- memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4");
- memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4");
- memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4");
-
- memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5");
- memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5");
- memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5");
-
- memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6");
- memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6");
- memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6");
- }
-
-
-
- int tmp;
-
- fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- 0,0,&tmp);
-
- fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- 0,0,&tmp);
-
- remap_6 = new Remap(lmp,world,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- 1,0,0,FFT_PRECISION);
-
- // create ghost grid object for rho and electric field communication
-
-
- if (differentiation_flag == 1)
- cg_6 = new CommGrid(lmp,world,7,7,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- else
- cg_6 = new CommGrid(lmp,world,21,7,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- }
-
- if (function[3]) {
- memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
- memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
-
- memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
- memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
- memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
-
- memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
- memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
- memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
-
- memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
- memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
- memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
- memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
- memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
-
- memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
- memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
- memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
-
- memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none");
- if ( differentiation_flag == 1) {
- memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
-
- memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
- memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
- memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
- memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
- memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
- memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
-
- } else {
- memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none");
- memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none");
- memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none");
- }
- memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none");
-
-
- int tmp;
-
- fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- 0,0,&tmp);
-
- fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- 0,0,&tmp);
-
- remap_6 = new Remap(lmp,world,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
- 1,0,0,FFT_PRECISION);
-
- // create ghost grid object for rho and electric field communication
-
- if (differentiation_flag == 1)
- cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- else
- cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- }
-
-}
-
-/* ----------------------------------------------------------------------
- allocate memory that depends on # of K-vectors and order
- for per atom calculations
-------------------------------------------------------------------------- */
-
-void PPPMDisp::allocate_peratom()
-{
-
- int (*procneigh)[2] = comm->procneigh;
-
- if (function[0]) {
-
- if (differentiation_flag != 1)
- memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:u_brick");
-
- memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:v0_brick");
- memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:v1_brick");
- memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:v2_brick");
- memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:v3_brick");
- memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:v4_brick");
- memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm/disp:v5_brick");
-
- // create ghost grid object for rho and electric field communication
-
- if (differentiation_flag == 1)
- cg_peratom =
- new CommGrid(lmp,world,6,1,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- else
- cg_peratom =
- new CommGrid(lmp,world,7,1,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-
- }
-
-
- if (function[1]) {
-
- if ( differentiation_flag != 1 )
- memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
-
- memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g");
- memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g");
- memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g");
- memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g");
- memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g");
- memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g");
-
- // create ghost grid object for rho and electric field communication
-
- if (differentiation_flag == 1)
- cg_peratom_6 =
- new CommGrid(lmp,world,6,1,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- else
- cg_peratom_6 =
- new CommGrid(lmp,world,7,1,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-
- }
-
- if (function[2]) {
-
- if ( differentiation_flag != 1 ) {
- memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
- memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
- memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
- memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
- memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
- memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
- memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
- }
-
- memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0");
- memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0");
- memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0");
- memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0");
- memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0");
- memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0");
-
- memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1");
- memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1");
- memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1");
- memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1");
- memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1");
- memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1");
-
- memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2");
- memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2");
- memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2");
- memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2");
- memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2");
- memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2");
-
- memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3");
- memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3");
- memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3");
- memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3");
- memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3");
- memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3");
-
- memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4");
- memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4");
- memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4");
- memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4");
- memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4");
- memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4");
-
- memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5");
- memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5");
- memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5");
- memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5");
- memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5");
- memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5");
-
- memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6");
- memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6");
- memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6");
- memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6");
- memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6");
- memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6");
-
- // create ghost grid object for rho and electric field communication
-
- if (differentiation_flag == 1)
- cg_peratom_6 =
- new CommGrid(lmp,world,42,1,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- else
- cg_peratom_6 =
- new CommGrid(lmp,world,49,1,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-
- }
-
- if (function[3]) {
-
- if ( differentiation_flag != 1 )
- memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
-
- memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none");
- memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none");
- memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none");
- memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none");
- memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none");
- memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
- nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none");
-
- // create ghost grid object for rho and electric field communication
-
- if (differentiation_flag == 1)
- cg_peratom_6 =
- new CommGrid(lmp,world,6*nsplit_alloc,1,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
- else
- cg_peratom_6 =
- new CommGrid(lmp,world,7*nsplit_alloc,1,
- nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
- nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
- procneigh[0][0],procneigh[0][1],procneigh[1][0],
- procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-
- }
-}
-
-
-/* ----------------------------------------------------------------------
- deallocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPMDisp::deallocate()
-{
- memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy(density_fft);
- density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
- density_fft = NULL;
-
- memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy(density_fft_g);
- density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
- density_fft_g = NULL;
-
- memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy(density_fft_a0);
- density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
- density_fft_a0 = NULL;
-
- memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy(density_fft_a1);
- density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
- density_fft_a1 = NULL;
-
- memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy(density_fft_a2);
- density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
- density_fft_a2 = NULL;
-
- memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy(density_fft_a3);
- density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
- density_fft_a3 = NULL;
-
- memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy(density_fft_a4);
- density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
- density_fft_a4 = NULL;
-
- memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy(density_fft_a5);
- density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
- density_fft_a5 = NULL;
-
- memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy(density_fft_a6);
- density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
- density_fft_a6 = NULL;
-
- memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
- memory->destroy(density_fft_none);
- density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
- density_fft_none = NULL;
-
- memory->destroy(sf_precoeff1);
- memory->destroy(sf_precoeff2);
- memory->destroy(sf_precoeff3);
- memory->destroy(sf_precoeff4);
- memory->destroy(sf_precoeff5);
- memory->destroy(sf_precoeff6);
- sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
-
- memory->destroy(sf_precoeff1_6);
- memory->destroy(sf_precoeff2_6);
- memory->destroy(sf_precoeff3_6);
- memory->destroy(sf_precoeff4_6);
- memory->destroy(sf_precoeff5_6);
- memory->destroy(sf_precoeff6_6);
- sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL;
-
- memory->destroy(greensfn);
- memory->destroy(greensfn_6);
- memory->destroy(work1);
- memory->destroy(work2);
- memory->destroy(work1_6);
- memory->destroy(work2_6);
- memory->destroy(vg);
- memory->destroy(vg2);
- memory->destroy(vg_6);
- memory->destroy(vg2_6);
- greensfn = greensfn_6 = NULL;
- work1 = work2 = work1_6 = work2_6 = NULL;
- vg = vg2 = vg_6 = vg2_6 = NULL;
-
- memory->destroy1d_offset(fkx,nxlo_fft);
- memory->destroy1d_offset(fky,nylo_fft);
- memory->destroy1d_offset(fkz,nzlo_fft);
- fkx = fky = fkz = NULL;
-
- memory->destroy1d_offset(fkx2,nxlo_fft);
- memory->destroy1d_offset(fky2,nylo_fft);
- memory->destroy1d_offset(fkz2,nzlo_fft);
- fkx2 = fky2 = fkz2 = NULL;
-
- memory->destroy1d_offset(fkx_6,nxlo_fft_6);
- memory->destroy1d_offset(fky_6,nylo_fft_6);
- memory->destroy1d_offset(fkz_6,nzlo_fft_6);
- fkx_6 = fky_6 = fkz_6 = NULL;
-
- memory->destroy1d_offset(fkx2_6,nxlo_fft_6);
- memory->destroy1d_offset(fky2_6,nylo_fft_6);
- memory->destroy1d_offset(fkz2_6,nzlo_fft_6);
- fkx2_6 = fky2_6 = fkz2_6 = NULL;
-
-
- memory->destroy(gf_b);
- memory->destroy2d_offset(rho1d,-order/2);
- memory->destroy2d_offset(rho_coeff,(1-order)/2);
- memory->destroy2d_offset(drho1d,-order/2);
- memory->destroy2d_offset(drho_coeff, (1-order)/2);
- gf_b = NULL;
- rho1d = rho_coeff = drho1d = drho_coeff = NULL;
-
- memory->destroy(gf_b_6);
- memory->destroy2d_offset(rho1d_6,-order_6/2);
- memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2);
- memory->destroy2d_offset(drho1d_6,-order_6/2);
- memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2);
- gf_b_6 = NULL;
- rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL;
-
- delete fft1;
- delete fft2;
- delete remap;
- delete cg;
- fft1 = fft2 = NULL;
- remap = NULL;
- cg = NULL;
-
- delete fft1_6;
- delete fft2_6;
- delete remap_6;
- delete cg_6;
- fft1_6 = fft2_6 = NULL;
- remap_6 = NULL;
- cg_6 = NULL;
-}
-
-
-/* ----------------------------------------------------------------------
- deallocate memory that depends on # of K-vectors and order
- for per atom calculations
-------------------------------------------------------------------------- */
-
-void PPPMDisp::deallocate_peratom()
-{
- peratom_allocate_flag = 0;
-
- memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
- memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out);
- memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out);
- memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out);
- memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out);
- memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out);
- memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out);
- u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
-
- memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
- u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL;
-
- memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
- u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL;
-
- memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
- u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL;
-
- memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
- u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL;
-
- memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
- u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL;
-
- memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
- u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL;
-
- memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
- u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL;
-
- memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
- u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL;
-
- memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
- memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
- u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL;
-
- delete cg_peratom;
- delete cg_peratom_6;
- cg_peratom = cg_peratom_6 = NULL;
-}
-
-/* ----------------------------------------------------------------------
- set size of FFT grid (nx,ny,nz_pppm) and g_ewald
- for Coulomb interactions
-------------------------------------------------------------------------- */
-
-void PPPMDisp::set_grid()
-{
- double q2 = qsqsum * force->qqrd2e / force->dielectric;
-
- // use xprd,yprd,zprd even if triclinic so grid size is the same
- // adjust z dimension for 2d slab PPPM
- // 3d PPPM just uses zprd since slab_volfactor = 1.0
-
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
-
- // make initial g_ewald estimate
- // based on desired accuracy and real space cutoff
- // fluid-occupied volume used to estimate real-space error
- // zprd used rather than zprd_slab
-
- double h, h_x,h_y,h_z;
- bigint natoms = atom->natoms;
-
- if (!gewaldflag) {
- g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
- if (g_ewald >= 1.0)
- error->all(FLERR,"KSpace accuracy too large to estimate G vector");
- g_ewald = sqrt(-log(g_ewald)) / cutoff;
- }
-
- // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
- // nz_pppm uses extended zprd_slab instead of zprd
- // reduce it until accuracy target is met
-
- if (!gridflag) {
- h = h_x = h_y = h_z = 4.0/g_ewald;
- int count = 0;
- while (1) {
-
- // set grid dimension
- nx_pppm = static_cast (xprd/h_x);
- ny_pppm = static_cast (yprd/h_y);
- nz_pppm = static_cast (zprd_slab/h_z);
-
- if (nx_pppm <= 1) nx_pppm = 2;
- if (ny_pppm <= 1) ny_pppm = 2;
- if (nz_pppm <= 1) nz_pppm = 2;
-
- //set local grid dimension
- int npey_fft,npez_fft;
- if (nz_pppm >= nprocs) {
- npey_fft = 1;
- npez_fft = nprocs;
- } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
-
- int me_y = me % npey_fft;
- int me_z = me / npey_fft;
-
- nxlo_fft = 0;
- nxhi_fft = nx_pppm - 1;
- nylo_fft = me_y*ny_pppm/npey_fft;
- nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
- nzlo_fft = me_z*nz_pppm/npez_fft;
- nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
-
- double qopt = compute_qopt();
-
- double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
-
- count++;
-
- // break loop if the accuracy has been reached or too many loops have been performed
- if (dfkspace <= accuracy) break;
- if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction");
- h *= 0.95;
- h_x = h_y = h_z = h;
- }
- }
-
- // boost grid size until it is factorable
-
- while (!factorable(nx_pppm)) nx_pppm++;
- while (!factorable(ny_pppm)) ny_pppm++;
- while (!factorable(nz_pppm)) nz_pppm++;
-}
-
-/* ----------------------------------------------------------------------
- set the FFT parameters
-------------------------------------------------------------------------- */
-
-void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p,
- int& nxlo_f,int& nylo_f,int& nzlo_f,
- int& nxhi_f,int& nyhi_f,int& nzhi_f,
- int& nxlo_i,int& nylo_i,int& nzlo_i,
- int& nxhi_i,int& nyhi_i,int& nzhi_i,
- int& nxlo_o,int& nylo_o,int& nzlo_o,
- int& nxhi_o,int& nyhi_o,int& nzhi_o,
- int& nlow, int& nupp,
- int& ng, int& nf, int& nfb,
- double& sft,double& sftone, int& ord)
-{
- // global indices of PPPM grid range from 0 to N-1
- // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
- // global PPPM grid that I own without ghost cells
- // for slab PPPM, assign z grid as if it were not extended
-
- nxlo_i = static_cast (comm->xsplit[comm->myloc[0]] * nx_p);
- nxhi_i = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1;
-
- nylo_i = static_cast (comm->ysplit[comm->myloc[1]] * ny_p);
- nyhi_i = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1;
-
- nzlo_i = static_cast
- (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor);
- nzhi_i = static_cast
- (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1;
-
-
- // nlow,nupp = stencil size for mapping particles to PPPM grid
-
- nlow = -(ord-1)/2;
- nupp = ord/2;
-
- // sft values for particle <-> grid mapping
- // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
- if (ord % 2) sft = OFFSET + 0.5;
- else sft = OFFSET;
- if (ord % 2) sftone = 0.0;
- else sftone = 0.5;
-
- // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
- // global PPPM grid that my particles can contribute charge to
- // effectively nlo_in,nhi_in + ghost cells
- // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
- // position a particle in my box can be at
- // dist[3] = particle position bound = subbox + skin/2.0 + qdist
- // qdist = offset due to TIP4P fictitious charge
- // convert to triclinic if necessary
- // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
- // for slab PPPM, assign z grid as if it were not extended
-
- double *prd,*sublo,*subhi;
-
- if (triclinic == 0) {
- prd = domain->prd;
- boxlo = domain->boxlo;
- sublo = domain->sublo;
- subhi = domain->subhi;
- } else {
- prd = domain->prd_lamda;
- boxlo = domain->boxlo_lamda;
- sublo = domain->sublo_lamda;
- subhi = domain->subhi_lamda;
- }
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double dist[3];
- double cuthalf = 0.5*neighbor->skin + qdist;
- if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
- else {
- dist[0] = cuthalf/domain->prd[0];
- dist[1] = cuthalf/domain->prd[1];
- dist[2] = cuthalf/domain->prd[2];
- }
-
- int nlo,nhi;
-
- nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) *
- nx_p/xprd + sft) - OFFSET;
- nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) *
- nx_p/xprd + sft) - OFFSET;
- nxlo_o = nlo + nlow;
- nxhi_o = nhi + nupp;
-
- nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) *
- ny_p/yprd + sft) - OFFSET;
- nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) *
- ny_p/yprd + sft) - OFFSET;
- nylo_o = nlo + nlow;
- nyhi_o = nhi + nupp;
-
- nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) *
- nz_p/zprd_slab + sft) - OFFSET;
- nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) *
- nz_p/zprd_slab + sft) - OFFSET;
- nzlo_o = nlo + nlow;
- nzhi_o = nhi + nupp;
-
- // for slab PPPM, change the grid boundary for processors at +z end
- // to include the empty volume between periodically repeating slabs
- // for slab PPPM, want charge data communicated from -z proc to +z proc,
- // but not vice versa, also want field data communicated from +z proc to
- // -z proc, but not vice versa
- // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells)
-
- if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) {
- nzhi_i = nz_p - 1;
- nzhi_o = nz_p - 1;
- }
-
- // decomposition of FFT mesh
- // global indices range from 0 to N-1
- // proc owns entire x-dimension, clump of columns in y,z dimensions
- // npey_fft,npez_fft = # of procs in y,z dims
- // if nprocs is small enough, proc can own 1 or more entire xy planes,
- // else proc owns 2d sub-blocks of yz plane
- // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
- // nlo_fft,nhi_fft = lower/upper limit of the section
- // of the global FFT mesh that I own
-
- int npey_fft,npez_fft;
- if (nz_p >= nprocs) {
- npey_fft = 1;
- npez_fft = nprocs;
- } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft);
-
- int me_y = me % npey_fft;
- int me_z = me / npey_fft;
-
- nxlo_f = 0;
- nxhi_f = nx_p - 1;
- nylo_f = me_y*ny_p/npey_fft;
- nyhi_f = (me_y+1)*ny_p/npey_fft - 1;
- nzlo_f = me_z*nz_p/npez_fft;
- nzhi_f = (me_z+1)*nz_p/npez_fft - 1;
-
- // PPPM grid for this proc, including ghosts
-
- ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) *
- (nzhi_o-nzlo_o+1);
-
- // FFT arrays on this proc, without ghosts
- // nfft = FFT points in FFT decomposition on this proc
- // nfft_brick = FFT points in 3d brick-decomposition on this proc
- // nfft_both = greater of 2 values
-
- nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) *
- (nzhi_f-nzlo_f+1);
- int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) *
- (nzhi_i-nzlo_i+1);
- nfb = MAX(nf,nfft_brick);
-
-}
-
-/* ----------------------------------------------------------------------
- check if all factors of n are in list of factors
- return 1 if yes, 0 if no
-------------------------------------------------------------------------- */
-
-int PPPMDisp::factorable(int n)
-{
- int i;
-
- while (n > 1) {
- for (i = 0; i < nfactors; i++) {
- if (n % factors[i] == 0) {
- n /= factors[i];
- break;
- }
- }
- if (i == nfactors) return 0;
- }
-
- return 1;
-}
-
-/* ----------------------------------------------------------------------
- pre-compute Green's function denominator expansion coeffs, Gamma(2n)
-------------------------------------------------------------------------- */
-void PPPMDisp::adjust_gewald()
-{
-
- // Use Newton solver to find g_ewald
-
- double dx;
-
- // Begin algorithm
-
- for (int i = 0; i < LARGE; i++) {
- dx = f() / derivf();
- g_ewald -= dx; //Update g_ewald
- if (fabs(f()) < SMALL) return;
- }
-
- // Failed to converge
-
- char str[128];
- sprintf(str, "Could not compute g_ewald");
- error->all(FLERR, str);
-
-}
-
-/* ----------------------------------------------------------------------
- Calculate f(x)
- ------------------------------------------------------------------------- */
-
-double PPPMDisp::f()
-{
- double df_rspace, df_kspace;
- double q2 = qsqsum * force->qqrd2e / force->dielectric;
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
- bigint natoms = atom->natoms;
-
- df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
- sqrt(natoms*cutoff*xprd*yprd*zprd);
-
- double qopt = compute_qopt();
- df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
-
- return df_rspace - df_kspace;
-}
-
-/* ----------------------------------------------------------------------
- Calculate numerical derivative f'(x) using forward difference
- [f(x + h) - f(x)] / h
- ------------------------------------------------------------------------- */
-
-double PPPMDisp::derivf()
-{
- double h = 0.000001; //Derivative step-size
- double df,f1,f2,g_ewald_old;
-
- f1 = f();
- g_ewald_old = g_ewald;
- g_ewald += h;
- f2 = f();
- g_ewald = g_ewald_old;
- df = (f2 - f1)/h;
-
- return df;
-}
-
-/* ----------------------------------------------------------------------
- Calculate the final estimator for the accuracy
-------------------------------------------------------------------------- */
-
-double PPPMDisp::final_accuracy()
-{
- double df_rspace, df_kspace;
- double q2 = qsqsum * force->qqrd2e / force->dielectric;
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
- bigint natoms = atom->natoms;
- df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
- sqrt(natoms*cutoff*xprd*yprd*zprd);
-
- double qopt = compute_qopt();
-
- df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
-
- double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace);
- return acc;
-}
-
-/* ----------------------------------------------------------------------
- Calculate the final estimator for the Dispersion accuracy
-------------------------------------------------------------------------- */
-
-void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace)
-{
- double df_rspace, df_kspace;
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
- bigint natoms = atom->natoms;
- acc_real = lj_rspace_error();
-
- double qopt = compute_qopt_6();
-
- acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
-
- acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace);
- return;
-}
-
-/* ----------------------------------------------------------------------
- Compute qopt for Coulomb interactions
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt()
-{
- double qopt;
- if (differentiation_flag == 1) {
- qopt = compute_qopt_ad();
- } else {
- qopt = compute_qopt_ik();
- }
- double qopt_all;
- MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
- return qopt_all;
-}
-
-/* ----------------------------------------------------------------------
- Compute qopt for Dispersion interactions
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt_6()
-{
- double qopt;
- if (differentiation_flag == 1) {
- qopt = compute_qopt_6_ad();
- } else {
- qopt = compute_qopt_6_ik();
- }
- double qopt_all;
- MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
- return qopt_all;
-}
-
-/* ----------------------------------------------------------------------
- Compute qopt for the ik differentiation scheme and Coulomb interaction
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt_ik()
-{
- double qopt = 0.0;
- int k,l,m;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double unitkx = (2.0*MY_PI/xprd);
- double unitky = (2.0*MY_PI/yprd);
- double unitkz = (2.0*MY_PI/zprd_slab);
-
- int nx,ny,nz,kper,lper,mper;
- double sqk, u2;
- double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
- double sum1,sum2, sum3,dot1,dot2;
-
- int nbx = 2;
- int nby = 2;
- int nbz = 2;
-
- for (m = nzlo_fft; m <= nzhi_fft; m++) {
- mper = m - nz_pppm*(2*m/nz_pppm);
-
- for (l = nylo_fft; l <= nyhi_fft; l++) {
- lper = l - ny_pppm*(2*l/ny_pppm);
-
- for (k = nxlo_fft; k <= nxhi_fft; k++) {
- kper = k - nx_pppm*(2*k/nx_pppm);
-
- sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
- pow(unitkz*mper,2.0);
-
- if (sqk != 0.0) {
- sum1 = 0.0;
- sum2 = 0.0;
- sum3 = 0.0;
- for (nx = -nbx; nx <= nbx; nx++) {
- qx = unitkx*(kper+nx_pppm*nx);
- sx = exp(-0.25*pow(qx/g_ewald,2.0));
- wx = 1.0;
- argx = 0.5*qx*xprd/nx_pppm;
- if (argx != 0.0) wx = pow(sin(argx)/argx,order);
- for (ny = -nby; ny <= nby; ny++) {
- qy = unitky*(lper+ny_pppm*ny);
- sy = exp(-0.25*pow(qy/g_ewald,2.0));
- wy = 1.0;
- argy = 0.5*qy*yprd/ny_pppm;
- if (argy != 0.0) wy = pow(sin(argy)/argy,order);
- for (nz = -nbz; nz <= nbz; nz++) {
- qz = unitkz*(mper+nz_pppm*nz);
- sz = exp(-0.25*pow(qz/g_ewald,2.0));
- wz = 1.0;
- argz = 0.5*qz*zprd_slab/nz_pppm;
- if (argz != 0.0) wz = pow(sin(argz)/argz,order);
-
- dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
- dot2 = qx*qx+qy*qy+qz*qz;
- u2 = pow(wx*wy*wz,2.0);
- sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
- sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1;
- sum3 += u2;
- }
- }
- }
- sum2 *= sum2;
- sum3 *= sum3*sqk;
- qopt += sum1 -sum2/sum3;
- }
- }
- }
- }
- return qopt;
-}
-
-/* ----------------------------------------------------------------------
- Compute qopt for the ad differentiation scheme and Coulomb interaction
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt_ad()
-{
- double qopt = 0.0;
- int k,l,m;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
-
- double unitkx = (2.0*MY_PI/xprd);
- double unitky = (2.0*MY_PI/yprd);
- double unitkz = (2.0*MY_PI/zprd_slab);
-
- int nx,ny,nz,kper,lper,mper;
- double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
- double u2, sqk;
- double sum1,sum2,sum3,sum4,dot2;
- double numerator;
-
- int nbx = 2;
- int nby = 2;
- int nbz = 2;
- double form = 1.0;
-
- for (m = nzlo_fft; m <= nzhi_fft; m++) {
- mper = m - nz_pppm*(2*m/nz_pppm);
-
- for (l = nylo_fft; l <= nyhi_fft; l++) {
- lper = l - ny_pppm*(2*l/ny_pppm);
-
- for (k = nxlo_fft; k <= nxhi_fft; k++) {
- kper = k - nx_pppm*(2*k/nx_pppm);
-
- sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
- pow(unitkz*mper,2.0);
-
- if (sqk != 0.0) {
- numerator = form*12.5663706;
-
- sum1 = 0.0;
- sum2 = 0.0;
- sum3 = 0.0;
- sum4 = 0.0;
- for (nx = -nbx; nx <= nbx; nx++) {
- qx = unitkx*(kper+nx_pppm*nx);
- sx = exp(-0.25*pow(qx/g_ewald,2.0));
- wx = 1.0;
- argx = 0.5*qx*xprd/nx_pppm;
- if (argx != 0.0) wx = pow(sin(argx)/argx,order);
- for (ny = -nby; ny <= nby; ny++) {
- qy = unitky*(lper+ny_pppm*ny);
- sy = exp(-0.25*pow(qy/g_ewald,2.0));
- wy = 1.0;
- argy = 0.5*qy*yprd/ny_pppm;
- if (argy != 0.0) wy = pow(sin(argy)/argy,order);
- for (nz = -nbz; nz <= nbz; nz++) {
- qz = unitkz*(mper+nz_pppm*nz);
- sz = exp(-0.25*pow(qz/g_ewald,2.0));
- wz = 1.0;
- argz = 0.5*qz*zprd_slab/nz_pppm;
- if (argz != 0.0) wz = pow(sin(argz)/argz,order);
-
- dot2 = qx*qx+qy*qy+qz*qz;
- u2 = pow(wx*wy*wz,2.0);
- sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
- sum2 += sx*sy*sz * u2*4.0*MY_PI;
- sum3 += u2;
- sum4 += dot2*u2;
- }
- }
- }
- sum2 *= sum2;
- qopt += sum1 - sum2/(sum3*sum4);
- }
- }
- }
- }
- return qopt;
-}
-
-/* ----------------------------------------------------------------------
- Compute qopt for the ik differentiation scheme and Dispersion interaction
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt_6_ik()
-{
- double qopt = 0.0;
- int k,l,m,n;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double unitkx = (2.0*MY_PI/xprd);
- double unitky = (2.0*MY_PI/yprd);
- double unitkz = (2.0*MY_PI/zprd_slab);
-
- int nx,ny,nz,kper,lper,mper;
- double sqk, u2;
- double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
- double sum1,sum2, sum3;
- double dot1,dot2, rtdot2, term;
- double inv2ew = 2*g_ewald_6;
- inv2ew = 1.0/inv2ew;
- double rtpi = sqrt(MY_PI);
-
- int nbx = 2;
- int nby = 2;
- int nbz = 2;
-
- n = 0;
- for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
- mper = m - nz_pppm_6*(2*m/nz_pppm_6);
-
- for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
- lper = l - ny_pppm_6*(2*l/ny_pppm_6);
-
- for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
- kper = k - nx_pppm_6*(2*k/nx_pppm_6);
-
- sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
- pow(unitkz*mper,2.0);
-
- if (sqk != 0.0) {
- sum1 = 0.0;
- sum2 = 0.0;
- sum3 = 0.0;
- for (nx = -nbx; nx <= nbx; nx++) {
- qx = unitkx*(kper+nx_pppm_6*nx);
- sx = exp(-qx*qx*inv2ew*inv2ew);
- wx = 1.0;
- argx = 0.5*qx*xprd/nx_pppm_6;
- if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
- for (ny = -nby; ny <= nby; ny++) {
- qy = unitky*(lper+ny_pppm_6*ny);
- sy = exp(-qy*qy*inv2ew*inv2ew);
- wy = 1.0;
- argy = 0.5*qy*yprd/ny_pppm_6;
- if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
- for (nz = -nbz; nz <= nbz; nz++) {
- qz = unitkz*(mper+nz_pppm_6*nz);
- sz = exp(-qz*qz*inv2ew*inv2ew);
- wz = 1.0;
- argz = 0.5*qz*zprd_slab/nz_pppm_6;
- if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
-
- dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
- dot2 = qx*qx+qy*qy+qz*qz;
- rtdot2 = sqrt(dot2);
- term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
- 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
- term *= g_ewald_6*g_ewald_6*g_ewald_6;
- u2 = pow(wx*wy*wz,2.0);
- sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
- sum2 += -u2*term*MY_PI*rtpi/3.0*dot1;
- sum3 += u2;
- }
- }
- }
- sum2 *= sum2;
- sum3 *= sum3*sqk;
- qopt += sum1 -sum2/sum3;
- }
- }
- }
- }
- return qopt;
-}
-
-/* ----------------------------------------------------------------------
- Compute qopt for the ad differentiation scheme and Dispersion interaction
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt_6_ad()
-{
- double qopt = 0.0;
- int k,l,m;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double unitkx = (2.0*MY_PI/xprd);
- double unitky = (2.0*MY_PI/yprd);
- double unitkz = (2.0*MY_PI/zprd_slab);
-
- int nx,ny,nz,kper,lper,mper;
- double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
- double u2, sqk;
- double sum1,sum2,sum3,sum4;
- double dot2, rtdot2, term;
- double inv2ew = 2*g_ewald_6;
- inv2ew = 1/inv2ew;
- double rtpi = sqrt(MY_PI);
-
- int nbx = 2;
- int nby = 2;
- int nbz = 2;
-
- for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
- mper = m - nz_pppm_6*(2*m/nz_pppm_6);
-
- for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
- lper = l - ny_pppm_6*(2*l/ny_pppm_6);
-
- for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
- kper = k - nx_pppm_6*(2*k/nx_pppm_6);
-
- sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
- pow(unitkz*mper,2.0);
-
- if (sqk != 0.0) {
-
- sum1 = 0.0;
- sum2 = 0.0;
- sum3 = 0.0;
- sum4 = 0.0;
- for (nx = -nbx; nx <= nbx; nx++) {
- qx = unitkx*(kper+nx_pppm_6*nx);
- sx = exp(-qx*qx*inv2ew*inv2ew);
- wx = 1.0;
- argx = 0.5*qx*xprd/nx_pppm_6;
- if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
- for (ny = -nby; ny <= nby; ny++) {
- qy = unitky*(lper+ny_pppm_6*ny);
- sy = exp(-qy*qy*inv2ew*inv2ew);
- wy = 1.0;
- argy = 0.5*qy*yprd/ny_pppm_6;
- if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
- for (nz = -nbz; nz <= nbz; nz++) {
- qz = unitkz*(mper+nz_pppm_6*nz);
- sz = exp(-qz*qz*inv2ew*inv2ew);
- wz = 1.0;
- argz = 0.5*qz*zprd_slab/nz_pppm_6;
- if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
-
- dot2 = qx*qx+qy*qy+qz*qz;
- rtdot2 = sqrt(dot2);
- term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
- 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
- term *= g_ewald_6*g_ewald_6*g_ewald_6;
- u2 = pow(wx*wy*wz,2.0);
- sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
- sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2;
- sum3 += u2;
- sum4 += dot2*u2;
- }
- }
- }
- sum2 *= sum2;
- qopt += sum1 - sum2/(sum3*sum4);
- }
- }
- }
- }
- return qopt;
-}
-
-/* ----------------------------------------------------------------------
- set size of FFT grid and g_ewald_6
- for Dispersion interactions
-------------------------------------------------------------------------- */
-
-void PPPMDisp::set_grid_6()
-{
- // Calculate csum
- if (!csumflag) calc_csum();
- if (!gewaldflag_6) set_init_g6();
- if (!gridflag_6) set_n_pppm_6();
- while (!factorable(nx_pppm_6)) nx_pppm_6++;
- while (!factorable(ny_pppm_6)) ny_pppm_6++;
- while (!factorable(nz_pppm_6)) nz_pppm_6++;
-
-}
-
-/* ----------------------------------------------------------------------
- Calculate the sum of the squared dispersion coefficients and other
- related quantities required for the calculations
-------------------------------------------------------------------------- */
-
-void PPPMDisp::calc_csum()
-{
- csumij = 0.0;
- csum = 0.0;
-
- int ntypes = atom->ntypes;
- int i,j,k;
-
- delete [] cii;
- cii = new double[ntypes +1];
- for (i = 0; i<=ntypes; i++) cii[i] = 0.0;
- delete [] csumi;
- csumi = new double[ntypes +1];
- for (i = 0; i<=ntypes; i++) csumi[i] = 0.0;
- int *neach = new int[ntypes+1];
- for (i = 0; i<=ntypes; i++) neach[i] = 0;
-
- //the following variables are needed to distinguish between arithmetic
- // and geometric mixing
-
- double mix1; // scales 20/16 to 4
- int mix2; // shifts the value to the sigma^3 value
- int mix3; // shifts the value to the right atom type
- if (function[1]) {
- for (i = 1; i <= ntypes; i++)
- cii[i] = B[i]*B[i];
- int tmp;
- for (i = 0; i < atom->nlocal; i++) {
- tmp = atom->type[i];
- neach[tmp]++;
- csum += B[tmp]*B[tmp];
- }
- }
- if (function[2]) {
- for (i = 1; i <= ntypes; i++)
- cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3];
- int tmp;
- for (i = 0; i < atom->nlocal; i++) {
- tmp = atom->type[i];
- neach[tmp]++;
- csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3];
- }
- }
- if (function[3]) {
- for (i = 1; i <= ntypes; i++)
- for (j = 0; j < nsplit; j++)
- cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j];
- int tmp;
- for (i = 0; i < atom->nlocal; i++) {
- tmp = atom->type[i];
- neach[tmp]++;
- for (j = 0; j < nsplit; j++)
- csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j];
- }
- }
-
-
- double tmp2;
- MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world);
- csum = tmp2;
- csumflag = 1;
-
- int *neach_all = new int[ntypes+1];
- MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world);
-
- // copmute csumij and csumi
- double d1, d2;
- if (function[1]){
- for (i=1; i<=ntypes; i++) {
- for (j=1; j<=ntypes; j++) {
- csumi[i] += neach_all[j]*B[i]*B[j];
- d1 = neach_all[i]*B[i];
- d2 = neach_all[j]*B[j];
- csumij += d1*d2;
- //csumij += neach_all[i]*neach_all[j]*B[i]*B[j];
- }
- }
- }
- if (function[2]) {
- for (i=1; i<=ntypes; i++) {
- for (j=1; j<=ntypes; j++) {
- for (k=0; k<=6; k++) {
- csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
- d1 = neach_all[i]*B[7*i + k];
- d2 = neach_all[j]*B[7*(j+1)-k-1];
- csumij += d1*d2;
- //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
- }
- }
- }
- }
- if (function[3]) {
- for (i=1; i<=ntypes; i++) {
- for (j=1; j<=ntypes; j++) {
- for (k=0; kall(FLERR, str);
-
-}
-
-/* ----------------------------------------------------------------------
- Calculate f(x) for Dispersion interaction
- ------------------------------------------------------------------------- */
-
-double PPPMDisp::f_6()
-{
- double df_rspace, df_kspace;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
- bigint natoms = atom->natoms;
-
- df_rspace = lj_rspace_error();
-
- double qopt = compute_qopt_6();
- df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
-
- return df_rspace - df_kspace;
-}
-
-/* ----------------------------------------------------------------------
- Calculate numerical derivative f'(x) using forward difference
- [f(x + h) - f(x)] / h
- ------------------------------------------------------------------------- */
-
-double PPPMDisp::derivf_6()
-{
- double h = 0.000001; //Derivative step-size
- double df,f1,f2,g_ewald_old;
-
- f1 = f_6();
- g_ewald_old = g_ewald_6;
- g_ewald_6 += h;
- f2 = f_6();
- g_ewald_6 = g_ewald_old;
- df = (f2 - f1)/h;
-
- return df;
-}
-
-
-/* ----------------------------------------------------------------------
- calculate an initial value for g_ewald_6
- ---------------------------------------------------------------------- */
-
-void PPPMDisp::set_init_g6()
-{
- // use xprd,yprd,zprd even if triclinic so grid size is the same
- // adjust z dimension for 2d slab PPPM
- // 3d PPPM just uses zprd since slab_volfactor = 1.0
-
- // make initial g_ewald estimate
- // based on desired error and real space cutoff
-
- // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj
- // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0
- // else, repeat multiply g_ewald_6 by 2 until df_real > 0
- // perform bisection for the last two values of
- double df_real;
- double g_ewald_old;
- double gmin, gmax;
-
- // check if there is a user defined accuracy
- double acc_rspace = accuracy;
- if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6;
-
- g_ewald_6 = 1.0/cutoff_lj;
- df_real = lj_rspace_error() - acc_rspace;
- int counter = 0;
- if (df_real > 0) {
- while (df_real > 0 && counter < LARGE) {
- counter++;
- g_ewald_old = g_ewald_6;
- g_ewald_6 *= 2;
- df_real = lj_rspace_error() - acc_rspace;
- }
- }
-
- if (df_real < 0) {
- while (df_real < 0 && counter < LARGE) {
- counter++;
- g_ewald_old = g_ewald_6;
- g_ewald_6 *= 0.5;
- df_real = lj_rspace_error() - acc_rspace;
- }
- }
-
- if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
-
- gmin = MIN(g_ewald_6, g_ewald_old);
- gmax = MAX(g_ewald_6, g_ewald_old);
- g_ewald_6 = gmin + 0.5*(gmax-gmin);
- counter = 0;
- while (gmax-gmin > SMALL && counter < LARGE) {
- counter++;
- df_real = lj_rspace_error() -acc_rspace;
- if (df_real < 0) gmax = g_ewald_6;
- else gmin = g_ewald_6;
- g_ewald_6 = gmin + 0.5*(gmax-gmin);
- }
- if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
-
-}
-
-/* ----------------------------------------------------------------------
- calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction
- ---------------------------------------------------------------------- */
-
-void PPPMDisp::set_n_pppm_6()
-{
- bigint natoms = atom->natoms;
-
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
- double h, h_x,h_y,h_z;
-
- double acc_kspace = accuracy;
- if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6;
-
- // initial value for the grid spacing
- h = h_x = h_y = h_z = 4.0/g_ewald_6;
- // decrease grid spacing untill required precision is obtained
- int count = 0;
- while(1) {
-
- // set grid dimension
- nx_pppm_6 = static_cast (xprd/h_x);
- ny_pppm_6 = static_cast (yprd/h_y);
- nz_pppm_6 = static_cast (zprd_slab/h_z);
-
- if (nx_pppm_6 <= 1) nx_pppm_6 = 2;
- if (ny_pppm_6 <= 1) ny_pppm_6 = 2;
- if (nz_pppm_6 <= 1) nz_pppm_6 = 2;
-
- //set local grid dimension
- int npey_fft,npez_fft;
- if (nz_pppm_6 >= nprocs) {
- npey_fft = 1;
- npez_fft = nprocs;
- } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft);
-
- int me_y = me % npey_fft;
- int me_z = me / npey_fft;
-
- nxlo_fft_6 = 0;
- nxhi_fft_6 = nx_pppm_6 - 1;
- nylo_fft_6 = me_y*ny_pppm_6/npey_fft;
- nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1;
- nzlo_fft_6 = me_z*nz_pppm_6/npez_fft;
- nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1;
-
- double qopt = compute_qopt_6();
-
- double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
-
- count++;
-
- // break loop if the accuracy has been reached or too many loops have been performed
- if (df_kspace <= acc_kspace) break;
- if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion");
- h *= 0.95;
- h_x = h_y = h_z = h;
- }
-}
-
-/* ----------------------------------------------------------------------
- calculate the real space error for dispersion interactions
- ---------------------------------------------------------------------- */
-
-double PPPMDisp::lj_rspace_error()
-{
- bigint natoms = atom->natoms;
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
-
- double deltaf;
- double rgs = (cutoff_lj*g_ewald_6);
- rgs *= rgs;
- double rgs_inv = 1.0/rgs;
- deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)*
- exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6)));
- return deltaf;
-}
-
-
-/* ----------------------------------------------------------------------
- Compyute the modified (hockney-eastwood) coulomb green function
- ---------------------------------------------------------------------- */
-
-void PPPMDisp::compute_gf()
-{
- int k,l,m,n;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
- volume = xprd * yprd * zprd_slab;
-
- double unitkx = (2.0*MY_PI/xprd);
- double unitky = (2.0*MY_PI/yprd);
- double unitkz = (2.0*MY_PI/zprd_slab);
-
- int kper,lper,mper;
- double snx,sny,snz,snx2,sny2,snz2;
- double sqk;
- double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
- double numerator,denominator;
-
-
- n = 0;
- for (m = nzlo_fft; m <= nzhi_fft; m++) {
- mper = m - nz_pppm*(2*m/nz_pppm);
- qz = unitkz*mper;
- snz = sin(0.5*qz*zprd_slab/nz_pppm);
- snz2 = snz*snz;
- sz = exp(-0.25*pow(qz/g_ewald,2.0));
- wz = 1.0;
- argz = 0.5*qz*zprd_slab/nz_pppm;
- if (argz != 0.0) wz = pow(sin(argz)/argz,order);
- wz *= wz;
-
- for (l = nylo_fft; l <= nyhi_fft; l++) {
- lper = l - ny_pppm*(2*l/ny_pppm);
- qy = unitky*lper;
- sny = sin(0.5*qy*yprd/ny_pppm);
- sny2 = sny*sny;
- sy = exp(-0.25*pow(qy/g_ewald,2.0));
- wy = 1.0;
- argy = 0.5*qy*yprd/ny_pppm;
- if (argy != 0.0) wy = pow(sin(argy)/argy,order);
- wy *= wy;
-
- for (k = nxlo_fft; k <= nxhi_fft; k++) {
- kper = k - nx_pppm*(2*k/nx_pppm);
- qx = unitkx*kper;
- snx = sin(0.5*qx*xprd/nx_pppm);
- snx2 = snx*snx;
- sx = exp(-0.25*pow(qx/g_ewald,2.0));
- wx = 1.0;
- argx = 0.5*qx*xprd/nx_pppm;
- if (argx != 0.0) wx = pow(sin(argx)/argx,order);
- wx *= wx;
-
- sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
-
- if (sqk != 0.0) {
- numerator = 4.0*MY_PI/sqk;
- denominator = gf_denom(snx2,sny2,snz2, gf_b, order);
- greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator;
- } else greensfn[n++] = 0.0;
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- compute self force coefficients for ad-differentiation scheme
- and Coulomb interaction
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord,
- int nxlo_ft, int nylo_ft, int nzlo_ft,
- int nxhi_ft, int nyhi_ft, int nzhi_ft,
- double *sf_pre1, double *sf_pre2, double *sf_pre3,
- double *sf_pre4, double *sf_pre5, double *sf_pre6)
-{
-
- int i,k,l,m,n;
- double *prd;
-
- // volume-dependent factors
- // adjust z dimension for 2d slab PPPM
- // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double unitkx = (2.0*MY_PI/xprd);
- double unitky = (2.0*MY_PI/yprd);
- double unitkz = (2.0*MY_PI/zprd_slab);
-
- int nx,ny,nz,kper,lper,mper;
- double argx,argy,argz;
- double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
- double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
- double u0,u1,u2,u3,u4,u5,u6;
- double sum1,sum2,sum3,sum4,sum5,sum6;
-
- int nb = 2;
-
- n = 0;
- for (m = nzlo_ft; m <= nzhi_ft; m++) {
- mper = m - nzp*(2*m/nzp);
-
- for (l = nylo_ft; l <= nyhi_ft; l++) {
- lper = l - nyp*(2*l/nyp);
-
- for (k = nxlo_ft; k <= nxhi_ft; k++) {
- kper = k - nxp*(2*k/nxp);
-
- sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
- for (i = -nb; i <= nb; i++) {
-
- qx0 = unitkx*(kper+nxp*i);
- qx1 = unitkx*(kper+nxp*(i+1));
- qx2 = unitkx*(kper+nxp*(i+2));
- wx0[i+2] = 1.0;
- wx1[i+2] = 1.0;
- wx2[i+2] = 1.0;
- argx = 0.5*qx0*xprd/nxp;
- if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord);
- argx = 0.5*qx1*xprd/nxp;
- if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord);
- argx = 0.5*qx2*xprd/nxp;
- if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord);
-
- qy0 = unitky*(lper+nyp*i);
- qy1 = unitky*(lper+nyp*(i+1));
- qy2 = unitky*(lper+nyp*(i+2));
- wy0[i+2] = 1.0;
- wy1[i+2] = 1.0;
- wy2[i+2] = 1.0;
- argy = 0.5*qy0*yprd/nyp;
- if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord);
- argy = 0.5*qy1*yprd/nyp;
- if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord);
- argy = 0.5*qy2*yprd/nyp;
- if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord);
-
- qz0 = unitkz*(mper+nzp*i);
- qz1 = unitkz*(mper+nzp*(i+1));
- qz2 = unitkz*(mper+nzp*(i+2));
- wz0[i+2] = 1.0;
- wz1[i+2] = 1.0;
- wz2[i+2] = 1.0;
- argz = 0.5*qz0*zprd_slab/nzp;
- if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord);
- argz = 0.5*qz1*zprd_slab/nzp;
- if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord);
- argz = 0.5*qz2*zprd_slab/nzp;
- if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord);
- }
-
- for (nx = 0; nx <= 4; nx++) {
- for (ny = 0; ny <= 4; ny++) {
- for (nz = 0; nz <= 4; nz++) {
- u0 = wx0[nx]*wy0[ny]*wz0[nz];
- u1 = wx1[nx]*wy0[ny]*wz0[nz];
- u2 = wx2[nx]*wy0[ny]*wz0[nz];
- u3 = wx0[nx]*wy1[ny]*wz0[nz];
- u4 = wx0[nx]*wy2[ny]*wz0[nz];
- u5 = wx0[nx]*wy0[ny]*wz1[nz];
- u6 = wx0[nx]*wy0[ny]*wz2[nz];
-
- sum1 += u0*u1;
- sum2 += u0*u2;
- sum3 += u0*u3;
- sum4 += u0*u4;
- sum5 += u0*u5;
- sum6 += u0*u6;
- }
- }
- }
-
- // store values
-
- sf_pre1[n] = sum1;
- sf_pre2[n] = sum2;
- sf_pre3[n] = sum3;
- sf_pre4[n] = sum4;
- sf_pre5[n] = sum5;
- sf_pre6[n++] = sum6;
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- Compute the modified (hockney-eastwood) dispersion green function
- ---------------------------------------------------------------------- */
-
-void PPPMDisp::compute_gf_6()
-{
- double *prd;
- int k,l,m,n;
-
- // volume-dependent factors
- // adjust z dimension for 2d slab PPPM
- // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double unitkx = (2.0*MY_PI/xprd);
- double unitky = (2.0*MY_PI/yprd);
- double unitkz = (2.0*MY_PI/zprd_slab);
-
- int kper,lper,mper;
- double sqk;
- double snx,sny,snz,snx2,sny2,snz2;
- double argx,argy,argz,wx,wy,wz,sx,sy,sz;
- double qx,qy,qz;
- double rtsqk, term;
- double numerator,denominator;
- double inv2ew = 2*g_ewald_6;
- inv2ew = 1/inv2ew;
- double rtpi = sqrt(MY_PI);
-
- numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);
-
- n = 0;
- for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
- mper = m - nz_pppm_6*(2*m/nz_pppm_6);
- qz = unitkz*mper;
- snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
- snz2 = snz*snz;
- sz = exp(-qz*qz*inv2ew*inv2ew);
- wz = 1.0;
- argz = 0.5*qz*zprd_slab/nz_pppm_6;
- if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
- wz *= wz;
-
- for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
- lper = l - ny_pppm_6*(2*l/ny_pppm_6);
- qy = unitky*lper;
- sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
- sny2 = sny*sny;
- sy = exp(-qy*qy*inv2ew*inv2ew);
- wy = 1.0;
- argy = 0.5*qy*yprd/ny_pppm_6;
- if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
- wy *= wy;
-
- for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
- kper = k - nx_pppm_6*(2*k/nx_pppm_6);
- qx = unitkx*kper;
- snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
- snx2 = snx*snx;
- sx = exp(-qx*qx*inv2ew*inv2ew);
- wx = 1.0;
- argx = 0.5*qx*xprd/nx_pppm_6;
- if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
- wx *= wx;
-
- sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
-
- if (sqk != 0.0) {
- denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6);
- rtsqk = sqrt(sqk);
- term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
- 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
- greensfn_6[n++] = numerator*term*wx*wy*wz/denominator;
- } else greensfn_6[n++] = 0.0;
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- compute self force coefficients for ad-differentiation scheme
- and Coulomb interaction
-------------------------------------------------------------------------- */
-void PPPMDisp::compute_sf_coeff()
-{
- int i,k,l,m,n;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
- volume = xprd * yprd * zprd_slab;
-
- for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0;
-
- n = 0;
- for (m = nzlo_fft; m <= nzhi_fft; m++) {
- for (l = nylo_fft; l <= nyhi_fft; l++) {
- for (k = nxlo_fft; k <= nxhi_fft; k++) {
- sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
- sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
- sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
- sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
- sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
- sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
- ++n;
- }
- }
- }
-
- // Compute the coefficients for the self-force correction
-
- double prex, prey, prez;
- prex = prey = prez = MY_PI/volume;
- prex *= nx_pppm/xprd;
- prey *= ny_pppm/yprd;
- prez *= nz_pppm/zprd_slab;
- sf_coeff[0] *= prex;
- sf_coeff[1] *= prex*2;
- sf_coeff[2] *= prey;
- sf_coeff[3] *= prey*2;
- sf_coeff[4] *= prez;
- sf_coeff[5] *= prez*2;
-
- // communicate values with other procs
-
- double tmp[6];
- MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
- for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
-}
-
-/* ----------------------------------------------------------------------
- compute self force coefficients for ad-differentiation scheme
- and Dispersion interaction
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_sf_coeff_6()
-{
- int i,k,l,m,n;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
- volume = xprd * yprd * zprd_slab;
-
- for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0;
-
- n = 0;
- for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
- for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
- for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
- sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n];
- sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n];
- sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n];
- sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n];
- sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n];
- sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n];
- ++n;
- }
- }
- }
-
-
- // perform multiplication with prefactors
-
- double prex, prey, prez;
- prex = prey = prez = MY_PI/volume;
- prex *= nx_pppm_6/xprd;
- prey *= ny_pppm_6/yprd;
- prez *= nz_pppm_6/zprd_slab;
- sf_coeff_6[0] *= prex;
- sf_coeff_6[1] *= prex*2;
- sf_coeff_6[2] *= prey;
- sf_coeff_6[3] *= prey*2;
- sf_coeff_6[4] *= prez;
- sf_coeff_6[5] *= prez*2;
-
- // communicate values with other procs
-
- double tmp[6];
- MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world);
- for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n];
-
-}
-
-/* ----------------------------------------------------------------------
- denominator for Hockney-Eastwood Green's function
- of x,y,z = sin(kx*deltax/2), etc
-
- inf n-1
- S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l
- j=-inf l=0
-
- = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x)
- gf_b = denominator expansion coeffs
-------------------------------------------------------------------------- */
-
-double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord)
-{
- double sx,sy,sz;
- sz = sy = sx = 0.0;
- for (int l = ord-1; l >= 0; l--) {
- sx = g_b[l] + sx*x;
- sy = g_b[l] + sy*y;
- sz = g_b[l] + sz*z;
- }
- double s = sx*sy*sz;
- return s*s;
-}
-
-/* ----------------------------------------------------------------------
- pre-compute Green's function denominator expansion coeffs, Gamma(2n)
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_gf_denom(double* gf, int ord)
-{
- int k,l,m;
-
- for (l = 1; l < ord; l++) gf[l] = 0.0;
- gf[0] = 1.0;
-
- for (m = 1; m < ord; m++) {
- for (l = m; l > 0; l--)
- gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1));
- gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5));
- }
-
- bigint ifact = 1;
- for (k = 1; k < 2*ord; k++) ifact *= k;
- double gaminv = 1.0/ifact;
- for (l = 0; l < ord; l++) gf[l] *= gaminv;
-}
-
-/* ----------------------------------------------------------------------
- ghost-swap to accumulate full density in brick decomposition
- remap density from 3d brick decomposition to FFTdecomposition
- for coulomb interaction or dispersion interaction with geometric
- mixing
-------------------------------------------------------------------------- */
-
-void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i,
- int nxhi_i, int nyhi_i, int nzhi_i,
- FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work,
- LAMMPS_NS::Remap* rmp)
-{
- int n,ix,iy,iz;
-
- // copy grabs inner portion of density from 3d brick
- // remap could be done as pre-stage of FFT,
- // but this works optimally on only double values, not complex values
-
- n = 0;
- for (iz = nzlo_i; iz <= nzhi_i; iz++)
- for (iy = nylo_i; iy <= nyhi_i; iy++)
- for (ix = nxlo_i; ix <= nxhi_i; ix++)
- dfft[n++] = dbrick[iz][iy][ix];
-
- rmp->perform(dfft,dfft,work);
-}
-
-
-/* ----------------------------------------------------------------------
- ghost-swap to accumulate full density in brick decomposition
- remap density from 3d brick decomposition to FFTdecomposition
- for dispersion with arithmetic mixing rule
-------------------------------------------------------------------------- */
-
-void PPPMDisp::brick2fft_a()
-{
- int n,ix,iy,iz;
-
- // copy grabs inner portion of density from 3d brick
- // remap could be done as pre-stage of FFT,
- // but this works optimally on only double values, not complex values
-
- n = 0;
- for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
- for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
- for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) {
- density_fft_a0[n] = density_brick_a0[iz][iy][ix];
- density_fft_a1[n] = density_brick_a1[iz][iy][ix];
- density_fft_a2[n] = density_brick_a2[iz][iy][ix];
- density_fft_a3[n] = density_brick_a3[iz][iy][ix];
- density_fft_a4[n] = density_brick_a4[iz][iy][ix];
- density_fft_a5[n] = density_brick_a5[iz][iy][ix];
- density_fft_a6[n++] = density_brick_a6[iz][iy][ix];
- }
-
- remap_6->perform(density_fft_a0,density_fft_a0,work1_6);
- remap_6->perform(density_fft_a1,density_fft_a1,work1_6);
- remap_6->perform(density_fft_a2,density_fft_a2,work1_6);
- remap_6->perform(density_fft_a3,density_fft_a3,work1_6);
- remap_6->perform(density_fft_a4,density_fft_a4,work1_6);
- remap_6->perform(density_fft_a5,density_fft_a5,work1_6);
- remap_6->perform(density_fft_a6,density_fft_a6,work1_6);
-
-}
-
-/* ----------------------------------------------------------------------
- ghost-swap to accumulate full density in brick decomposition
- remap density from 3d brick decomposition to FFTdecomposition
- for dispersion with special case
-------------------------------------------------------------------------- */
-
-void PPPMDisp::brick2fft_none()
-{
- int k,n,ix,iy,iz;
-
- // copy grabs inner portion of density from 3d brick
- // remap could be done as pre-stage of FFT,
- // but this works optimally on only double values, not complex values
-
- for (k = 0; kperform(density_fft_none[k],density_fft_none[k],work1_6);
-}
-
-/* ----------------------------------------------------------------------
- find center grid pt for each of my particles
- check that full stencil for the particle will fit in my 3d brick
- store central grid pt indices in part2grid array
-------------------------------------------------------------------------- */
-
-void PPPMDisp::particle_map(double delx, double dely, double delz,
- double sft, int** p2g, int nup, int nlow,
- int nxlo, int nylo, int nzlo,
- int nxhi, int nyhi, int nzhi)
-{
- int nx,ny,nz;
-
- double **x = atom->x;
- int nlocal = atom->nlocal;
-
- int flag = 0;
- for (int i = 0; i < nlocal; i++) {
-
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // current particle coord can be outside global and local box
- // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
- nx = static_cast ((x[i][0]-boxlo[0])*delx+sft) - OFFSET;
- ny = static_cast ((x[i][1]-boxlo[1])*dely+sft) - OFFSET;
- nz = static_cast ((x[i][2]-boxlo[2])*delz+sft) - OFFSET;
-
- p2g[i][0] = nx;
- p2g[i][1] = ny;
- p2g[i][2] = nz;
-
- // check that entire stencil around nx,ny,nz will fit in my 3d brick
-
- if (nx+nlow < nxlo || nx+nup > nxhi ||
- ny+nlow < nylo || ny+nup > nyhi ||
- nz+nlow < nzlo || nz+nup > nzhi)
- flag = 1;
- }
-
- if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp");
-}
-
-
-void PPPMDisp::particle_map_c(double delx, double dely, double delz,
- double sft, int** p2g, int nup, int nlow,
- int nxlo, int nylo, int nzlo,
- int nxhi, int nyhi, int nzhi)
-{
- particle_map(delx, dely, delz, sft, p2g, nup, nlow,
- nxlo, nylo, nzlo, nxhi, nyhi, nzhi);
-}
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = charge "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid
-------------------------------------------------------------------------- */
-
-void PPPMDisp::make_rho_c()
-{
- int l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
- // clear 3d density array
-
- memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
- ngrid*sizeof(FFT_SCALAR));
-
- // loop over my charges, add their contribution to nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
-
- double *q = atom->q;
- double **x = atom->x;
- int nlocal = atom->nlocal;
-
- for (int i = 0; i < nlocal; i++) {
-
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
-
- z0 = delvolinv * q[i];
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- y0 = z0*rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- x0 = y0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- density_brick[mz][my][mx] += x0*rho1d[0][l];
- }
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = dispersion "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid --- geometric mixing
-------------------------------------------------------------------------- */
-
-void PPPMDisp::make_rho_g()
-{
- int l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
- // clear 3d density array
-
- memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
- ngrid_6*sizeof(FFT_SCALAR));
-
- // loop over my charges, add their contribution to nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- int type;
- double **x = atom->x;
- int nlocal = atom->nlocal;
-
- for (int i = 0; i < nlocal; i++) {
-
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
- type = atom->type[i];
- z0 = delvolinv_6 * B[type];
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- y0 = z0*rho1d_6[2][n];
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- x0 = y0*rho1d_6[1][m];
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- density_brick_g[mz][my][mx] += x0*rho1d_6[0][l];
- }
- }
- }
- }
-}
-
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = dispersion "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid --- arithmetic mixing
-------------------------------------------------------------------------- */
-
-void PPPMDisp::make_rho_a()
-{
- int l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
-
- // clear 3d density array
-
- memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
- ngrid_6*sizeof(FFT_SCALAR));
- memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
- ngrid_6*sizeof(FFT_SCALAR));
- memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
- ngrid_6*sizeof(FFT_SCALAR));
- memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
- ngrid_6*sizeof(FFT_SCALAR));
- memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
- ngrid_6*sizeof(FFT_SCALAR));
- memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
- ngrid_6*sizeof(FFT_SCALAR));
- memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
- ngrid_6*sizeof(FFT_SCALAR));
-
- // loop over my particles, add their contribution to nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- int type;
- double **x = atom->x;
- int nlocal = atom->nlocal;
-
- for (int i = 0; i < nlocal; i++) {
-
- //do the following for all 4 grids
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
- type = atom->type[i];
- z0 = delvolinv_6;
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- y0 = z0*rho1d_6[2][n];
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- x0 = y0*rho1d_6[1][m];
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- w = x0*rho1d_6[0][l];
- density_brick_a0[mz][my][mx] += w*B[7*type];
- density_brick_a1[mz][my][mx] += w*B[7*type+1];
- density_brick_a2[mz][my][mx] += w*B[7*type+2];
- density_brick_a3[mz][my][mx] += w*B[7*type+3];
- density_brick_a4[mz][my][mx] += w*B[7*type+4];
- density_brick_a5[mz][my][mx] += w*B[7*type+5];
- density_brick_a6[mz][my][mx] += w*B[7*type+6];
- }
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = dispersion "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid --- case when mixing rules don't apply
-------------------------------------------------------------------------- */
-
-void PPPMDisp::make_rho_none()
-{
- int k,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
-
- // clear 3d density array
- for (k = 0; k < nsplit_alloc; k++)
- memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
- ngrid_6*sizeof(FFT_SCALAR));
-
-
- // loop over my particles, add their contribution to nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- int type;
- double **x = atom->x;
- int nlocal = atom->nlocal;
-
- for (int i = 0; i < nlocal; i++) {
-
- //do the following for all 4 grids
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
- type = atom->type[i];
- z0 = delvolinv_6;
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- y0 = z0*rho1d_6[2][n];
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- x0 = y0*rho1d_6[1][m];
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- w = x0*rho1d_6[0][l];
- for (k = 0; k < nsplit; k++)
- density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k];
- }
- }
- }
- }
-}
-
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver for ik differentiation
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
- FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2,
- int nx_p, int ny_p, int nz_p, int nft,
- int nxlo_ft, int nylo_ft, int nzlo_ft,
- int nxhi_ft, int nyhi_ft, int nzhi_ft,
- int nxlo_i, int nylo_i, int nzlo_i,
- int nxhi_i, int nyhi_i, int nzhi_i,
- double& egy, double* gfn,
- double* kx, double* ky, double* kz,
- double* kx2, double* ky2, double* kz2,
- FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick,
- double* vir, double** vcoeff, double** vcoeff2,
- FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
- FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
-
-
-{
- int i,j,k,n;
- double eng;
-
- // transform charge/dispersion density (r -> k)
- n = 0;
- for (i = 0; i < nft; i++) {
- wk1[n++] = dfft[i];
- wk1[n++] = ZEROF;
- }
-
- ft1->compute(wk1,wk1,1);
-
- // if requested, compute energy and virial contribution
-
- double scaleinv = 1.0/(nx_p*ny_p*nz_p);
- double s2 = scaleinv*scaleinv;
-
- if (eflag_global || vflag_global) {
- if (vflag_global) {
- n = 0;
- for (i = 0; i < nft; i++) {
- eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
- for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
- if (eflag_global) egy += eng;
- n += 2;
- }
- } else {
- n = 0;
- for (i = 0; i < nft; i++) {
- egy +=
- s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
- n += 2;
- }
- }
- }
-
- // scale by 1/total-grid-pts to get rho(k)
- // multiply by Green's function to get V(k)
-
- n = 0;
- for (i = 0; i < nft; i++) {
- wk1[n++] *= scaleinv * gfn[i];
- wk1[n++] *= scaleinv * gfn[i];
- }
-
- // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
- // FFT leaves data in 3d brick decomposition
- // copy it into inner portion of vdx,vdy,vdz arrays
-
- // x & y direction gradient
-
- n = 0;
- for (k = nzlo_ft; k <= nzhi_ft; k++)
- for (j = nylo_ft; j <= nyhi_ft; j++)
- for (i = nxlo_ft; i <= nxhi_ft; i++) {
- wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n];
- wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1];
- n += 2;
- }
-
- ft2->compute(wk2,wk2,-1);
-
- n = 0;
- for (k = nzlo_i; k <= nzhi_i; k++)
- for (j = nylo_i; j <= nyhi_i; j++)
- for (i = nxlo_i; i <= nxhi_i; i++) {
- vx_brick[k][j][i] = wk2[n++];
- vy_brick[k][j][i] = wk2[n++];
- }
-
- if (!eflag_atom) {
- // z direction gradient only
-
- n = 0;
- for (k = nzlo_ft; k <= nzhi_ft; k++)
- for (j = nylo_ft; j <= nyhi_ft; j++)
- for (i = nxlo_ft; i <= nxhi_ft; i++) {
- wk2[n] = kz[k]*wk1[n+1];
- wk2[n+1] = -kz[k]*wk1[n];
- n += 2;
- }
-
- ft2->compute(wk2,wk2,-1);
-
-
- n = 0;
- for (k = nzlo_i; k <= nzhi_i; k++)
- for (j = nylo_i; j <= nyhi_i; j++)
- for (i = nxlo_i; i <= nxhi_i; i++) {
- vz_brick[k][j][i] = wk2[n];
- n += 2;
- }
-
- }
-
- else {
- // z direction gradient & per-atom energy
-
- n = 0;
- for (k = nzlo_ft; k <= nzhi_ft; k++)
- for (j = nylo_ft; j <= nyhi_ft; j++)
- for (i = nxlo_ft; i <= nxhi_ft; i++) {
- wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1];
- wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n];
- n += 2;
- }
-
- ft2->compute(wk2,wk2,-1);
-
- n = 0;
- for (k = nzlo_i; k <= nzhi_i; k++)
- for (j = nylo_i; j <= nyhi_i; j++)
- for (i = nxlo_i; i <= nxhi_i; i++) {
- vz_brick[k][j][i] = wk2[n++];
- u_pa[k][j][i] = wk2[n++];;
- }
- }
-
- if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
- nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
- v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver for ad differentiation
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
- FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2,
- int nx_p, int ny_p, int nz_p, int nft,
- int nxlo_ft, int nylo_ft, int nzlo_ft,
- int nxhi_ft, int nyhi_ft, int nzhi_ft,
- int nxlo_i, int nylo_i, int nzlo_i,
- int nxhi_i, int nyhi_i, int nzhi_i,
- double& egy, double* gfn,
- double* vir, double** vcoeff, double** vcoeff2,
- FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
- FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
-
-
-{
- int i,j,k,n;
- double eng;
-
- // transform charge/dispersion density (r -> k)
- n = 0;
- for (i = 0; i < nft; i++) {
- wk1[n++] = dfft[i];
- wk1[n++] = ZEROF;
- }
-
- ft1->compute(wk1,wk1,1);
-
- // if requested, compute energy and virial contribution
-
- double scaleinv = 1.0/(nx_p*ny_p*nz_p);
- double s2 = scaleinv*scaleinv;
-
- if (eflag_global || vflag_global) {
- if (vflag_global) {
- n = 0;
- for (i = 0; i < nft; i++) {
- eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
- for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
- if (eflag_global) egy += eng;
- n += 2;
- }
- } else {
- n = 0;
- for (i = 0; i < nft; i++) {
- egy +=
- s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
- n += 2;
- }
- }
- }
-
- // scale by 1/total-grid-pts to get rho(k)
- // multiply by Green's function to get V(k)
-
- n = 0;
- for (i = 0; i < nft; i++) {
- wk1[n++] *= scaleinv * gfn[i];
- wk1[n++] *= scaleinv * gfn[i];
- }
-
-
- n = 0;
- for (k = nzlo_ft; k <= nzhi_ft; k++)
- for (j = nylo_ft; j <= nyhi_ft; j++)
- for (i = nxlo_ft; i <= nxhi_ft; i++) {
- wk2[n] = wk1[n];
- wk2[n+1] = wk1[n+1];
- n += 2;
- }
-
- ft2->compute(wk2,wk2,-1);
-
-
- n = 0;
- for (k = nzlo_i; k <= nzhi_i; k++)
- for (j = nylo_i; j <= nyhi_i; j++)
- for (i = nxlo_i; i <= nxhi_i; i++) {
- u_pa[k][j][i] = wk2[n++];
- n++;
- }
-
-
- if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
- nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
- v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
-
-}
-
-/* ----------------------------------------------------------------------
- Fourier Transform for per atom virial calculations
-------------------------------------------------------------------------- */
-
-void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2,
- double** vcoeff, double** vcoeff2, int nft,
- int nxlo_i, int nylo_i, int nzlo_i,
- int nxhi_i, int nyhi_i, int nzhi_i,
- FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
- FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
-{
- //v0 & v1 term
- int n, i, j, k;
- n = 0;
- for (i = 0; i < nft; i++) {
- wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1];
- wk2[n+1] = wk1[n+1]*vcoeff[i][0] + wk1[n]*vcoeff[i][1];
- n += 2;
- }
-
- ft2->compute(wk2,wk2,-1);
-
- n = 0;
- for (k = nzlo_i; k <= nzhi_i; k++)
- for (j = nylo_i; j <= nyhi_i; j++)
- for (i = nxlo_i; i <= nxhi_i; i++) {
- v0_pa[k][j][i] = wk2[n++];
- v1_pa[k][j][i] = wk2[n++];
- }
-
- //v2 & v3 term
-
- n = 0;
- for (i = 0; i < nft; i++) {
- wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0];
- wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0];
- n += 2;
- }
-
- ft2->compute(wk2,wk2,-1);
-
- n = 0;
- for (k = nzlo_i; k <= nzhi_i; k++)
- for (j = nylo_i; j <= nyhi_i; j++)
- for (i = nxlo_i; i <= nxhi_i; i++) {
- v2_pa[k][j][i] = wk2[n++];
- v3_pa[k][j][i] = wk2[n++];
- }
-
- //v4 & v5 term
-
- n = 0;
- for (i = 0; i < nft; i++) {
- wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2];
- wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2];
- n += 2;
- }
-
- ft2->compute(wk2,wk2,-1);
-
- n = 0;
- for (k = nzlo_i; k <= nzhi_i; k++)
- for (j = nylo_i; j <= nyhi_i; j++)
- for (i = nxlo_i; i <= nxhi_i; i++) {
- v4_pa[k][j][i] = wk2[n++];
- v5_pa[k][j][i] = wk2[n++];
- }
-
-}
-
-/* ----------------------------------------------------------------------
- Poisson solver for one mesh with 2 different dispersion densities
- for ik scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
- FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
- FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
- FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
- FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
- FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
- FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
-
-{
- int i,j,k,n;
- double eng;
-
- double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
-
- // transform charge/dispersion density (r -> k)
- // only one tansform required when energies and pressures do not
- // need to be calculated
- if (eflag_global + vflag_global == 0) {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n++] = dfft_1[i];
- work1_6[n++] = dfft_2[i];
- }
-
- fft1_6->compute(work1_6,work1_6,1);
- }
- // two transforms are required when energies and pressures are
- // calculated
- else {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n] = dfft_1[i];
- work2_6[n++] = ZEROF;
- work1_6[n] = ZEROF;
- work2_6[n++] = dfft_2[i];
- }
-
- fft1_6->compute(work1_6,work1_6,1);
- fft1_6->compute(work2_6,work2_6,1);
-
- double s2 = scaleinv*scaleinv;
-
- if (vflag_global) {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
- for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
- if (eflag_global)energy_6 += eng;
- n += 2;
- }
- } else {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- energy_6 +=
- 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
- n += 2;
- }
- }
- // unify the two transformed vectors for efficient calculations later
- for ( i = 0; i < 2*nfft_6; i++) {
- work1_6[i] += work2_6[i];
- }
- }
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n++] *= scaleinv * greensfn_6[i];
- work1_6[n++] *= scaleinv * greensfn_6[i];
- }
-
- // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
- // FFT leaves data in 3d brick decomposition
- // copy it into inner portion of vdx,vdy,vdz arrays
-
- // x direction gradient
-
- n = 0;
- for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
- for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
- for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
- work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
- work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- vxbrick_1[k][j][i] = work2_6[n++];
- vxbrick_2[k][j][i] = work2_6[n++];
- }
-
- // y direction gradient
-
- n = 0;
- for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
- for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
- for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
- work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
- work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- vybrick_1[k][j][i] = work2_6[n++];
- vybrick_2[k][j][i] = work2_6[n++];
- }
-
- // z direction gradient
-
- n = 0;
- for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
- for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
- for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
- work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
- work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- vzbrick_1[k][j][i] = work2_6[n++];
- vzbrick_2[k][j][i] = work2_6[n++];
- }
-
- //Per-atom energy
-
- if (eflag_atom) {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n];
- work2_6[n+1] = work1_6[n+1];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- u_pa_1[k][j][i] = work2_6[n++];
- u_pa_2[k][j][i] = work2_6[n++];
- }
- }
-
- if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
- v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
-}
-
-
-/* ----------------------------------------------------------------------
- Poisson solver for one mesh with 2 different dispersion densities
- for ik scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
- FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
- FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
- FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
- FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
-{
- int i,j,k,n;
- double eng;
-
- double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
-
- // transform charge/dispersion density (r -> k)
- // only one tansform required when energies and pressures do not
- // need to be calculated
- if (eflag_global + vflag_global == 0) {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n++] = dfft_1[i];
- work1_6[n++] = dfft_2[i];
- }
-
- fft1_6->compute(work1_6,work1_6,1);
- }
-
-
- // two transforms are required when energies and pressures are
- // calculated
- else {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n] = dfft_1[i];
- work2_6[n++] = ZEROF;
- work1_6[n] = ZEROF;
- work2_6[n++] = dfft_2[i];
- }
-
-
- fft1_6->compute(work1_6,work1_6,1);
- fft1_6->compute(work2_6,work2_6,1);
-
- double s2 = scaleinv*scaleinv;
-
- if (vflag_global) {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
- for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
- if (eflag_global)energy_6 += eng;
- n += 2;
- }
- } else {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- energy_6 +=
- s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
- n += 2;
- }
- }
- // unify the two transformed vectors for efficient calculations later
- for ( i = 0; i < 2*nfft_6; i++) {
- work1_6[i] += work2_6[i];
- }
- }
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n++] *= scaleinv * greensfn_6[i];
- work1_6[n++] *= scaleinv * greensfn_6[i];
- }
-
- // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
- // FFT leaves data in 3d brick decomposition
- // copy it into inner portion of vdx,vdy,vdz arrays
-
- // x direction gradient
-
- n = 0;
- for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
- for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
- for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
- work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
- work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- vxbrick_1[k][j][i] = B[n1]*work2_6[n++];
- vxbrick_2[k][j][i] = B[n2]*work2_6[n++];
- }
-
- // y direction gradient
-
- n = 0;
- for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
- for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
- for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
- work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
- work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- vybrick_1[k][j][i] = B[n1]*work2_6[n++];
- vybrick_2[k][j][i] = B[n2]*work2_6[n++];
- }
-
- // z direction gradient
-
- n = 0;
- for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
- for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
- for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
- work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
- work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- vzbrick_1[k][j][i] = B[n1]*work2_6[n++];
- vzbrick_2[k][j][i] = B[n2]*work2_6[n++];
- }
-
- //Per-atom energy
-
- if (eflag_atom) {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n];
- work2_6[n+1] = work1_6[n+1];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- u_pa[n1][k][j][i] = B[n1]*work2_6[n++];
- u_pa[n2][k][j][i] = B[n2]*work2_6[n++];
- }
- }
-
- if (vflag_atom) poisson_none_peratom(n1,n2,
- v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
- v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
-}
-
-/* ----------------------------------------------------------------------
- Poisson solver for one mesh with 2 different dispersion densities
- for ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
- FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
- FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
- FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
- FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
-
-{
- int i,j,k,n;
- double eng;
-
- double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
-
- // transform charge/dispersion density (r -> k)
- // only one tansform required when energies and pressures do not
- // need to be calculated
- if (eflag_global + vflag_global == 0) {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n++] = dfft_1[i];
- work1_6[n++] = dfft_2[i];
- }
-
- fft1_6->compute(work1_6,work1_6,1);
- }
- // two transforms are required when energies and pressures are
- // calculated
- else {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n] = dfft_1[i];
- work2_6[n++] = ZEROF;
- work1_6[n] = ZEROF;
- work2_6[n++] = dfft_2[i];
- }
-
- fft1_6->compute(work1_6,work1_6,1);
- fft1_6->compute(work2_6,work2_6,1);
-
- double s2 = scaleinv*scaleinv;
-
- if (vflag_global) {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
- for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
- if (eflag_global)energy_6 += eng;
- n += 2;
- }
- } else {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- energy_6 +=
- 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
- n += 2;
- }
- }
- // unify the two transformed vectors for efficient calculations later
- for ( i = 0; i < 2*nfft_6; i++) {
- work1_6[i] += work2_6[i];
- }
- }
-
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n++] *= scaleinv * greensfn_6[i];
- work1_6[n++] *= scaleinv * greensfn_6[i];
- }
-
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n];
- work2_6[n+1] = work1_6[n+1];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- u_pa_1[k][j][i] = work2_6[n++];
- u_pa_2[k][j][i] = work2_6[n++];
- }
-
- if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
- v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
-}
-
-/* ----------------------------------------------------------------------
- Poisson solver for one mesh with 2 different dispersion densities
- for ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
- FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2,
- FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
- FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
-{
- int i,j,k,n;
- double eng;
-
- double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
-
- // transform charge/dispersion density (r -> k)
- // only one tansform required when energies and pressures do not
- // need to be calculated
- if (eflag_global + vflag_global == 0) {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n++] = dfft_1[i];
- work1_6[n++] = dfft_2[i];
- }
-
- fft1_6->compute(work1_6,work1_6,1);
- }
- // two transforms are required when energies and pressures are
- // calculated
- else {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n] = dfft_1[i];
- work2_6[n++] = ZEROF;
- work1_6[n] = ZEROF;
- work2_6[n++] = dfft_2[i];
- }
-
- fft1_6->compute(work1_6,work1_6,1);
- fft1_6->compute(work2_6,work2_6,1);
-
- double s2 = scaleinv*scaleinv;
-
- if (vflag_global) {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
- for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
- if (eflag_global)energy_6 += eng;
- n += 2;
- }
- } else {
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- energy_6 +=
- s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
- n += 2;
- }
- }
- // unify the two transformed vectors for efficient calculations later
- for ( i = 0; i < 2*nfft_6; i++) {
- work1_6[i] += work2_6[i];
- }
- }
-
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work1_6[n++] *= scaleinv * greensfn_6[i];
- work1_6[n++] *= scaleinv * greensfn_6[i];
- }
-
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n];
- work2_6[n+1] = work1_6[n+1];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- u_pa_1[k][j][i] = B[n1]*work2_6[n++];
- u_pa_2[k][j][i] = B[n2]*work2_6[n++];
- }
-
- if (vflag_atom) poisson_none_peratom(n1,n2,
- v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
- v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
-}
-
-/* ----------------------------------------------------------------------
- Fourier Transform for per atom virial calculations
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
- FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
- FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
- FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
-{
- //Compute first virial term v0
- int n, i, j, k;
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg_6[i][0];
- work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v0_pa_1[k][j][i] = work2_6[n++];
- v0_pa_2[k][j][i] = work2_6[n++];
- }
-
- //Compute second virial term v1
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg_6[i][1];
- work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v1_pa_1[k][j][i] = work2_6[n++];
- v1_pa_2[k][j][i] = work2_6[n++];
- }
-
- //Compute third virial term v2
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg_6[i][2];
- work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v2_pa_1[k][j][i] = work2_6[n++];
- v2_pa_2[k][j][i] = work2_6[n++];
- }
-
- //Compute fourth virial term v3
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg2_6[i][0];
- work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v3_pa_1[k][j][i] = work2_6[n++];
- v3_pa_2[k][j][i] = work2_6[n++];
- }
-
- //Compute fifth virial term v4
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg2_6[i][1];
- work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v4_pa_1[k][j][i] = work2_6[n++];
- v4_pa_2[k][j][i] = work2_6[n++];
- }
-
- //Compute last virial term v5
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg2_6[i][2];
- work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v5_pa_1[k][j][i] = work2_6[n++];
- v5_pa_2[k][j][i] = work2_6[n++];
- }
-}
-
-/* ----------------------------------------------------------------------
- Fourier Transform for per atom virial calculations
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_none_peratom(int n1, int n2,
- FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
- FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
- FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
- FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
-{
- //Compute first virial term v0
- int n, i, j, k;
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg_6[i][0];
- work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v0_pa_1[k][j][i] = B[n1]*work2_6[n++];
- v0_pa_2[k][j][i] = B[n2]*work2_6[n++];
- }
-
- //Compute second virial term v1
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg_6[i][1];
- work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v1_pa_1[k][j][i] = B[n1]*work2_6[n++];
- v1_pa_2[k][j][i] = B[n2]*work2_6[n++];
- }
-
- //Compute third virial term v2
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg_6[i][2];
- work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v2_pa_1[k][j][i] = B[n1]*work2_6[n++];
- v2_pa_2[k][j][i] = B[n2]*work2_6[n++];
- }
-
- //Compute fourth virial term v3
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg2_6[i][0];
- work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v3_pa_1[k][j][i] = B[n1]*work2_6[n++];
- v3_pa_2[k][j][i] = B[n2]*work2_6[n++];
- }
-
- //Compute fifth virial term v4
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg2_6[i][1];
- work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v4_pa_1[k][j][i] = B[n1]*work2_6[n++];
- v4_pa_2[k][j][i] = B[n2]*work2_6[n++];
- }
-
- //Compute last virial term v5
-
- n = 0;
- for (i = 0; i < nfft_6; i++) {
- work2_6[n] = work1_6[n]*vg2_6[i][2];
- work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
- n += 2;
- }
-
- fft2_6->compute(work2_6,work2_6,-1);
-
- n = 0;
- for (k = nzlo_in_6; k <= nzhi_in_6; k++)
- for (j = nylo_in_6; j <= nyhi_in_6; j++)
- for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
- v5_pa_1[k][j][i] = B[n1]*work2_6[n++];
- v5_pa_2[k][j][i] = B[n2]*work2_6[n++];
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get electric field & force on my particles
- for ik scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_c_ik()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR ekx,eky,ekz;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of E-field on particle
-
- double *q = atom->q;
- double **x = atom->x;
- double **f = atom->f;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
-
- ekx = eky = ekz = ZEROF;
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- z0 = rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- y0 = z0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- x0 = y0*rho1d[0][l];
- ekx -= x0*vdx_brick[mz][my][mx];
- eky -= x0*vdy_brick[mz][my][mx];
- ekz -= x0*vdz_brick[mz][my][mx];
- }
- }
- }
-
- // convert E-field to force
-
- const double qfactor = force->qqrd2e * scale * q[i];
- f[i][0] += qfactor*ekx;
- f[i][1] += qfactor*eky;
- if (slabflag != 2) f[i][2] += qfactor*ekz;
- }
-}
-/* ----------------------------------------------------------------------
- interpolate from grid to get electric field & force on my particles
- for ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_c_ad()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz;
- FFT_SCALAR ekx,eky,ekz;
- double s1,s2,s3;
- double sf = 0.0;
-
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double hx_inv = nx_pppm/xprd;
- double hy_inv = ny_pppm/yprd;
- double hz_inv = nz_pppm/zprd_slab;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of E-field on particle
-
- double *q = atom->q;
- double **x = atom->x;
- double **f = atom->f;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
- compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d);
-
- ekx = eky = ekz = ZEROF;
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
- eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
- ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
- }
- }
- }
- ekx *= hx_inv;
- eky *= hy_inv;
- ekz *= hz_inv;
- // convert E-field to force and substract self forces
- const double qfactor = force->qqrd2e * scale;
-
- s1 = x[i][0]*hx_inv;
- s2 = x[i][1]*hy_inv;
- s3 = x[i][2]*hz_inv;
- sf = sf_coeff[0]*sin(2*MY_PI*s1);
- sf += sf_coeff[1]*sin(4*MY_PI*s1);
- sf *= 2*q[i]*q[i];
- f[i][0] += qfactor*(ekx*q[i] - sf);
-
- sf = sf_coeff[2]*sin(2*MY_PI*s2);
- sf += sf_coeff[3]*sin(4*MY_PI*s2);
- sf *= 2*q[i]*q[i];
- f[i][1] += qfactor*(eky*q[i] - sf);
-
-
- sf = sf_coeff[4]*sin(2*MY_PI*s3);
- sf += sf_coeff[5]*sin(4*MY_PI*s3);
- sf *= 2*q[i]*q[i];
- if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get electric field & force on my particles
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_c_peratom()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of E-field on particle
-
- double *q = atom->q;
- double **x = atom->x;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
-
- u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- z0 = rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- y0 = z0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- x0 = y0*rho1d[0][l];
- if (eflag_atom) u_pa += x0*u_brick[mz][my][mx];
- if (vflag_atom) {
- v0 += x0*v0_brick[mz][my][mx];
- v1 += x0*v1_brick[mz][my][mx];
- v2 += x0*v2_brick[mz][my][mx];
- v3 += x0*v3_brick[mz][my][mx];
- v4 += x0*v4_brick[mz][my][mx];
- v5 += x0*v5_brick[mz][my][mx];
- }
- }
- }
- }
-
- // convert E-field to force
-
- const double qfactor = 0.5*force->qqrd2e * scale * q[i];
-
- if (eflag_atom) eatom[i] += u_pa*qfactor;
- if (vflag_atom) {
- vatom[i][0] += v0*qfactor;
- vatom[i][1] += v1*qfactor;
- vatom[i][2] += v2*qfactor;
- vatom[i][3] += v3*qfactor;
- vatom[i][4] += v4*qfactor;
- vatom[i][5] += v5*qfactor;
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get dispersion field & force on my particles
- for geometric mixing rule
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_g_ik()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR ekx,eky,ekz;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of dispersion field on particle
-
- double **x = atom->x;
- double **f = atom->f;
- int type;
- double lj;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-
- ekx = eky = ekz = ZEROF;
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- z0 = rho1d_6[2][n];
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- y0 = z0*rho1d_6[1][m];
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- x0 = y0*rho1d_6[0][l];
- ekx -= x0*vdx_brick_g[mz][my][mx];
- eky -= x0*vdy_brick_g[mz][my][mx];
- ekz -= x0*vdz_brick_g[mz][my][mx];
- }
- }
- }
-
- // convert E-field to force
- type = atom->type[i];
- lj = B[type];
- f[i][0] += lj*ekx;
- f[i][1] += lj*eky;
- if (slabflag != 2) f[i][2] += lj*ekz;
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get dispersion field & force on my particles
- for geometric mixing rule for ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_g_ad()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz;
- FFT_SCALAR ekx,eky,ekz;
- double s1,s2,s3;
- double sf = 0.0;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double hx_inv = nx_pppm_6/xprd;
- double hy_inv = ny_pppm_6/yprd;
- double hz_inv = nz_pppm_6/zprd_slab;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of dispersion field on particle
-
- double **x = atom->x;
- double **f = atom->f;
- int type;
- double lj;
-
- int nlocal = atom->nlocal;
-
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
- compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
-
-
- ekx = eky = ekz = ZEROF;
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
- eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
- ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx];
- }
- }
- }
- ekx *= hx_inv;
- eky *= hy_inv;
- ekz *= hz_inv;
-
- // convert E-field to force
- type = atom->type[i];
- lj = B[type];
-
- s1 = x[i][0]*hx_inv;
- s2 = x[i][1]*hy_inv;
- s3 = x[i][2]*hz_inv;
-
- sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
- sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
- sf *= 2*lj*lj;
- f[i][0] += ekx*lj - sf;
-
- sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
- sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
- sf *= 2*lj*lj;
- f[i][1] += eky*lj - sf;
-
-
- sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
- sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
- sf *= 2*lj*lj;
- if (slabflag != 2) f[i][2] += ekz*lj - sf;
-
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get dispersion field & force on my particles
- for geometric mixing rule for per atom quantities
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_g_peratom()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of dispersion field on particle
-
- double **x = atom->x;
- int type;
- double lj;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-
- u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- z0 = rho1d_6[2][n];
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- y0 = z0*rho1d_6[1][m];
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- x0 = y0*rho1d_6[0][l];
- if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx];
- if (vflag_atom) {
- v0 += x0*v0_brick_g[mz][my][mx];
- v1 += x0*v1_brick_g[mz][my][mx];
- v2 += x0*v2_brick_g[mz][my][mx];
- v3 += x0*v3_brick_g[mz][my][mx];
- v4 += x0*v4_brick_g[mz][my][mx];
- v5 += x0*v5_brick_g[mz][my][mx];
- }
- }
- }
- }
-
- // convert E-field to force
- type = atom->type[i];
- lj = B[type]*0.5;
-
- if (eflag_atom) eatom[i] += u_pa*lj;
- if (vflag_atom) {
- vatom[i][0] += v0*lj;
- vatom[i][1] += v1*lj;
- vatom[i][2] += v2*lj;
- vatom[i][3] += v3*lj;
- vatom[i][4] += v4*lj;
- vatom[i][5] += v5*lj;
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get dispersion field & force on my particles
- for arithmetic mixing rule and ik scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_a_ik()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
- FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
- FFT_SCALAR ekx6, eky6, ekz6;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of dispersion field on particle
-
- double **x = atom->x;
- double **f = atom->f;
- int type;
- double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
-
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
- ekx0 = eky0 = ekz0 = ZEROF;
- ekx1 = eky1 = ekz1 = ZEROF;
- ekx2 = eky2 = ekz2 = ZEROF;
- ekx3 = eky3 = ekz3 = ZEROF;
- ekx4 = eky4 = ekz4 = ZEROF;
- ekx5 = eky5 = ekz5 = ZEROF;
- ekx6 = eky6 = ekz6 = ZEROF;
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- z0 = rho1d_6[2][n];
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- y0 = z0*rho1d_6[1][m];
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- x0 = y0*rho1d_6[0][l];
- ekx0 -= x0*vdx_brick_a0[mz][my][mx];
- eky0 -= x0*vdy_brick_a0[mz][my][mx];
- ekz0 -= x0*vdz_brick_a0[mz][my][mx];
- ekx1 -= x0*vdx_brick_a1[mz][my][mx];
- eky1 -= x0*vdy_brick_a1[mz][my][mx];
- ekz1 -= x0*vdz_brick_a1[mz][my][mx];
- ekx2 -= x0*vdx_brick_a2[mz][my][mx];
- eky2 -= x0*vdy_brick_a2[mz][my][mx];
- ekz2 -= x0*vdz_brick_a2[mz][my][mx];
- ekx3 -= x0*vdx_brick_a3[mz][my][mx];
- eky3 -= x0*vdy_brick_a3[mz][my][mx];
- ekz3 -= x0*vdz_brick_a3[mz][my][mx];
- ekx4 -= x0*vdx_brick_a4[mz][my][mx];
- eky4 -= x0*vdy_brick_a4[mz][my][mx];
- ekz4 -= x0*vdz_brick_a4[mz][my][mx];
- ekx5 -= x0*vdx_brick_a5[mz][my][mx];
- eky5 -= x0*vdy_brick_a5[mz][my][mx];
- ekz5 -= x0*vdz_brick_a5[mz][my][mx];
- ekx6 -= x0*vdx_brick_a6[mz][my][mx];
- eky6 -= x0*vdy_brick_a6[mz][my][mx];
- ekz6 -= x0*vdz_brick_a6[mz][my][mx];
- }
- }
- }
- // convert D-field to force
- type = atom->type[i];
- lj0 = B[7*type+6];
- lj1 = B[7*type+5];
- lj2 = B[7*type+4];
- lj3 = B[7*type+3];
- lj4 = B[7*type+2];
- lj5 = B[7*type+1];
- lj6 = B[7*type];
- f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
- f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
- if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get dispersion field & force on my particles
- for arithmetic mixing rule for the ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_a_ad()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
- FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
- FFT_SCALAR ekx6, eky6, ekz6;
-
- double s1,s2,s3;
- double sf = 0.0;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double hx_inv = nx_pppm_6/xprd;
- double hy_inv = ny_pppm_6/yprd;
- double hz_inv = nz_pppm_6/zprd_slab;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of dispersion field on particle
-
- double **x = atom->x;
- double **f = atom->f;
- int type;
- double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
-
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
- compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
-
- ekx0 = eky0 = ekz0 = ZEROF;
- ekx1 = eky1 = ekz1 = ZEROF;
- ekx2 = eky2 = ekz2 = ZEROF;
- ekx3 = eky3 = ekz3 = ZEROF;
- ekx4 = eky4 = ekz4 = ZEROF;
- ekx5 = eky5 = ekz5 = ZEROF;
- ekx6 = eky6 = ekz6 = ZEROF;
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
- y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
- z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
-
- ekx0 += x0*u_brick_a0[mz][my][mx];
- eky0 += y0*u_brick_a0[mz][my][mx];
- ekz0 += z0*u_brick_a0[mz][my][mx];
-
- ekx1 += x0*u_brick_a1[mz][my][mx];
- eky1 += y0*u_brick_a1[mz][my][mx];
- ekz1 += z0*u_brick_a1[mz][my][mx];
-
- ekx2 += x0*u_brick_a2[mz][my][mx];
- eky2 += y0*u_brick_a2[mz][my][mx];
- ekz2 += z0*u_brick_a2[mz][my][mx];
-
- ekx3 += x0*u_brick_a3[mz][my][mx];
- eky3 += y0*u_brick_a3[mz][my][mx];
- ekz3 += z0*u_brick_a3[mz][my][mx];
-
- ekx4 += x0*u_brick_a4[mz][my][mx];
- eky4 += y0*u_brick_a4[mz][my][mx];
- ekz4 += z0*u_brick_a4[mz][my][mx];
-
- ekx5 += x0*u_brick_a5[mz][my][mx];
- eky5 += y0*u_brick_a5[mz][my][mx];
- ekz5 += z0*u_brick_a5[mz][my][mx];
-
- ekx6 += x0*u_brick_a6[mz][my][mx];
- eky6 += y0*u_brick_a6[mz][my][mx];
- ekz6 += z0*u_brick_a6[mz][my][mx];
- }
- }
- }
-
- ekx0 *= hx_inv;
- eky0 *= hy_inv;
- ekz0 *= hz_inv;
-
- ekx1 *= hx_inv;
- eky1 *= hy_inv;
- ekz1 *= hz_inv;
-
- ekx2 *= hx_inv;
- eky2 *= hy_inv;
- ekz2 *= hz_inv;
-
- ekx3 *= hx_inv;
- eky3 *= hy_inv;
- ekz3 *= hz_inv;
-
- ekx4 *= hx_inv;
- eky4 *= hy_inv;
- ekz4 *= hz_inv;
-
- ekx5 *= hx_inv;
- eky5 *= hy_inv;
- ekz5 *= hz_inv;
-
- ekx6 *= hx_inv;
- eky6 *= hy_inv;
- ekz6 *= hz_inv;
-
- // convert D-field to force
- type = atom->type[i];
- lj0 = B[7*type+6];
- lj1 = B[7*type+5];
- lj2 = B[7*type+4];
- lj3 = B[7*type+3];
- lj4 = B[7*type+2];
- lj5 = B[7*type+1];
- lj6 = B[7*type];
-
- s1 = x[i][0]*hx_inv;
- s2 = x[i][1]*hy_inv;
- s3 = x[i][2]*hz_inv;
-
- sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
- sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
- sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
- f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;
-
- sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
- sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
- sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
- f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;
-
- sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
- sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
- sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
- if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get dispersion field & force on my particles
- for arithmetic mixing rule for per atom quantities
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_a_peratom()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50;
- FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51;
- FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52;
- FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53;
- FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54;
- FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55;
- FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of dispersion field on particle
-
- double **x = atom->x;
- int type;
- double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
-
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-
- u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
- u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
- u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
- u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
- u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
- u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
- u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- z0 = rho1d_6[2][n];
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- y0 = z0*rho1d_6[1][m];
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- x0 = y0*rho1d_6[0][l];
- if (eflag_atom) {
- u_pa0 += x0*u_brick_a0[mz][my][mx];
- u_pa1 += x0*u_brick_a1[mz][my][mx];
- u_pa2 += x0*u_brick_a2[mz][my][mx];
- u_pa3 += x0*u_brick_a3[mz][my][mx];
- u_pa4 += x0*u_brick_a4[mz][my][mx];
- u_pa5 += x0*u_brick_a5[mz][my][mx];
- u_pa6 += x0*u_brick_a6[mz][my][mx];
- }
- if (vflag_atom) {
- v00 += x0*v0_brick_a0[mz][my][mx];
- v10 += x0*v1_brick_a0[mz][my][mx];
- v20 += x0*v2_brick_a0[mz][my][mx];
- v30 += x0*v3_brick_a0[mz][my][mx];
- v40 += x0*v4_brick_a0[mz][my][mx];
- v50 += x0*v5_brick_a0[mz][my][mx];
- v01 += x0*v0_brick_a1[mz][my][mx];
- v11 += x0*v1_brick_a1[mz][my][mx];
- v21 += x0*v2_brick_a1[mz][my][mx];
- v31 += x0*v3_brick_a1[mz][my][mx];
- v41 += x0*v4_brick_a1[mz][my][mx];
- v51 += x0*v5_brick_a1[mz][my][mx];
- v02 += x0*v0_brick_a2[mz][my][mx];
- v12 += x0*v1_brick_a2[mz][my][mx];
- v22 += x0*v2_brick_a2[mz][my][mx];
- v32 += x0*v3_brick_a2[mz][my][mx];
- v42 += x0*v4_brick_a2[mz][my][mx];
- v52 += x0*v5_brick_a2[mz][my][mx];
- v03 += x0*v0_brick_a3[mz][my][mx];
- v13 += x0*v1_brick_a3[mz][my][mx];
- v23 += x0*v2_brick_a3[mz][my][mx];
- v33 += x0*v3_brick_a3[mz][my][mx];
- v43 += x0*v4_brick_a3[mz][my][mx];
- v53 += x0*v5_brick_a3[mz][my][mx];
- v04 += x0*v0_brick_a4[mz][my][mx];
- v14 += x0*v1_brick_a4[mz][my][mx];
- v24 += x0*v2_brick_a4[mz][my][mx];
- v34 += x0*v3_brick_a4[mz][my][mx];
- v44 += x0*v4_brick_a4[mz][my][mx];
- v54 += x0*v5_brick_a4[mz][my][mx];
- v05 += x0*v0_brick_a5[mz][my][mx];
- v15 += x0*v1_brick_a5[mz][my][mx];
- v25 += x0*v2_brick_a5[mz][my][mx];
- v35 += x0*v3_brick_a5[mz][my][mx];
- v45 += x0*v4_brick_a5[mz][my][mx];
- v55 += x0*v5_brick_a5[mz][my][mx];
- v06 += x0*v0_brick_a6[mz][my][mx];
- v16 += x0*v1_brick_a6[mz][my][mx];
- v26 += x0*v2_brick_a6[mz][my][mx];
- v36 += x0*v3_brick_a6[mz][my][mx];
- v46 += x0*v4_brick_a6[mz][my][mx];
- v56 += x0*v5_brick_a6[mz][my][mx];
- }
- }
- }
- }
- // convert D-field to force
- type = atom->type[i];
- lj0 = B[7*type+6]*0.5;
- lj1 = B[7*type+5]*0.5;
- lj2 = B[7*type+4]*0.5;
- lj3 = B[7*type+3]*0.5;
- lj4 = B[7*type+2]*0.5;
- lj5 = B[7*type+1]*0.5;
- lj6 = B[7*type]*0.5;
-
-
- if (eflag_atom)
- eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 +
- u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6;
- if (vflag_atom) {
- vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 +
- v04*lj4 + v05*lj5 + v06*lj6;
- vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 +
- v14*lj4 + v15*lj5 + v16*lj6;
- vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 +
- v24*lj4 + v25*lj5 + v26*lj6;
- vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 +
- v34*lj4 + v35*lj5 + v36*lj6;
- vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 +
- v44*lj4 + v45*lj5 + v46*lj6;
- vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 +
- v54*lj4 + v55*lj5 + v56*lj6;
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get dispersion field & force on my particles
- for arithmetic mixing rule and ik scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_none_ik()
-{
- int i,k,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR *ekx, *eky, *ekz;
-
- ekx = new FFT_SCALAR[nsplit];
- eky = new FFT_SCALAR[nsplit];
- ekz = new FFT_SCALAR[nsplit];
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of dispersion field on particle
-
- double **x = atom->x;
- double **f = atom->f;
- int type;
- double lj;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
-
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
- for (k = 0; k < nsplit; k++)
- ekx[k] = eky[k] = ekz[k] = ZEROF;
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- z0 = rho1d_6[2][n];
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- y0 = z0*rho1d_6[1][m];
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- x0 = y0*rho1d_6[0][l];
- for (k = 0; k < nsplit; k++) {
- ekx[k] -= x0*vdx_brick_none[k][mz][my][mx];
- eky[k] -= x0*vdy_brick_none[k][mz][my][mx];
- ekz[k] -= x0*vdz_brick_none[k][mz][my][mx];
- }
- }
- }
- }
- // convert D-field to force
- type = atom->type[i];
- for (k = 0; k < nsplit; k++) {
- lj = B[nsplit*type + k];
- f[i][0] += lj*ekx[k];
- f[i][1] +=lj*eky[k];
- if (slabflag != 2) f[i][2] +=lj*ekz[k];
- }
- }
-
- delete [] ekx;
- delete [] eky;
- delete [] ekz;
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get dispersion field & force on my particles
- for arithmetic mixing rule for the ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_none_ad()
-{
- int i,k,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR *ekx, *eky, *ekz;
-
- ekx = new FFT_SCALAR[nsplit];
- eky = new FFT_SCALAR[nsplit];
- ekz = new FFT_SCALAR[nsplit];
-
-
- double s1,s2,s3;
- double sf1,sf2,sf3;
- double sf = 0.0;
- double *prd;
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double hx_inv = nx_pppm_6/xprd;
- double hy_inv = ny_pppm_6/yprd;
- double hz_inv = nz_pppm_6/zprd_slab;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of dispersion field on particle
-
- double **x = atom->x;
- double **f = atom->f;
- int type;
- double lj;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
-
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
- compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
-
- for (k = 0; k < nsplit; k++)
- ekx[k] = eky[k] = ekz[k] = ZEROF;
-
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
- y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
- z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
-
- for (k = 0; k < nsplit; k++) {
- ekx[k] += x0*u_brick_none[k][mz][my][mx];
- eky[k] += y0*u_brick_none[k][mz][my][mx];
- ekz[k] += z0*u_brick_none[k][mz][my][mx];
- }
- }
- }
- }
-
- for (k = 0; k < nsplit; k++) {
- ekx[k] *= hx_inv;
- eky[k] *= hy_inv;
- ekz[k] *= hz_inv;
- }
-
- // convert D-field to force
- type = atom->type[i];
-
- s1 = x[i][0]*hx_inv;
- s2 = x[i][1]*hy_inv;
- s3 = x[i][2]*hz_inv;
-
- sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1);
- sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1);
-
- sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2);
- sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2);
-
- sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3);
- sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3);
-
- for (k = 0; k < nsplit; k++) {
- lj = B[nsplit*type + k];
-
- sf = sf1*B[k]*2*lj*lj;
- f[i][0] += lj*ekx[k] - sf;
-
-
- sf = sf2*B[k]*2*lj*lj;
- f[i][1] += lj*eky[k] - sf;
-
- sf = sf3*B[k]*2*lj*lj;
- if (slabflag != 2) f[i][2] += lj*ekz[k] - sf;
- }
- }
-
- delete [] ekx;
- delete [] eky;
- delete [] ekz;
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get dispersion field & force on my particles
- for arithmetic mixing rule for per atom quantities
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_none_peratom()
-{
- int i,k,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5;
-
- u_pa = new FFT_SCALAR[nsplit];
- v0 = new FFT_SCALAR[nsplit];
- v1 = new FFT_SCALAR[nsplit];
- v2 = new FFT_SCALAR[nsplit];
- v3 = new FFT_SCALAR[nsplit];
- v4 = new FFT_SCALAR[nsplit];
- v5 = new FFT_SCALAR[nsplit];
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of dispersion field on particle
-
- double **x = atom->x;
- int type;
- double lj;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
-
- nx = part2grid_6[i][0];
- ny = part2grid_6[i][1];
- nz = part2grid_6[i][2];
- dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
- dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
- dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
- compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-
- for (k = 0; k < nsplit; k++)
- u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF;
-
- for (n = nlower_6; n <= nupper_6; n++) {
- mz = n+nz;
- z0 = rho1d_6[2][n];
- for (m = nlower_6; m <= nupper_6; m++) {
- my = m+ny;
- y0 = z0*rho1d_6[1][m];
- for (l = nlower_6; l <= nupper_6; l++) {
- mx = l+nx;
- x0 = y0*rho1d_6[0][l];
- if (eflag_atom) {
- for (k = 0; k < nsplit; k++)
- u_pa[k] += x0*u_brick_none[k][mz][my][mx];
- }
- if (vflag_atom) {
- for (k = 0; k < nsplit; k++) {
- v0[k] += x0*v0_brick_none[k][mz][my][mx];
- v1[k] += x0*v1_brick_none[k][mz][my][mx];
- v2[k] += x0*v2_brick_none[k][mz][my][mx];
- v3[k] += x0*v3_brick_none[k][mz][my][mx];
- v4[k] += x0*v4_brick_none[k][mz][my][mx];
- v5[k] += x0*v5_brick_none[k][mz][my][mx];
- }
- }
- }
- }
- }
- // convert D-field to force
- type = atom->type[i];
- for (k = 0; k < nsplit; k++) {
- lj = B[nsplit*type + k]*0.5;
-
- if (eflag_atom) {
- eatom[i] += u_pa[k]*lj;
- }
- if (vflag_atom) {
- vatom[i][0] += v0[k]*lj;
- vatom[i][1] += v1[k]*lj;
- vatom[i][2] += v2[k]*lj;
- vatom[i][3] += v3[k]*lj;
- vatom[i][4] += v4[k]*lj;
- vatom[i][5] += v5[k]*lj;
- }
- }
- }
-
- delete [] u_pa;
- delete [] v0;
- delete [] v1;
- delete [] v2;
- delete [] v3;
- delete [] v4;
- delete [] v5;
-}
-
-/* ----------------------------------------------------------------------
- pack values to buf to send to another proc
-------------------------------------------------------------------------- */
-
-void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
- int n = 0;
-
- switch (flag) {
-
- // Coulomb interactions
-
- case FORWARD_IK: {
- FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- buf[n++] = xsrc[list[i]];
- buf[n++] = ysrc[list[i]];
- buf[n++] = zsrc[list[i]];
- }
- break;
- }
-
- case FORWARD_AD: {
- FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++)
- buf[i] = src[list[i]];
- break;
- }
-
- case FORWARD_IK_PERATOM: {
- FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- if (eflag_atom) buf[n++] = esrc[list[i]];
- if (vflag_atom) {
- buf[n++] = v0src[list[i]];
- buf[n++] = v1src[list[i]];
- buf[n++] = v2src[list[i]];
- buf[n++] = v3src[list[i]];
- buf[n++] = v4src[list[i]];
- buf[n++] = v5src[list[i]];
- }
- }
- break;
- }
-
- case FORWARD_AD_PERATOM: {
- FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- buf[n++] = v0src[list[i]];
- buf[n++] = v1src[list[i]];
- buf[n++] = v2src[list[i]];
- buf[n++] = v3src[list[i]];
- buf[n++] = v4src[list[i]];
- buf[n++] = v5src[list[i]];
- }
- break;
- }
-
- // Dispersion interactions, geometric mixing
-
- case FORWARD_IK_G: {
- FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- buf[n++] = xsrc[list[i]];
- buf[n++] = ysrc[list[i]];
- buf[n++] = zsrc[list[i]];
- }
- break;
- }
-
- case FORWARD_AD_G: {
- FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++)
- buf[i] = src[list[i]];
- break;
- }
-
- case FORWARD_IK_PERATOM_G: {
- FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- if (eflag_atom) buf[n++] = esrc[list[i]];
- if (vflag_atom) {
- buf[n++] = v0src[list[i]];
- buf[n++] = v1src[list[i]];
- buf[n++] = v2src[list[i]];
- buf[n++] = v3src[list[i]];
- buf[n++] = v4src[list[i]];
- buf[n++] = v5src[list[i]];
- }
- }
- break;
- }
-
- case FORWARD_AD_PERATOM_G: {
- FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- buf[n++] = v0src[list[i]];
- buf[n++] = v1src[list[i]];
- buf[n++] = v2src[list[i]];
- buf[n++] = v3src[list[i]];
- buf[n++] = v4src[list[i]];
- buf[n++] = v5src[list[i]];
- }
- break;
- }
-
- // Dispersion interactions, arithmetic mixing
-
- case FORWARD_IK_A: {
- FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- for (int i = 0; i < nlist; i++) {
- buf[n++] = xsrc0[list[i]];
- buf[n++] = ysrc0[list[i]];
- buf[n++] = zsrc0[list[i]];
-
- buf[n++] = xsrc1[list[i]];
- buf[n++] = ysrc1[list[i]];
- buf[n++] = zsrc1[list[i]];
-
- buf[n++] = xsrc2[list[i]];
- buf[n++] = ysrc2[list[i]];
- buf[n++] = zsrc2[list[i]];
-
- buf[n++] = xsrc3[list[i]];
- buf[n++] = ysrc3[list[i]];
- buf[n++] = zsrc3[list[i]];
-
- buf[n++] = xsrc4[list[i]];
- buf[n++] = ysrc4[list[i]];
- buf[n++] = zsrc4[list[i]];
-
- buf[n++] = xsrc5[list[i]];
- buf[n++] = ysrc5[list[i]];
- buf[n++] = zsrc5[list[i]];
-
- buf[n++] = xsrc6[list[i]];
- buf[n++] = ysrc6[list[i]];
- buf[n++] = zsrc6[list[i]];
- }
- break;
- }
-
- case FORWARD_AD_A: {
- FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- for (int i = 0; i < nlist; i++) {
- buf[n++] = src0[list[i]];
- buf[n++] = src1[list[i]];
- buf[n++] = src2[list[i]];
- buf[n++] = src3[list[i]];
- buf[n++] = src4[list[i]];
- buf[n++] = src5[list[i]];
- buf[n++] = src6[list[i]];
- }
- break;
- }
-
- case FORWARD_IK_PERATOM_A: {
- FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- for (int i = 0; i < nlist; i++) {
- if (eflag_atom) {
- buf[n++] = esrc0[list[i]];
- buf[n++] = esrc1[list[i]];
- buf[n++] = esrc2[list[i]];
- buf[n++] = esrc3[list[i]];
- buf[n++] = esrc4[list[i]];
- buf[n++] = esrc5[list[i]];
- buf[n++] = esrc6[list[i]];
- }
- if (vflag_atom) {
- buf[n++] = v0src0[list[i]];
- buf[n++] = v1src0[list[i]];
- buf[n++] = v2src0[list[i]];
- buf[n++] = v3src0[list[i]];
- buf[n++] = v4src0[list[i]];
- buf[n++] = v5src0[list[i]];
-
- buf[n++] = v0src1[list[i]];
- buf[n++] = v1src1[list[i]];
- buf[n++] = v2src1[list[i]];
- buf[n++] = v3src1[list[i]];
- buf[n++] = v4src1[list[i]];
- buf[n++] = v5src1[list[i]];
-
- buf[n++] = v0src2[list[i]];
- buf[n++] = v1src2[list[i]];
- buf[n++] = v2src2[list[i]];
- buf[n++] = v3src2[list[i]];
- buf[n++] = v4src2[list[i]];
- buf[n++] = v5src2[list[i]];
-
- buf[n++] = v0src3[list[i]];
- buf[n++] = v1src3[list[i]];
- buf[n++] = v2src3[list[i]];
- buf[n++] = v3src3[list[i]];
- buf[n++] = v4src3[list[i]];
- buf[n++] = v5src3[list[i]];
-
- buf[n++] = v0src4[list[i]];
- buf[n++] = v1src4[list[i]];
- buf[n++] = v2src4[list[i]];
- buf[n++] = v3src4[list[i]];
- buf[n++] = v4src4[list[i]];
- buf[n++] = v5src4[list[i]];
-
- buf[n++] = v0src5[list[i]];
- buf[n++] = v1src5[list[i]];
- buf[n++] = v2src5[list[i]];
- buf[n++] = v3src5[list[i]];
- buf[n++] = v4src5[list[i]];
- buf[n++] = v5src5[list[i]];
-
- buf[n++] = v0src6[list[i]];
- buf[n++] = v1src6[list[i]];
- buf[n++] = v2src6[list[i]];
- buf[n++] = v3src6[list[i]];
- buf[n++] = v4src6[list[i]];
- buf[n++] = v5src6[list[i]];
- }
- }
- break;
- }
-
- case FORWARD_AD_PERATOM_A: {
- FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- for (int i = 0; i < nlist; i++) {
- buf[n++] = v0src0[list[i]];
- buf[n++] = v1src0[list[i]];
- buf[n++] = v2src0[list[i]];
- buf[n++] = v3src0[list[i]];
- buf[n++] = v4src0[list[i]];
- buf[n++] = v5src0[list[i]];
-
- buf[n++] = v0src1[list[i]];
- buf[n++] = v1src1[list[i]];
- buf[n++] = v2src1[list[i]];
- buf[n++] = v3src1[list[i]];
- buf[n++] = v4src1[list[i]];
- buf[n++] = v5src1[list[i]];
-
- buf[n++] = v0src2[list[i]];
- buf[n++] = v1src2[list[i]];
- buf[n++] = v2src2[list[i]];
- buf[n++] = v3src2[list[i]];
- buf[n++] = v4src2[list[i]];
- buf[n++] = v5src2[list[i]];
-
- buf[n++] = v0src3[list[i]];
- buf[n++] = v1src3[list[i]];
- buf[n++] = v2src3[list[i]];
- buf[n++] = v3src3[list[i]];
- buf[n++] = v4src3[list[i]];
- buf[n++] = v5src3[list[i]];
-
- buf[n++] = v0src4[list[i]];
- buf[n++] = v1src4[list[i]];
- buf[n++] = v2src4[list[i]];
- buf[n++] = v3src4[list[i]];
- buf[n++] = v4src4[list[i]];
- buf[n++] = v5src4[list[i]];
-
- buf[n++] = v0src5[list[i]];
- buf[n++] = v1src5[list[i]];
- buf[n++] = v2src5[list[i]];
- buf[n++] = v3src5[list[i]];
- buf[n++] = v4src5[list[i]];
- buf[n++] = v5src5[list[i]];
-
- buf[n++] = v0src6[list[i]];
- buf[n++] = v1src6[list[i]];
- buf[n++] = v2src6[list[i]];
- buf[n++] = v3src6[list[i]];
- buf[n++] = v4src6[list[i]];
- buf[n++] = v5src6[list[i]];
- }
- break;
- }
-
- // Dispersion interactions, no mixing
-
- case FORWARD_IK_NONE: {
- for (int k = 0; k < nsplit_alloc; k++) {
- FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- buf[n++] = xsrc[list[i]];
- buf[n++] = ysrc[list[i]];
- buf[n++] = zsrc[list[i]];
- }
- }
- break;
- }
-
- case FORWARD_AD_NONE: {
- for (int k = 0; k < nsplit_alloc; k++) {
- FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++)
- buf[n++] = src[list[i]];
- }
- break;
- }
-
- case FORWARD_IK_PERATOM_NONE: {
- for (int k = 0; k < nsplit_alloc; k++) {
- FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- if (eflag_atom) buf[n++] = esrc[list[i]];
- if (vflag_atom) {
- buf[n++] = v0src[list[i]];
- buf[n++] = v1src[list[i]];
- buf[n++] = v2src[list[i]];
- buf[n++] = v3src[list[i]];
- buf[n++] = v4src[list[i]];
- buf[n++] = v5src[list[i]];
- }
- }
- }
- break;
- }
-
- case FORWARD_AD_PERATOM_NONE: {
- for (int k = 0; k < nsplit_alloc; k++) {
- FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- buf[n++] = v0src[list[i]];
- buf[n++] = v1src[list[i]];
- buf[n++] = v2src[list[i]];
- buf[n++] = v3src[list[i]];
- buf[n++] = v4src[list[i]];
- buf[n++] = v5src[list[i]];
- }
- }
- break;
- }
-
- }
-}
-
-/* ----------------------------------------------------------------------
- unpack another proc's own values from buf and set own ghost values
-------------------------------------------------------------------------- */
-
-void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
- int n = 0;
-
- switch (flag) {
-
- // Coulomb interactions
-
- case FORWARD_IK: {
- FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- xdest[list[i]] = buf[n++];
- ydest[list[i]] = buf[n++];
- zdest[list[i]] = buf[n++];
- }
- break;
- }
-
- case FORWARD_AD: {
- FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++)
- dest[list[i]] = buf[n++];
- break;
- }
-
- case FORWARD_IK_PERATOM: {
- FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- if (eflag_atom) esrc[list[i]] = buf[n++];
- if (vflag_atom) {
- v0src[list[i]] = buf[n++];
- v1src[list[i]] = buf[n++];
- v2src[list[i]] = buf[n++];
- v3src[list[i]] = buf[n++];
- v4src[list[i]] = buf[n++];
- v5src[list[i]] = buf[n++];
- }
- }
- break;
- }
-
- case FORWARD_AD_PERATOM: {
- FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
- FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++) {
- v0src[list[i]] = buf[n++];
- v1src[list[i]] = buf[n++];
- v2src[list[i]] = buf[n++];
- v3src[list[i]] = buf[n++];
- v4src[list[i]] = buf[n++];
- v5src[list[i]] = buf[n++];
- }
- break;
- }
-
- // Disperion interactions, geometric mixing
-
- case FORWARD_IK_G: {
- FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- xdest[list[i]] = buf[n++];
- ydest[list[i]] = buf[n++];
- zdest[list[i]] = buf[n++];
- }
- break;
- }
-
- case FORWARD_AD_G: {
- FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++)
- dest[list[i]] = buf[n++];
- break;
- }
-
- case FORWARD_IK_PERATOM_G: {
- FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- if (eflag_atom) esrc[list[i]] = buf[n++];
- if (vflag_atom) {
- v0src[list[i]] = buf[n++];
- v1src[list[i]] = buf[n++];
- v2src[list[i]] = buf[n++];
- v3src[list[i]] = buf[n++];
- v4src[list[i]] = buf[n++];
- v5src[list[i]] = buf[n++];
- }
- }
- break;
- }
-
- case FORWARD_AD_PERATOM_G: {
- FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- v0src[list[i]] = buf[n++];
- v1src[list[i]] = buf[n++];
- v2src[list[i]] = buf[n++];
- v3src[list[i]] = buf[n++];
- v4src[list[i]] = buf[n++];
- v5src[list[i]] = buf[n++];
- }
- break;
- }
-
- // Disperion interactions, arithmetic mixing
-
- case FORWARD_IK_A: {
- FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- for (int i = 0; i < nlist; i++) {
- xdest0[list[i]] = buf[n++];
- ydest0[list[i]] = buf[n++];
- zdest0[list[i]] = buf[n++];
-
- xdest1[list[i]] = buf[n++];
- ydest1[list[i]] = buf[n++];
- zdest1[list[i]] = buf[n++];
-
- xdest2[list[i]] = buf[n++];
- ydest2[list[i]] = buf[n++];
- zdest2[list[i]] = buf[n++];
-
- xdest3[list[i]] = buf[n++];
- ydest3[list[i]] = buf[n++];
- zdest3[list[i]] = buf[n++];
-
- xdest4[list[i]] = buf[n++];
- ydest4[list[i]] = buf[n++];
- zdest4[list[i]] = buf[n++];
-
- xdest5[list[i]] = buf[n++];
- ydest5[list[i]] = buf[n++];
- zdest5[list[i]] = buf[n++];
-
- xdest6[list[i]] = buf[n++];
- ydest6[list[i]] = buf[n++];
- zdest6[list[i]] = buf[n++];
- }
- break;
- }
-
- case FORWARD_AD_A: {
- FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- for (int i = 0; i < nlist; i++) {
- dest0[list[i]] = buf[n++];
- dest1[list[i]] = buf[n++];
- dest2[list[i]] = buf[n++];
- dest3[list[i]] = buf[n++];
- dest4[list[i]] = buf[n++];
- dest5[list[i]] = buf[n++];
- dest6[list[i]] = buf[n++];
- }
- break;
- }
-
- case FORWARD_IK_PERATOM_A: {
- FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- for (int i = 0; i < nlist; i++) {
- if (eflag_atom) {
- esrc0[list[i]] = buf[n++];
- esrc1[list[i]] = buf[n++];
- esrc2[list[i]] = buf[n++];
- esrc3[list[i]] = buf[n++];
- esrc4[list[i]] = buf[n++];
- esrc5[list[i]] = buf[n++];
- esrc6[list[i]] = buf[n++];
- }
- if (vflag_atom) {
- v0src0[list[i]] = buf[n++];
- v1src0[list[i]] = buf[n++];
- v2src0[list[i]] = buf[n++];
- v3src0[list[i]] = buf[n++];
- v4src0[list[i]] = buf[n++];
- v5src0[list[i]] = buf[n++];
-
- v0src1[list[i]] = buf[n++];
- v1src1[list[i]] = buf[n++];
- v2src1[list[i]] = buf[n++];
- v3src1[list[i]] = buf[n++];
- v4src1[list[i]] = buf[n++];
- v5src1[list[i]] = buf[n++];
-
- v0src2[list[i]] = buf[n++];
- v1src2[list[i]] = buf[n++];
- v2src2[list[i]] = buf[n++];
- v3src2[list[i]] = buf[n++];
- v4src2[list[i]] = buf[n++];
- v5src2[list[i]] = buf[n++];
-
- v0src3[list[i]] = buf[n++];
- v1src3[list[i]] = buf[n++];
- v2src3[list[i]] = buf[n++];
- v3src3[list[i]] = buf[n++];
- v4src3[list[i]] = buf[n++];
- v5src3[list[i]] = buf[n++];
-
- v0src4[list[i]] = buf[n++];
- v1src4[list[i]] = buf[n++];
- v2src4[list[i]] = buf[n++];
- v3src4[list[i]] = buf[n++];
- v4src4[list[i]] = buf[n++];
- v5src4[list[i]] = buf[n++];
-
- v0src5[list[i]] = buf[n++];
- v1src5[list[i]] = buf[n++];
- v2src5[list[i]] = buf[n++];
- v3src5[list[i]] = buf[n++];
- v4src5[list[i]] = buf[n++];
- v5src5[list[i]] = buf[n++];
-
- v0src6[list[i]] = buf[n++];
- v1src6[list[i]] = buf[n++];
- v2src6[list[i]] = buf[n++];
- v3src6[list[i]] = buf[n++];
- v4src6[list[i]] = buf[n++];
- v5src6[list[i]] = buf[n++];
- }
- }
- break;
- }
-
- case FORWARD_AD_PERATOM_A: {
- FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
- for (int i = 0; i < nlist; i++) {
- v0src0[list[i]] = buf[n++];
- v1src0[list[i]] = buf[n++];
- v2src0[list[i]] = buf[n++];
- v3src0[list[i]] = buf[n++];
- v4src0[list[i]] = buf[n++];
- v5src0[list[i]] = buf[n++];
-
- v0src1[list[i]] = buf[n++];
- v1src1[list[i]] = buf[n++];
- v2src1[list[i]] = buf[n++];
- v3src1[list[i]] = buf[n++];
- v4src1[list[i]] = buf[n++];
- v5src1[list[i]] = buf[n++];
-
- v0src2[list[i]] = buf[n++];
- v1src2[list[i]] = buf[n++];
- v2src2[list[i]] = buf[n++];
- v3src2[list[i]] = buf[n++];
- v4src2[list[i]] = buf[n++];
- v5src2[list[i]] = buf[n++];
-
- v0src3[list[i]] = buf[n++];
- v1src3[list[i]] = buf[n++];
- v2src3[list[i]] = buf[n++];
- v3src3[list[i]] = buf[n++];
- v4src3[list[i]] = buf[n++];
- v5src3[list[i]] = buf[n++];
-
- v0src4[list[i]] = buf[n++];
- v1src4[list[i]] = buf[n++];
- v2src4[list[i]] = buf[n++];
- v3src4[list[i]] = buf[n++];
- v4src4[list[i]] = buf[n++];
- v5src4[list[i]] = buf[n++];
-
- v0src5[list[i]] = buf[n++];
- v1src5[list[i]] = buf[n++];
- v2src5[list[i]] = buf[n++];
- v3src5[list[i]] = buf[n++];
- v4src5[list[i]] = buf[n++];
- v5src5[list[i]] = buf[n++];
-
- v0src6[list[i]] = buf[n++];
- v1src6[list[i]] = buf[n++];
- v2src6[list[i]] = buf[n++];
- v3src6[list[i]] = buf[n++];
- v4src6[list[i]] = buf[n++];
- v5src6[list[i]] = buf[n++];
- }
- break;
- }
-
- // Disperion interactions, geometric mixing
-
- case FORWARD_IK_NONE: {
- for (int k = 0; k < nsplit_alloc; k++) {
- FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- xdest[list[i]] = buf[n++];
- ydest[list[i]] = buf[n++];
- zdest[list[i]] = buf[n++];
- }
- }
- break;
- }
-
- case FORWARD_AD_NONE: {
- for (int k = 0; k < nsplit_alloc; k++) {
- FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++)
- dest[list[i]] = buf[n++];
- }
- break;
- }
-
- case FORWARD_IK_PERATOM_NONE: {
- for (int k = 0; k < nsplit_alloc; k++) {
- FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- if (eflag_atom) esrc[list[i]] = buf[n++];
- if (vflag_atom) {
- v0src[list[i]] = buf[n++];
- v1src[list[i]] = buf[n++];
- v2src[list[i]] = buf[n++];
- v3src[list[i]] = buf[n++];
- v4src[list[i]] = buf[n++];
- v5src[list[i]] = buf[n++];
- }
- }
- }
- break;
- }
-
- case FORWARD_AD_PERATOM_NONE: {
- for (int k = 0; k < nsplit_alloc; k++) {
- FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- v0src[list[i]] = buf[n++];
- v1src[list[i]] = buf[n++];
- v2src[list[i]] = buf[n++];
- v3src[list[i]] = buf[n++];
- v4src[list[i]] = buf[n++];
- v5src[list[i]] = buf[n++];
- }
- }
- break;
- }
-
- }
-}
-
-/* ----------------------------------------------------------------------
- pack ghost values into buf to send to another proc
-------------------------------------------------------------------------- */
-
-void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
- int n = 0;
-
- //Coulomb interactions
-
- if (flag == REVERSE_RHO) {
- FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++)
- buf[i] = src[list[i]];
-
- //Dispersion interactions, geometric mixing
-
- } else if (flag == REVERSE_RHO_G) {
- FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++)
- buf[i] = src[list[i]];
-
- //Dispersion interactions, arithmetic mixing
-
- } else if (flag == REVERSE_RHO_A) {
- FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- buf[n++] = src0[list[i]];
- buf[n++] = src1[list[i]];
- buf[n++] = src2[list[i]];
- buf[n++] = src3[list[i]];
- buf[n++] = src4[list[i]];
- buf[n++] = src5[list[i]];
- buf[n++] = src6[list[i]];
- }
-
- //Dispersion interactions, no mixing
-
- } else if (flag == REVERSE_RHO_NONE) {
- for (int k = 0; k < nsplit_alloc; k++) {
- FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- buf[n++] = src[list[i]];
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- unpack another proc's ghost values from buf and add to own values
-------------------------------------------------------------------------- */
-
-void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
- int n = 0;
-
- //Coulomb interactions
-
- if (flag == REVERSE_RHO) {
- FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
- for (int i = 0; i < nlist; i++)
- dest[list[i]] += buf[i];
-
- //Dispersion interactions, geometric mixing
-
- } else if (flag == REVERSE_RHO_G) {
- FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++)
- dest[list[i]] += buf[i];
-
- //Dispersion interactions, arithmetic mixing
-
- } else if (flag == REVERSE_RHO_A) {
- FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
- FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++) {
- dest0[list[i]] += buf[n++];
- dest1[list[i]] += buf[n++];
- dest2[list[i]] += buf[n++];
- dest3[list[i]] += buf[n++];
- dest4[list[i]] += buf[n++];
- dest5[list[i]] += buf[n++];
- dest6[list[i]] += buf[n++];
- }
-
- //Dispersion interactions, no mixing
-
- } else if (flag == REVERSE_RHO_NONE) {
- for (int k = 0; k < nsplit_alloc; k++) {
- FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
- for (int i = 0; i < nlist; i++)
- dest[list[i]] += buf[n++];
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
-------------------------------------------------------------------------- */
-
-void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
-{
- // loop thru all possible factorizations of nprocs
- // surf = surface area of largest proc sub-domain
- // innermost if test minimizes surface area and surface/volume ratio
-
- int bestsurf = 2 * (nx + ny);
- int bestboxx = 0;
- int bestboxy = 0;
-
- int boxx,boxy,surf,ipx,ipy;
-
- ipx = 1;
- while (ipx <= nprocs) {
- if (nprocs % ipx == 0) {
- ipy = nprocs/ipx;
- boxx = nx/ipx;
- if (nx % ipx) boxx++;
- boxy = ny/ipy;
- if (ny % ipy) boxy++;
- surf = boxx + boxy;
- if (surf < bestsurf ||
- (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
- bestsurf = surf;
- bestboxx = boxx;
- bestboxy = boxy;
- *px = ipx;
- *py = ipy;
- }
- }
- ipx++;
- }
-}
-
-/* ----------------------------------------------------------------------
- charge assignment into rho1d
- dx,dy,dz = distance of particle from "lower left" grid point
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
- const FFT_SCALAR &dz, int ord,
- FFT_SCALAR **rho_c, FFT_SCALAR **r1d)
-{
- int k,l;
- FFT_SCALAR r1,r2,r3;
-
- for (k = (1-ord)/2; k <= ord/2; k++) {
- r1 = r2 = r3 = ZEROF;
-
- for (l = ord-1; l >= 0; l--) {
- r1 = rho_c[l][k] + r1*dx;
- r2 = rho_c[l][k] + r2*dy;
- r3 = rho_c[l][k] + r3*dz;
- }
- r1d[0][k] = r1;
- r1d[1][k] = r2;
- r1d[2][k] = r3;
- }
-}
-
-/* ----------------------------------------------------------------------
- charge assignment into drho1d
- dx,dy,dz = distance of particle from "lower left" grid point
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
- const FFT_SCALAR &dz, int ord,
- FFT_SCALAR **drho_c, FFT_SCALAR **dr1d)
-{
- int k,l;
- FFT_SCALAR r1,r2,r3;
-
- for (k = (1-ord)/2; k <= ord/2; k++) {
- r1 = r2 = r3 = ZEROF;
-
- for (l = ord-2; l >= 0; l--) {
- r1 = drho_c[l][k] + r1*dx;
- r2 = drho_c[l][k] + r2*dy;
- r3 = drho_c[l][k] + r3*dz;
- }
- dr1d[0][k] = r1;
- dr1d[1][k] = r2;
- dr1d[2][k] = r3;
- }
-}
-
-/* ----------------------------------------------------------------------
- generate coeffients for the weight function of order n
-
- (n-1)
- Wn(x) = Sum wn(k,x) , Sum is over every other integer
- k=-(n-1)
- For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
- k is odd integers if n is even and even integers if n is odd
- ---
- | n-1
- | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2
- wn(k,x) = < l=0
- |
- | 0 otherwise
- ---
- a coeffients are packed into the array rho_coeff to eliminate zeros
- rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff,
- int ord)
-{
- int j,k,l,m;
- FFT_SCALAR s;
-
- FFT_SCALAR **a;
- memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a");
-
- for (k = -ord; k <= ord; k++)
- for (l = 0; l < ord; l++)
- a[l][k] = 0.0;
-
- a[0][0] = 1.0;
- for (j = 1; j < ord; j++) {
- for (k = -j; k <= j; k += 2) {
- s = 0.0;
- for (l = 0; l < j; l++) {
- a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
-#ifdef FFT_SINGLE
- s += powf(0.5,(float) l+1) *
- (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
-#else
- s += pow(0.5,(double) l+1) *
- (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
-#endif
- }
- a[0][k] = s;
- }
- }
-
- m = (1-ord)/2;
- for (k = -(ord-1); k < ord; k += 2) {
- for (l = 0; l < ord; l++)
- coeff[l][m] = a[l][k];
- for (l = 1; l < ord; l++)
- dcoeff[l-1][m] = l*a[l][k];
- m++;
- }
-
- memory->destroy2d_offset(a,-ord);
-}
-
-/* ----------------------------------------------------------------------
- Slab-geometry correction term to dampen inter-slab interactions between
- periodically repeating slabs. Yields good approximation to 2D Ewald if
- adequate empty space is left between repeating slabs (J. Chem. Phys.
- 111, 3155). Slabs defined here to be parallel to the xy plane. Also
- extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void PPPMDisp::slabcorr(int eflag)
-{
- // compute local contribution to global dipole moment
-
- double *q = atom->q;
- double **x = atom->x;
- double zprd = domain->zprd;
- int nlocal = atom->nlocal;
-
- double dipole = 0.0;
- for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
-
- // sum local contributions to get global dipole moment
-
- double dipole_all;
- MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
- // need to make non-neutral systems and/or
- // per-atom energy translationally invariant
-
- double dipole_r2 = 0.0;
- if (eflag_atom || fabs(qsum) > SMALL) {
- for (int i = 0; i < nlocal; i++)
- dipole_r2 += q[i]*x[i][2]*x[i][2];
-
- // sum local contributions
-
- double tmp;
- MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_r2 = tmp;
- }
-
- // compute corrections
-
- const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
- qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
- const double qscale = force->qqrd2e * scale;
-
- if (eflag_global) energy_1 += qscale * e_slabcorr;
-
- // per-atom energy
-
- if (eflag_atom) {
- double efact = qscale * MY_2PI/volume;
- for (int i = 0; i < nlocal; i++)
- eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
- qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
- }
-
- // add on force corrections
-
- double ffact = qscale * (-4.0*MY_PI/volume);
- double **f = atom->f;
-
- for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
-}
-
-/* ----------------------------------------------------------------------
- perform and time the 1d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPMDisp::timing_1d(int n, double &time1d)
-{
- double time1,time2;
- int mixing = 1;
- if (function[2]) mixing = 4;
- if (function[3]) mixing = nsplit_alloc/2;
-
- if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
- if (function[1] + function[2] + function[3])
- for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
-
- MPI_Barrier(world);
- time1 = MPI_Wtime();
-
- if (function[0]) {
- for (int i = 0; i < n; i++) {
- fft1->timing1d(work1,nfft_both,1);
- fft2->timing1d(work1,nfft_both,-1);
- if (differentiation_flag != 1){
- fft2->timing1d(work1,nfft_both,-1);
- fft2->timing1d(work1,nfft_both,-1);
- }
- }
- }
-
- MPI_Barrier(world);
- time2 = MPI_Wtime();
- time1d = time2 - time1;
-
- MPI_Barrier(world);
- time1 = MPI_Wtime();
-
- if (function[1] + function[2] + function[3]) {
- for (int i = 0; i < n; i++) {
- fft1_6->timing1d(work1_6,nfft_both_6,1);
- fft2_6->timing1d(work1_6,nfft_both_6,-1);
- if (differentiation_flag != 1){
- fft2_6->timing1d(work1_6,nfft_both_6,-1);
- fft2_6->timing1d(work1_6,nfft_both_6,-1);
- }
- }
- }
-
- MPI_Barrier(world);
- time2 = MPI_Wtime();
- time1d += (time2 - time1)*mixing;
-
- if (differentiation_flag) return 2;
- return 4;
-}
-
-/* ----------------------------------------------------------------------
- perform and time the 3d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPMDisp::timing_3d(int n, double &time3d)
-{
- double time1,time2;
- int mixing = 1;
- if (function[2]) mixing = 4;
- if (function[3]) mixing = nsplit_alloc/2;
-
- if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
- if (function[1] + function[2] + function[3])
- for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
-
-
-
- MPI_Barrier(world);
- time1 = MPI_Wtime();
-
- if (function[0]) {
- for (int i = 0; i < n; i++) {
- fft1->compute(work1,work1,1);
- fft2->compute(work1,work1,-1);
- if (differentiation_flag != 1) {
- fft2->compute(work1,work1,-1);
- fft2->compute(work1,work1,-1);
- }
- }
- }
-
- MPI_Barrier(world);
- time2 = MPI_Wtime();
- time3d = time2 - time1;
-
- MPI_Barrier(world);
- time1 = MPI_Wtime();
-
- if (function[1] + function[2] + function[3]) {
- for (int i = 0; i < n; i++) {
- fft1_6->compute(work1_6,work1_6,1);
- fft2_6->compute(work1_6,work1_6,-1);
- if (differentiation_flag != 1) {
- fft2_6->compute(work1_6,work1_6,-1);
- fft2_6->compute(work1_6,work1_6,-1);
- }
- }
- }
-
- MPI_Barrier(world);
- time2 = MPI_Wtime();
- time3d += (time2 - time1) * mixing;
-
- if (differentiation_flag) return 2;
- return 4;
-}
-
-/* ----------------------------------------------------------------------
- memory usage of local arrays
-------------------------------------------------------------------------- */
-
-double PPPMDisp::memory_usage()
-{
- double bytes = nmax*3 * sizeof(double);
- int mixing = 1;
- int diff = 3; //depends on differentiation
- int per = 7; //depends on per atom calculations
- if (differentiation_flag) {
- diff = 1;
- per = 6;
- }
- if (!evflag_atom) per = 0;
- if (function[2]) mixing = 7;
- if (function[3]) mixing = nsplit_alloc;
-
- if (function[0]) {
- int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
- (nzhi_out-nzlo_out+1);
- bytes += (1 + diff + per) * nbrick * sizeof(FFT_SCALAR); //brick memory
- bytes += 6 * nfft_both * sizeof(double); // vg
- bytes += nfft_both * sizeof(double); // greensfn
- bytes += nfft_both * 3 * sizeof(FFT_SCALAR); // density_FFT, work1, work2
- bytes += cg->memory_usage();
- }
-
- if (function[1] + function[2] + function[3]) {
- int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) *
- (nzhi_out_6-nzlo_out_6+1);
- bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing; // density_brick + vd_brick + per atom bricks
- bytes += 6 * nfft_both_6 * sizeof(double); // vg
- bytes += nfft_both_6 * sizeof(double); // greensfn
- bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR); // density_FFT, work1, work2
- bytes += cg_6->memory_usage();
- }
- return bytes;
-}
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing authors: Rolf Isele-Holder (Aachen University)
+ Paul Crozier (SNL)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "string.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "math.h"
+#include "pppm_disp.h"
+#include "math_const.h"
+#include "atom.h"
+#include "comm.h"
+#include "commgrid.h"
+#include "neighbor.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "domain.h"
+#include "fft3d_wrap.h"
+#include "remap_wrap.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define MAXORDER 7
+#define OFFSET 16384
+#define SMALL 0.00001
+#define LARGE 10000.0
+#define EPS_HOC 1.0e-7
+
+enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};
+enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE};
+enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM,
+ FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G,
+ FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A,
+ FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE};
+
+
+#ifdef FFT_SINGLE
+#define ZEROF 0.0f
+#define ONEF 1.0f
+#else
+#define ZEROF 0.0
+#define ONEF 1.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+ if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command");
+
+ triclinic_support = 0;
+ pppmflag = dispersionflag = 1;
+ accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
+
+ nfactors = 3;
+ factors = new int[nfactors];
+ factors[0] = 2;
+ factors[1] = 3;
+ factors[2] = 5;
+
+ MPI_Comm_rank(world,&me);
+ MPI_Comm_size(world,&nprocs);
+
+ csumflag = 0;
+ B = NULL;
+ cii = NULL;
+ csumi = NULL;
+ peratom_allocate_flag = 0;
+
+ density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
+ density_fft = NULL;
+ u_brick = v0_brick = v1_brick = v2_brick = v3_brick =
+ v4_brick = v5_brick = NULL;
+
+ density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
+ density_fft_g = NULL;
+ u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g =
+ v4_brick_g = v5_brick_g = NULL;
+
+ density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
+ density_fft_a0 = NULL;
+ u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 =
+ v4_brick_a0 = v5_brick_a0 = NULL;
+
+ density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
+ density_fft_a1 = NULL;
+ u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 =
+ v4_brick_a1 = v5_brick_a1 = NULL;
+
+ density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
+ density_fft_a2 = NULL;
+ u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 =
+ v4_brick_a2 = v5_brick_a2 = NULL;
+
+ density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
+ density_fft_a3 = NULL;
+ u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 =
+ v4_brick_a3 = v5_brick_a3 = NULL;
+
+ density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
+ density_fft_a4 = NULL;
+ u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 =
+ v4_brick_a4 = v5_brick_a4 = NULL;
+
+ density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
+ density_fft_a5 = NULL;
+ u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 =
+ v4_brick_a5 = v5_brick_a5 = NULL;
+
+ density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
+ density_fft_a6 = NULL;
+ u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 =
+ v4_brick_a6 = v5_brick_a6 = NULL;
+
+ density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
+ density_fft_none = NULL;
+ u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none =
+ v4_brick_none = v5_brick_none = NULL;
+
+ greensfn = NULL;
+ greensfn_6 = NULL;
+ work1 = work2 = NULL;
+ work1_6 = work2_6 = NULL;
+ vg = NULL;
+ vg2 = NULL;
+ vg_6 = NULL;
+ vg2_6 = NULL;
+ fkx = fky = fkz = NULL;
+ fkx2 = fky2 = fkz2 = NULL;
+ fkx_6 = fky_6 = fkz_6 = NULL;
+ fkx2_6 = fky2_6 = fkz2_6 = NULL;
+
+ sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 =
+ sf_precoeff5 = sf_precoeff6 = NULL;
+ sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 =
+ sf_precoeff5_6 = sf_precoeff6_6 = NULL;
+
+ gf_b = NULL;
+ gf_b_6 = NULL;
+ rho1d = rho_coeff = NULL;
+ drho1d = drho_coeff = NULL;
+ rho1d_6 = rho_coeff_6 = NULL;
+ drho1d_6 = drho_coeff_6 = NULL;
+ fft1 = fft2 = NULL;
+ fft1_6 = fft2_6 = NULL;
+ remap = NULL;
+ remap_6 = NULL;
+
+ nmax = 0;
+ part2grid = NULL;
+ part2grid_6 = NULL;
+
+ cg = NULL;
+ cg_peratom = NULL;
+ cg_6 = NULL;
+ cg_peratom_6 = NULL;
+
+ memset(function, 0, EWALD_FUNCS*sizeof(int));
+}
+
+/* ----------------------------------------------------------------------
+ free all memory
+------------------------------------------------------------------------- */
+
+PPPMDisp::~PPPMDisp()
+{
+ delete [] factors;
+ delete [] B;
+ B = NULL;
+ delete [] cii;
+ cii = NULL;
+ delete [] csumi;
+ csumi = NULL;
+ deallocate();
+ deallocate_peratom();
+ memory->destroy(part2grid);
+ memory->destroy(part2grid_6);
+ part2grid = part2grid_6 = NULL;
+}
+
+/* ----------------------------------------------------------------------
+ called once before run
+------------------------------------------------------------------------- */
+
+void PPPMDisp::init()
+{
+ if (me == 0) {
+ if (screen) fprintf(screen,"PPPMDisp initialization ...\n");
+ if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n");
+ }
+
+ triclinic_check();
+ if (domain->dimension == 2)
+ error->all(FLERR,"Cannot use PPPMDisp with 2d simulation");
+
+ if (slabflag == 0 && domain->nonperiodic > 0)
+ error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
+ if (slabflag == 1) {
+ if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
+ domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+ error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
+ }
+
+ if (order > MAXORDER || order_6 > MAXORDER) {
+ char str[128];
+ sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER);
+ error->all(FLERR,str);
+ }
+
+ // free all arrays previously allocated
+
+ deallocate();
+ deallocate_peratom();
+
+ // set scale
+
+ scale = 1.0;
+
+ triclinic = domain->triclinic;
+
+ // check whether cutoff and pair style are set
+
+ pair_check();
+
+ int tmp;
+ Pair *pair = force->pair;
+ int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
+ double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
+ double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL;
+ if (!(ptr||*p_cutoff||*p_cutoff_lj))
+ error->all(FLERR,"KSpace style is incompatible with Pair style");
+ cutoff = *p_cutoff;
+ cutoff_lj = *p_cutoff_lj;
+
+ double tmp2;
+ MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world);
+
+ // check out which types of potentials will have to be calculated
+
+ int ewald_order = ptr ? *((int *) ptr) : 1<<1;
+ int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
+ memset(function, 0, EWALD_FUNCS*sizeof(int));
+ for (int i=0; i<=EWALD_MAXORDER; ++i) // transcribe order
+ if (ewald_order&(1<pair_style);
+ error->all(FLERR,str);
+ }
+ function[k] = 1;
+ }
+
+
+ // warn, if function[0] is not set but charge attribute is set!
+ if (!function[0] && atom->q_flag && me == 0) {
+ char str[128];
+ sprintf(str, "Charges are set, but coulombic solver is not used");
+ error->warning(FLERR, str);
+ }
+
+ // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral
+
+ if (function[0]) {
+ if (!atom->q_flag)
+ error->all(FLERR,"Kspace style with selected options "
+ "requires atom attribute q");
+
+ qsum = qsqsum = 0.0;
+ for (int i = 0; i < atom->nlocal; i++) {
+ qsum += atom->q[i];
+ qsqsum += atom->q[i]*atom->q[i];
+
+ }
+
+ double tmp;
+ MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsum = tmp;
+ MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsqsum = tmp;
+
+ if (qsqsum == 0.0)
+ error->all(FLERR,"Cannot use kspace solver with selected options "
+ "on system with no charge");
+ if (fabs(qsum) > SMALL && me == 0) {
+ char str[128];
+ sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+ error->warning(FLERR,str);
+ }
+ }
+
+ // if kspace is TIP4P, extract TIP4P params from pair style
+ // bond/angle are not yet init(), so insure equilibrium request is valid
+
+ qdist = 0.0;
+
+ if (tip4pflag) {
+ int itmp;
+ double *p_qdist = (double *) force->pair->extract("qdist",itmp);
+ int *p_typeO = (int *) force->pair->extract("typeO",itmp);
+ int *p_typeH = (int *) force->pair->extract("typeH",itmp);
+ int *p_typeA = (int *) force->pair->extract("typeA",itmp);
+ int *p_typeB = (int *) force->pair->extract("typeB",itmp);
+ if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
+ error->all(FLERR,"KSpace style is incompatible with Pair style");
+ qdist = *p_qdist;
+ typeO = *p_typeO;
+ typeH = *p_typeH;
+ int typeA = *p_typeA;
+ int typeB = *p_typeB;
+
+ if (force->angle == NULL || force->bond == NULL)
+ error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
+ if (typeA < 1 || typeA > atom->nangletypes ||
+ force->angle->setflag[typeA] == 0)
+ error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P");
+ if (typeB < 1 || typeB > atom->nbondtypes ||
+ force->bond->setflag[typeB] == 0)
+ error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P");
+ double theta = force->angle->equilibrium_angle(typeA);
+ double blen = force->bond->equilibrium_distance(typeB);
+ alpha = qdist / (cos(0.5*theta) * blen);
+ }
+
+
+ // initialize the pair style to get the coefficients
+ neighrequest_flag = 0;
+ pair->init();
+ neighrequest_flag = 1;
+ init_coeffs();
+
+ //if g_ewald and g_ewald_6 have not been specified, set some initial value
+ // to avoid problems when calculating the energies!
+
+ if (!gewaldflag) g_ewald = 1;
+ if (!gewaldflag_6) g_ewald_6 = 1;
+
+ // set accuracy (force units) from accuracy_relative or accuracy_absolute
+
+ if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
+ else accuracy = accuracy_relative * two_charge_force;
+
+ int (*procneigh)[2] = comm->procneigh;
+
+ int iteration = 0;
+ if (function[0]) {
+ CommGrid *cgtmp = NULL;
+ while (order >= minorder) {
+
+ if (iteration && me == 0)
+ error->warning(FLERR,"Reducing PPPMDisp Coulomb order "
+ "b/c stencil extends beyond neighbor processor");
+ iteration++;
+
+ // set grid for dispersion interaction and coulomb interactions
+
+ set_grid();
+
+ if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
+ error->all(FLERR,"PPPMDisp Coulomb grid is too large");
+
+ set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
+ nxlo_fft, nylo_fft, nzlo_fft,
+ nxhi_fft, nyhi_fft, nzhi_fft,
+ nxlo_in, nylo_in, nzlo_in,
+ nxhi_in, nyhi_in, nzhi_in,
+ nxlo_out, nylo_out, nzlo_out,
+ nxhi_out, nyhi_out, nzhi_out,
+ nlower, nupper,
+ ngrid, nfft, nfft_both,
+ shift, shiftone, order);
+
+ if (overlap_allowed) break;
+
+ cgtmp = new CommGrid(lmp, world,1,1,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_out,nxhi_out,nylo_out,nyhi_out,
+ nzlo_out,nzhi_out,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ cgtmp->ghost_notify();
+ if (!cgtmp->ghost_overlap()) break;
+ delete cgtmp;
+
+ order--;
+ }
+
+ if (order < minorder)
+ error->all(FLERR,
+ "Coulomb PPPMDisp order has been reduced below minorder");
+ if (cgtmp) delete cgtmp;
+
+ // adjust g_ewald
+
+ if (!gewaldflag) adjust_gewald();
+
+ // calculate the final accuracy
+
+ double acc = final_accuracy();
+
+ // print stats
+
+ int ngrid_max,nfft_both_max,nbuf_max;
+ MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
+ MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
+
+ if (me == 0) {
+ #ifdef FFT_SINGLE
+ const char fft_prec[] = "single";
+ #else
+ const char fft_prec[] = "double";
+ #endif
+
+ if (screen) {
+ fprintf(screen," Coulomb G vector (1/distance)= %g\n",g_ewald);
+ fprintf(screen," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+ fprintf(screen," Coulomb stencil order = %d\n",order);
+ fprintf(screen," Coulomb estimated absolute RMS force accuracy = %g\n",
+ acc);
+ fprintf(screen," Coulomb estimated relative force accuracy = %g\n",
+ acc/two_charge_force);
+ fprintf(screen," using %s precision FFTs\n",fft_prec);
+ fprintf(screen," 3d grid and FFT values/proc = %d %d\n",
+ ngrid_max, nfft_both_max);
+ }
+ if (logfile) {
+ fprintf(logfile," Coulomb G vector (1/distance) = %g\n",g_ewald);
+ fprintf(logfile," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+ fprintf(logfile," Coulomb stencil order = %d\n",order);
+ fprintf(logfile,
+ " Coulomb estimated absolute RMS force accuracy = %g\n",
+ acc);
+ fprintf(logfile," Coulomb estimated relative force accuracy = %g\n",
+ acc/two_charge_force);
+ fprintf(logfile," using %s precision FFTs\n",fft_prec);
+ fprintf(logfile," 3d grid and FFT values/proc = %d %d\n",
+ ngrid_max, nfft_both_max);
+ }
+ }
+ }
+
+ iteration = 0;
+ if (function[1] + function[2] + function[3]) {
+ CommGrid *cgtmp = NULL;
+ while (order_6 >= minorder) {
+
+ if (iteration && me == 0)
+ error->warning(FLERR,"Reducing PPPMDisp dispersion order "
+ "b/c stencil extends beyond neighbor processor");
+ iteration++;
+
+ set_grid_6();
+
+ if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET)
+ error->all(FLERR,"PPPMDisp Dispersion grid is too large");
+
+ set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
+ nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
+ nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+ nxlo_in_6, nylo_in_6, nzlo_in_6,
+ nxhi_in_6, nyhi_in_6, nzhi_in_6,
+ nxlo_out_6, nylo_out_6, nzlo_out_6,
+ nxhi_out_6, nyhi_out_6, nzhi_out_6,
+ nlower_6, nupper_6,
+ ngrid_6, nfft_6, nfft_both_6,
+ shift_6, shiftone_6, order_6);
+
+ if (overlap_allowed) break;
+
+ cgtmp = new CommGrid(lmp,world,1,1,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,
+ nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,
+ nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ cgtmp->ghost_notify();
+ if (!cgtmp->ghost_overlap()) break;
+ delete cgtmp;
+ order_6--;
+ }
+
+ if (order_6 < minorder)
+ error->all(FLERR,"Dispersion PPPMDisp order has been "
+ "reduced below minorder");
+ if (cgtmp) delete cgtmp;
+
+ // adjust g_ewald_6
+
+ if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6)
+ adjust_gewald_6();
+
+ // calculate the final accuracy
+
+ double acc, acc_real, acc_kspace;
+ final_accuracy_6(acc, acc_real, acc_kspace);
+
+
+ // print stats
+
+ int ngrid_max,nfft_both_max,nbuf_max;
+ MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world);
+ MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
+
+ if (me == 0) {
+ #ifdef FFT_SINGLE
+ const char fft_prec[] = "single";
+ #else
+ const char fft_prec[] = "double";
+ #endif
+
+ if (screen) {
+ fprintf(screen," Dispersion G vector (1/distance)= %g\n",g_ewald_6);
+ fprintf(screen," Dispersion grid = %d %d %d\n",
+ nx_pppm_6,ny_pppm_6,nz_pppm_6);
+ fprintf(screen," Dispersion stencil order = %d\n",order_6);
+ fprintf(screen," Dispersion estimated absolute "
+ "RMS force accuracy = %g\n",acc);
+ fprintf(screen," Dispersion estimated absolute "
+ "real space RMS force accuracy = %g\n",acc_real);
+ fprintf(screen," Dispersion estimated absolute "
+ "kspace RMS force accuracy = %g\n",acc_kspace);
+ fprintf(screen," Dispersion estimated relative force accuracy = %g\n",
+ acc/two_charge_force);
+ fprintf(screen," using %s precision FFTs\n",fft_prec);
+ fprintf(screen," 3d grid and FFT values/proc dispersion = %d %d\n",
+ ngrid_max,nfft_both_max);
+ }
+ if (logfile) {
+ fprintf(logfile," Dispersion G vector (1/distance) = %g\n",g_ewald_6);
+ fprintf(logfile," Dispersion grid = %d %d %d\n",
+ nx_pppm_6,ny_pppm_6,nz_pppm_6);
+ fprintf(logfile," Dispersion stencil order = %d\n",order_6);
+ fprintf(logfile," Dispersion estimated absolute "
+ "RMS force accuracy = %g\n",acc);
+ fprintf(logfile," Dispersion estimated absolute "
+ "real space RMS force accuracy = %g\n",acc_real);
+ fprintf(logfile," Dispersion estimated absolute "
+ "kspace RMS force accuracy = %g\n",acc_kspace);
+ fprintf(logfile," Disperion estimated relative force accuracy = %g\n",
+ acc/two_charge_force);
+ fprintf(logfile," using %s precision FFTs\n",fft_prec);
+ fprintf(logfile," 3d grid and FFT values/proc dispersion = %d %d\n",
+ ngrid_max,nfft_both_max);
+ }
+ }
+ }
+
+ // allocate K-space dependent memory
+
+ allocate();
+
+ // pre-compute Green's function denomiator expansion
+ // pre-compute 1d charge distribution coefficients
+
+ if (function[0]) {
+ compute_gf_denom(gf_b, order);
+ compute_rho_coeff(rho_coeff, drho_coeff, order);
+ cg->ghost_notify();
+ cg->setup();
+ if (differentiation_flag == 1)
+ compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
+ nxlo_fft, nylo_fft, nzlo_fft,
+ nxhi_fft, nyhi_fft, nzhi_fft,
+ sf_precoeff1, sf_precoeff2, sf_precoeff3,
+ sf_precoeff4, sf_precoeff5, sf_precoeff6);
+ }
+ if (function[1] + function[2] + function[3]) {
+ compute_gf_denom(gf_b_6, order_6);
+ compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
+ cg_6->ghost_notify();
+ cg_6->setup();
+ if (differentiation_flag == 1)
+ compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
+ nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
+ nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+ sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
+ sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
+ }
+
+}
+
+/* ----------------------------------------------------------------------
+ adjust PPPM coeffs, called initially and whenever volume has changed
+------------------------------------------------------------------------- */
+
+void PPPMDisp::setup()
+{
+ double *prd;
+
+ // volume-dependent factors
+ // adjust z dimension for 2d slab PPPM
+ // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+ volume = xprd * yprd * zprd_slab;
+
+ // compute fkx,fky,fkz for my FFT grid pts
+
+ double unitkx = (2.0*MY_PI/xprd);
+ double unitky = (2.0*MY_PI/yprd);
+ double unitkz = (2.0*MY_PI/zprd_slab);
+
+ //compute the virial coefficients and green functions
+ if (function[0]){
+
+ delxinv = nx_pppm/xprd;
+ delyinv = ny_pppm/yprd;
+ delzinv = nz_pppm/zprd_slab;
+
+ delvolinv = delxinv*delyinv*delzinv;
+
+ double per;
+ int i, j, k, n;
+
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ per = i - nx_pppm*(2*i/nx_pppm);
+ fkx[i] = unitkx*per;
+ j = (nx_pppm - i) % nx_pppm;
+ per = j - nx_pppm*(2*j/nx_pppm);
+ fkx2[i] = unitkx*per;
+ }
+
+ for (i = nylo_fft; i <= nyhi_fft; i++) {
+ per = i - ny_pppm*(2*i/ny_pppm);
+ fky[i] = unitky*per;
+ j = (ny_pppm - i) % ny_pppm;
+ per = j - ny_pppm*(2*j/ny_pppm);
+ fky2[i] = unitky*per;
+ }
+
+ for (i = nzlo_fft; i <= nzhi_fft; i++) {
+ per = i - nz_pppm*(2*i/nz_pppm);
+ fkz[i] = unitkz*per;
+ j = (nz_pppm - i) % nz_pppm;
+ per = j - nz_pppm*(2*j/nz_pppm);
+ fkz2[i] = unitkz*per;
+ }
+
+ double sqk,vterm;
+ double gew2inv = 1/(g_ewald*g_ewald);
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++) {
+ for (j = nylo_fft; j <= nyhi_fft; j++) {
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
+ if (sqk == 0.0) {
+ vg[n][0] = 0.0;
+ vg[n][1] = 0.0;
+ vg[n][2] = 0.0;
+ vg[n][3] = 0.0;
+ vg[n][4] = 0.0;
+ vg[n][5] = 0.0;
+ } else {
+ vterm = -2.0 * (1.0/sqk + 0.25*gew2inv);
+ vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
+ vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
+ vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
+ vg[n][3] = vterm*fkx[i]*fky[j];
+ vg[n][4] = vterm*fkx[i]*fkz[k];
+ vg[n][5] = vterm*fky[j]*fkz[k];
+ vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]);
+ vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]);
+ vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]);
+ }
+ n++;
+ }
+ }
+ }
+ compute_gf();
+ if (differentiation_flag == 1) compute_sf_coeff();
+ }
+
+ if (function[1] + function[2] + function[3]) {
+ delxinv_6 = nx_pppm_6/xprd;
+ delyinv_6 = ny_pppm_6/yprd;
+ delzinv_6 = nz_pppm_6/zprd_slab;
+ delvolinv_6 = delxinv_6*delyinv_6*delzinv_6;
+
+ double per;
+ int i, j, k, n;
+ for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+ per = i - nx_pppm_6*(2*i/nx_pppm_6);
+ fkx_6[i] = unitkx*per;
+ j = (nx_pppm_6 - i) % nx_pppm_6;
+ per = j - nx_pppm_6*(2*j/nx_pppm_6);
+ fkx2_6[i] = unitkx*per;
+ }
+ for (i = nylo_fft_6; i <= nyhi_fft_6; i++) {
+ per = i - ny_pppm_6*(2*i/ny_pppm_6);
+ fky_6[i] = unitky*per;
+ j = (ny_pppm_6 - i) % ny_pppm_6;
+ per = j - ny_pppm_6*(2*j/ny_pppm_6);
+ fky2_6[i] = unitky*per;
+ }
+ for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) {
+ per = i - nz_pppm_6*(2*i/nz_pppm_6);
+ fkz_6[i] = unitkz*per;
+ j = (nz_pppm_6 - i) % nz_pppm_6;
+ per = j - nz_pppm_6*(2*j/nz_pppm_6);
+ fkz2_6[i] = unitkz*per;
+ }
+ double sqk,vterm;
+ long double erft, expt,nom, denom;
+ long double b, bs, bt;
+ double rtpi = sqrt(MY_PI);
+ double gewinv = 1/g_ewald_6;
+ n = 0;
+ for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) {
+ for (j = nylo_fft_6; j <= nyhi_fft_6; j++) {
+ for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+ sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k];
+ if (sqk == 0.0) {
+ vg_6[n][0] = 0.0;
+ vg_6[n][1] = 0.0;
+ vg_6[n][2] = 0.0;
+ vg_6[n][3] = 0.0;
+ vg_6[n][4] = 0.0;
+ vg_6[n][5] = 0.0;
+ } else {
+ b = 0.5*sqrt(sqk)*gewinv;
+ bs = b*b;
+ bt = bs*b;
+ erft = 2*bt*rtpi*erfc(b);
+ expt = exp(-bs);
+ nom = erft - 2*bs*expt;
+ denom = nom + expt;
+ if (denom == 0) vterm = 3.0/sqk;
+ else vterm = 3.0*nom/(sqk*denom);
+ vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i];
+ vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j];
+ vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k];
+ vg_6[n][3] = vterm*fkx_6[i]*fky_6[j];
+ vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k];
+ vg_6[n][5] = vterm*fky_6[j]*fkz_6[k];
+ vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]);
+ vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]);
+ vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]);
+ }
+ n++;
+ }
+ }
+ }
+ compute_gf_6();
+ if (differentiation_flag == 1) compute_sf_coeff_6();
+ }
+}
+
+/* ----------------------------------------------------------------------
+ reset local grid arrays and communication stencils
+ called by fix balance b/c it changed sizes of processor sub-domains
+------------------------------------------------------------------------- */
+
+void PPPMDisp::setup_grid()
+{
+ // free all arrays previously allocated
+
+ deallocate();
+ deallocate_peratom();
+
+ // reset portion of global grid that each proc owns
+
+ if (function[0])
+ set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
+ nxlo_fft, nylo_fft, nzlo_fft,
+ nxhi_fft, nyhi_fft, nzhi_fft,
+ nxlo_in, nylo_in, nzlo_in,
+ nxhi_in, nyhi_in, nzhi_in,
+ nxlo_out, nylo_out, nzlo_out,
+ nxhi_out, nyhi_out, nzhi_out,
+ nlower, nupper,
+ ngrid, nfft, nfft_both,
+ shift, shiftone, order);
+
+ if (function[1] + function[2] + function[3])
+ set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
+ nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
+ nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+ nxlo_in_6, nylo_in_6, nzlo_in_6,
+ nxhi_in_6, nyhi_in_6, nzhi_in_6,
+ nxlo_out_6, nylo_out_6, nzlo_out_6,
+ nxhi_out_6, nyhi_out_6, nzhi_out_6,
+ nlower_6, nupper_6,
+ ngrid_6, nfft_6, nfft_both_6,
+ shift_6, shiftone_6, order_6);
+
+ // reallocate K-space dependent memory
+ // check if grid communication is now overlapping if not allowed
+ // don't invoke allocate_peratom(), compute() will allocate when needed
+
+ allocate();
+
+ if (function[0]) {
+ cg->ghost_notify();
+ if (overlap_allowed == 0 && cg->ghost_overlap())
+ error->all(FLERR,"PPPM grid stencil extends "
+ "beyond nearest neighbor processor");
+ cg->setup();
+ }
+ if (function[1] + function[2] + function[3]) {
+ cg_6->ghost_notify();
+ if (overlap_allowed == 0 && cg_6->ghost_overlap())
+ error->all(FLERR,"PPPM grid stencil extends "
+ "beyond nearest neighbor processor");
+ cg_6->setup();
+ }
+
+ // pre-compute Green's function denomiator expansion
+ // pre-compute 1d charge distribution coefficients
+
+ if (function[0]) {
+ compute_gf_denom(gf_b, order);
+ compute_rho_coeff(rho_coeff, drho_coeff, order);
+ if (differentiation_flag == 1)
+ compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
+ nxlo_fft, nylo_fft, nzlo_fft,
+ nxhi_fft, nyhi_fft, nzhi_fft,
+ sf_precoeff1, sf_precoeff2, sf_precoeff3,
+ sf_precoeff4, sf_precoeff5, sf_precoeff6);
+ }
+ if (function[1] + function[2] + function[3]) {
+ compute_gf_denom(gf_b_6, order_6);
+ compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
+ if (differentiation_flag == 1)
+ compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
+ nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
+ nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+ sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
+ sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
+ }
+
+ // pre-compute volume-dependent coeffs
+
+ setup();
+}
+
+/* ----------------------------------------------------------------------
+ compute the PPPM long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute(int eflag, int vflag)
+{
+
+ int i;
+ // convert atoms from box to lamda coords
+
+ if (eflag || vflag) ev_setup(eflag,vflag);
+ else evflag = evflag_atom = eflag_global = vflag_global =
+ eflag_atom = vflag_atom = 0;
+
+ if (evflag_atom && !peratom_allocate_flag) {
+ allocate_peratom();
+ if (function[0]) {
+ cg_peratom->ghost_notify();
+ cg_peratom->setup();
+ }
+ if (function[1] + function[2] + function[3]) {
+ cg_peratom_6->ghost_notify();
+ cg_peratom_6->setup();
+ }
+ peratom_allocate_flag = 1;
+ }
+
+ if (triclinic == 0) boxlo = domain->boxlo;
+ else {
+ boxlo = domain->boxlo_lamda;
+ domain->x2lamda(atom->nlocal);
+ }
+ // extend size of per-atom arrays if necessary
+
+ if (atom->nlocal > nmax) {
+
+ if (function[0]) memory->destroy(part2grid);
+ if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6);
+ nmax = atom->nmax;
+ if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid");
+ if (function[1] + function[2] + function[3])
+ memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6");
+ }
+
+
+ energy = 0.0;
+ energy_1 = 0.0;
+ energy_6 = 0.0;
+ if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0;
+
+ // find grid points for all my particles
+ // distribute partcles' charges/dispersion coefficients on the grid
+ // communication between processors and remapping two fft
+ // Solution of poissons equation in k-space and backtransformation
+ // communication between processors
+ // calculation of forces
+
+ if (function[0]) {
+
+ //perfrom calculations for coulomb interactions only
+
+ particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower,
+ nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out);
+
+ make_rho_c();
+
+ cg->reverse_comm(this,REVERSE_RHO);
+
+ brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
+ density_brick, density_fft, work1,remap);
+
+ if (differentiation_flag == 1) {
+
+ poisson_ad(work1, work2, density_fft, fft1, fft2,
+ nx_pppm, ny_pppm, nz_pppm, nfft,
+ nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
+ nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
+ energy_1, greensfn,
+ virial_1, vg,vg2,
+ u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
+
+ cg->forward_comm(this,FORWARD_AD);
+
+ fieldforce_c_ad();
+
+ if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM);
+
+ } else {
+ poisson_ik(work1, work2, density_fft, fft1, fft2,
+ nx_pppm, ny_pppm, nz_pppm, nfft,
+ nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
+ nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
+ energy_1, greensfn,
+ fkx, fky, fkz,fkx2, fky2, fkz2,
+ vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2,
+ u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
+
+ cg->forward_comm(this, FORWARD_IK);
+
+ fieldforce_c_ik();
+
+ if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM);
+ }
+ if (evflag_atom) fieldforce_c_peratom();
+ }
+
+ if (function[1]) {
+ //perfrom calculations for geometric mixing
+ particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
+ nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
+ make_rho_g();
+
+
+ cg_6->reverse_comm(this, REVERSE_RHO_G);
+
+ brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
+ density_brick_g, density_fft_g, work1_6,remap_6);
+
+ if (differentiation_flag == 1) {
+
+ poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
+ nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
+ nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+ nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
+ energy_6, greensfn_6,
+ virial_6, vg_6, vg2_6,
+ u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
+
+ cg_6->forward_comm(this,FORWARD_AD_G);
+
+ fieldforce_g_ad();
+
+ if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G);
+
+ } else {
+ poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
+ nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
+ nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+ nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
+ energy_6, greensfn_6,
+ fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
+ vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6,
+ u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
+
+ cg_6->forward_comm(this,FORWARD_IK_G);
+
+ fieldforce_g_ik();
+
+
+ if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G);
+ }
+ if (evflag_atom) fieldforce_g_peratom();
+ }
+
+ if (function[2]) {
+ //perform calculations for arithmetic mixing
+ particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
+ nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
+ make_rho_a();
+
+ cg_6->reverse_comm(this, REVERSE_RHO_A);
+
+ brick2fft_a();
+
+ if ( differentiation_flag == 1) {
+
+ poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
+ nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
+ nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+ nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
+ energy_6, greensfn_6,
+ virial_6, vg_6, vg2_6,
+ u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
+ poisson_2s_ad(density_fft_a0, density_fft_a6,
+ u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
+ u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
+ poisson_2s_ad(density_fft_a1, density_fft_a5,
+ u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
+ u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
+ poisson_2s_ad(density_fft_a2, density_fft_a4,
+ u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
+ u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
+
+ cg_6->forward_comm(this, FORWARD_AD_A);
+
+ fieldforce_a_ad();
+
+ if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A);
+
+ } else {
+
+ poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
+ nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
+ nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+ nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
+ energy_6, greensfn_6,
+ fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
+ vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6,
+ u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
+ poisson_2s_ik(density_fft_a0, density_fft_a6,
+ vdx_brick_a0, vdy_brick_a0, vdz_brick_a0,
+ vdx_brick_a6, vdy_brick_a6, vdz_brick_a6,
+ u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
+ u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
+ poisson_2s_ik(density_fft_a1, density_fft_a5,
+ vdx_brick_a1, vdy_brick_a1, vdz_brick_a1,
+ vdx_brick_a5, vdy_brick_a5, vdz_brick_a5,
+ u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
+ u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
+ poisson_2s_ik(density_fft_a2, density_fft_a4,
+ vdx_brick_a2, vdy_brick_a2, vdz_brick_a2,
+ vdx_brick_a4, vdy_brick_a4, vdz_brick_a4,
+ u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
+ u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
+
+ cg_6->forward_comm(this, FORWARD_IK_A);
+
+ fieldforce_a_ik();
+
+ if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A);
+ }
+ if (evflag_atom) fieldforce_a_peratom();
+ }
+
+ if (function[3]) {
+ //perfrom calculations if no mixing rule applies
+ particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
+ nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
+
+ make_rho_none();
+
+ cg_6->reverse_comm(this, REVERSE_RHO_NONE);
+
+ brick2fft_none();
+
+ if (differentiation_flag == 1) {
+
+ int n = 0;
+ for (int k = 0; kforward_comm(this,FORWARD_AD_NONE);
+
+ fieldforce_none_ad();
+
+ if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE);
+
+ } else {
+ int n = 0;
+ for (int k = 0; kforward_comm(this,FORWARD_IK_NONE);
+
+ fieldforce_none_ik();
+
+
+ if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE);
+ }
+ if (evflag_atom) fieldforce_none_peratom();
+ }
+
+ // sum energy across procs and add in volume-dependent term
+
+ const double qscale = force->qqrd2e * scale;
+ if (eflag_global) {
+ double energy_all;
+ MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+ energy_1 = energy_all;
+ MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+ energy_6 = energy_all;
+
+ energy_1 *= 0.5*volume;
+ energy_6 *= 0.5*volume;
+
+ energy_1 -= g_ewald*qsqsum/MY_PIS +
+ MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+ energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij +
+ 1.0/12.0*pow(g_ewald_6,6)*csum;
+ energy_1 *= qscale;
+ }
+
+ // sum virial across procs
+
+ if (vflag_global) {
+ double virial_all[6];
+ MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
+ for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
+ MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
+ for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i];
+ if (function[1]+function[2]+function[3]){
+ double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij;
+ virial[0] -= a;
+ virial[1] -= a;
+ virial[2] -= a;
+ }
+ }
+
+ if (eflag_atom) {
+ if (function[0]) {
+ double *q = atom->q;
+ for (i = 0; i < atom->nlocal; i++) {
+ eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction
+ }
+ }
+ if (function[1] + function[2] + function[3]) {
+ int tmp;
+ for (i = 0; i < atom->nlocal; i++) {
+ tmp = atom->type[i];
+ eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] +
+ 1.0/12.0*pow(g_ewald_6,6)*cii[tmp];
+ }
+ }
+ }
+
+ if (vflag_atom) {
+ if (function[1] + function[2] + function[3]) {
+ int tmp;
+ for (i = 0; i < atom->nlocal; i++) {
+ tmp = atom->type[i];
+ for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction
+ }
+ }
+ }
+
+
+ // 2d slab correction
+
+ if (slabflag) slabcorr(eflag);
+ if (function[0]) energy += energy_1;
+ if (function[1] + function[2] + function[3]) energy += energy_6;
+
+ // convert atoms back from lamda to box coords
+
+ if (triclinic) domain->lamda2x(atom->nlocal);
+}
+
+/* ----------------------------------------------------------------------
+ initialize coefficients needed for the dispersion density on the grids
+------------------------------------------------------------------------- */
+
+void PPPMDisp::init_coeffs() // local pair coeffs
+{
+ int tmp;
+ int n = atom->ntypes;
+ int converged;
+ delete [] B;
+ if (function[3] + function[2]) { // no mixing rule or arithmetic
+ if (function[2] && me == 0) {
+ if (screen) fprintf(screen," Optimizing splitting of Dispersion coefficients\n");
+ if (logfile) fprintf(logfile," Optimizing splitting of Dispersion coefficients\n");
+ }
+ // get dispersion coefficients
+ double **b = (double **) force->pair->extract("B",tmp);
+ // allocate data for eigenvalue decomposition
+ double **A;
+ double **Q;
+ memory->create(A,n,n,"pppm/disp:A");
+ memory->create(Q,n,n,"pppm/disp:Q");
+ // fill coefficients to matrix a
+ for (int i = 1; i <= n; i++)
+ for (int j = 1; j <= n; j++)
+ A[i-1][j-1] = b[i][j];
+ // transform q to a unity matrix
+ for (int i = 0; i < n; i++)
+ for (int j = 0; j < n; j++)
+ Q[i][j] = 0.0;
+ for (int i = 0; i < n; i++)
+ Q[i][i] = 1.0;
+ // perfrom eigenvalue decomposition with QR algorithm
+ converged = qr_alg(A,Q,n);
+ if (function[3] && !converged) {
+ error->all(FLERR,"Matrix factorization to split dispersion coefficients failed");
+ }
+ // determine number of used eigenvalues
+ // based on maximum allowed number or cutoff criterion
+ // sort eigenvalues according to their size with bubble sort
+ double t;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n-1-i; j++) {
+ if (fabs(A[j][j]) < fabs(A[j+1][j+1])) {
+ t = A[j][j];
+ A[j][j] = A[j+1][j+1];
+ A[j+1][j+1] = t;
+ for (int k = 0; k < n; k++) {
+ t = Q[k][j];
+ Q[k][j] = Q[k][j+1];
+ Q[k][j+1] = t;
+ }
+ }
+ }
+ }
+
+ // check which eigenvalue is the first that is smaller
+ // than a specified tolerance
+ // check how many are maximum allowed by the user
+ double amax = fabs(A[0][0]);
+ double acrit = amax*splittol;
+ double bmax = 0;
+ double err = 0;
+ nsplit = 0;
+ for (int i = 0; i < n; i++) {
+ if (fabs(A[i][i]) > acrit) nsplit++;
+ else {
+ bmax = fabs(A[i][i]);
+ break;
+ }
+ }
+
+ err = bmax/amax;
+ if (err > 1.0e-4) {
+ char str[128];
+ sprintf(str,"Error in splitting of dispersion coeffs is estimated %g",err);
+ error->warning(FLERR, str);
+ }
+ // set B
+ B = new double[nsplit*n+nsplit];
+ for (int i = 0; i< nsplit; i++) {
+ B[i] = A[i][i];
+ for (int j = 0; j < n; j++) {
+ B[nsplit*(j+1) + i] = Q[j][i];
+ }
+ }
+
+ nsplit_alloc = nsplit;
+ if (nsplit%2 == 1) nsplit_alloc = nsplit + 1;
+ // check if the function should preferably be [1] or [2] or [3]
+ if (nsplit == 1) {
+ delete [] B;
+ function[3] = 0;
+ function[2] = 0;
+ function[1] = 1;
+ if (me == 0) {
+ if (screen) fprintf(screen," Using geometric mixing for reciprocal space\n");
+ if (logfile) fprintf(logfile," Using geometric mixing for reciprocal space\n");
+ }
+ }
+ if (function[2] && nsplit <= 6) {
+ if (me == 0) {
+ if (screen) fprintf(screen," Using %d instead of 7 structure factors\n",nsplit);
+ if (logfile) fprintf(logfile," Using %d instead of 7 structure factors\n",nsplit);
+ }
+ function[3] = 1;
+ function[2] = 0;
+ }
+ if (function[2] && (nsplit > 6)) {
+ if (me == 0) {
+ if (screen) fprintf(screen," Using 7 structure factors\n");
+ if (logfile) fprintf(logfile," Using 7 structure factors\n");
+ }
+ delete [] B;
+ }
+ if (function[3]) {
+ if (me == 0) {
+ if (screen) fprintf(screen," Using %d structure factors\n",nsplit);
+ if (logfile) fprintf(logfile," Using %d structure factors\n",nsplit);
+ }
+ if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors");
+ }
+
+ memory->destroy(A);
+ memory->destroy(Q);
+ }
+ if (function[1]) { // geometric 1/r^6
+ double **b = (double **) force->pair->extract("B",tmp);
+ B = new double[n+1];
+ for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
+ }
+ if (function[2]) { // arithmetic 1/r^6
+ //cannot use epsilon, because this has not been set yet
+ double **epsilon = (double **) force->pair->extract("epsilon",tmp);
+ //cannot use sigma, because this has not been set yet
+ double **sigma = (double **) force->pair->extract("sigma",tmp);
+ if (!(epsilon&&sigma))
+ error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp");
+ double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
+ double c[7] = {
+ 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
+ for (int i=0; i<=n; ++i) {
+ eps_i = sqrt(epsilon[i][i]);
+ sigma_i = sigma[i][i];
+ sigma_n = 1.0;
+ for (int j=0; j<7; ++j) {
+ *(bi++) = sigma_n*eps_i*c[j]*0.25;
+ sigma_n *= sigma_i;
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ Eigenvalue decomposition of a real, symmetric matrix with the QR
+ method (includes transpformation to Tridiagonal Matrix + Wilkinson
+ shift)
+------------------------------------------------------------------------- */
+
+int PPPMDisp::qr_alg(double **A, double **Q, int n)
+{
+ int converged = 0;
+ double an1, an, bn1, d, mue;
+ // allocate some memory for the required operations
+ double **A0,**Qi,**C,**D,**E;
+ // make a copy of A for convergence check
+ memory->create(A0,n,n,"pppm/disp:A0");
+ for (int i = 0; i < n; i++)
+ for (int j = 0; j < n; j++)
+ A0[i][j] = A[i][j];
+
+ // allocate an auxiliary matrix Qi
+ memory->create(Qi,n,n,"pppm/disp:Qi");
+
+ // alllocate an auxillary matrices for the matrix multiplication
+ memory->create(C,n,n,"pppm/disp:C");
+ memory->create(D,n,n,"pppm/disp:D");
+ memory->create(E,n,n,"pppm/disp:E");
+
+ // transform Matrix A to Tridiagonal form
+ hessenberg(A,Q,n);
+
+ // start loop for the matrix factorization
+ int count = 0;
+ int countmax = 100000;
+ while (1) {
+ // make a Wilkinson shift
+ an1 = A[n-2][n-2];
+ an = A[n-1][n-1];
+ bn1 = A[n-2][n-1];
+ d = (an1-an)/2;
+ mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1);
+ for (int i = 0; i < n; i++)
+ A[i][i] -= mue;
+
+ // perform a QR factorization for a tridiagonal matrix A
+ qr_tri(Qi,A,n);
+
+ // update the matrices
+ mmult(A,Qi,C,n);
+ mmult(Q,Qi,C,n);
+
+ // backward Wilkinson shift
+ for (int i = 0; i < n; i++)
+ A[i][i] += mue;
+
+ // check the convergence
+ converged = check_convergence(A,Q,A0,C,D,E,n);
+ if (converged) break;
+ count = count + 1;
+ if (count == countmax) break;
+ }
+
+ // free allocated memory
+ memory->destroy(Qi);
+ memory->destroy(A0);
+ memory->destroy(C);
+ memory->destroy(D);
+ memory->destroy(E);
+
+ return converged;
+}
+
+/* ----------------------------------------------------------------------
+ Transform a Matrix to Hessenberg form (for symmetric Matrices, the
+ result will be a tridiagonal matrix)
+------------------------------------------------------------------------- */
+
+void PPPMDisp::hessenberg(double **A, double **Q, int n)
+{
+ double r,a,b,c,s,x1,x2;
+ for (int i = 0; i < n-1; i++) {
+ for (int j = i+2; j < n; j++) {
+ // compute coeffs for the rotation matrix
+ a = A[i+1][i];
+ b = A[j][i];
+ r = sqrt(a*a + b*b);
+ c = a/r;
+ s = b/r;
+ // update the entries of A with multiplication from the left
+ for (int k = 0; k < n; k++) {
+ x1 = A[i+1][k];
+ x2 = A[j][k];
+ A[i+1][k] = c*x1 + s*x2;
+ A[j][k] = -s*x1 + c*x2;
+ }
+ // update the entries of A and Q with a multiplication from the right
+ for (int k = 0; k < n; k++) {
+ x1 = A[k][i+1];
+ x2 = A[k][j];
+ A[k][i+1] = c*x1 + s*x2;
+ A[k][j] = -s*x1 + c*x2;
+ x1 = Q[k][i+1];
+ x2 = Q[k][j];
+ Q[k][i+1] = c*x1 + s*x2;
+ Q[k][j] = -s*x1 + c*x2;
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ QR factorization for a tridiagonal matrix; Result of the factorization
+ is stored in A and Qi
+------------------------------------------------------------------------- */
+
+void PPPMDisp::qr_tri(double** Qi,double** A,int n)
+{
+ double r,a,b,c,s,x1,x2;
+ int j,k,k0,kmax;
+ // make Qi a unity matrix
+ for (int i = 0; i < n; i++)
+ for (int j = 0; j < n; j++)
+ Qi[i][j] = 0.0;
+ for (int i = 0; i < n; i++)
+ Qi[i][i] = 1.0;
+ // loop over main diagonal and first of diagonal of A
+ for (int i = 0; i < n-1; i++) {
+ j = i+1;
+ // coefficients of the rotation matrix
+ a = A[i][i];
+ b = A[j][i];
+ r = sqrt(a*a + b*b);
+ c = a/r;
+ s = b/r;
+ // update the entries of A and Q
+ k0 = (i-1>0)?i-1:0; //min(i-1,0);
+ kmax = (i+3A0[i][j])?Bmax:A0[i][j]; //max(Bmax,A0[i][j]);
+ double epsabs = eps*Bmax;
+
+ // reconstruct the original matrix
+ // store the diagonal elements in D
+ for (int i = 0; i < n; i++)
+ for (int j = 0; j < n; j++)
+ D[i][j] = 0.0;
+ for (int i = 0; i < n; i++)
+ D[i][i] = A[i][i];
+ // store matrix Q in E
+ for (int i = 0; i < n; i++)
+ for (int j = 0; j < n; j++)
+ E[i][j] = Q[i][j];
+ // E = Q*A
+ mmult(E,D,C,n);
+ // store transpose of Q in D
+ for (int i = 0; i < n; i++)
+ for (int j = 0; j < n; j++)
+ D[i][j] = Q[j][i];
+ // E = Q*A*Q.t
+ mmult(E,D,C,n);
+
+ //compare the original matrix and the final matrix
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ diff = A0[i][j] - E[i][j];
+ epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff));
+ }
+ }
+ if (epsmax > epsabs) converged = 0;
+ return converged;
+}
+
+/* ----------------------------------------------------------------------
+ allocate memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPMDisp::allocate()
+{
+
+ int (*procneigh)[2] = comm->procneigh;
+
+ if (function[0]) {
+ memory->create(work1,2*nfft_both,"pppm/disp:work1");
+ memory->create(work2,2*nfft_both,"pppm/disp:work2");
+
+ memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx");
+ memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky");
+ memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz");
+
+ memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2");
+ memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2");
+ memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2");
+
+
+ memory->create(gf_b,order,"pppm/disp:gf_b");
+ memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d");
+ memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff");
+ memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d");
+ memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff");
+
+ memory->create(greensfn,nfft_both,"pppm/disp:greensfn");
+ memory->create(vg,nfft_both,6,"pppm/disp:vg");
+ memory->create(vg2,nfft_both,3,"pppm/disp:vg2");
+
+ memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:density_brick");
+ if ( differentiation_flag == 1) {
+ memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:u_brick");
+ memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1");
+ memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2");
+ memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3");
+ memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4");
+ memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5");
+ memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6");
+
+ } else {
+ memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:vdx_brick");
+ memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:vdy_brick");
+ memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:vdz_brick");
+ }
+ memory->create(density_fft,nfft_both,"pppm/disp:density_fft");
+
+ int tmp;
+
+ fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ 0,0,&tmp);
+
+ fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ 0,0,&tmp);
+
+ remap = new Remap(lmp,world,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ 1,0,0,FFT_PRECISION);
+
+ // create ghost grid object for rho and electric field communication
+
+ if (differentiation_flag == 1)
+ cg = new CommGrid(lmp,world,1,1,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ else
+ cg = new CommGrid(lmp,world,3,1,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ }
+
+ if (function[1]) {
+ memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
+ memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
+
+ memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
+ memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
+ memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
+
+ memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
+ memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
+ memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
+
+ memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
+ memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
+ memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
+ memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
+ memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
+
+ memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
+ memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
+ memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
+
+ memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g");
+ if ( differentiation_flag == 1) {
+ memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
+
+ memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
+ memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
+ memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
+ memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
+ memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
+ memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
+
+ } else {
+ memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g");
+ memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g");
+ memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g");
+ }
+ memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g");
+
+
+ int tmp;
+
+ fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ 0,0,&tmp);
+
+ fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ 0,0,&tmp);
+
+ remap_6 = new Remap(lmp,world,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ 1,0,0,FFT_PRECISION);
+
+ // create ghost grid object for rho and electric field communication
+
+ if (differentiation_flag == 1)
+ cg_6 = new CommGrid(lmp,world,1,1,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ else
+ cg_6 = new CommGrid(lmp,world,3,1,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ }
+
+ if (function[2]) {
+ memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
+ memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
+
+ memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
+ memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
+ memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
+
+ memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
+ memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
+ memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
+
+ memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
+ memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
+ memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
+ memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
+ memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
+
+ memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
+ memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
+ memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
+
+ memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0");
+ memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1");
+ memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2");
+ memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3");
+ memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4");
+ memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5");
+ memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6");
+
+ memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0");
+ memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1");
+ memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2");
+ memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3");
+ memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4");
+ memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5");
+ memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6");
+
+
+ if ( differentiation_flag == 1 ) {
+ memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
+ memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
+ memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
+ memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
+ memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
+ memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
+ memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
+
+ memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
+ memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
+ memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
+ memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
+ memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
+ memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
+
+ } else {
+
+ memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0");
+ memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0");
+ memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0");
+
+ memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1");
+ memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1");
+ memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1");
+
+ memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2");
+ memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2");
+ memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2");
+
+ memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3");
+ memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3");
+ memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3");
+
+ memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4");
+ memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4");
+ memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4");
+
+ memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5");
+ memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5");
+ memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5");
+
+ memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6");
+ memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6");
+ memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6");
+ }
+
+
+
+ int tmp;
+
+ fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ 0,0,&tmp);
+
+ fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ 0,0,&tmp);
+
+ remap_6 = new Remap(lmp,world,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ 1,0,0,FFT_PRECISION);
+
+ // create ghost grid object for rho and electric field communication
+
+
+ if (differentiation_flag == 1)
+ cg_6 = new CommGrid(lmp,world,7,7,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ else
+ cg_6 = new CommGrid(lmp,world,21,7,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ }
+
+ if (function[3]) {
+ memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
+ memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
+
+ memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
+ memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
+ memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
+
+ memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
+ memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
+ memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
+
+ memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
+ memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
+ memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
+ memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
+ memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
+
+ memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
+ memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
+ memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
+
+ memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none");
+ if ( differentiation_flag == 1) {
+ memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
+
+ memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
+ memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
+ memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
+ memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
+ memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
+ memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
+
+ } else {
+ memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none");
+ memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none");
+ memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none");
+ }
+ memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none");
+
+
+ int tmp;
+
+ fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ 0,0,&tmp);
+
+ fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ 0,0,&tmp);
+
+ remap_6 = new Remap(lmp,world,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+ 1,0,0,FFT_PRECISION);
+
+ // create ghost grid object for rho and electric field communication
+
+ if (differentiation_flag == 1)
+ cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ else
+ cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ }
+
+}
+
+/* ----------------------------------------------------------------------
+ allocate memory that depends on # of K-vectors and order
+ for per atom calculations
+------------------------------------------------------------------------- */
+
+void PPPMDisp::allocate_peratom()
+{
+
+ int (*procneigh)[2] = comm->procneigh;
+
+ if (function[0]) {
+
+ if (differentiation_flag != 1)
+ memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:u_brick");
+
+ memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:v0_brick");
+ memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:v1_brick");
+ memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:v2_brick");
+ memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:v3_brick");
+ memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:v4_brick");
+ memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm/disp:v5_brick");
+
+ // create ghost grid object for rho and electric field communication
+
+ if (differentiation_flag == 1)
+ cg_peratom =
+ new CommGrid(lmp,world,6,1,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ else
+ cg_peratom =
+ new CommGrid(lmp,world,7,1,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+
+ }
+
+
+ if (function[1]) {
+
+ if ( differentiation_flag != 1 )
+ memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
+
+ memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g");
+ memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g");
+ memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g");
+ memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g");
+ memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g");
+ memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g");
+
+ // create ghost grid object for rho and electric field communication
+
+ if (differentiation_flag == 1)
+ cg_peratom_6 =
+ new CommGrid(lmp,world,6,1,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ else
+ cg_peratom_6 =
+ new CommGrid(lmp,world,7,1,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+
+ }
+
+ if (function[2]) {
+
+ if ( differentiation_flag != 1 ) {
+ memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
+ memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
+ memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
+ memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
+ memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
+ memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
+ memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
+ }
+
+ memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0");
+ memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0");
+ memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0");
+ memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0");
+ memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0");
+ memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0");
+
+ memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1");
+ memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1");
+ memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1");
+ memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1");
+ memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1");
+ memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1");
+
+ memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2");
+ memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2");
+ memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2");
+ memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2");
+ memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2");
+ memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2");
+
+ memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3");
+ memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3");
+ memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3");
+ memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3");
+ memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3");
+ memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3");
+
+ memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4");
+ memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4");
+ memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4");
+ memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4");
+ memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4");
+ memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4");
+
+ memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5");
+ memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5");
+ memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5");
+ memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5");
+ memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5");
+ memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5");
+
+ memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6");
+ memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6");
+ memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6");
+ memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6");
+ memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6");
+ memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6");
+
+ // create ghost grid object for rho and electric field communication
+
+ if (differentiation_flag == 1)
+ cg_peratom_6 =
+ new CommGrid(lmp,world,42,1,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ else
+ cg_peratom_6 =
+ new CommGrid(lmp,world,49,1,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+
+ }
+
+ if (function[3]) {
+
+ if ( differentiation_flag != 1 )
+ memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
+
+ memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none");
+ memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none");
+ memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none");
+ memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none");
+ memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none");
+ memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+ nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none");
+
+ // create ghost grid object for rho and electric field communication
+
+ if (differentiation_flag == 1)
+ cg_peratom_6 =
+ new CommGrid(lmp,world,6*nsplit_alloc,1,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+ else
+ cg_peratom_6 =
+ new CommGrid(lmp,world,7*nsplit_alloc,1,
+ nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+ nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+ procneigh[0][0],procneigh[0][1],procneigh[1][0],
+ procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+
+ }
+}
+
+
+/* ----------------------------------------------------------------------
+ deallocate memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPMDisp::deallocate()
+{
+ memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy(density_fft);
+ density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
+ density_fft = NULL;
+
+ memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy(density_fft_g);
+ density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
+ density_fft_g = NULL;
+
+ memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy(density_fft_a0);
+ density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
+ density_fft_a0 = NULL;
+
+ memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy(density_fft_a1);
+ density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
+ density_fft_a1 = NULL;
+
+ memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy(density_fft_a2);
+ density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
+ density_fft_a2 = NULL;
+
+ memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy(density_fft_a3);
+ density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
+ density_fft_a3 = NULL;
+
+ memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy(density_fft_a4);
+ density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
+ density_fft_a4 = NULL;
+
+ memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy(density_fft_a5);
+ density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
+ density_fft_a5 = NULL;
+
+ memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy(density_fft_a6);
+ density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
+ density_fft_a6 = NULL;
+
+ memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
+ memory->destroy(density_fft_none);
+ density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
+ density_fft_none = NULL;
+
+ memory->destroy(sf_precoeff1);
+ memory->destroy(sf_precoeff2);
+ memory->destroy(sf_precoeff3);
+ memory->destroy(sf_precoeff4);
+ memory->destroy(sf_precoeff5);
+ memory->destroy(sf_precoeff6);
+ sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
+
+ memory->destroy(sf_precoeff1_6);
+ memory->destroy(sf_precoeff2_6);
+ memory->destroy(sf_precoeff3_6);
+ memory->destroy(sf_precoeff4_6);
+ memory->destroy(sf_precoeff5_6);
+ memory->destroy(sf_precoeff6_6);
+ sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL;
+
+ memory->destroy(greensfn);
+ memory->destroy(greensfn_6);
+ memory->destroy(work1);
+ memory->destroy(work2);
+ memory->destroy(work1_6);
+ memory->destroy(work2_6);
+ memory->destroy(vg);
+ memory->destroy(vg2);
+ memory->destroy(vg_6);
+ memory->destroy(vg2_6);
+ greensfn = greensfn_6 = NULL;
+ work1 = work2 = work1_6 = work2_6 = NULL;
+ vg = vg2 = vg_6 = vg2_6 = NULL;
+
+ memory->destroy1d_offset(fkx,nxlo_fft);
+ memory->destroy1d_offset(fky,nylo_fft);
+ memory->destroy1d_offset(fkz,nzlo_fft);
+ fkx = fky = fkz = NULL;
+
+ memory->destroy1d_offset(fkx2,nxlo_fft);
+ memory->destroy1d_offset(fky2,nylo_fft);
+ memory->destroy1d_offset(fkz2,nzlo_fft);
+ fkx2 = fky2 = fkz2 = NULL;
+
+ memory->destroy1d_offset(fkx_6,nxlo_fft_6);
+ memory->destroy1d_offset(fky_6,nylo_fft_6);
+ memory->destroy1d_offset(fkz_6,nzlo_fft_6);
+ fkx_6 = fky_6 = fkz_6 = NULL;
+
+ memory->destroy1d_offset(fkx2_6,nxlo_fft_6);
+ memory->destroy1d_offset(fky2_6,nylo_fft_6);
+ memory->destroy1d_offset(fkz2_6,nzlo_fft_6);
+ fkx2_6 = fky2_6 = fkz2_6 = NULL;
+
+
+ memory->destroy(gf_b);
+ memory->destroy2d_offset(rho1d,-order/2);
+ memory->destroy2d_offset(rho_coeff,(1-order)/2);
+ memory->destroy2d_offset(drho1d,-order/2);
+ memory->destroy2d_offset(drho_coeff, (1-order)/2);
+ gf_b = NULL;
+ rho1d = rho_coeff = drho1d = drho_coeff = NULL;
+
+ memory->destroy(gf_b_6);
+ memory->destroy2d_offset(rho1d_6,-order_6/2);
+ memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2);
+ memory->destroy2d_offset(drho1d_6,-order_6/2);
+ memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2);
+ gf_b_6 = NULL;
+ rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL;
+
+ delete fft1;
+ delete fft2;
+ delete remap;
+ delete cg;
+ fft1 = fft2 = NULL;
+ remap = NULL;
+ cg = NULL;
+
+ delete fft1_6;
+ delete fft2_6;
+ delete remap_6;
+ delete cg_6;
+ fft1_6 = fft2_6 = NULL;
+ remap_6 = NULL;
+ cg_6 = NULL;
+}
+
+
+/* ----------------------------------------------------------------------
+ deallocate memory that depends on # of K-vectors and order
+ for per atom calculations
+------------------------------------------------------------------------- */
+
+void PPPMDisp::deallocate_peratom()
+{
+ peratom_allocate_flag = 0;
+
+ memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
+ memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out);
+ memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out);
+ memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out);
+ memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out);
+ memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out);
+ memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out);
+ u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
+
+ memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL;
+
+ memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL;
+
+ memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL;
+
+ memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL;
+
+ memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL;
+
+ memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL;
+
+ memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL;
+
+ memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL;
+
+ memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+ u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL;
+
+ delete cg_peratom;
+ delete cg_peratom_6;
+ cg_peratom = cg_peratom_6 = NULL;
+}
+
+/* ----------------------------------------------------------------------
+ set size of FFT grid (nx,ny,nz_pppm) and g_ewald
+ for Coulomb interactions
+------------------------------------------------------------------------- */
+
+void PPPMDisp::set_grid()
+{
+ double q2 = qsqsum * force->qqrd2e;
+
+ // use xprd,yprd,zprd even if triclinic so grid size is the same
+ // adjust z dimension for 2d slab PPPM
+ // 3d PPPM just uses zprd since slab_volfactor = 1.0
+
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ double zprd_slab = zprd*slab_volfactor;
+
+ // make initial g_ewald estimate
+ // based on desired accuracy and real space cutoff
+ // fluid-occupied volume used to estimate real-space error
+ // zprd used rather than zprd_slab
+
+ double h, h_x,h_y,h_z;
+ bigint natoms = atom->natoms;
+
+ if (!gewaldflag) {
+ g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
+ if (g_ewald >= 1.0)
+ error->all(FLERR,"KSpace accuracy too large to estimate G vector");
+ g_ewald = sqrt(-log(g_ewald)) / cutoff;
+ }
+
+ // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
+ // nz_pppm uses extended zprd_slab instead of zprd
+ // reduce it until accuracy target is met
+
+ if (!gridflag) {
+ h = h_x = h_y = h_z = 4.0/g_ewald;
+ int count = 0;
+ while (1) {
+
+ // set grid dimension
+ nx_pppm = static_cast (xprd/h_x);
+ ny_pppm = static_cast (yprd/h_y);
+ nz_pppm = static_cast (zprd_slab/h_z);
+
+ if (nx_pppm <= 1) nx_pppm = 2;
+ if (ny_pppm <= 1) ny_pppm = 2;
+ if (nz_pppm <= 1) nz_pppm = 2;
+
+ //set local grid dimension
+ int npey_fft,npez_fft;
+ if (nz_pppm >= nprocs) {
+ npey_fft = 1;
+ npez_fft = nprocs;
+ } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
+
+ int me_y = me % npey_fft;
+ int me_z = me / npey_fft;
+
+ nxlo_fft = 0;
+ nxhi_fft = nx_pppm - 1;
+ nylo_fft = me_y*ny_pppm/npey_fft;
+ nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
+ nzlo_fft = me_z*nz_pppm/npez_fft;
+ nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
+
+ double qopt = compute_qopt();
+
+ double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
+
+ count++;
+
+ // break loop if the accuracy has been reached or too many loops have been performed
+ if (dfkspace <= accuracy) break;
+ if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction");
+ h *= 0.95;
+ h_x = h_y = h_z = h;
+ }
+ }
+
+ // boost grid size until it is factorable
+
+ while (!factorable(nx_pppm)) nx_pppm++;
+ while (!factorable(ny_pppm)) ny_pppm++;
+ while (!factorable(nz_pppm)) nz_pppm++;
+}
+
+/* ----------------------------------------------------------------------
+ set the FFT parameters
+------------------------------------------------------------------------- */
+
+void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p,
+ int& nxlo_f,int& nylo_f,int& nzlo_f,
+ int& nxhi_f,int& nyhi_f,int& nzhi_f,
+ int& nxlo_i,int& nylo_i,int& nzlo_i,
+ int& nxhi_i,int& nyhi_i,int& nzhi_i,
+ int& nxlo_o,int& nylo_o,int& nzlo_o,
+ int& nxhi_o,int& nyhi_o,int& nzhi_o,
+ int& nlow, int& nupp,
+ int& ng, int& nf, int& nfb,
+ double& sft,double& sftone, int& ord)
+{
+ // global indices of PPPM grid range from 0 to N-1
+ // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
+ // global PPPM grid that I own without ghost cells
+ // for slab PPPM, assign z grid as if it were not extended
+
+ nxlo_i = static_cast (comm->xsplit[comm->myloc[0]] * nx_p);
+ nxhi_i = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1;
+
+ nylo_i = static_cast (comm->ysplit[comm->myloc[1]] * ny_p);
+ nyhi_i = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1;
+
+ nzlo_i = static_cast
+ (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor);
+ nzhi_i = static_cast
+ (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1;
+
+
+ // nlow,nupp = stencil size for mapping particles to PPPM grid
+
+ nlow = -(ord-1)/2;
+ nupp = ord/2;
+
+ // sft values for particle <-> grid mapping
+ // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+ if (ord % 2) sft = OFFSET + 0.5;
+ else sft = OFFSET;
+ if (ord % 2) sftone = 0.0;
+ else sftone = 0.5;
+
+ // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
+ // global PPPM grid that my particles can contribute charge to
+ // effectively nlo_in,nhi_in + ghost cells
+ // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
+ // position a particle in my box can be at
+ // dist[3] = particle position bound = subbox + skin/2.0 + qdist
+ // qdist = offset due to TIP4P fictitious charge
+ // convert to triclinic if necessary
+ // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
+ // for slab PPPM, assign z grid as if it were not extended
+
+ double *prd,*sublo,*subhi;
+
+ if (triclinic == 0) {
+ prd = domain->prd;
+ boxlo = domain->boxlo;
+ sublo = domain->sublo;
+ subhi = domain->subhi;
+ } else {
+ prd = domain->prd_lamda;
+ boxlo = domain->boxlo_lamda;
+ sublo = domain->sublo_lamda;
+ subhi = domain->subhi_lamda;
+ }
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double dist[3];
+ double cuthalf = 0.5*neighbor->skin + qdist;
+ if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
+ else {
+ dist[0] = cuthalf/domain->prd[0];
+ dist[1] = cuthalf/domain->prd[1];
+ dist[2] = cuthalf/domain->prd[2];
+ }
+
+ int nlo,nhi;
+
+ nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) *
+ nx_p/xprd + sft) - OFFSET;
+ nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) *
+ nx_p/xprd + sft) - OFFSET;
+ nxlo_o = nlo + nlow;
+ nxhi_o = nhi + nupp;
+
+ nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) *
+ ny_p/yprd + sft) - OFFSET;
+ nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) *
+ ny_p/yprd + sft) - OFFSET;
+ nylo_o = nlo + nlow;
+ nyhi_o = nhi + nupp;
+
+ nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) *
+ nz_p/zprd_slab + sft) - OFFSET;
+ nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) *
+ nz_p/zprd_slab + sft) - OFFSET;
+ nzlo_o = nlo + nlow;
+ nzhi_o = nhi + nupp;
+
+ // for slab PPPM, change the grid boundary for processors at +z end
+ // to include the empty volume between periodically repeating slabs
+ // for slab PPPM, want charge data communicated from -z proc to +z proc,
+ // but not vice versa, also want field data communicated from +z proc to
+ // -z proc, but not vice versa
+ // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells)
+
+ if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) {
+ nzhi_i = nz_p - 1;
+ nzhi_o = nz_p - 1;
+ }
+
+ // decomposition of FFT mesh
+ // global indices range from 0 to N-1
+ // proc owns entire x-dimension, clump of columns in y,z dimensions
+ // npey_fft,npez_fft = # of procs in y,z dims
+ // if nprocs is small enough, proc can own 1 or more entire xy planes,
+ // else proc owns 2d sub-blocks of yz plane
+ // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
+ // nlo_fft,nhi_fft = lower/upper limit of the section
+ // of the global FFT mesh that I own
+
+ int npey_fft,npez_fft;
+ if (nz_p >= nprocs) {
+ npey_fft = 1;
+ npez_fft = nprocs;
+ } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft);
+
+ int me_y = me % npey_fft;
+ int me_z = me / npey_fft;
+
+ nxlo_f = 0;
+ nxhi_f = nx_p - 1;
+ nylo_f = me_y*ny_p/npey_fft;
+ nyhi_f = (me_y+1)*ny_p/npey_fft - 1;
+ nzlo_f = me_z*nz_p/npez_fft;
+ nzhi_f = (me_z+1)*nz_p/npez_fft - 1;
+
+ // PPPM grid for this proc, including ghosts
+
+ ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) *
+ (nzhi_o-nzlo_o+1);
+
+ // FFT arrays on this proc, without ghosts
+ // nfft = FFT points in FFT decomposition on this proc
+ // nfft_brick = FFT points in 3d brick-decomposition on this proc
+ // nfft_both = greater of 2 values
+
+ nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) *
+ (nzhi_f-nzlo_f+1);
+ int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) *
+ (nzhi_i-nzlo_i+1);
+ nfb = MAX(nf,nfft_brick);
+
+}
+
+/* ----------------------------------------------------------------------
+ check if all factors of n are in list of factors
+ return 1 if yes, 0 if no
+------------------------------------------------------------------------- */
+
+int PPPMDisp::factorable(int n)
+{
+ int i;
+
+ while (n > 1) {
+ for (i = 0; i < nfactors; i++) {
+ if (n % factors[i] == 0) {
+ n /= factors[i];
+ break;
+ }
+ }
+ if (i == nfactors) return 0;
+ }
+
+ return 1;
+}
+
+/* ----------------------------------------------------------------------
+ pre-compute Green's function denominator expansion coeffs, Gamma(2n)
+------------------------------------------------------------------------- */
+void PPPMDisp::adjust_gewald()
+{
+
+ // Use Newton solver to find g_ewald
+
+ double dx;
+
+ // Begin algorithm
+
+ for (int i = 0; i < LARGE; i++) {
+ dx = f() / derivf();
+ g_ewald -= dx; //Update g_ewald
+ if (fabs(f()) < SMALL) return;
+ }
+
+ // Failed to converge
+
+ char str[128];
+ sprintf(str, "Could not compute g_ewald");
+ error->all(FLERR, str);
+
+}
+
+/* ----------------------------------------------------------------------
+ Calculate f(x)
+ ------------------------------------------------------------------------- */
+
+double PPPMDisp::f()
+{
+ double df_rspace, df_kspace;
+ double q2 = qsqsum * force->qqrd2e;
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ double zprd_slab = zprd*slab_volfactor;
+ bigint natoms = atom->natoms;
+
+ df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
+ sqrt(natoms*cutoff*xprd*yprd*zprd);
+
+ double qopt = compute_qopt();
+ df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
+
+ return df_rspace - df_kspace;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate numerical derivative f'(x) using forward difference
+ [f(x + h) - f(x)] / h
+ ------------------------------------------------------------------------- */
+
+double PPPMDisp::derivf()
+{
+ double h = 0.000001; //Derivative step-size
+ double df,f1,f2,g_ewald_old;
+
+ f1 = f();
+ g_ewald_old = g_ewald;
+ g_ewald += h;
+ f2 = f();
+ g_ewald = g_ewald_old;
+ df = (f2 - f1)/h;
+
+ return df;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate the final estimator for the accuracy
+------------------------------------------------------------------------- */
+
+double PPPMDisp::final_accuracy()
+{
+ double df_rspace, df_kspace;
+ double q2 = qsqsum * force->qqrd2e;
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ double zprd_slab = zprd*slab_volfactor;
+ bigint natoms = atom->natoms;
+ df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
+ sqrt(natoms*cutoff*xprd*yprd*zprd);
+
+ double qopt = compute_qopt();
+
+ df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
+
+ double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace);
+ return acc;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate the final estimator for the Dispersion accuracy
+------------------------------------------------------------------------- */
+
+void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace)
+{
+ double df_rspace, df_kspace;
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ double zprd_slab = zprd*slab_volfactor;
+ bigint natoms = atom->natoms;
+ acc_real = lj_rspace_error();
+
+ double qopt = compute_qopt_6();
+
+ acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
+
+ acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace);
+ return;
+}
+
+/* ----------------------------------------------------------------------
+ Compute qopt for Coulomb interactions
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt()
+{
+ double qopt;
+ if (differentiation_flag == 1) {
+ qopt = compute_qopt_ad();
+ } else {
+ qopt = compute_qopt_ik();
+ }
+ double qopt_all;
+ MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
+ return qopt_all;
+}
+
+/* ----------------------------------------------------------------------
+ Compute qopt for Dispersion interactions
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt_6()
+{
+ double qopt;
+ if (differentiation_flag == 1) {
+ qopt = compute_qopt_6_ad();
+ } else {
+ qopt = compute_qopt_6_ik();
+ }
+ double qopt_all;
+ MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
+ return qopt_all;
+}
+
+/* ----------------------------------------------------------------------
+ Compute qopt for the ik differentiation scheme and Coulomb interaction
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt_ik()
+{
+ double qopt = 0.0;
+ int k,l,m;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double unitkx = (2.0*MY_PI/xprd);
+ double unitky = (2.0*MY_PI/yprd);
+ double unitkz = (2.0*MY_PI/zprd_slab);
+
+ int nx,ny,nz,kper,lper,mper;
+ double sqk, u2;
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+ double sum1,sum2, sum3,dot1,dot2;
+
+ int nbx = 2;
+ int nby = 2;
+ int nbz = 2;
+
+ for (m = nzlo_fft; m <= nzhi_fft; m++) {
+ mper = m - nz_pppm*(2*m/nz_pppm);
+
+ for (l = nylo_fft; l <= nyhi_fft; l++) {
+ lper = l - ny_pppm*(2*l/ny_pppm);
+
+ for (k = nxlo_fft; k <= nxhi_fft; k++) {
+ kper = k - nx_pppm*(2*k/nx_pppm);
+
+ sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
+ pow(unitkz*mper,2.0);
+
+ if (sqk != 0.0) {
+ sum1 = 0.0;
+ sum2 = 0.0;
+ sum3 = 0.0;
+ for (nx = -nbx; nx <= nbx; nx++) {
+ qx = unitkx*(kper+nx_pppm*nx);
+ sx = exp(-0.25*pow(qx/g_ewald,2.0));
+ wx = 1.0;
+ argx = 0.5*qx*xprd/nx_pppm;
+ if (argx != 0.0) wx = pow(sin(argx)/argx,order);
+ for (ny = -nby; ny <= nby; ny++) {
+ qy = unitky*(lper+ny_pppm*ny);
+ sy = exp(-0.25*pow(qy/g_ewald,2.0));
+ wy = 1.0;
+ argy = 0.5*qy*yprd/ny_pppm;
+ if (argy != 0.0) wy = pow(sin(argy)/argy,order);
+ for (nz = -nbz; nz <= nbz; nz++) {
+ qz = unitkz*(mper+nz_pppm*nz);
+ sz = exp(-0.25*pow(qz/g_ewald,2.0));
+ wz = 1.0;
+ argz = 0.5*qz*zprd_slab/nz_pppm;
+ if (argz != 0.0) wz = pow(sin(argz)/argz,order);
+
+ dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
+ dot2 = qx*qx+qy*qy+qz*qz;
+ u2 = pow(wx*wy*wz,2.0);
+ sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
+ sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1;
+ sum3 += u2;
+ }
+ }
+ }
+ sum2 *= sum2;
+ sum3 *= sum3*sqk;
+ qopt += sum1 -sum2/sum3;
+ }
+ }
+ }
+ }
+ return qopt;
+}
+
+/* ----------------------------------------------------------------------
+ Compute qopt for the ad differentiation scheme and Coulomb interaction
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt_ad()
+{
+ double qopt = 0.0;
+ int k,l,m;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+
+ double unitkx = (2.0*MY_PI/xprd);
+ double unitky = (2.0*MY_PI/yprd);
+ double unitkz = (2.0*MY_PI/zprd_slab);
+
+ int nx,ny,nz,kper,lper,mper;
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+ double u2, sqk;
+ double sum1,sum2,sum3,sum4,dot2;
+ double numerator;
+
+ int nbx = 2;
+ int nby = 2;
+ int nbz = 2;
+ double form = 1.0;
+
+ for (m = nzlo_fft; m <= nzhi_fft; m++) {
+ mper = m - nz_pppm*(2*m/nz_pppm);
+
+ for (l = nylo_fft; l <= nyhi_fft; l++) {
+ lper = l - ny_pppm*(2*l/ny_pppm);
+
+ for (k = nxlo_fft; k <= nxhi_fft; k++) {
+ kper = k - nx_pppm*(2*k/nx_pppm);
+
+ sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
+ pow(unitkz*mper,2.0);
+
+ if (sqk != 0.0) {
+ numerator = form*12.5663706;
+
+ sum1 = 0.0;
+ sum2 = 0.0;
+ sum3 = 0.0;
+ sum4 = 0.0;
+ for (nx = -nbx; nx <= nbx; nx++) {
+ qx = unitkx*(kper+nx_pppm*nx);
+ sx = exp(-0.25*pow(qx/g_ewald,2.0));
+ wx = 1.0;
+ argx = 0.5*qx*xprd/nx_pppm;
+ if (argx != 0.0) wx = pow(sin(argx)/argx,order);
+ for (ny = -nby; ny <= nby; ny++) {
+ qy = unitky*(lper+ny_pppm*ny);
+ sy = exp(-0.25*pow(qy/g_ewald,2.0));
+ wy = 1.0;
+ argy = 0.5*qy*yprd/ny_pppm;
+ if (argy != 0.0) wy = pow(sin(argy)/argy,order);
+ for (nz = -nbz; nz <= nbz; nz++) {
+ qz = unitkz*(mper+nz_pppm*nz);
+ sz = exp(-0.25*pow(qz/g_ewald,2.0));
+ wz = 1.0;
+ argz = 0.5*qz*zprd_slab/nz_pppm;
+ if (argz != 0.0) wz = pow(sin(argz)/argz,order);
+
+ dot2 = qx*qx+qy*qy+qz*qz;
+ u2 = pow(wx*wy*wz,2.0);
+ sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
+ sum2 += sx*sy*sz * u2*4.0*MY_PI;
+ sum3 += u2;
+ sum4 += dot2*u2;
+ }
+ }
+ }
+ sum2 *= sum2;
+ qopt += sum1 - sum2/(sum3*sum4);
+ }
+ }
+ }
+ }
+ return qopt;
+}
+
+/* ----------------------------------------------------------------------
+ Compute qopt for the ik differentiation scheme and Dispersion interaction
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt_6_ik()
+{
+ double qopt = 0.0;
+ int k,l,m,n;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double unitkx = (2.0*MY_PI/xprd);
+ double unitky = (2.0*MY_PI/yprd);
+ double unitkz = (2.0*MY_PI/zprd_slab);
+
+ int nx,ny,nz,kper,lper,mper;
+ double sqk, u2;
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+ double sum1,sum2, sum3;
+ double dot1,dot2, rtdot2, term;
+ double inv2ew = 2*g_ewald_6;
+ inv2ew = 1.0/inv2ew;
+ double rtpi = sqrt(MY_PI);
+
+ int nbx = 2;
+ int nby = 2;
+ int nbz = 2;
+
+ n = 0;
+ for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
+ mper = m - nz_pppm_6*(2*m/nz_pppm_6);
+
+ for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
+ lper = l - ny_pppm_6*(2*l/ny_pppm_6);
+
+ for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
+ kper = k - nx_pppm_6*(2*k/nx_pppm_6);
+
+ sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
+ pow(unitkz*mper,2.0);
+
+ if (sqk != 0.0) {
+ sum1 = 0.0;
+ sum2 = 0.0;
+ sum3 = 0.0;
+ for (nx = -nbx; nx <= nbx; nx++) {
+ qx = unitkx*(kper+nx_pppm_6*nx);
+ sx = exp(-qx*qx*inv2ew*inv2ew);
+ wx = 1.0;
+ argx = 0.5*qx*xprd/nx_pppm_6;
+ if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
+ for (ny = -nby; ny <= nby; ny++) {
+ qy = unitky*(lper+ny_pppm_6*ny);
+ sy = exp(-qy*qy*inv2ew*inv2ew);
+ wy = 1.0;
+ argy = 0.5*qy*yprd/ny_pppm_6;
+ if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
+ for (nz = -nbz; nz <= nbz; nz++) {
+ qz = unitkz*(mper+nz_pppm_6*nz);
+ sz = exp(-qz*qz*inv2ew*inv2ew);
+ wz = 1.0;
+ argz = 0.5*qz*zprd_slab/nz_pppm_6;
+ if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
+
+ dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
+ dot2 = qx*qx+qy*qy+qz*qz;
+ rtdot2 = sqrt(dot2);
+ term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
+ 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
+ term *= g_ewald_6*g_ewald_6*g_ewald_6;
+ u2 = pow(wx*wy*wz,2.0);
+ sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
+ sum2 += -u2*term*MY_PI*rtpi/3.0*dot1;
+ sum3 += u2;
+ }
+ }
+ }
+ sum2 *= sum2;
+ sum3 *= sum3*sqk;
+ qopt += sum1 -sum2/sum3;
+ }
+ }
+ }
+ }
+ return qopt;
+}
+
+/* ----------------------------------------------------------------------
+ Compute qopt for the ad differentiation scheme and Dispersion interaction
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt_6_ad()
+{
+ double qopt = 0.0;
+ int k,l,m;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double unitkx = (2.0*MY_PI/xprd);
+ double unitky = (2.0*MY_PI/yprd);
+ double unitkz = (2.0*MY_PI/zprd_slab);
+
+ int nx,ny,nz,kper,lper,mper;
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+ double u2, sqk;
+ double sum1,sum2,sum3,sum4;
+ double dot2, rtdot2, term;
+ double inv2ew = 2*g_ewald_6;
+ inv2ew = 1/inv2ew;
+ double rtpi = sqrt(MY_PI);
+
+ int nbx = 2;
+ int nby = 2;
+ int nbz = 2;
+
+ for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
+ mper = m - nz_pppm_6*(2*m/nz_pppm_6);
+
+ for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
+ lper = l - ny_pppm_6*(2*l/ny_pppm_6);
+
+ for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
+ kper = k - nx_pppm_6*(2*k/nx_pppm_6);
+
+ sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
+ pow(unitkz*mper,2.0);
+
+ if (sqk != 0.0) {
+
+ sum1 = 0.0;
+ sum2 = 0.0;
+ sum3 = 0.0;
+ sum4 = 0.0;
+ for (nx = -nbx; nx <= nbx; nx++) {
+ qx = unitkx*(kper+nx_pppm_6*nx);
+ sx = exp(-qx*qx*inv2ew*inv2ew);
+ wx = 1.0;
+ argx = 0.5*qx*xprd/nx_pppm_6;
+ if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
+ for (ny = -nby; ny <= nby; ny++) {
+ qy = unitky*(lper+ny_pppm_6*ny);
+ sy = exp(-qy*qy*inv2ew*inv2ew);
+ wy = 1.0;
+ argy = 0.5*qy*yprd/ny_pppm_6;
+ if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
+ for (nz = -nbz; nz <= nbz; nz++) {
+ qz = unitkz*(mper+nz_pppm_6*nz);
+ sz = exp(-qz*qz*inv2ew*inv2ew);
+ wz = 1.0;
+ argz = 0.5*qz*zprd_slab/nz_pppm_6;
+ if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
+
+ dot2 = qx*qx+qy*qy+qz*qz;
+ rtdot2 = sqrt(dot2);
+ term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
+ 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
+ term *= g_ewald_6*g_ewald_6*g_ewald_6;
+ u2 = pow(wx*wy*wz,2.0);
+ sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
+ sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2;
+ sum3 += u2;
+ sum4 += dot2*u2;
+ }
+ }
+ }
+ sum2 *= sum2;
+ qopt += sum1 - sum2/(sum3*sum4);
+ }
+ }
+ }
+ }
+ return qopt;
+}
+
+/* ----------------------------------------------------------------------
+ set size of FFT grid and g_ewald_6
+ for Dispersion interactions
+------------------------------------------------------------------------- */
+
+void PPPMDisp::set_grid_6()
+{
+ // Calculate csum
+ if (!csumflag) calc_csum();
+ if (!gewaldflag_6) set_init_g6();
+ if (!gridflag_6) set_n_pppm_6();
+ while (!factorable(nx_pppm_6)) nx_pppm_6++;
+ while (!factorable(ny_pppm_6)) ny_pppm_6++;
+ while (!factorable(nz_pppm_6)) nz_pppm_6++;
+
+}
+
+/* ----------------------------------------------------------------------
+ Calculate the sum of the squared dispersion coefficients and other
+ related quantities required for the calculations
+------------------------------------------------------------------------- */
+
+void PPPMDisp::calc_csum()
+{
+ csumij = 0.0;
+ csum = 0.0;
+
+ int ntypes = atom->ntypes;
+ int i,j,k;
+
+ delete [] cii;
+ cii = new double[ntypes +1];
+ for (i = 0; i<=ntypes; i++) cii[i] = 0.0;
+ delete [] csumi;
+ csumi = new double[ntypes +1];
+ for (i = 0; i<=ntypes; i++) csumi[i] = 0.0;
+ int *neach = new int[ntypes+1];
+ for (i = 0; i<=ntypes; i++) neach[i] = 0;
+
+ //the following variables are needed to distinguish between arithmetic
+ // and geometric mixing
+
+ double mix1; // scales 20/16 to 4
+ int mix2; // shifts the value to the sigma^3 value
+ int mix3; // shifts the value to the right atom type
+ if (function[1]) {
+ for (i = 1; i <= ntypes; i++)
+ cii[i] = B[i]*B[i];
+ int tmp;
+ for (i = 0; i < atom->nlocal; i++) {
+ tmp = atom->type[i];
+ neach[tmp]++;
+ csum += B[tmp]*B[tmp];
+ }
+ }
+ if (function[2]) {
+ for (i = 1; i <= ntypes; i++)
+ cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3];
+ int tmp;
+ for (i = 0; i < atom->nlocal; i++) {
+ tmp = atom->type[i];
+ neach[tmp]++;
+ csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3];
+ }
+ }
+ if (function[3]) {
+ for (i = 1; i <= ntypes; i++)
+ for (j = 0; j < nsplit; j++)
+ cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j];
+ int tmp;
+ for (i = 0; i < atom->nlocal; i++) {
+ tmp = atom->type[i];
+ neach[tmp]++;
+ for (j = 0; j < nsplit; j++)
+ csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j];
+ }
+ }
+
+
+ double tmp2;
+ MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world);
+ csum = tmp2;
+ csumflag = 1;
+
+ int *neach_all = new int[ntypes+1];
+ MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world);
+
+ // copmute csumij and csumi
+ double d1, d2;
+ if (function[1]){
+ for (i=1; i<=ntypes; i++) {
+ for (j=1; j<=ntypes; j++) {
+ csumi[i] += neach_all[j]*B[i]*B[j];
+ d1 = neach_all[i]*B[i];
+ d2 = neach_all[j]*B[j];
+ csumij += d1*d2;
+ //csumij += neach_all[i]*neach_all[j]*B[i]*B[j];
+ }
+ }
+ }
+ if (function[2]) {
+ for (i=1; i<=ntypes; i++) {
+ for (j=1; j<=ntypes; j++) {
+ for (k=0; k<=6; k++) {
+ csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
+ d1 = neach_all[i]*B[7*i + k];
+ d2 = neach_all[j]*B[7*(j+1)-k-1];
+ csumij += d1*d2;
+ //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
+ }
+ }
+ }
+ }
+ if (function[3]) {
+ for (i=1; i<=ntypes; i++) {
+ for (j=1; j<=ntypes; j++) {
+ for (k=0; kall(FLERR, str);
+
+}
+
+/* ----------------------------------------------------------------------
+ Calculate f(x) for Dispersion interaction
+ ------------------------------------------------------------------------- */
+
+double PPPMDisp::f_6()
+{
+ double df_rspace, df_kspace;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+ bigint natoms = atom->natoms;
+
+ df_rspace = lj_rspace_error();
+
+ double qopt = compute_qopt_6();
+ df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
+
+ return df_rspace - df_kspace;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate numerical derivative f'(x) using forward difference
+ [f(x + h) - f(x)] / h
+ ------------------------------------------------------------------------- */
+
+double PPPMDisp::derivf_6()
+{
+ double h = 0.000001; //Derivative step-size
+ double df,f1,f2,g_ewald_old;
+
+ f1 = f_6();
+ g_ewald_old = g_ewald_6;
+ g_ewald_6 += h;
+ f2 = f_6();
+ g_ewald_6 = g_ewald_old;
+ df = (f2 - f1)/h;
+
+ return df;
+}
+
+
+/* ----------------------------------------------------------------------
+ calculate an initial value for g_ewald_6
+ ---------------------------------------------------------------------- */
+
+void PPPMDisp::set_init_g6()
+{
+ // use xprd,yprd,zprd even if triclinic so grid size is the same
+ // adjust z dimension for 2d slab PPPM
+ // 3d PPPM just uses zprd since slab_volfactor = 1.0
+
+ // make initial g_ewald estimate
+ // based on desired error and real space cutoff
+
+ // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj
+ // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0
+ // else, repeat multiply g_ewald_6 by 2 until df_real > 0
+ // perform bisection for the last two values of
+ double df_real;
+ double g_ewald_old;
+ double gmin, gmax;
+
+ // check if there is a user defined accuracy
+ double acc_rspace = accuracy;
+ if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6;
+
+ g_ewald_6 = 1.0/cutoff_lj;
+ df_real = lj_rspace_error() - acc_rspace;
+ int counter = 0;
+ if (df_real > 0) {
+ while (df_real > 0 && counter < LARGE) {
+ counter++;
+ g_ewald_old = g_ewald_6;
+ g_ewald_6 *= 2;
+ df_real = lj_rspace_error() - acc_rspace;
+ }
+ }
+
+ if (df_real < 0) {
+ while (df_real < 0 && counter < LARGE) {
+ counter++;
+ g_ewald_old = g_ewald_6;
+ g_ewald_6 *= 0.5;
+ df_real = lj_rspace_error() - acc_rspace;
+ }
+ }
+
+ if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
+
+ gmin = MIN(g_ewald_6, g_ewald_old);
+ gmax = MAX(g_ewald_6, g_ewald_old);
+ g_ewald_6 = gmin + 0.5*(gmax-gmin);
+ counter = 0;
+ while (gmax-gmin > SMALL && counter < LARGE) {
+ counter++;
+ df_real = lj_rspace_error() -acc_rspace;
+ if (df_real < 0) gmax = g_ewald_6;
+ else gmin = g_ewald_6;
+ g_ewald_6 = gmin + 0.5*(gmax-gmin);
+ }
+ if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
+
+}
+
+/* ----------------------------------------------------------------------
+ calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction
+ ---------------------------------------------------------------------- */
+
+void PPPMDisp::set_n_pppm_6()
+{
+ bigint natoms = atom->natoms;
+
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+ double h, h_x,h_y,h_z;
+
+ double acc_kspace = accuracy;
+ if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6;
+
+ // initial value for the grid spacing
+ h = h_x = h_y = h_z = 4.0/g_ewald_6;
+ // decrease grid spacing untill required precision is obtained
+ int count = 0;
+ while(1) {
+
+ // set grid dimension
+ nx_pppm_6 = static_cast (xprd/h_x);
+ ny_pppm_6 = static_cast (yprd/h_y);
+ nz_pppm_6 = static_cast (zprd_slab/h_z);
+
+ if (nx_pppm_6 <= 1) nx_pppm_6 = 2;
+ if (ny_pppm_6 <= 1) ny_pppm_6 = 2;
+ if (nz_pppm_6 <= 1) nz_pppm_6 = 2;
+
+ //set local grid dimension
+ int npey_fft,npez_fft;
+ if (nz_pppm_6 >= nprocs) {
+ npey_fft = 1;
+ npez_fft = nprocs;
+ } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft);
+
+ int me_y = me % npey_fft;
+ int me_z = me / npey_fft;
+
+ nxlo_fft_6 = 0;
+ nxhi_fft_6 = nx_pppm_6 - 1;
+ nylo_fft_6 = me_y*ny_pppm_6/npey_fft;
+ nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1;
+ nzlo_fft_6 = me_z*nz_pppm_6/npez_fft;
+ nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1;
+
+ double qopt = compute_qopt_6();
+
+ double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
+
+ count++;
+
+ // break loop if the accuracy has been reached or too many loops have been performed
+ if (df_kspace <= acc_kspace) break;
+ if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion");
+ h *= 0.95;
+ h_x = h_y = h_z = h;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ calculate the real space error for dispersion interactions
+ ---------------------------------------------------------------------- */
+
+double PPPMDisp::lj_rspace_error()
+{
+ bigint natoms = atom->natoms;
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ double zprd_slab = zprd*slab_volfactor;
+
+ double deltaf;
+ double rgs = (cutoff_lj*g_ewald_6);
+ rgs *= rgs;
+ double rgs_inv = 1.0/rgs;
+ deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)*
+ exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6)));
+ return deltaf;
+}
+
+
+/* ----------------------------------------------------------------------
+ Compyute the modified (hockney-eastwood) coulomb green function
+ ---------------------------------------------------------------------- */
+
+void PPPMDisp::compute_gf()
+{
+ int k,l,m,n;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+ volume = xprd * yprd * zprd_slab;
+
+ double unitkx = (2.0*MY_PI/xprd);
+ double unitky = (2.0*MY_PI/yprd);
+ double unitkz = (2.0*MY_PI/zprd_slab);
+
+ int kper,lper,mper;
+ double snx,sny,snz,snx2,sny2,snz2;
+ double sqk;
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+ double numerator,denominator;
+
+
+ n = 0;
+ for (m = nzlo_fft; m <= nzhi_fft; m++) {
+ mper = m - nz_pppm*(2*m/nz_pppm);
+ qz = unitkz*mper;
+ snz = sin(0.5*qz*zprd_slab/nz_pppm);
+ snz2 = snz*snz;
+ sz = exp(-0.25*pow(qz/g_ewald,2.0));
+ wz = 1.0;
+ argz = 0.5*qz*zprd_slab/nz_pppm;
+ if (argz != 0.0) wz = pow(sin(argz)/argz,order);
+ wz *= wz;
+
+ for (l = nylo_fft; l <= nyhi_fft; l++) {
+ lper = l - ny_pppm*(2*l/ny_pppm);
+ qy = unitky*lper;
+ sny = sin(0.5*qy*yprd/ny_pppm);
+ sny2 = sny*sny;
+ sy = exp(-0.25*pow(qy/g_ewald,2.0));
+ wy = 1.0;
+ argy = 0.5*qy*yprd/ny_pppm;
+ if (argy != 0.0) wy = pow(sin(argy)/argy,order);
+ wy *= wy;
+
+ for (k = nxlo_fft; k <= nxhi_fft; k++) {
+ kper = k - nx_pppm*(2*k/nx_pppm);
+ qx = unitkx*kper;
+ snx = sin(0.5*qx*xprd/nx_pppm);
+ snx2 = snx*snx;
+ sx = exp(-0.25*pow(qx/g_ewald,2.0));
+ wx = 1.0;
+ argx = 0.5*qx*xprd/nx_pppm;
+ if (argx != 0.0) wx = pow(sin(argx)/argx,order);
+ wx *= wx;
+
+ sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
+
+ if (sqk != 0.0) {
+ numerator = 4.0*MY_PI/sqk;
+ denominator = gf_denom(snx2,sny2,snz2, gf_b, order);
+ greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator;
+ } else greensfn[n++] = 0.0;
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ compute self force coefficients for ad-differentiation scheme
+ and Coulomb interaction
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord,
+ int nxlo_ft, int nylo_ft, int nzlo_ft,
+ int nxhi_ft, int nyhi_ft, int nzhi_ft,
+ double *sf_pre1, double *sf_pre2, double *sf_pre3,
+ double *sf_pre4, double *sf_pre5, double *sf_pre6)
+{
+
+ int i,k,l,m,n;
+ double *prd;
+
+ // volume-dependent factors
+ // adjust z dimension for 2d slab PPPM
+ // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double unitkx = (2.0*MY_PI/xprd);
+ double unitky = (2.0*MY_PI/yprd);
+ double unitkz = (2.0*MY_PI/zprd_slab);
+
+ int nx,ny,nz,kper,lper,mper;
+ double argx,argy,argz;
+ double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
+ double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
+ double u0,u1,u2,u3,u4,u5,u6;
+ double sum1,sum2,sum3,sum4,sum5,sum6;
+
+ int nb = 2;
+
+ n = 0;
+ for (m = nzlo_ft; m <= nzhi_ft; m++) {
+ mper = m - nzp*(2*m/nzp);
+
+ for (l = nylo_ft; l <= nyhi_ft; l++) {
+ lper = l - nyp*(2*l/nyp);
+
+ for (k = nxlo_ft; k <= nxhi_ft; k++) {
+ kper = k - nxp*(2*k/nxp);
+
+ sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
+ for (i = -nb; i <= nb; i++) {
+
+ qx0 = unitkx*(kper+nxp*i);
+ qx1 = unitkx*(kper+nxp*(i+1));
+ qx2 = unitkx*(kper+nxp*(i+2));
+ wx0[i+2] = 1.0;
+ wx1[i+2] = 1.0;
+ wx2[i+2] = 1.0;
+ argx = 0.5*qx0*xprd/nxp;
+ if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord);
+ argx = 0.5*qx1*xprd/nxp;
+ if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord);
+ argx = 0.5*qx2*xprd/nxp;
+ if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord);
+
+ qy0 = unitky*(lper+nyp*i);
+ qy1 = unitky*(lper+nyp*(i+1));
+ qy2 = unitky*(lper+nyp*(i+2));
+ wy0[i+2] = 1.0;
+ wy1[i+2] = 1.0;
+ wy2[i+2] = 1.0;
+ argy = 0.5*qy0*yprd/nyp;
+ if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord);
+ argy = 0.5*qy1*yprd/nyp;
+ if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord);
+ argy = 0.5*qy2*yprd/nyp;
+ if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord);
+
+ qz0 = unitkz*(mper+nzp*i);
+ qz1 = unitkz*(mper+nzp*(i+1));
+ qz2 = unitkz*(mper+nzp*(i+2));
+ wz0[i+2] = 1.0;
+ wz1[i+2] = 1.0;
+ wz2[i+2] = 1.0;
+ argz = 0.5*qz0*zprd_slab/nzp;
+ if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord);
+ argz = 0.5*qz1*zprd_slab/nzp;
+ if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord);
+ argz = 0.5*qz2*zprd_slab/nzp;
+ if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord);
+ }
+
+ for (nx = 0; nx <= 4; nx++) {
+ for (ny = 0; ny <= 4; ny++) {
+ for (nz = 0; nz <= 4; nz++) {
+ u0 = wx0[nx]*wy0[ny]*wz0[nz];
+ u1 = wx1[nx]*wy0[ny]*wz0[nz];
+ u2 = wx2[nx]*wy0[ny]*wz0[nz];
+ u3 = wx0[nx]*wy1[ny]*wz0[nz];
+ u4 = wx0[nx]*wy2[ny]*wz0[nz];
+ u5 = wx0[nx]*wy0[ny]*wz1[nz];
+ u6 = wx0[nx]*wy0[ny]*wz2[nz];
+
+ sum1 += u0*u1;
+ sum2 += u0*u2;
+ sum3 += u0*u3;
+ sum4 += u0*u4;
+ sum5 += u0*u5;
+ sum6 += u0*u6;
+ }
+ }
+ }
+
+ // store values
+
+ sf_pre1[n] = sum1;
+ sf_pre2[n] = sum2;
+ sf_pre3[n] = sum3;
+ sf_pre4[n] = sum4;
+ sf_pre5[n] = sum5;
+ sf_pre6[n++] = sum6;
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ Compute the modified (hockney-eastwood) dispersion green function
+ ---------------------------------------------------------------------- */
+
+void PPPMDisp::compute_gf_6()
+{
+ double *prd;
+ int k,l,m,n;
+
+ // volume-dependent factors
+ // adjust z dimension for 2d slab PPPM
+ // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double unitkx = (2.0*MY_PI/xprd);
+ double unitky = (2.0*MY_PI/yprd);
+ double unitkz = (2.0*MY_PI/zprd_slab);
+
+ int kper,lper,mper;
+ double sqk;
+ double snx,sny,snz,snx2,sny2,snz2;
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz;
+ double qx,qy,qz;
+ double rtsqk, term;
+ double numerator,denominator;
+ double inv2ew = 2*g_ewald_6;
+ inv2ew = 1/inv2ew;
+ double rtpi = sqrt(MY_PI);
+
+ numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);
+
+ n = 0;
+ for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
+ mper = m - nz_pppm_6*(2*m/nz_pppm_6);
+ qz = unitkz*mper;
+ snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
+ snz2 = snz*snz;
+ sz = exp(-qz*qz*inv2ew*inv2ew);
+ wz = 1.0;
+ argz = 0.5*qz*zprd_slab/nz_pppm_6;
+ if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
+ wz *= wz;
+
+ for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
+ lper = l - ny_pppm_6*(2*l/ny_pppm_6);
+ qy = unitky*lper;
+ sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
+ sny2 = sny*sny;
+ sy = exp(-qy*qy*inv2ew*inv2ew);
+ wy = 1.0;
+ argy = 0.5*qy*yprd/ny_pppm_6;
+ if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
+ wy *= wy;
+
+ for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
+ kper = k - nx_pppm_6*(2*k/nx_pppm_6);
+ qx = unitkx*kper;
+ snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
+ snx2 = snx*snx;
+ sx = exp(-qx*qx*inv2ew*inv2ew);
+ wx = 1.0;
+ argx = 0.5*qx*xprd/nx_pppm_6;
+ if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
+ wx *= wx;
+
+ sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
+
+ if (sqk != 0.0) {
+ denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6);
+ rtsqk = sqrt(sqk);
+ term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
+ 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
+ greensfn_6[n++] = numerator*term*wx*wy*wz/denominator;
+ } else greensfn_6[n++] = 0.0;
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ compute self force coefficients for ad-differentiation scheme
+ and Coulomb interaction
+------------------------------------------------------------------------- */
+void PPPMDisp::compute_sf_coeff()
+{
+ int i,k,l,m,n;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+ volume = xprd * yprd * zprd_slab;
+
+ for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0;
+
+ n = 0;
+ for (m = nzlo_fft; m <= nzhi_fft; m++) {
+ for (l = nylo_fft; l <= nyhi_fft; l++) {
+ for (k = nxlo_fft; k <= nxhi_fft; k++) {
+ sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
+ sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
+ sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
+ sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
+ sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
+ sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
+ ++n;
+ }
+ }
+ }
+
+ // Compute the coefficients for the self-force correction
+
+ double prex, prey, prez;
+ prex = prey = prez = MY_PI/volume;
+ prex *= nx_pppm/xprd;
+ prey *= ny_pppm/yprd;
+ prez *= nz_pppm/zprd_slab;
+ sf_coeff[0] *= prex;
+ sf_coeff[1] *= prex*2;
+ sf_coeff[2] *= prey;
+ sf_coeff[3] *= prey*2;
+ sf_coeff[4] *= prez;
+ sf_coeff[5] *= prez*2;
+
+ // communicate values with other procs
+
+ double tmp[6];
+ MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
+ for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
+}
+
+/* ----------------------------------------------------------------------
+ compute self force coefficients for ad-differentiation scheme
+ and Dispersion interaction
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_sf_coeff_6()
+{
+ int i,k,l,m,n;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+ volume = xprd * yprd * zprd_slab;
+
+ for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0;
+
+ n = 0;
+ for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
+ for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
+ for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
+ sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n];
+ sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n];
+ sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n];
+ sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n];
+ sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n];
+ sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n];
+ ++n;
+ }
+ }
+ }
+
+
+ // perform multiplication with prefactors
+
+ double prex, prey, prez;
+ prex = prey = prez = MY_PI/volume;
+ prex *= nx_pppm_6/xprd;
+ prey *= ny_pppm_6/yprd;
+ prez *= nz_pppm_6/zprd_slab;
+ sf_coeff_6[0] *= prex;
+ sf_coeff_6[1] *= prex*2;
+ sf_coeff_6[2] *= prey;
+ sf_coeff_6[3] *= prey*2;
+ sf_coeff_6[4] *= prez;
+ sf_coeff_6[5] *= prez*2;
+
+ // communicate values with other procs
+
+ double tmp[6];
+ MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world);
+ for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n];
+
+}
+
+/* ----------------------------------------------------------------------
+ denominator for Hockney-Eastwood Green's function
+ of x,y,z = sin(kx*deltax/2), etc
+
+ inf n-1
+ S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l
+ j=-inf l=0
+
+ = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x)
+ gf_b = denominator expansion coeffs
+------------------------------------------------------------------------- */
+
+double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord)
+{
+ double sx,sy,sz;
+ sz = sy = sx = 0.0;
+ for (int l = ord-1; l >= 0; l--) {
+ sx = g_b[l] + sx*x;
+ sy = g_b[l] + sy*y;
+ sz = g_b[l] + sz*z;
+ }
+ double s = sx*sy*sz;
+ return s*s;
+}
+
+/* ----------------------------------------------------------------------
+ pre-compute Green's function denominator expansion coeffs, Gamma(2n)
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_gf_denom(double* gf, int ord)
+{
+ int k,l,m;
+
+ for (l = 1; l < ord; l++) gf[l] = 0.0;
+ gf[0] = 1.0;
+
+ for (m = 1; m < ord; m++) {
+ for (l = m; l > 0; l--)
+ gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1));
+ gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5));
+ }
+
+ bigint ifact = 1;
+ for (k = 1; k < 2*ord; k++) ifact *= k;
+ double gaminv = 1.0/ifact;
+ for (l = 0; l < ord; l++) gf[l] *= gaminv;
+}
+
+/* ----------------------------------------------------------------------
+ ghost-swap to accumulate full density in brick decomposition
+ remap density from 3d brick decomposition to FFTdecomposition
+ for coulomb interaction or dispersion interaction with geometric
+ mixing
+------------------------------------------------------------------------- */
+
+void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i,
+ int nxhi_i, int nyhi_i, int nzhi_i,
+ FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work,
+ LAMMPS_NS::Remap* rmp)
+{
+ int n,ix,iy,iz;
+
+ // copy grabs inner portion of density from 3d brick
+ // remap could be done as pre-stage of FFT,
+ // but this works optimally on only double values, not complex values
+
+ n = 0;
+ for (iz = nzlo_i; iz <= nzhi_i; iz++)
+ for (iy = nylo_i; iy <= nyhi_i; iy++)
+ for (ix = nxlo_i; ix <= nxhi_i; ix++)
+ dfft[n++] = dbrick[iz][iy][ix];
+
+ rmp->perform(dfft,dfft,work);
+}
+
+
+/* ----------------------------------------------------------------------
+ ghost-swap to accumulate full density in brick decomposition
+ remap density from 3d brick decomposition to FFTdecomposition
+ for dispersion with arithmetic mixing rule
+------------------------------------------------------------------------- */
+
+void PPPMDisp::brick2fft_a()
+{
+ int n,ix,iy,iz;
+
+ // copy grabs inner portion of density from 3d brick
+ // remap could be done as pre-stage of FFT,
+ // but this works optimally on only double values, not complex values
+
+ n = 0;
+ for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
+ for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
+ for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) {
+ density_fft_a0[n] = density_brick_a0[iz][iy][ix];
+ density_fft_a1[n] = density_brick_a1[iz][iy][ix];
+ density_fft_a2[n] = density_brick_a2[iz][iy][ix];
+ density_fft_a3[n] = density_brick_a3[iz][iy][ix];
+ density_fft_a4[n] = density_brick_a4[iz][iy][ix];
+ density_fft_a5[n] = density_brick_a5[iz][iy][ix];
+ density_fft_a6[n++] = density_brick_a6[iz][iy][ix];
+ }
+
+ remap_6->perform(density_fft_a0,density_fft_a0,work1_6);
+ remap_6->perform(density_fft_a1,density_fft_a1,work1_6);
+ remap_6->perform(density_fft_a2,density_fft_a2,work1_6);
+ remap_6->perform(density_fft_a3,density_fft_a3,work1_6);
+ remap_6->perform(density_fft_a4,density_fft_a4,work1_6);
+ remap_6->perform(density_fft_a5,density_fft_a5,work1_6);
+ remap_6->perform(density_fft_a6,density_fft_a6,work1_6);
+
+}
+
+/* ----------------------------------------------------------------------
+ ghost-swap to accumulate full density in brick decomposition
+ remap density from 3d brick decomposition to FFTdecomposition
+ for dispersion with special case
+------------------------------------------------------------------------- */
+
+void PPPMDisp::brick2fft_none()
+{
+ int k,n,ix,iy,iz;
+
+ // copy grabs inner portion of density from 3d brick
+ // remap could be done as pre-stage of FFT,
+ // but this works optimally on only double values, not complex values
+
+ for (k = 0; kperform(density_fft_none[k],density_fft_none[k],work1_6);
+}
+
+/* ----------------------------------------------------------------------
+ find center grid pt for each of my particles
+ check that full stencil for the particle will fit in my 3d brick
+ store central grid pt indices in part2grid array
+------------------------------------------------------------------------- */
+
+void PPPMDisp::particle_map(double delx, double dely, double delz,
+ double sft, int** p2g, int nup, int nlow,
+ int nxlo, int nylo, int nzlo,
+ int nxhi, int nyhi, int nzhi)
+{
+ int nx,ny,nz;
+
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+
+ int flag = 0;
+ for (int i = 0; i < nlocal; i++) {
+
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // current particle coord can be outside global and local box
+ // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+ nx = static_cast ((x[i][0]-boxlo[0])*delx+sft) - OFFSET;
+ ny = static_cast ((x[i][1]-boxlo[1])*dely+sft) - OFFSET;
+ nz = static_cast ((x[i][2]-boxlo[2])*delz+sft) - OFFSET;
+
+ p2g[i][0] = nx;
+ p2g[i][1] = ny;
+ p2g[i][2] = nz;
+
+ // check that entire stencil around nx,ny,nz will fit in my 3d brick
+
+ if (nx+nlow < nxlo || nx+nup > nxhi ||
+ ny+nlow < nylo || ny+nup > nyhi ||
+ nz+nlow < nzlo || nz+nup > nzhi)
+ flag = 1;
+ }
+
+ if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp");
+}
+
+
+void PPPMDisp::particle_map_c(double delx, double dely, double delz,
+ double sft, int** p2g, int nup, int nlow,
+ int nxlo, int nylo, int nzlo,
+ int nxhi, int nyhi, int nzhi)
+{
+ particle_map(delx, dely, delz, sft, p2g, nup, nlow,
+ nxlo, nylo, nzlo, nxhi, nyhi, nzhi);
+}
+
+/* ----------------------------------------------------------------------
+ create discretized "density" on section of global grid due to my particles
+ density(x,y,z) = charge "density" at grid points of my 3d brick
+ (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+ in global grid
+------------------------------------------------------------------------- */
+
+void PPPMDisp::make_rho_c()
+{
+ int l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+ // clear 3d density array
+
+ memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
+ ngrid*sizeof(FFT_SCALAR));
+
+ // loop over my charges, add their contribution to nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+
+ double *q = atom->q;
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+
+ for (int i = 0; i < nlocal; i++) {
+
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
+
+ z0 = delvolinv * q[i];
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ y0 = z0*rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ x0 = y0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ density_brick[mz][my][mx] += x0*rho1d[0][l];
+ }
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ create discretized "density" on section of global grid due to my particles
+ density(x,y,z) = dispersion "density" at grid points of my 3d brick
+ (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+ in global grid --- geometric mixing
+------------------------------------------------------------------------- */
+
+void PPPMDisp::make_rho_g()
+{
+ int l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+ // clear 3d density array
+
+ memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+ ngrid_6*sizeof(FFT_SCALAR));
+
+ // loop over my charges, add their contribution to nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ int type;
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+
+ for (int i = 0; i < nlocal; i++) {
+
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+ type = atom->type[i];
+ z0 = delvolinv_6 * B[type];
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ y0 = z0*rho1d_6[2][n];
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ x0 = y0*rho1d_6[1][m];
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ density_brick_g[mz][my][mx] += x0*rho1d_6[0][l];
+ }
+ }
+ }
+ }
+}
+
+
+/* ----------------------------------------------------------------------
+ create discretized "density" on section of global grid due to my particles
+ density(x,y,z) = dispersion "density" at grid points of my 3d brick
+ (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+ in global grid --- arithmetic mixing
+------------------------------------------------------------------------- */
+
+void PPPMDisp::make_rho_a()
+{
+ int l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
+
+ // clear 3d density array
+
+ memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+ ngrid_6*sizeof(FFT_SCALAR));
+ memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+ ngrid_6*sizeof(FFT_SCALAR));
+ memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+ ngrid_6*sizeof(FFT_SCALAR));
+ memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+ ngrid_6*sizeof(FFT_SCALAR));
+ memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+ ngrid_6*sizeof(FFT_SCALAR));
+ memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+ ngrid_6*sizeof(FFT_SCALAR));
+ memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+ ngrid_6*sizeof(FFT_SCALAR));
+
+ // loop over my particles, add their contribution to nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ int type;
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+
+ for (int i = 0; i < nlocal; i++) {
+
+ //do the following for all 4 grids
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+ type = atom->type[i];
+ z0 = delvolinv_6;
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ y0 = z0*rho1d_6[2][n];
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ x0 = y0*rho1d_6[1][m];
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ w = x0*rho1d_6[0][l];
+ density_brick_a0[mz][my][mx] += w*B[7*type];
+ density_brick_a1[mz][my][mx] += w*B[7*type+1];
+ density_brick_a2[mz][my][mx] += w*B[7*type+2];
+ density_brick_a3[mz][my][mx] += w*B[7*type+3];
+ density_brick_a4[mz][my][mx] += w*B[7*type+4];
+ density_brick_a5[mz][my][mx] += w*B[7*type+5];
+ density_brick_a6[mz][my][mx] += w*B[7*type+6];
+ }
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ create discretized "density" on section of global grid due to my particles
+ density(x,y,z) = dispersion "density" at grid points of my 3d brick
+ (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+ in global grid --- case when mixing rules don't apply
+------------------------------------------------------------------------- */
+
+void PPPMDisp::make_rho_none()
+{
+ int k,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
+
+ // clear 3d density array
+ for (k = 0; k < nsplit_alloc; k++)
+ memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+ ngrid_6*sizeof(FFT_SCALAR));
+
+
+ // loop over my particles, add their contribution to nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ int type;
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+
+ for (int i = 0; i < nlocal; i++) {
+
+ //do the following for all 4 grids
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+ type = atom->type[i];
+ z0 = delvolinv_6;
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ y0 = z0*rho1d_6[2][n];
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ x0 = y0*rho1d_6[1][m];
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ w = x0*rho1d_6[0][l];
+ for (k = 0; k < nsplit; k++)
+ density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k];
+ }
+ }
+ }
+ }
+}
+
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver for ik differentiation
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
+ FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2,
+ int nx_p, int ny_p, int nz_p, int nft,
+ int nxlo_ft, int nylo_ft, int nzlo_ft,
+ int nxhi_ft, int nyhi_ft, int nzhi_ft,
+ int nxlo_i, int nylo_i, int nzlo_i,
+ int nxhi_i, int nyhi_i, int nzhi_i,
+ double& egy, double* gfn,
+ double* kx, double* ky, double* kz,
+ double* kx2, double* ky2, double* kz2,
+ FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick,
+ double* vir, double** vcoeff, double** vcoeff2,
+ FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
+ FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
+
+
+{
+ int i,j,k,n;
+ double eng;
+
+ // transform charge/dispersion density (r -> k)
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ wk1[n++] = dfft[i];
+ wk1[n++] = ZEROF;
+ }
+
+ ft1->compute(wk1,wk1,1);
+
+ // if requested, compute energy and virial contribution
+
+ double scaleinv = 1.0/(nx_p*ny_p*nz_p);
+ double s2 = scaleinv*scaleinv;
+
+ if (eflag_global || vflag_global) {
+ if (vflag_global) {
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
+ for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
+ if (eflag_global) egy += eng;
+ n += 2;
+ }
+ } else {
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ egy +=
+ s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
+ n += 2;
+ }
+ }
+ }
+
+ // scale by 1/total-grid-pts to get rho(k)
+ // multiply by Green's function to get V(k)
+
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ wk1[n++] *= scaleinv * gfn[i];
+ wk1[n++] *= scaleinv * gfn[i];
+ }
+
+ // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+ // FFT leaves data in 3d brick decomposition
+ // copy it into inner portion of vdx,vdy,vdz arrays
+
+ // x & y direction gradient
+
+ n = 0;
+ for (k = nzlo_ft; k <= nzhi_ft; k++)
+ for (j = nylo_ft; j <= nyhi_ft; j++)
+ for (i = nxlo_ft; i <= nxhi_ft; i++) {
+ wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n];
+ wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1];
+ n += 2;
+ }
+
+ ft2->compute(wk2,wk2,-1);
+
+ n = 0;
+ for (k = nzlo_i; k <= nzhi_i; k++)
+ for (j = nylo_i; j <= nyhi_i; j++)
+ for (i = nxlo_i; i <= nxhi_i; i++) {
+ vx_brick[k][j][i] = wk2[n++];
+ vy_brick[k][j][i] = wk2[n++];
+ }
+
+ if (!eflag_atom) {
+ // z direction gradient only
+
+ n = 0;
+ for (k = nzlo_ft; k <= nzhi_ft; k++)
+ for (j = nylo_ft; j <= nyhi_ft; j++)
+ for (i = nxlo_ft; i <= nxhi_ft; i++) {
+ wk2[n] = kz[k]*wk1[n+1];
+ wk2[n+1] = -kz[k]*wk1[n];
+ n += 2;
+ }
+
+ ft2->compute(wk2,wk2,-1);
+
+
+ n = 0;
+ for (k = nzlo_i; k <= nzhi_i; k++)
+ for (j = nylo_i; j <= nyhi_i; j++)
+ for (i = nxlo_i; i <= nxhi_i; i++) {
+ vz_brick[k][j][i] = wk2[n];
+ n += 2;
+ }
+
+ }
+
+ else {
+ // z direction gradient & per-atom energy
+
+ n = 0;
+ for (k = nzlo_ft; k <= nzhi_ft; k++)
+ for (j = nylo_ft; j <= nyhi_ft; j++)
+ for (i = nxlo_ft; i <= nxhi_ft; i++) {
+ wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1];
+ wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n];
+ n += 2;
+ }
+
+ ft2->compute(wk2,wk2,-1);
+
+ n = 0;
+ for (k = nzlo_i; k <= nzhi_i; k++)
+ for (j = nylo_i; j <= nyhi_i; j++)
+ for (i = nxlo_i; i <= nxhi_i; i++) {
+ vz_brick[k][j][i] = wk2[n++];
+ u_pa[k][j][i] = wk2[n++];;
+ }
+ }
+
+ if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
+ nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
+ v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver for ad differentiation
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
+ FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2,
+ int nx_p, int ny_p, int nz_p, int nft,
+ int nxlo_ft, int nylo_ft, int nzlo_ft,
+ int nxhi_ft, int nyhi_ft, int nzhi_ft,
+ int nxlo_i, int nylo_i, int nzlo_i,
+ int nxhi_i, int nyhi_i, int nzhi_i,
+ double& egy, double* gfn,
+ double* vir, double** vcoeff, double** vcoeff2,
+ FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
+ FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
+
+
+{
+ int i,j,k,n;
+ double eng;
+
+ // transform charge/dispersion density (r -> k)
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ wk1[n++] = dfft[i];
+ wk1[n++] = ZEROF;
+ }
+
+ ft1->compute(wk1,wk1,1);
+
+ // if requested, compute energy and virial contribution
+
+ double scaleinv = 1.0/(nx_p*ny_p*nz_p);
+ double s2 = scaleinv*scaleinv;
+
+ if (eflag_global || vflag_global) {
+ if (vflag_global) {
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
+ for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
+ if (eflag_global) egy += eng;
+ n += 2;
+ }
+ } else {
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ egy +=
+ s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
+ n += 2;
+ }
+ }
+ }
+
+ // scale by 1/total-grid-pts to get rho(k)
+ // multiply by Green's function to get V(k)
+
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ wk1[n++] *= scaleinv * gfn[i];
+ wk1[n++] *= scaleinv * gfn[i];
+ }
+
+
+ n = 0;
+ for (k = nzlo_ft; k <= nzhi_ft; k++)
+ for (j = nylo_ft; j <= nyhi_ft; j++)
+ for (i = nxlo_ft; i <= nxhi_ft; i++) {
+ wk2[n] = wk1[n];
+ wk2[n+1] = wk1[n+1];
+ n += 2;
+ }
+
+ ft2->compute(wk2,wk2,-1);
+
+
+ n = 0;
+ for (k = nzlo_i; k <= nzhi_i; k++)
+ for (j = nylo_i; j <= nyhi_i; j++)
+ for (i = nxlo_i; i <= nxhi_i; i++) {
+ u_pa[k][j][i] = wk2[n++];
+ n++;
+ }
+
+
+ if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
+ nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
+ v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
+
+}
+
+/* ----------------------------------------------------------------------
+ Fourier Transform for per atom virial calculations
+------------------------------------------------------------------------- */
+
+void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2,
+ double** vcoeff, double** vcoeff2, int nft,
+ int nxlo_i, int nylo_i, int nzlo_i,
+ int nxhi_i, int nyhi_i, int nzhi_i,
+ FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
+ FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
+{
+ //v0 & v1 term
+ int n, i, j, k;
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1];
+ wk2[n+1] = wk1[n+1]*vcoeff[i][0] + wk1[n]*vcoeff[i][1];
+ n += 2;
+ }
+
+ ft2->compute(wk2,wk2,-1);
+
+ n = 0;
+ for (k = nzlo_i; k <= nzhi_i; k++)
+ for (j = nylo_i; j <= nyhi_i; j++)
+ for (i = nxlo_i; i <= nxhi_i; i++) {
+ v0_pa[k][j][i] = wk2[n++];
+ v1_pa[k][j][i] = wk2[n++];
+ }
+
+ //v2 & v3 term
+
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0];
+ wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0];
+ n += 2;
+ }
+
+ ft2->compute(wk2,wk2,-1);
+
+ n = 0;
+ for (k = nzlo_i; k <= nzhi_i; k++)
+ for (j = nylo_i; j <= nyhi_i; j++)
+ for (i = nxlo_i; i <= nxhi_i; i++) {
+ v2_pa[k][j][i] = wk2[n++];
+ v3_pa[k][j][i] = wk2[n++];
+ }
+
+ //v4 & v5 term
+
+ n = 0;
+ for (i = 0; i < nft; i++) {
+ wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2];
+ wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2];
+ n += 2;
+ }
+
+ ft2->compute(wk2,wk2,-1);
+
+ n = 0;
+ for (k = nzlo_i; k <= nzhi_i; k++)
+ for (j = nylo_i; j <= nyhi_i; j++)
+ for (i = nxlo_i; i <= nxhi_i; i++) {
+ v4_pa[k][j][i] = wk2[n++];
+ v5_pa[k][j][i] = wk2[n++];
+ }
+
+}
+
+/* ----------------------------------------------------------------------
+ Poisson solver for one mesh with 2 different dispersion densities
+ for ik scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
+ FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
+ FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
+ FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
+ FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
+ FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
+ FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
+
+{
+ int i,j,k,n;
+ double eng;
+
+ double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+
+ // transform charge/dispersion density (r -> k)
+ // only one tansform required when energies and pressures do not
+ // need to be calculated
+ if (eflag_global + vflag_global == 0) {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n++] = dfft_1[i];
+ work1_6[n++] = dfft_2[i];
+ }
+
+ fft1_6->compute(work1_6,work1_6,1);
+ }
+ // two transforms are required when energies and pressures are
+ // calculated
+ else {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n] = dfft_1[i];
+ work2_6[n++] = ZEROF;
+ work1_6[n] = ZEROF;
+ work2_6[n++] = dfft_2[i];
+ }
+
+ fft1_6->compute(work1_6,work1_6,1);
+ fft1_6->compute(work2_6,work2_6,1);
+
+ double s2 = scaleinv*scaleinv;
+
+ if (vflag_global) {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
+ for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
+ if (eflag_global)energy_6 += eng;
+ n += 2;
+ }
+ } else {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ energy_6 +=
+ 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
+ n += 2;
+ }
+ }
+ // unify the two transformed vectors for efficient calculations later
+ for ( i = 0; i < 2*nfft_6; i++) {
+ work1_6[i] += work2_6[i];
+ }
+ }
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n++] *= scaleinv * greensfn_6[i];
+ work1_6[n++] *= scaleinv * greensfn_6[i];
+ }
+
+ // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+ // FFT leaves data in 3d brick decomposition
+ // copy it into inner portion of vdx,vdy,vdz arrays
+
+ // x direction gradient
+
+ n = 0;
+ for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+ for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+ for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+ work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
+ work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ vxbrick_1[k][j][i] = work2_6[n++];
+ vxbrick_2[k][j][i] = work2_6[n++];
+ }
+
+ // y direction gradient
+
+ n = 0;
+ for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+ for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+ for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+ work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
+ work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ vybrick_1[k][j][i] = work2_6[n++];
+ vybrick_2[k][j][i] = work2_6[n++];
+ }
+
+ // z direction gradient
+
+ n = 0;
+ for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+ for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+ for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+ work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
+ work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ vzbrick_1[k][j][i] = work2_6[n++];
+ vzbrick_2[k][j][i] = work2_6[n++];
+ }
+
+ //Per-atom energy
+
+ if (eflag_atom) {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n];
+ work2_6[n+1] = work1_6[n+1];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ u_pa_1[k][j][i] = work2_6[n++];
+ u_pa_2[k][j][i] = work2_6[n++];
+ }
+ }
+
+ if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
+ v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
+}
+
+
+/* ----------------------------------------------------------------------
+ Poisson solver for one mesh with 2 different dispersion densities
+ for ik scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
+ FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
+ FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
+ FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
+ FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
+{
+ int i,j,k,n;
+ double eng;
+
+ double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+
+ // transform charge/dispersion density (r -> k)
+ // only one tansform required when energies and pressures do not
+ // need to be calculated
+ if (eflag_global + vflag_global == 0) {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n++] = dfft_1[i];
+ work1_6[n++] = dfft_2[i];
+ }
+
+ fft1_6->compute(work1_6,work1_6,1);
+ }
+
+
+ // two transforms are required when energies and pressures are
+ // calculated
+ else {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n] = dfft_1[i];
+ work2_6[n++] = ZEROF;
+ work1_6[n] = ZEROF;
+ work2_6[n++] = dfft_2[i];
+ }
+
+
+ fft1_6->compute(work1_6,work1_6,1);
+ fft1_6->compute(work2_6,work2_6,1);
+
+ double s2 = scaleinv*scaleinv;
+
+ if (vflag_global) {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
+ for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
+ if (eflag_global)energy_6 += eng;
+ n += 2;
+ }
+ } else {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ energy_6 +=
+ s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
+ n += 2;
+ }
+ }
+ // unify the two transformed vectors for efficient calculations later
+ for ( i = 0; i < 2*nfft_6; i++) {
+ work1_6[i] += work2_6[i];
+ }
+ }
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n++] *= scaleinv * greensfn_6[i];
+ work1_6[n++] *= scaleinv * greensfn_6[i];
+ }
+
+ // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+ // FFT leaves data in 3d brick decomposition
+ // copy it into inner portion of vdx,vdy,vdz arrays
+
+ // x direction gradient
+
+ n = 0;
+ for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+ for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+ for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+ work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
+ work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ vxbrick_1[k][j][i] = B[n1]*work2_6[n++];
+ vxbrick_2[k][j][i] = B[n2]*work2_6[n++];
+ }
+
+ // y direction gradient
+
+ n = 0;
+ for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+ for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+ for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+ work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
+ work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ vybrick_1[k][j][i] = B[n1]*work2_6[n++];
+ vybrick_2[k][j][i] = B[n2]*work2_6[n++];
+ }
+
+ // z direction gradient
+
+ n = 0;
+ for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+ for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+ for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+ work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
+ work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ vzbrick_1[k][j][i] = B[n1]*work2_6[n++];
+ vzbrick_2[k][j][i] = B[n2]*work2_6[n++];
+ }
+
+ //Per-atom energy
+
+ if (eflag_atom) {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n];
+ work2_6[n+1] = work1_6[n+1];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ u_pa[n1][k][j][i] = B[n1]*work2_6[n++];
+ u_pa[n2][k][j][i] = B[n2]*work2_6[n++];
+ }
+ }
+
+ if (vflag_atom) poisson_none_peratom(n1,n2,
+ v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
+ v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
+}
+
+/* ----------------------------------------------------------------------
+ Poisson solver for one mesh with 2 different dispersion densities
+ for ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
+ FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
+ FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
+ FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
+ FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
+
+{
+ int i,j,k,n;
+ double eng;
+
+ double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+
+ // transform charge/dispersion density (r -> k)
+ // only one tansform required when energies and pressures do not
+ // need to be calculated
+ if (eflag_global + vflag_global == 0) {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n++] = dfft_1[i];
+ work1_6[n++] = dfft_2[i];
+ }
+
+ fft1_6->compute(work1_6,work1_6,1);
+ }
+ // two transforms are required when energies and pressures are
+ // calculated
+ else {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n] = dfft_1[i];
+ work2_6[n++] = ZEROF;
+ work1_6[n] = ZEROF;
+ work2_6[n++] = dfft_2[i];
+ }
+
+ fft1_6->compute(work1_6,work1_6,1);
+ fft1_6->compute(work2_6,work2_6,1);
+
+ double s2 = scaleinv*scaleinv;
+
+ if (vflag_global) {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
+ for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
+ if (eflag_global)energy_6 += eng;
+ n += 2;
+ }
+ } else {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ energy_6 +=
+ 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
+ n += 2;
+ }
+ }
+ // unify the two transformed vectors for efficient calculations later
+ for ( i = 0; i < 2*nfft_6; i++) {
+ work1_6[i] += work2_6[i];
+ }
+ }
+
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n++] *= scaleinv * greensfn_6[i];
+ work1_6[n++] *= scaleinv * greensfn_6[i];
+ }
+
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n];
+ work2_6[n+1] = work1_6[n+1];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ u_pa_1[k][j][i] = work2_6[n++];
+ u_pa_2[k][j][i] = work2_6[n++];
+ }
+
+ if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
+ v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
+}
+
+/* ----------------------------------------------------------------------
+ Poisson solver for one mesh with 2 different dispersion densities
+ for ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
+ FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2,
+ FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
+ FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
+{
+ int i,j,k,n;
+ double eng;
+
+ double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+
+ // transform charge/dispersion density (r -> k)
+ // only one tansform required when energies and pressures do not
+ // need to be calculated
+ if (eflag_global + vflag_global == 0) {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n++] = dfft_1[i];
+ work1_6[n++] = dfft_2[i];
+ }
+
+ fft1_6->compute(work1_6,work1_6,1);
+ }
+ // two transforms are required when energies and pressures are
+ // calculated
+ else {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n] = dfft_1[i];
+ work2_6[n++] = ZEROF;
+ work1_6[n] = ZEROF;
+ work2_6[n++] = dfft_2[i];
+ }
+
+ fft1_6->compute(work1_6,work1_6,1);
+ fft1_6->compute(work2_6,work2_6,1);
+
+ double s2 = scaleinv*scaleinv;
+
+ if (vflag_global) {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
+ for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
+ if (eflag_global)energy_6 += eng;
+ n += 2;
+ }
+ } else {
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ energy_6 +=
+ s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
+ n += 2;
+ }
+ }
+ // unify the two transformed vectors for efficient calculations later
+ for ( i = 0; i < 2*nfft_6; i++) {
+ work1_6[i] += work2_6[i];
+ }
+ }
+
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work1_6[n++] *= scaleinv * greensfn_6[i];
+ work1_6[n++] *= scaleinv * greensfn_6[i];
+ }
+
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n];
+ work2_6[n+1] = work1_6[n+1];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ u_pa_1[k][j][i] = B[n1]*work2_6[n++];
+ u_pa_2[k][j][i] = B[n2]*work2_6[n++];
+ }
+
+ if (vflag_atom) poisson_none_peratom(n1,n2,
+ v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
+ v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
+}
+
+/* ----------------------------------------------------------------------
+ Fourier Transform for per atom virial calculations
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
+ FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
+ FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
+ FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
+{
+ //Compute first virial term v0
+ int n, i, j, k;
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg_6[i][0];
+ work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v0_pa_1[k][j][i] = work2_6[n++];
+ v0_pa_2[k][j][i] = work2_6[n++];
+ }
+
+ //Compute second virial term v1
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg_6[i][1];
+ work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v1_pa_1[k][j][i] = work2_6[n++];
+ v1_pa_2[k][j][i] = work2_6[n++];
+ }
+
+ //Compute third virial term v2
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg_6[i][2];
+ work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v2_pa_1[k][j][i] = work2_6[n++];
+ v2_pa_2[k][j][i] = work2_6[n++];
+ }
+
+ //Compute fourth virial term v3
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg2_6[i][0];
+ work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v3_pa_1[k][j][i] = work2_6[n++];
+ v3_pa_2[k][j][i] = work2_6[n++];
+ }
+
+ //Compute fifth virial term v4
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg2_6[i][1];
+ work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v4_pa_1[k][j][i] = work2_6[n++];
+ v4_pa_2[k][j][i] = work2_6[n++];
+ }
+
+ //Compute last virial term v5
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg2_6[i][2];
+ work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v5_pa_1[k][j][i] = work2_6[n++];
+ v5_pa_2[k][j][i] = work2_6[n++];
+ }
+}
+
+/* ----------------------------------------------------------------------
+ Fourier Transform for per atom virial calculations
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_none_peratom(int n1, int n2,
+ FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
+ FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
+ FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
+ FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
+{
+ //Compute first virial term v0
+ int n, i, j, k;
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg_6[i][0];
+ work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v0_pa_1[k][j][i] = B[n1]*work2_6[n++];
+ v0_pa_2[k][j][i] = B[n2]*work2_6[n++];
+ }
+
+ //Compute second virial term v1
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg_6[i][1];
+ work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v1_pa_1[k][j][i] = B[n1]*work2_6[n++];
+ v1_pa_2[k][j][i] = B[n2]*work2_6[n++];
+ }
+
+ //Compute third virial term v2
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg_6[i][2];
+ work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v2_pa_1[k][j][i] = B[n1]*work2_6[n++];
+ v2_pa_2[k][j][i] = B[n2]*work2_6[n++];
+ }
+
+ //Compute fourth virial term v3
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg2_6[i][0];
+ work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v3_pa_1[k][j][i] = B[n1]*work2_6[n++];
+ v3_pa_2[k][j][i] = B[n2]*work2_6[n++];
+ }
+
+ //Compute fifth virial term v4
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg2_6[i][1];
+ work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v4_pa_1[k][j][i] = B[n1]*work2_6[n++];
+ v4_pa_2[k][j][i] = B[n2]*work2_6[n++];
+ }
+
+ //Compute last virial term v5
+
+ n = 0;
+ for (i = 0; i < nfft_6; i++) {
+ work2_6[n] = work1_6[n]*vg2_6[i][2];
+ work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
+ n += 2;
+ }
+
+ fft2_6->compute(work2_6,work2_6,-1);
+
+ n = 0;
+ for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+ for (j = nylo_in_6; j <= nyhi_in_6; j++)
+ for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+ v5_pa_1[k][j][i] = B[n1]*work2_6[n++];
+ v5_pa_2[k][j][i] = B[n2]*work2_6[n++];
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get electric field & force on my particles
+ for ik scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_c_ik()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR ekx,eky,ekz;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of E-field on particle
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double **f = atom->f;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
+
+ ekx = eky = ekz = ZEROF;
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ z0 = rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ y0 = z0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d[0][l];
+ ekx -= x0*vdx_brick[mz][my][mx];
+ eky -= x0*vdy_brick[mz][my][mx];
+ ekz -= x0*vdz_brick[mz][my][mx];
+ }
+ }
+ }
+
+ // convert E-field to force
+
+ const double qfactor = force->qqrd2e * scale * q[i];
+ f[i][0] += qfactor*ekx;
+ f[i][1] += qfactor*eky;
+ if (slabflag != 2) f[i][2] += qfactor*ekz;
+ }
+}
+/* ----------------------------------------------------------------------
+ interpolate from grid to get electric field & force on my particles
+ for ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_c_ad()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz;
+ FFT_SCALAR ekx,eky,ekz;
+ double s1,s2,s3;
+ double sf = 0.0;
+
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double hx_inv = nx_pppm/xprd;
+ double hy_inv = ny_pppm/yprd;
+ double hz_inv = nz_pppm/zprd_slab;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of E-field on particle
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double **f = atom->f;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
+ compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d);
+
+ ekx = eky = ekz = ZEROF;
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+ eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+ ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
+ }
+ }
+ }
+ ekx *= hx_inv;
+ eky *= hy_inv;
+ ekz *= hz_inv;
+ // convert E-field to force and substract self forces
+ const double qfactor = force->qqrd2e * scale;
+
+ s1 = x[i][0]*hx_inv;
+ s2 = x[i][1]*hy_inv;
+ s3 = x[i][2]*hz_inv;
+ sf = sf_coeff[0]*sin(2*MY_PI*s1);
+ sf += sf_coeff[1]*sin(4*MY_PI*s1);
+ sf *= 2*q[i]*q[i];
+ f[i][0] += qfactor*(ekx*q[i] - sf);
+
+ sf = sf_coeff[2]*sin(2*MY_PI*s2);
+ sf += sf_coeff[3]*sin(4*MY_PI*s2);
+ sf *= 2*q[i]*q[i];
+ f[i][1] += qfactor*(eky*q[i] - sf);
+
+
+ sf = sf_coeff[4]*sin(2*MY_PI*s3);
+ sf += sf_coeff[5]*sin(4*MY_PI*s3);
+ sf *= 2*q[i]*q[i];
+ if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get electric field & force on my particles
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_c_peratom()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of E-field on particle
+
+ double *q = atom->q;
+ double **x = atom->x;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
+
+ u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ z0 = rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ y0 = z0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d[0][l];
+ if (eflag_atom) u_pa += x0*u_brick[mz][my][mx];
+ if (vflag_atom) {
+ v0 += x0*v0_brick[mz][my][mx];
+ v1 += x0*v1_brick[mz][my][mx];
+ v2 += x0*v2_brick[mz][my][mx];
+ v3 += x0*v3_brick[mz][my][mx];
+ v4 += x0*v4_brick[mz][my][mx];
+ v5 += x0*v5_brick[mz][my][mx];
+ }
+ }
+ }
+ }
+
+ // convert E-field to force
+
+ const double qfactor = 0.5*force->qqrd2e * scale * q[i];
+
+ if (eflag_atom) eatom[i] += u_pa*qfactor;
+ if (vflag_atom) {
+ vatom[i][0] += v0*qfactor;
+ vatom[i][1] += v1*qfactor;
+ vatom[i][2] += v2*qfactor;
+ vatom[i][3] += v3*qfactor;
+ vatom[i][4] += v4*qfactor;
+ vatom[i][5] += v5*qfactor;
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get dispersion field & force on my particles
+ for geometric mixing rule
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_g_ik()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR ekx,eky,ekz;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of dispersion field on particle
+
+ double **x = atom->x;
+ double **f = atom->f;
+ int type;
+ double lj;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+
+ ekx = eky = ekz = ZEROF;
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ z0 = rho1d_6[2][n];
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ y0 = z0*rho1d_6[1][m];
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d_6[0][l];
+ ekx -= x0*vdx_brick_g[mz][my][mx];
+ eky -= x0*vdy_brick_g[mz][my][mx];
+ ekz -= x0*vdz_brick_g[mz][my][mx];
+ }
+ }
+ }
+
+ // convert E-field to force
+ type = atom->type[i];
+ lj = B[type];
+ f[i][0] += lj*ekx;
+ f[i][1] += lj*eky;
+ if (slabflag != 2) f[i][2] += lj*ekz;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get dispersion field & force on my particles
+ for geometric mixing rule for ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_g_ad()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz;
+ FFT_SCALAR ekx,eky,ekz;
+ double s1,s2,s3;
+ double sf = 0.0;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double hx_inv = nx_pppm_6/xprd;
+ double hy_inv = ny_pppm_6/yprd;
+ double hz_inv = nz_pppm_6/zprd_slab;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of dispersion field on particle
+
+ double **x = atom->x;
+ double **f = atom->f;
+ int type;
+ double lj;
+
+ int nlocal = atom->nlocal;
+
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+ compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
+
+
+ ekx = eky = ekz = ZEROF;
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
+ eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
+ ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx];
+ }
+ }
+ }
+ ekx *= hx_inv;
+ eky *= hy_inv;
+ ekz *= hz_inv;
+
+ // convert E-field to force
+ type = atom->type[i];
+ lj = B[type];
+
+ s1 = x[i][0]*hx_inv;
+ s2 = x[i][1]*hy_inv;
+ s3 = x[i][2]*hz_inv;
+
+ sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
+ sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
+ sf *= 2*lj*lj;
+ f[i][0] += ekx*lj - sf;
+
+ sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
+ sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
+ sf *= 2*lj*lj;
+ f[i][1] += eky*lj - sf;
+
+
+ sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
+ sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
+ sf *= 2*lj*lj;
+ if (slabflag != 2) f[i][2] += ekz*lj - sf;
+
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get dispersion field & force on my particles
+ for geometric mixing rule for per atom quantities
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_g_peratom()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of dispersion field on particle
+
+ double **x = atom->x;
+ int type;
+ double lj;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+
+ u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ z0 = rho1d_6[2][n];
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ y0 = z0*rho1d_6[1][m];
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d_6[0][l];
+ if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx];
+ if (vflag_atom) {
+ v0 += x0*v0_brick_g[mz][my][mx];
+ v1 += x0*v1_brick_g[mz][my][mx];
+ v2 += x0*v2_brick_g[mz][my][mx];
+ v3 += x0*v3_brick_g[mz][my][mx];
+ v4 += x0*v4_brick_g[mz][my][mx];
+ v5 += x0*v5_brick_g[mz][my][mx];
+ }
+ }
+ }
+ }
+
+ // convert E-field to force
+ type = atom->type[i];
+ lj = B[type]*0.5;
+
+ if (eflag_atom) eatom[i] += u_pa*lj;
+ if (vflag_atom) {
+ vatom[i][0] += v0*lj;
+ vatom[i][1] += v1*lj;
+ vatom[i][2] += v2*lj;
+ vatom[i][3] += v3*lj;
+ vatom[i][4] += v4*lj;
+ vatom[i][5] += v5*lj;
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get dispersion field & force on my particles
+ for arithmetic mixing rule and ik scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_a_ik()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
+ FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
+ FFT_SCALAR ekx6, eky6, ekz6;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of dispersion field on particle
+
+ double **x = atom->x;
+ double **f = atom->f;
+ int type;
+ double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+ ekx0 = eky0 = ekz0 = ZEROF;
+ ekx1 = eky1 = ekz1 = ZEROF;
+ ekx2 = eky2 = ekz2 = ZEROF;
+ ekx3 = eky3 = ekz3 = ZEROF;
+ ekx4 = eky4 = ekz4 = ZEROF;
+ ekx5 = eky5 = ekz5 = ZEROF;
+ ekx6 = eky6 = ekz6 = ZEROF;
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ z0 = rho1d_6[2][n];
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ y0 = z0*rho1d_6[1][m];
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d_6[0][l];
+ ekx0 -= x0*vdx_brick_a0[mz][my][mx];
+ eky0 -= x0*vdy_brick_a0[mz][my][mx];
+ ekz0 -= x0*vdz_brick_a0[mz][my][mx];
+ ekx1 -= x0*vdx_brick_a1[mz][my][mx];
+ eky1 -= x0*vdy_brick_a1[mz][my][mx];
+ ekz1 -= x0*vdz_brick_a1[mz][my][mx];
+ ekx2 -= x0*vdx_brick_a2[mz][my][mx];
+ eky2 -= x0*vdy_brick_a2[mz][my][mx];
+ ekz2 -= x0*vdz_brick_a2[mz][my][mx];
+ ekx3 -= x0*vdx_brick_a3[mz][my][mx];
+ eky3 -= x0*vdy_brick_a3[mz][my][mx];
+ ekz3 -= x0*vdz_brick_a3[mz][my][mx];
+ ekx4 -= x0*vdx_brick_a4[mz][my][mx];
+ eky4 -= x0*vdy_brick_a4[mz][my][mx];
+ ekz4 -= x0*vdz_brick_a4[mz][my][mx];
+ ekx5 -= x0*vdx_brick_a5[mz][my][mx];
+ eky5 -= x0*vdy_brick_a5[mz][my][mx];
+ ekz5 -= x0*vdz_brick_a5[mz][my][mx];
+ ekx6 -= x0*vdx_brick_a6[mz][my][mx];
+ eky6 -= x0*vdy_brick_a6[mz][my][mx];
+ ekz6 -= x0*vdz_brick_a6[mz][my][mx];
+ }
+ }
+ }
+ // convert D-field to force
+ type = atom->type[i];
+ lj0 = B[7*type+6];
+ lj1 = B[7*type+5];
+ lj2 = B[7*type+4];
+ lj3 = B[7*type+3];
+ lj4 = B[7*type+2];
+ lj5 = B[7*type+1];
+ lj6 = B[7*type];
+ f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
+ f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
+ if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get dispersion field & force on my particles
+ for arithmetic mixing rule for the ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_a_ad()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
+ FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
+ FFT_SCALAR ekx6, eky6, ekz6;
+
+ double s1,s2,s3;
+ double sf = 0.0;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double hx_inv = nx_pppm_6/xprd;
+ double hy_inv = ny_pppm_6/yprd;
+ double hz_inv = nz_pppm_6/zprd_slab;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of dispersion field on particle
+
+ double **x = atom->x;
+ double **f = atom->f;
+ int type;
+ double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+ compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
+
+ ekx0 = eky0 = ekz0 = ZEROF;
+ ekx1 = eky1 = ekz1 = ZEROF;
+ ekx2 = eky2 = ekz2 = ZEROF;
+ ekx3 = eky3 = ekz3 = ZEROF;
+ ekx4 = eky4 = ekz4 = ZEROF;
+ ekx5 = eky5 = ekz5 = ZEROF;
+ ekx6 = eky6 = ekz6 = ZEROF;
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
+ y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
+ z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
+
+ ekx0 += x0*u_brick_a0[mz][my][mx];
+ eky0 += y0*u_brick_a0[mz][my][mx];
+ ekz0 += z0*u_brick_a0[mz][my][mx];
+
+ ekx1 += x0*u_brick_a1[mz][my][mx];
+ eky1 += y0*u_brick_a1[mz][my][mx];
+ ekz1 += z0*u_brick_a1[mz][my][mx];
+
+ ekx2 += x0*u_brick_a2[mz][my][mx];
+ eky2 += y0*u_brick_a2[mz][my][mx];
+ ekz2 += z0*u_brick_a2[mz][my][mx];
+
+ ekx3 += x0*u_brick_a3[mz][my][mx];
+ eky3 += y0*u_brick_a3[mz][my][mx];
+ ekz3 += z0*u_brick_a3[mz][my][mx];
+
+ ekx4 += x0*u_brick_a4[mz][my][mx];
+ eky4 += y0*u_brick_a4[mz][my][mx];
+ ekz4 += z0*u_brick_a4[mz][my][mx];
+
+ ekx5 += x0*u_brick_a5[mz][my][mx];
+ eky5 += y0*u_brick_a5[mz][my][mx];
+ ekz5 += z0*u_brick_a5[mz][my][mx];
+
+ ekx6 += x0*u_brick_a6[mz][my][mx];
+ eky6 += y0*u_brick_a6[mz][my][mx];
+ ekz6 += z0*u_brick_a6[mz][my][mx];
+ }
+ }
+ }
+
+ ekx0 *= hx_inv;
+ eky0 *= hy_inv;
+ ekz0 *= hz_inv;
+
+ ekx1 *= hx_inv;
+ eky1 *= hy_inv;
+ ekz1 *= hz_inv;
+
+ ekx2 *= hx_inv;
+ eky2 *= hy_inv;
+ ekz2 *= hz_inv;
+
+ ekx3 *= hx_inv;
+ eky3 *= hy_inv;
+ ekz3 *= hz_inv;
+
+ ekx4 *= hx_inv;
+ eky4 *= hy_inv;
+ ekz4 *= hz_inv;
+
+ ekx5 *= hx_inv;
+ eky5 *= hy_inv;
+ ekz5 *= hz_inv;
+
+ ekx6 *= hx_inv;
+ eky6 *= hy_inv;
+ ekz6 *= hz_inv;
+
+ // convert D-field to force
+ type = atom->type[i];
+ lj0 = B[7*type+6];
+ lj1 = B[7*type+5];
+ lj2 = B[7*type+4];
+ lj3 = B[7*type+3];
+ lj4 = B[7*type+2];
+ lj5 = B[7*type+1];
+ lj6 = B[7*type];
+
+ s1 = x[i][0]*hx_inv;
+ s2 = x[i][1]*hy_inv;
+ s3 = x[i][2]*hz_inv;
+
+ sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
+ sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
+ sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
+ f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;
+
+ sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
+ sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
+ sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
+ f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;
+
+ sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
+ sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
+ sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
+ if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get dispersion field & force on my particles
+ for arithmetic mixing rule for per atom quantities
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_a_peratom()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50;
+ FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51;
+ FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52;
+ FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53;
+ FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54;
+ FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55;
+ FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of dispersion field on particle
+
+ double **x = atom->x;
+ int type;
+ double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+
+ u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
+ u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
+ u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
+ u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
+ u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
+ u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
+ u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ z0 = rho1d_6[2][n];
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ y0 = z0*rho1d_6[1][m];
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d_6[0][l];
+ if (eflag_atom) {
+ u_pa0 += x0*u_brick_a0[mz][my][mx];
+ u_pa1 += x0*u_brick_a1[mz][my][mx];
+ u_pa2 += x0*u_brick_a2[mz][my][mx];
+ u_pa3 += x0*u_brick_a3[mz][my][mx];
+ u_pa4 += x0*u_brick_a4[mz][my][mx];
+ u_pa5 += x0*u_brick_a5[mz][my][mx];
+ u_pa6 += x0*u_brick_a6[mz][my][mx];
+ }
+ if (vflag_atom) {
+ v00 += x0*v0_brick_a0[mz][my][mx];
+ v10 += x0*v1_brick_a0[mz][my][mx];
+ v20 += x0*v2_brick_a0[mz][my][mx];
+ v30 += x0*v3_brick_a0[mz][my][mx];
+ v40 += x0*v4_brick_a0[mz][my][mx];
+ v50 += x0*v5_brick_a0[mz][my][mx];
+ v01 += x0*v0_brick_a1[mz][my][mx];
+ v11 += x0*v1_brick_a1[mz][my][mx];
+ v21 += x0*v2_brick_a1[mz][my][mx];
+ v31 += x0*v3_brick_a1[mz][my][mx];
+ v41 += x0*v4_brick_a1[mz][my][mx];
+ v51 += x0*v5_brick_a1[mz][my][mx];
+ v02 += x0*v0_brick_a2[mz][my][mx];
+ v12 += x0*v1_brick_a2[mz][my][mx];
+ v22 += x0*v2_brick_a2[mz][my][mx];
+ v32 += x0*v3_brick_a2[mz][my][mx];
+ v42 += x0*v4_brick_a2[mz][my][mx];
+ v52 += x0*v5_brick_a2[mz][my][mx];
+ v03 += x0*v0_brick_a3[mz][my][mx];
+ v13 += x0*v1_brick_a3[mz][my][mx];
+ v23 += x0*v2_brick_a3[mz][my][mx];
+ v33 += x0*v3_brick_a3[mz][my][mx];
+ v43 += x0*v4_brick_a3[mz][my][mx];
+ v53 += x0*v5_brick_a3[mz][my][mx];
+ v04 += x0*v0_brick_a4[mz][my][mx];
+ v14 += x0*v1_brick_a4[mz][my][mx];
+ v24 += x0*v2_brick_a4[mz][my][mx];
+ v34 += x0*v3_brick_a4[mz][my][mx];
+ v44 += x0*v4_brick_a4[mz][my][mx];
+ v54 += x0*v5_brick_a4[mz][my][mx];
+ v05 += x0*v0_brick_a5[mz][my][mx];
+ v15 += x0*v1_brick_a5[mz][my][mx];
+ v25 += x0*v2_brick_a5[mz][my][mx];
+ v35 += x0*v3_brick_a5[mz][my][mx];
+ v45 += x0*v4_brick_a5[mz][my][mx];
+ v55 += x0*v5_brick_a5[mz][my][mx];
+ v06 += x0*v0_brick_a6[mz][my][mx];
+ v16 += x0*v1_brick_a6[mz][my][mx];
+ v26 += x0*v2_brick_a6[mz][my][mx];
+ v36 += x0*v3_brick_a6[mz][my][mx];
+ v46 += x0*v4_brick_a6[mz][my][mx];
+ v56 += x0*v5_brick_a6[mz][my][mx];
+ }
+ }
+ }
+ }
+ // convert D-field to force
+ type = atom->type[i];
+ lj0 = B[7*type+6]*0.5;
+ lj1 = B[7*type+5]*0.5;
+ lj2 = B[7*type+4]*0.5;
+ lj3 = B[7*type+3]*0.5;
+ lj4 = B[7*type+2]*0.5;
+ lj5 = B[7*type+1]*0.5;
+ lj6 = B[7*type]*0.5;
+
+
+ if (eflag_atom)
+ eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 +
+ u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6;
+ if (vflag_atom) {
+ vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 +
+ v04*lj4 + v05*lj5 + v06*lj6;
+ vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 +
+ v14*lj4 + v15*lj5 + v16*lj6;
+ vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 +
+ v24*lj4 + v25*lj5 + v26*lj6;
+ vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 +
+ v34*lj4 + v35*lj5 + v36*lj6;
+ vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 +
+ v44*lj4 + v45*lj5 + v46*lj6;
+ vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 +
+ v54*lj4 + v55*lj5 + v56*lj6;
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get dispersion field & force on my particles
+ for arithmetic mixing rule and ik scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_none_ik()
+{
+ int i,k,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR *ekx, *eky, *ekz;
+
+ ekx = new FFT_SCALAR[nsplit];
+ eky = new FFT_SCALAR[nsplit];
+ ekz = new FFT_SCALAR[nsplit];
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of dispersion field on particle
+
+ double **x = atom->x;
+ double **f = atom->f;
+ int type;
+ double lj;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+ for (k = 0; k < nsplit; k++)
+ ekx[k] = eky[k] = ekz[k] = ZEROF;
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ z0 = rho1d_6[2][n];
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ y0 = z0*rho1d_6[1][m];
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d_6[0][l];
+ for (k = 0; k < nsplit; k++) {
+ ekx[k] -= x0*vdx_brick_none[k][mz][my][mx];
+ eky[k] -= x0*vdy_brick_none[k][mz][my][mx];
+ ekz[k] -= x0*vdz_brick_none[k][mz][my][mx];
+ }
+ }
+ }
+ }
+ // convert D-field to force
+ type = atom->type[i];
+ for (k = 0; k < nsplit; k++) {
+ lj = B[nsplit*type + k];
+ f[i][0] += lj*ekx[k];
+ f[i][1] +=lj*eky[k];
+ if (slabflag != 2) f[i][2] +=lj*ekz[k];
+ }
+ }
+
+ delete [] ekx;
+ delete [] eky;
+ delete [] ekz;
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get dispersion field & force on my particles
+ for arithmetic mixing rule for the ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_none_ad()
+{
+ int i,k,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR *ekx, *eky, *ekz;
+
+ ekx = new FFT_SCALAR[nsplit];
+ eky = new FFT_SCALAR[nsplit];
+ ekz = new FFT_SCALAR[nsplit];
+
+
+ double s1,s2,s3;
+ double sf1,sf2,sf3;
+ double sf = 0.0;
+ double *prd;
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double hx_inv = nx_pppm_6/xprd;
+ double hy_inv = ny_pppm_6/yprd;
+ double hz_inv = nz_pppm_6/zprd_slab;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of dispersion field on particle
+
+ double **x = atom->x;
+ double **f = atom->f;
+ int type;
+ double lj;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+ compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
+
+ for (k = 0; k < nsplit; k++)
+ ekx[k] = eky[k] = ekz[k] = ZEROF;
+
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
+ y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
+ z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
+
+ for (k = 0; k < nsplit; k++) {
+ ekx[k] += x0*u_brick_none[k][mz][my][mx];
+ eky[k] += y0*u_brick_none[k][mz][my][mx];
+ ekz[k] += z0*u_brick_none[k][mz][my][mx];
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < nsplit; k++) {
+ ekx[k] *= hx_inv;
+ eky[k] *= hy_inv;
+ ekz[k] *= hz_inv;
+ }
+
+ // convert D-field to force
+ type = atom->type[i];
+
+ s1 = x[i][0]*hx_inv;
+ s2 = x[i][1]*hy_inv;
+ s3 = x[i][2]*hz_inv;
+
+ sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1);
+ sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1);
+
+ sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2);
+ sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2);
+
+ sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3);
+ sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3);
+
+ for (k = 0; k < nsplit; k++) {
+ lj = B[nsplit*type + k];
+
+ sf = sf1*B[k]*2*lj*lj;
+ f[i][0] += lj*ekx[k] - sf;
+
+
+ sf = sf2*B[k]*2*lj*lj;
+ f[i][1] += lj*eky[k] - sf;
+
+ sf = sf3*B[k]*2*lj*lj;
+ if (slabflag != 2) f[i][2] += lj*ekz[k] - sf;
+ }
+ }
+
+ delete [] ekx;
+ delete [] eky;
+ delete [] ekz;
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get dispersion field & force on my particles
+ for arithmetic mixing rule for per atom quantities
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_none_peratom()
+{
+ int i,k,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5;
+
+ u_pa = new FFT_SCALAR[nsplit];
+ v0 = new FFT_SCALAR[nsplit];
+ v1 = new FFT_SCALAR[nsplit];
+ v2 = new FFT_SCALAR[nsplit];
+ v3 = new FFT_SCALAR[nsplit];
+ v4 = new FFT_SCALAR[nsplit];
+ v5 = new FFT_SCALAR[nsplit];
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of dispersion field on particle
+
+ double **x = atom->x;
+ int type;
+ double lj;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+
+ nx = part2grid_6[i][0];
+ ny = part2grid_6[i][1];
+ nz = part2grid_6[i][2];
+ dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+ dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+ dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+ compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+
+ for (k = 0; k < nsplit; k++)
+ u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF;
+
+ for (n = nlower_6; n <= nupper_6; n++) {
+ mz = n+nz;
+ z0 = rho1d_6[2][n];
+ for (m = nlower_6; m <= nupper_6; m++) {
+ my = m+ny;
+ y0 = z0*rho1d_6[1][m];
+ for (l = nlower_6; l <= nupper_6; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d_6[0][l];
+ if (eflag_atom) {
+ for (k = 0; k < nsplit; k++)
+ u_pa[k] += x0*u_brick_none[k][mz][my][mx];
+ }
+ if (vflag_atom) {
+ for (k = 0; k < nsplit; k++) {
+ v0[k] += x0*v0_brick_none[k][mz][my][mx];
+ v1[k] += x0*v1_brick_none[k][mz][my][mx];
+ v2[k] += x0*v2_brick_none[k][mz][my][mx];
+ v3[k] += x0*v3_brick_none[k][mz][my][mx];
+ v4[k] += x0*v4_brick_none[k][mz][my][mx];
+ v5[k] += x0*v5_brick_none[k][mz][my][mx];
+ }
+ }
+ }
+ }
+ }
+ // convert D-field to force
+ type = atom->type[i];
+ for (k = 0; k < nsplit; k++) {
+ lj = B[nsplit*type + k]*0.5;
+
+ if (eflag_atom) {
+ eatom[i] += u_pa[k]*lj;
+ }
+ if (vflag_atom) {
+ vatom[i][0] += v0[k]*lj;
+ vatom[i][1] += v1[k]*lj;
+ vatom[i][2] += v2[k]*lj;
+ vatom[i][3] += v3[k]*lj;
+ vatom[i][4] += v4[k]*lj;
+ vatom[i][5] += v5[k]*lj;
+ }
+ }
+ }
+
+ delete [] u_pa;
+ delete [] v0;
+ delete [] v1;
+ delete [] v2;
+ delete [] v3;
+ delete [] v4;
+ delete [] v5;
+}
+
+/* ----------------------------------------------------------------------
+ pack values to buf to send to another proc
+------------------------------------------------------------------------- */
+
+void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+ int n = 0;
+
+ switch (flag) {
+
+ // Coulomb interactions
+
+ case FORWARD_IK: {
+ FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = xsrc[list[i]];
+ buf[n++] = ysrc[list[i]];
+ buf[n++] = zsrc[list[i]];
+ }
+ break;
+ }
+
+ case FORWARD_AD: {
+ FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++)
+ buf[i] = src[list[i]];
+ break;
+ }
+
+ case FORWARD_IK_PERATOM: {
+ FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ if (eflag_atom) buf[n++] = esrc[list[i]];
+ if (vflag_atom) {
+ buf[n++] = v0src[list[i]];
+ buf[n++] = v1src[list[i]];
+ buf[n++] = v2src[list[i]];
+ buf[n++] = v3src[list[i]];
+ buf[n++] = v4src[list[i]];
+ buf[n++] = v5src[list[i]];
+ }
+ }
+ break;
+ }
+
+ case FORWARD_AD_PERATOM: {
+ FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = v0src[list[i]];
+ buf[n++] = v1src[list[i]];
+ buf[n++] = v2src[list[i]];
+ buf[n++] = v3src[list[i]];
+ buf[n++] = v4src[list[i]];
+ buf[n++] = v5src[list[i]];
+ }
+ break;
+ }
+
+ // Dispersion interactions, geometric mixing
+
+ case FORWARD_IK_G: {
+ FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = xsrc[list[i]];
+ buf[n++] = ysrc[list[i]];
+ buf[n++] = zsrc[list[i]];
+ }
+ break;
+ }
+
+ case FORWARD_AD_G: {
+ FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++)
+ buf[i] = src[list[i]];
+ break;
+ }
+
+ case FORWARD_IK_PERATOM_G: {
+ FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ if (eflag_atom) buf[n++] = esrc[list[i]];
+ if (vflag_atom) {
+ buf[n++] = v0src[list[i]];
+ buf[n++] = v1src[list[i]];
+ buf[n++] = v2src[list[i]];
+ buf[n++] = v3src[list[i]];
+ buf[n++] = v4src[list[i]];
+ buf[n++] = v5src[list[i]];
+ }
+ }
+ break;
+ }
+
+ case FORWARD_AD_PERATOM_G: {
+ FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = v0src[list[i]];
+ buf[n++] = v1src[list[i]];
+ buf[n++] = v2src[list[i]];
+ buf[n++] = v3src[list[i]];
+ buf[n++] = v4src[list[i]];
+ buf[n++] = v5src[list[i]];
+ }
+ break;
+ }
+
+ // Dispersion interactions, arithmetic mixing
+
+ case FORWARD_IK_A: {
+ FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = xsrc0[list[i]];
+ buf[n++] = ysrc0[list[i]];
+ buf[n++] = zsrc0[list[i]];
+
+ buf[n++] = xsrc1[list[i]];
+ buf[n++] = ysrc1[list[i]];
+ buf[n++] = zsrc1[list[i]];
+
+ buf[n++] = xsrc2[list[i]];
+ buf[n++] = ysrc2[list[i]];
+ buf[n++] = zsrc2[list[i]];
+
+ buf[n++] = xsrc3[list[i]];
+ buf[n++] = ysrc3[list[i]];
+ buf[n++] = zsrc3[list[i]];
+
+ buf[n++] = xsrc4[list[i]];
+ buf[n++] = ysrc4[list[i]];
+ buf[n++] = zsrc4[list[i]];
+
+ buf[n++] = xsrc5[list[i]];
+ buf[n++] = ysrc5[list[i]];
+ buf[n++] = zsrc5[list[i]];
+
+ buf[n++] = xsrc6[list[i]];
+ buf[n++] = ysrc6[list[i]];
+ buf[n++] = zsrc6[list[i]];
+ }
+ break;
+ }
+
+ case FORWARD_AD_A: {
+ FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = src0[list[i]];
+ buf[n++] = src1[list[i]];
+ buf[n++] = src2[list[i]];
+ buf[n++] = src3[list[i]];
+ buf[n++] = src4[list[i]];
+ buf[n++] = src5[list[i]];
+ buf[n++] = src6[list[i]];
+ }
+ break;
+ }
+
+ case FORWARD_IK_PERATOM_A: {
+ FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ for (int i = 0; i < nlist; i++) {
+ if (eflag_atom) {
+ buf[n++] = esrc0[list[i]];
+ buf[n++] = esrc1[list[i]];
+ buf[n++] = esrc2[list[i]];
+ buf[n++] = esrc3[list[i]];
+ buf[n++] = esrc4[list[i]];
+ buf[n++] = esrc5[list[i]];
+ buf[n++] = esrc6[list[i]];
+ }
+ if (vflag_atom) {
+ buf[n++] = v0src0[list[i]];
+ buf[n++] = v1src0[list[i]];
+ buf[n++] = v2src0[list[i]];
+ buf[n++] = v3src0[list[i]];
+ buf[n++] = v4src0[list[i]];
+ buf[n++] = v5src0[list[i]];
+
+ buf[n++] = v0src1[list[i]];
+ buf[n++] = v1src1[list[i]];
+ buf[n++] = v2src1[list[i]];
+ buf[n++] = v3src1[list[i]];
+ buf[n++] = v4src1[list[i]];
+ buf[n++] = v5src1[list[i]];
+
+ buf[n++] = v0src2[list[i]];
+ buf[n++] = v1src2[list[i]];
+ buf[n++] = v2src2[list[i]];
+ buf[n++] = v3src2[list[i]];
+ buf[n++] = v4src2[list[i]];
+ buf[n++] = v5src2[list[i]];
+
+ buf[n++] = v0src3[list[i]];
+ buf[n++] = v1src3[list[i]];
+ buf[n++] = v2src3[list[i]];
+ buf[n++] = v3src3[list[i]];
+ buf[n++] = v4src3[list[i]];
+ buf[n++] = v5src3[list[i]];
+
+ buf[n++] = v0src4[list[i]];
+ buf[n++] = v1src4[list[i]];
+ buf[n++] = v2src4[list[i]];
+ buf[n++] = v3src4[list[i]];
+ buf[n++] = v4src4[list[i]];
+ buf[n++] = v5src4[list[i]];
+
+ buf[n++] = v0src5[list[i]];
+ buf[n++] = v1src5[list[i]];
+ buf[n++] = v2src5[list[i]];
+ buf[n++] = v3src5[list[i]];
+ buf[n++] = v4src5[list[i]];
+ buf[n++] = v5src5[list[i]];
+
+ buf[n++] = v0src6[list[i]];
+ buf[n++] = v1src6[list[i]];
+ buf[n++] = v2src6[list[i]];
+ buf[n++] = v3src6[list[i]];
+ buf[n++] = v4src6[list[i]];
+ buf[n++] = v5src6[list[i]];
+ }
+ }
+ break;
+ }
+
+ case FORWARD_AD_PERATOM_A: {
+ FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = v0src0[list[i]];
+ buf[n++] = v1src0[list[i]];
+ buf[n++] = v2src0[list[i]];
+ buf[n++] = v3src0[list[i]];
+ buf[n++] = v4src0[list[i]];
+ buf[n++] = v5src0[list[i]];
+
+ buf[n++] = v0src1[list[i]];
+ buf[n++] = v1src1[list[i]];
+ buf[n++] = v2src1[list[i]];
+ buf[n++] = v3src1[list[i]];
+ buf[n++] = v4src1[list[i]];
+ buf[n++] = v5src1[list[i]];
+
+ buf[n++] = v0src2[list[i]];
+ buf[n++] = v1src2[list[i]];
+ buf[n++] = v2src2[list[i]];
+ buf[n++] = v3src2[list[i]];
+ buf[n++] = v4src2[list[i]];
+ buf[n++] = v5src2[list[i]];
+
+ buf[n++] = v0src3[list[i]];
+ buf[n++] = v1src3[list[i]];
+ buf[n++] = v2src3[list[i]];
+ buf[n++] = v3src3[list[i]];
+ buf[n++] = v4src3[list[i]];
+ buf[n++] = v5src3[list[i]];
+
+ buf[n++] = v0src4[list[i]];
+ buf[n++] = v1src4[list[i]];
+ buf[n++] = v2src4[list[i]];
+ buf[n++] = v3src4[list[i]];
+ buf[n++] = v4src4[list[i]];
+ buf[n++] = v5src4[list[i]];
+
+ buf[n++] = v0src5[list[i]];
+ buf[n++] = v1src5[list[i]];
+ buf[n++] = v2src5[list[i]];
+ buf[n++] = v3src5[list[i]];
+ buf[n++] = v4src5[list[i]];
+ buf[n++] = v5src5[list[i]];
+
+ buf[n++] = v0src6[list[i]];
+ buf[n++] = v1src6[list[i]];
+ buf[n++] = v2src6[list[i]];
+ buf[n++] = v3src6[list[i]];
+ buf[n++] = v4src6[list[i]];
+ buf[n++] = v5src6[list[i]];
+ }
+ break;
+ }
+
+ // Dispersion interactions, no mixing
+
+ case FORWARD_IK_NONE: {
+ for (int k = 0; k < nsplit_alloc; k++) {
+ FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = xsrc[list[i]];
+ buf[n++] = ysrc[list[i]];
+ buf[n++] = zsrc[list[i]];
+ }
+ }
+ break;
+ }
+
+ case FORWARD_AD_NONE: {
+ for (int k = 0; k < nsplit_alloc; k++) {
+ FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++)
+ buf[n++] = src[list[i]];
+ }
+ break;
+ }
+
+ case FORWARD_IK_PERATOM_NONE: {
+ for (int k = 0; k < nsplit_alloc; k++) {
+ FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ if (eflag_atom) buf[n++] = esrc[list[i]];
+ if (vflag_atom) {
+ buf[n++] = v0src[list[i]];
+ buf[n++] = v1src[list[i]];
+ buf[n++] = v2src[list[i]];
+ buf[n++] = v3src[list[i]];
+ buf[n++] = v4src[list[i]];
+ buf[n++] = v5src[list[i]];
+ }
+ }
+ }
+ break;
+ }
+
+ case FORWARD_AD_PERATOM_NONE: {
+ for (int k = 0; k < nsplit_alloc; k++) {
+ FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = v0src[list[i]];
+ buf[n++] = v1src[list[i]];
+ buf[n++] = v2src[list[i]];
+ buf[n++] = v3src[list[i]];
+ buf[n++] = v4src[list[i]];
+ buf[n++] = v5src[list[i]];
+ }
+ }
+ break;
+ }
+
+ }
+}
+
+/* ----------------------------------------------------------------------
+ unpack another proc's own values from buf and set own ghost values
+------------------------------------------------------------------------- */
+
+void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+ int n = 0;
+
+ switch (flag) {
+
+ // Coulomb interactions
+
+ case FORWARD_IK: {
+ FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ xdest[list[i]] = buf[n++];
+ ydest[list[i]] = buf[n++];
+ zdest[list[i]] = buf[n++];
+ }
+ break;
+ }
+
+ case FORWARD_AD: {
+ FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++)
+ dest[list[i]] = buf[n++];
+ break;
+ }
+
+ case FORWARD_IK_PERATOM: {
+ FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ if (eflag_atom) esrc[list[i]] = buf[n++];
+ if (vflag_atom) {
+ v0src[list[i]] = buf[n++];
+ v1src[list[i]] = buf[n++];
+ v2src[list[i]] = buf[n++];
+ v3src[list[i]] = buf[n++];
+ v4src[list[i]] = buf[n++];
+ v5src[list[i]] = buf[n++];
+ }
+ }
+ break;
+ }
+
+ case FORWARD_AD_PERATOM: {
+ FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+ FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++) {
+ v0src[list[i]] = buf[n++];
+ v1src[list[i]] = buf[n++];
+ v2src[list[i]] = buf[n++];
+ v3src[list[i]] = buf[n++];
+ v4src[list[i]] = buf[n++];
+ v5src[list[i]] = buf[n++];
+ }
+ break;
+ }
+
+ // Disperion interactions, geometric mixing
+
+ case FORWARD_IK_G: {
+ FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ xdest[list[i]] = buf[n++];
+ ydest[list[i]] = buf[n++];
+ zdest[list[i]] = buf[n++];
+ }
+ break;
+ }
+
+ case FORWARD_AD_G: {
+ FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++)
+ dest[list[i]] = buf[n++];
+ break;
+ }
+
+ case FORWARD_IK_PERATOM_G: {
+ FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ if (eflag_atom) esrc[list[i]] = buf[n++];
+ if (vflag_atom) {
+ v0src[list[i]] = buf[n++];
+ v1src[list[i]] = buf[n++];
+ v2src[list[i]] = buf[n++];
+ v3src[list[i]] = buf[n++];
+ v4src[list[i]] = buf[n++];
+ v5src[list[i]] = buf[n++];
+ }
+ }
+ break;
+ }
+
+ case FORWARD_AD_PERATOM_G: {
+ FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ v0src[list[i]] = buf[n++];
+ v1src[list[i]] = buf[n++];
+ v2src[list[i]] = buf[n++];
+ v3src[list[i]] = buf[n++];
+ v4src[list[i]] = buf[n++];
+ v5src[list[i]] = buf[n++];
+ }
+ break;
+ }
+
+ // Disperion interactions, arithmetic mixing
+
+ case FORWARD_IK_A: {
+ FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ for (int i = 0; i < nlist; i++) {
+ xdest0[list[i]] = buf[n++];
+ ydest0[list[i]] = buf[n++];
+ zdest0[list[i]] = buf[n++];
+
+ xdest1[list[i]] = buf[n++];
+ ydest1[list[i]] = buf[n++];
+ zdest1[list[i]] = buf[n++];
+
+ xdest2[list[i]] = buf[n++];
+ ydest2[list[i]] = buf[n++];
+ zdest2[list[i]] = buf[n++];
+
+ xdest3[list[i]] = buf[n++];
+ ydest3[list[i]] = buf[n++];
+ zdest3[list[i]] = buf[n++];
+
+ xdest4[list[i]] = buf[n++];
+ ydest4[list[i]] = buf[n++];
+ zdest4[list[i]] = buf[n++];
+
+ xdest5[list[i]] = buf[n++];
+ ydest5[list[i]] = buf[n++];
+ zdest5[list[i]] = buf[n++];
+
+ xdest6[list[i]] = buf[n++];
+ ydest6[list[i]] = buf[n++];
+ zdest6[list[i]] = buf[n++];
+ }
+ break;
+ }
+
+ case FORWARD_AD_A: {
+ FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ for (int i = 0; i < nlist; i++) {
+ dest0[list[i]] = buf[n++];
+ dest1[list[i]] = buf[n++];
+ dest2[list[i]] = buf[n++];
+ dest3[list[i]] = buf[n++];
+ dest4[list[i]] = buf[n++];
+ dest5[list[i]] = buf[n++];
+ dest6[list[i]] = buf[n++];
+ }
+ break;
+ }
+
+ case FORWARD_IK_PERATOM_A: {
+ FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ for (int i = 0; i < nlist; i++) {
+ if (eflag_atom) {
+ esrc0[list[i]] = buf[n++];
+ esrc1[list[i]] = buf[n++];
+ esrc2[list[i]] = buf[n++];
+ esrc3[list[i]] = buf[n++];
+ esrc4[list[i]] = buf[n++];
+ esrc5[list[i]] = buf[n++];
+ esrc6[list[i]] = buf[n++];
+ }
+ if (vflag_atom) {
+ v0src0[list[i]] = buf[n++];
+ v1src0[list[i]] = buf[n++];
+ v2src0[list[i]] = buf[n++];
+ v3src0[list[i]] = buf[n++];
+ v4src0[list[i]] = buf[n++];
+ v5src0[list[i]] = buf[n++];
+
+ v0src1[list[i]] = buf[n++];
+ v1src1[list[i]] = buf[n++];
+ v2src1[list[i]] = buf[n++];
+ v3src1[list[i]] = buf[n++];
+ v4src1[list[i]] = buf[n++];
+ v5src1[list[i]] = buf[n++];
+
+ v0src2[list[i]] = buf[n++];
+ v1src2[list[i]] = buf[n++];
+ v2src2[list[i]] = buf[n++];
+ v3src2[list[i]] = buf[n++];
+ v4src2[list[i]] = buf[n++];
+ v5src2[list[i]] = buf[n++];
+
+ v0src3[list[i]] = buf[n++];
+ v1src3[list[i]] = buf[n++];
+ v2src3[list[i]] = buf[n++];
+ v3src3[list[i]] = buf[n++];
+ v4src3[list[i]] = buf[n++];
+ v5src3[list[i]] = buf[n++];
+
+ v0src4[list[i]] = buf[n++];
+ v1src4[list[i]] = buf[n++];
+ v2src4[list[i]] = buf[n++];
+ v3src4[list[i]] = buf[n++];
+ v4src4[list[i]] = buf[n++];
+ v5src4[list[i]] = buf[n++];
+
+ v0src5[list[i]] = buf[n++];
+ v1src5[list[i]] = buf[n++];
+ v2src5[list[i]] = buf[n++];
+ v3src5[list[i]] = buf[n++];
+ v4src5[list[i]] = buf[n++];
+ v5src5[list[i]] = buf[n++];
+
+ v0src6[list[i]] = buf[n++];
+ v1src6[list[i]] = buf[n++];
+ v2src6[list[i]] = buf[n++];
+ v3src6[list[i]] = buf[n++];
+ v4src6[list[i]] = buf[n++];
+ v5src6[list[i]] = buf[n++];
+ }
+ }
+ break;
+ }
+
+ case FORWARD_AD_PERATOM_A: {
+ FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+ for (int i = 0; i < nlist; i++) {
+ v0src0[list[i]] = buf[n++];
+ v1src0[list[i]] = buf[n++];
+ v2src0[list[i]] = buf[n++];
+ v3src0[list[i]] = buf[n++];
+ v4src0[list[i]] = buf[n++];
+ v5src0[list[i]] = buf[n++];
+
+ v0src1[list[i]] = buf[n++];
+ v1src1[list[i]] = buf[n++];
+ v2src1[list[i]] = buf[n++];
+ v3src1[list[i]] = buf[n++];
+ v4src1[list[i]] = buf[n++];
+ v5src1[list[i]] = buf[n++];
+
+ v0src2[list[i]] = buf[n++];
+ v1src2[list[i]] = buf[n++];
+ v2src2[list[i]] = buf[n++];
+ v3src2[list[i]] = buf[n++];
+ v4src2[list[i]] = buf[n++];
+ v5src2[list[i]] = buf[n++];
+
+ v0src3[list[i]] = buf[n++];
+ v1src3[list[i]] = buf[n++];
+ v2src3[list[i]] = buf[n++];
+ v3src3[list[i]] = buf[n++];
+ v4src3[list[i]] = buf[n++];
+ v5src3[list[i]] = buf[n++];
+
+ v0src4[list[i]] = buf[n++];
+ v1src4[list[i]] = buf[n++];
+ v2src4[list[i]] = buf[n++];
+ v3src4[list[i]] = buf[n++];
+ v4src4[list[i]] = buf[n++];
+ v5src4[list[i]] = buf[n++];
+
+ v0src5[list[i]] = buf[n++];
+ v1src5[list[i]] = buf[n++];
+ v2src5[list[i]] = buf[n++];
+ v3src5[list[i]] = buf[n++];
+ v4src5[list[i]] = buf[n++];
+ v5src5[list[i]] = buf[n++];
+
+ v0src6[list[i]] = buf[n++];
+ v1src6[list[i]] = buf[n++];
+ v2src6[list[i]] = buf[n++];
+ v3src6[list[i]] = buf[n++];
+ v4src6[list[i]] = buf[n++];
+ v5src6[list[i]] = buf[n++];
+ }
+ break;
+ }
+
+ // Disperion interactions, geometric mixing
+
+ case FORWARD_IK_NONE: {
+ for (int k = 0; k < nsplit_alloc; k++) {
+ FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ xdest[list[i]] = buf[n++];
+ ydest[list[i]] = buf[n++];
+ zdest[list[i]] = buf[n++];
+ }
+ }
+ break;
+ }
+
+ case FORWARD_AD_NONE: {
+ for (int k = 0; k < nsplit_alloc; k++) {
+ FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++)
+ dest[list[i]] = buf[n++];
+ }
+ break;
+ }
+
+ case FORWARD_IK_PERATOM_NONE: {
+ for (int k = 0; k < nsplit_alloc; k++) {
+ FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ if (eflag_atom) esrc[list[i]] = buf[n++];
+ if (vflag_atom) {
+ v0src[list[i]] = buf[n++];
+ v1src[list[i]] = buf[n++];
+ v2src[list[i]] = buf[n++];
+ v3src[list[i]] = buf[n++];
+ v4src[list[i]] = buf[n++];
+ v5src[list[i]] = buf[n++];
+ }
+ }
+ }
+ break;
+ }
+
+ case FORWARD_AD_PERATOM_NONE: {
+ for (int k = 0; k < nsplit_alloc; k++) {
+ FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ v0src[list[i]] = buf[n++];
+ v1src[list[i]] = buf[n++];
+ v2src[list[i]] = buf[n++];
+ v3src[list[i]] = buf[n++];
+ v4src[list[i]] = buf[n++];
+ v5src[list[i]] = buf[n++];
+ }
+ }
+ break;
+ }
+
+ }
+}
+
+/* ----------------------------------------------------------------------
+ pack ghost values into buf to send to another proc
+------------------------------------------------------------------------- */
+
+void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+ int n = 0;
+
+ //Coulomb interactions
+
+ if (flag == REVERSE_RHO) {
+ FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++)
+ buf[i] = src[list[i]];
+
+ //Dispersion interactions, geometric mixing
+
+ } else if (flag == REVERSE_RHO_G) {
+ FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++)
+ buf[i] = src[list[i]];
+
+ //Dispersion interactions, arithmetic mixing
+
+ } else if (flag == REVERSE_RHO_A) {
+ FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = src0[list[i]];
+ buf[n++] = src1[list[i]];
+ buf[n++] = src2[list[i]];
+ buf[n++] = src3[list[i]];
+ buf[n++] = src4[list[i]];
+ buf[n++] = src5[list[i]];
+ buf[n++] = src6[list[i]];
+ }
+
+ //Dispersion interactions, no mixing
+
+ } else if (flag == REVERSE_RHO_NONE) {
+ for (int k = 0; k < nsplit_alloc; k++) {
+ FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ buf[n++] = src[list[i]];
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ unpack another proc's ghost values from buf and add to own values
+------------------------------------------------------------------------- */
+
+void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+ int n = 0;
+
+ //Coulomb interactions
+
+ if (flag == REVERSE_RHO) {
+ FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
+ for (int i = 0; i < nlist; i++)
+ dest[list[i]] += buf[i];
+
+ //Dispersion interactions, geometric mixing
+
+ } else if (flag == REVERSE_RHO_G) {
+ FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++)
+ dest[list[i]] += buf[i];
+
+ //Dispersion interactions, arithmetic mixing
+
+ } else if (flag == REVERSE_RHO_A) {
+ FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++) {
+ dest0[list[i]] += buf[n++];
+ dest1[list[i]] += buf[n++];
+ dest2[list[i]] += buf[n++];
+ dest3[list[i]] += buf[n++];
+ dest4[list[i]] += buf[n++];
+ dest5[list[i]] += buf[n++];
+ dest6[list[i]] += buf[n++];
+ }
+
+ //Dispersion interactions, no mixing
+
+ } else if (flag == REVERSE_RHO_NONE) {
+ for (int k = 0; k < nsplit_alloc; k++) {
+ FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+ for (int i = 0; i < nlist; i++)
+ dest[list[i]] += buf[n++];
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
+------------------------------------------------------------------------- */
+
+void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
+{
+ // loop thru all possible factorizations of nprocs
+ // surf = surface area of largest proc sub-domain
+ // innermost if test minimizes surface area and surface/volume ratio
+
+ int bestsurf = 2 * (nx + ny);
+ int bestboxx = 0;
+ int bestboxy = 0;
+
+ int boxx,boxy,surf,ipx,ipy;
+
+ ipx = 1;
+ while (ipx <= nprocs) {
+ if (nprocs % ipx == 0) {
+ ipy = nprocs/ipx;
+ boxx = nx/ipx;
+ if (nx % ipx) boxx++;
+ boxy = ny/ipy;
+ if (ny % ipy) boxy++;
+ surf = boxx + boxy;
+ if (surf < bestsurf ||
+ (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
+ bestsurf = surf;
+ bestboxx = boxx;
+ bestboxy = boxy;
+ *px = ipx;
+ *py = ipy;
+ }
+ }
+ ipx++;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ charge assignment into rho1d
+ dx,dy,dz = distance of particle from "lower left" grid point
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
+ const FFT_SCALAR &dz, int ord,
+ FFT_SCALAR **rho_c, FFT_SCALAR **r1d)
+{
+ int k,l;
+ FFT_SCALAR r1,r2,r3;
+
+ for (k = (1-ord)/2; k <= ord/2; k++) {
+ r1 = r2 = r3 = ZEROF;
+
+ for (l = ord-1; l >= 0; l--) {
+ r1 = rho_c[l][k] + r1*dx;
+ r2 = rho_c[l][k] + r2*dy;
+ r3 = rho_c[l][k] + r3*dz;
+ }
+ r1d[0][k] = r1;
+ r1d[1][k] = r2;
+ r1d[2][k] = r3;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ charge assignment into drho1d
+ dx,dy,dz = distance of particle from "lower left" grid point
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
+ const FFT_SCALAR &dz, int ord,
+ FFT_SCALAR **drho_c, FFT_SCALAR **dr1d)
+{
+ int k,l;
+ FFT_SCALAR r1,r2,r3;
+
+ for (k = (1-ord)/2; k <= ord/2; k++) {
+ r1 = r2 = r3 = ZEROF;
+
+ for (l = ord-2; l >= 0; l--) {
+ r1 = drho_c[l][k] + r1*dx;
+ r2 = drho_c[l][k] + r2*dy;
+ r3 = drho_c[l][k] + r3*dz;
+ }
+ dr1d[0][k] = r1;
+ dr1d[1][k] = r2;
+ dr1d[2][k] = r3;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ generate coeffients for the weight function of order n
+
+ (n-1)
+ Wn(x) = Sum wn(k,x) , Sum is over every other integer
+ k=-(n-1)
+ For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
+ k is odd integers if n is even and even integers if n is odd
+ ---
+ | n-1
+ | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2
+ wn(k,x) = < l=0
+ |
+ | 0 otherwise
+ ---
+ a coeffients are packed into the array rho_coeff to eliminate zeros
+ rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff,
+ int ord)
+{
+ int j,k,l,m;
+ FFT_SCALAR s;
+
+ FFT_SCALAR **a;
+ memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a");
+
+ for (k = -ord; k <= ord; k++)
+ for (l = 0; l < ord; l++)
+ a[l][k] = 0.0;
+
+ a[0][0] = 1.0;
+ for (j = 1; j < ord; j++) {
+ for (k = -j; k <= j; k += 2) {
+ s = 0.0;
+ for (l = 0; l < j; l++) {
+ a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
+#ifdef FFT_SINGLE
+ s += powf(0.5,(float) l+1) *
+ (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
+#else
+ s += pow(0.5,(double) l+1) *
+ (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
+#endif
+ }
+ a[0][k] = s;
+ }
+ }
+
+ m = (1-ord)/2;
+ for (k = -(ord-1); k < ord; k += 2) {
+ for (l = 0; l < ord; l++)
+ coeff[l][m] = a[l][k];
+ for (l = 1; l < ord; l++)
+ dcoeff[l-1][m] = l*a[l][k];
+ m++;
+ }
+
+ memory->destroy2d_offset(a,-ord);
+}
+
+/* ----------------------------------------------------------------------
+ Slab-geometry correction term to dampen inter-slab interactions between
+ periodically repeating slabs. Yields good approximation to 2D Ewald if
+ adequate empty space is left between repeating slabs (J. Chem. Phys.
+ 111, 3155). Slabs defined here to be parallel to the xy plane. Also
+ extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void PPPMDisp::slabcorr(int eflag)
+{
+ // compute local contribution to global dipole moment
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double zprd = domain->zprd;
+ int nlocal = atom->nlocal;
+
+ double dipole = 0.0;
+ for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+
+ // sum local contributions to get global dipole moment
+
+ double dipole_all;
+ MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+ // need to make non-neutral systems and/or
+ // per-atom energy translationally invariant
+
+ double dipole_r2 = 0.0;
+ if (eflag_atom || fabs(qsum) > SMALL) {
+ for (int i = 0; i < nlocal; i++)
+ dipole_r2 += q[i]*x[i][2]*x[i][2];
+
+ // sum local contributions
+
+ double tmp;
+ MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_r2 = tmp;
+ }
+
+ // compute corrections
+
+ const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
+ qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
+ const double qscale = force->qqrd2e * scale;
+
+ if (eflag_global) energy_1 += qscale * e_slabcorr;
+
+ // per-atom energy
+
+ if (eflag_atom) {
+ double efact = qscale * MY_2PI/volume;
+ for (int i = 0; i < nlocal; i++)
+ eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
+ qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
+ }
+
+ // add on force corrections
+
+ double ffact = qscale * (-4.0*MY_PI/volume);
+ double **f = atom->f;
+
+ for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
+}
+
+/* ----------------------------------------------------------------------
+ perform and time the 1d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPMDisp::timing_1d(int n, double &time1d)
+{
+ double time1,time2;
+ int mixing = 1;
+ if (function[2]) mixing = 4;
+ if (function[3]) mixing = nsplit_alloc/2;
+
+ if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+ if (function[1] + function[2] + function[3])
+ for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
+
+ MPI_Barrier(world);
+ time1 = MPI_Wtime();
+
+ if (function[0]) {
+ for (int i = 0; i < n; i++) {
+ fft1->timing1d(work1,nfft_both,1);
+ fft2->timing1d(work1,nfft_both,-1);
+ if (differentiation_flag != 1){
+ fft2->timing1d(work1,nfft_both,-1);
+ fft2->timing1d(work1,nfft_both,-1);
+ }
+ }
+ }
+
+ MPI_Barrier(world);
+ time2 = MPI_Wtime();
+ time1d = time2 - time1;
+
+ MPI_Barrier(world);
+ time1 = MPI_Wtime();
+
+ if (function[1] + function[2] + function[3]) {
+ for (int i = 0; i < n; i++) {
+ fft1_6->timing1d(work1_6,nfft_both_6,1);
+ fft2_6->timing1d(work1_6,nfft_both_6,-1);
+ if (differentiation_flag != 1){
+ fft2_6->timing1d(work1_6,nfft_both_6,-1);
+ fft2_6->timing1d(work1_6,nfft_both_6,-1);
+ }
+ }
+ }
+
+ MPI_Barrier(world);
+ time2 = MPI_Wtime();
+ time1d += (time2 - time1)*mixing;
+
+ if (differentiation_flag) return 2;
+ return 4;
+}
+
+/* ----------------------------------------------------------------------
+ perform and time the 3d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPMDisp::timing_3d(int n, double &time3d)
+{
+ double time1,time2;
+ int mixing = 1;
+ if (function[2]) mixing = 4;
+ if (function[3]) mixing = nsplit_alloc/2;
+
+ if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+ if (function[1] + function[2] + function[3])
+ for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
+
+
+
+ MPI_Barrier(world);
+ time1 = MPI_Wtime();
+
+ if (function[0]) {
+ for (int i = 0; i < n; i++) {
+ fft1->compute(work1,work1,1);
+ fft2->compute(work1,work1,-1);
+ if (differentiation_flag != 1) {
+ fft2->compute(work1,work1,-1);
+ fft2->compute(work1,work1,-1);
+ }
+ }
+ }
+
+ MPI_Barrier(world);
+ time2 = MPI_Wtime();
+ time3d = time2 - time1;
+
+ MPI_Barrier(world);
+ time1 = MPI_Wtime();
+
+ if (function[1] + function[2] + function[3]) {
+ for (int i = 0; i < n; i++) {
+ fft1_6->compute(work1_6,work1_6,1);
+ fft2_6->compute(work1_6,work1_6,-1);
+ if (differentiation_flag != 1) {
+ fft2_6->compute(work1_6,work1_6,-1);
+ fft2_6->compute(work1_6,work1_6,-1);
+ }
+ }
+ }
+
+ MPI_Barrier(world);
+ time2 = MPI_Wtime();
+ time3d += (time2 - time1) * mixing;
+
+ if (differentiation_flag) return 2;
+ return 4;
+}
+
+/* ----------------------------------------------------------------------
+ memory usage of local arrays
+------------------------------------------------------------------------- */
+
+double PPPMDisp::memory_usage()
+{
+ double bytes = nmax*3 * sizeof(double);
+ int mixing = 1;
+ int diff = 3; //depends on differentiation
+ int per = 7; //depends on per atom calculations
+ if (differentiation_flag) {
+ diff = 1;
+ per = 6;
+ }
+ if (!evflag_atom) per = 0;
+ if (function[2]) mixing = 7;
+ if (function[3]) mixing = nsplit_alloc;
+
+ if (function[0]) {
+ int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
+ (nzhi_out-nzlo_out+1);
+ bytes += (1 + diff + per) * nbrick * sizeof(FFT_SCALAR); //brick memory
+ bytes += 6 * nfft_both * sizeof(double); // vg
+ bytes += nfft_both * sizeof(double); // greensfn
+ bytes += nfft_both * 3 * sizeof(FFT_SCALAR); // density_FFT, work1, work2
+ bytes += cg->memory_usage();
+ }
+
+ if (function[1] + function[2] + function[3]) {
+ int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) *
+ (nzhi_out_6-nzlo_out_6+1);
+ bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing; // density_brick + vd_brick + per atom bricks
+ bytes += 6 * nfft_both_6 * sizeof(double); // vg
+ bytes += nfft_both_6 * sizeof(double); // greensfn
+ bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR); // density_FFT, work1, work2
+ bytes += cg_6->memory_usage();
+ }
+ return bytes;
+}
diff --git a/src/KSPACE/pppm_old.cpp b/src/KSPACE/pppm_old.cpp
index 4d68c12586..a368b5d5b0 100644
--- a/src/KSPACE/pppm_old.cpp
+++ b/src/KSPACE/pppm_old.cpp
@@ -1,2863 +1,2863 @@
-/* ----------------------------------------------------------------------
- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
- http://lammps.sandia.gov, Sandia National Laboratories
- Steve Plimpton, sjplimp@sandia.gov
-
- Copyright (2003) Sandia Corporation. Under the terms of Contract
- DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
- certain rights in this software. This software is distributed under
- the GNU General Public License.
-
- See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
- Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
- per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
-------------------------------------------------------------------------- */
-
-#include "lmptype.h"
-#include "mpi.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "math.h"
-#include "pppm_old.h"
-#include "math_const.h"
-#include "atom.h"
-#include "comm.h"
-#include "neighbor.h"
-#include "force.h"
-#include "pair.h"
-#include "bond.h"
-#include "angle.h"
-#include "domain.h"
-#include "fft3d_wrap.h"
-#include "remap_wrap.h"
-#include "memory.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-
-#define MAXORDER 7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
-
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF 1.0f
-#else
-#define ZEROF 0.0
-#define ONEF 1.0
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
-{
- if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
-
- triclinic_support = 0;
- pppmflag = 1;
- group_group_enable = 0;
-
- accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
-
- nfactors = 3;
- factors = new int[nfactors];
- factors[0] = 2;
- factors[1] = 3;
- factors[2] = 5;
-
- MPI_Comm_rank(world,&me);
- MPI_Comm_size(world,&nprocs);
-
- density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
- density_fft = NULL;
- u_brick = NULL;
- v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
- greensfn = NULL;
- work1 = work2 = NULL;
- vg = NULL;
- fkx = fky = fkz = NULL;
- buf1 = buf2 = buf3 = buf4 = NULL;
-
- density_A_brick = density_B_brick = NULL;
- density_A_fft = density_B_fft = NULL;
-
- gf_b = NULL;
- rho1d = rho_coeff = NULL;
-
- fft1 = fft2 = NULL;
- remap = NULL;
-
- nmax = 0;
- part2grid = NULL;
-}
-
-/* ----------------------------------------------------------------------
- free all memory
-------------------------------------------------------------------------- */
-
-PPPMOld::~PPPMOld()
-{
- delete [] factors;
- deallocate();
- deallocate_peratom();
- deallocate_groups();
- memory->destroy(part2grid);
-}
-
-/* ----------------------------------------------------------------------
- called once before run
-------------------------------------------------------------------------- */
-
-void PPPMOld::init()
-{
- if (me == 0) {
- if (screen) fprintf(screen,"PPPM initialization ...\n");
- if (logfile) fprintf(logfile,"PPPM initialization ...\n");
- }
-
- // error check
-
- triclinic_check();
- if (domain->dimension == 2) error->all(FLERR,
- "Cannot use PPPM with 2d simulation");
-
- if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
-
- if (slabflag == 0 && domain->nonperiodic > 0)
- error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
- if (slabflag) {
- if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
- domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
- error->all(FLERR,"Incorrect boundaries with slab PPPM");
- }
-
- if (order < 2 || order > MAXORDER) {
- char str[128];
- sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
- error->all(FLERR,str);
- }
-
- // free all arrays previously allocated
-
- deallocate();
- deallocate_peratom();
- peratom_allocate_flag = 0;
- deallocate_groups();
- group_allocate_flag = 0;
-
- // extract short-range Coulombic cutoff from pair style
-
- scale = 1.0;
-
- pair_check();
-
- int itmp=0;
- double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
- if (p_cutoff == NULL)
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- cutoff = *p_cutoff;
-
- // if kspace is TIP4P, extract TIP4P params from pair style
- // bond/angle are not yet init(), so insure equilibrium request is valid
-
- qdist = 0.0;
-
- if (tip4pflag) {
- double *p_qdist = (double *) force->pair->extract("qdist",itmp);
- int *p_typeO = (int *) force->pair->extract("typeO",itmp);
- int *p_typeH = (int *) force->pair->extract("typeH",itmp);
- int *p_typeA = (int *) force->pair->extract("typeA",itmp);
- int *p_typeB = (int *) force->pair->extract("typeB",itmp);
- if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- qdist = *p_qdist;
- typeO = *p_typeO;
- typeH = *p_typeH;
- int typeA = *p_typeA;
- int typeB = *p_typeB;
-
- if (force->angle == NULL || force->bond == NULL)
- error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
- if (typeA < 1 || typeA > atom->nangletypes ||
- force->angle->setflag[typeA] == 0)
- error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
- if (typeB < 1 || typeB > atom->nbondtypes ||
- force->bond->setflag[typeB] == 0)
- error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
- double theta = force->angle->equilibrium_angle(typeA);
- double blen = force->bond->equilibrium_distance(typeB);
- alpha = qdist / (cos(0.5*theta) * blen);
- }
-
- // compute qsum & qsqsum and warn if not charge-neutral
-
- qsum = qsqsum = 0.0;
- for (int i = 0; i < atom->nlocal; i++) {
- qsum += atom->q[i];
- qsqsum += atom->q[i]*atom->q[i];
- }
-
- double tmp;
- MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsum = tmp;
- MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsqsum = tmp;
-
- if (qsqsum == 0.0)
- error->all(FLERR,"Cannot use kspace solver on system with no charge");
- if (fabs(qsum) > SMALL && me == 0) {
- char str[128];
- sprintf(str,"System is not charge neutral, net charge = %g",qsum);
- error->warning(FLERR,str);
- }
-
- // set accuracy (force units) from accuracy_relative or accuracy_absolute
-
- if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
- else accuracy = accuracy_relative * two_charge_force;
-
- // setup FFT grid resolution and g_ewald
- // normally one iteration thru while loop is all that is required
- // if grid stencil extends beyond neighbor proc, reduce order and try again
-
- int iteration = 0;
-
- while (order > 1) {
- if (iteration && me == 0)
- error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
- "beyond neighbor processor");
- iteration++;
-
- set_grid();
-
- if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
- error->all(FLERR,"PPPM grid is too large");
-
- // global indices of PPPM grid range from 0 to N-1
- // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
- // global PPPM grid that I own without ghost cells
- // for slab PPPM, assign z grid as if it were not extended
-
- nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm);
- nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
-
- nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm);
- nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
-
- nzlo_in = static_cast
- (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
- nzhi_in = static_cast
- (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
-
- // nlower,nupper = stencil size for mapping particles to PPPM grid
-
- nlower = -(order-1)/2;
- nupper = order/2;
-
- // shift values for particle <-> grid mapping
- // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
- if (order % 2) shift = OFFSET + 0.5;
- else shift = OFFSET;
- if (order % 2) shiftone = 0.0;
- else shiftone = 0.5;
-
- // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
- // global PPPM grid that my particles can contribute charge to
- // effectively nlo_in,nhi_in + ghost cells
- // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
- // position a particle in my box can be at
- // dist[3] = particle position bound = subbox + skin/2.0 + qdist
- // qdist = offset due to TIP4P fictitious charge
- // convert to triclinic if necessary
- // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
- // for slab PPPM, assign z grid as if it were not extended
-
- triclinic = domain->triclinic;
- double *prd,*sublo,*subhi;
-
- if (triclinic == 0) {
- prd = domain->prd;
- boxlo = domain->boxlo;
- sublo = domain->sublo;
- subhi = domain->subhi;
- } else {
- prd = domain->prd_lamda;
- boxlo = domain->boxlo_lamda;
- sublo = domain->sublo_lamda;
- subhi = domain->subhi_lamda;
- }
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double dist[3];
- double cuthalf = 0.5*neighbor->skin + qdist;
- if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
- else {
- dist[0] = cuthalf/domain->prd[0];
- dist[1] = cuthalf/domain->prd[1];
- dist[2] = cuthalf/domain->prd[2];
- }
-
- int nlo,nhi;
-
- nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) *
- nx_pppm/xprd + shift) - OFFSET;
- nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) *
- nx_pppm/xprd + shift) - OFFSET;
- nxlo_out = nlo + nlower;
- nxhi_out = nhi + nupper;
-
- nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) *
- ny_pppm/yprd + shift) - OFFSET;
- nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) *
- ny_pppm/yprd + shift) - OFFSET;
- nylo_out = nlo + nlower;
- nyhi_out = nhi + nupper;
-
- nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) *
- nz_pppm/zprd_slab + shift) - OFFSET;
- nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) *
- nz_pppm/zprd_slab + shift) - OFFSET;
- nzlo_out = nlo + nlower;
- nzhi_out = nhi + nupper;
-
- // for slab PPPM, change the grid boundary for processors at +z end
- // to include the empty volume between periodically repeating slabs
- // for slab PPPM, want charge data communicated from -z proc to +z proc,
- // but not vice versa, also want field data communicated from +z proc to
- // -z proc, but not vice versa
- // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
-
- if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) {
- nzhi_in = nz_pppm - 1;
- nzhi_out = nz_pppm - 1;
- }
-
- // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions
- // that overlay domain I own
- // proc in that direction tells me via sendrecv()
- // if no neighbor proc, value is from self since I have ghosts regardless
-
- int nplanes;
- MPI_Status status;
-
- nplanes = nxlo_in - nxlo_out;
- if (comm->procneigh[0][0] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0,
- &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0,
- world,&status);
- else nxhi_ghost = nplanes;
-
- nplanes = nxhi_out - nxhi_in;
- if (comm->procneigh[0][1] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0,
- &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0],
- 0,world,&status);
- else nxlo_ghost = nplanes;
-
- nplanes = nylo_in - nylo_out;
- if (comm->procneigh[1][0] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0,
- &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0,
- world,&status);
- else nyhi_ghost = nplanes;
-
- nplanes = nyhi_out - nyhi_in;
- if (comm->procneigh[1][1] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0,
- &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0,
- world,&status);
- else nylo_ghost = nplanes;
-
- nplanes = nzlo_in - nzlo_out;
- if (comm->procneigh[2][0] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0,
- &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0,
- world,&status);
- else nzhi_ghost = nplanes;
-
- nplanes = nzhi_out - nzhi_in;
- if (comm->procneigh[2][1] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0,
- &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0,
- world,&status);
- else nzlo_ghost = nplanes;
-
- // test that ghost overlap is not bigger than my sub-domain
-
- int flag = 0;
- if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1;
- if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1;
- if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1;
- if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1;
- if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1;
- if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1;
-
- int flag_all;
- MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
-
- if (flag_all == 0) break;
- order--;
- }
-
- if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0");
-
- // decomposition of FFT mesh
- // global indices range from 0 to N-1
- // proc owns entire x-dimension, clump of columns in y,z dimensions
- // npey_fft,npez_fft = # of procs in y,z dims
- // if nprocs is small enough, proc can own 1 or more entire xy planes,
- // else proc owns 2d sub-blocks of yz plane
- // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
- // nlo_fft,nhi_fft = lower/upper limit of the section
- // of the global FFT mesh that I own
-
- int npey_fft,npez_fft;
- if (nz_pppm >= nprocs) {
- npey_fft = 1;
- npez_fft = nprocs;
- } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
-
- int me_y = me % npey_fft;
- int me_z = me / npey_fft;
-
- nxlo_fft = 0;
- nxhi_fft = nx_pppm - 1;
- nylo_fft = me_y*ny_pppm/npey_fft;
- nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
- nzlo_fft = me_z*nz_pppm/npez_fft;
- nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
-
- // PPPM grid for this proc, including ghosts
-
- ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
- (nzhi_out-nzlo_out+1);
-
- // FFT arrays on this proc, without ghosts
- // nfft = FFT points in FFT decomposition on this proc
- // nfft_brick = FFT points in 3d brick-decomposition on this proc
- // nfft_both = greater of 2 values
-
- nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
- (nzhi_fft-nzlo_fft+1);
- int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
- (nzhi_in-nzlo_in+1);
- nfft_both = MAX(nfft,nfft_brick);
-
- // buffer space for use in brick2fft and fillbrick
- // idel = max # of ghost planes to send or recv in +/- dir of each dim
- // nx,ny,nz = owned planes (including ghosts) in each dim
- // nxx,nyy,nzz = max # of grid cells to send in each dim
- // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick
-
- int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz;
-
- idelx = MAX(nxlo_ghost,nxhi_ghost);
- idelx = MAX(idelx,nxhi_out-nxhi_in);
- idelx = MAX(idelx,nxlo_in-nxlo_out);
-
- idely = MAX(nylo_ghost,nyhi_ghost);
- idely = MAX(idely,nyhi_out-nyhi_in);
- idely = MAX(idely,nylo_in-nylo_out);
-
- idelz = MAX(nzlo_ghost,nzhi_ghost);
- idelz = MAX(idelz,nzhi_out-nzhi_in);
- idelz = MAX(idelz,nzlo_in-nzlo_out);
-
- nx = nxhi_out - nxlo_out + 1;
- ny = nyhi_out - nylo_out + 1;
- nz = nzhi_out - nzlo_out + 1;
-
- nxx = idelx * ny * nz;
- nyy = idely * nx * nz;
- nzz = idelz * nx * ny;
-
- nbuf = MAX(nxx,nyy);
- nbuf = MAX(nbuf,nzz);
-
- nbuf_peratom = 7*nbuf;
- nbuf *= 3;
-
- // print stats
-
- int ngrid_max,nfft_both_max,nbuf_max;
- MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
- MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
- MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world);
-
- if (me == 0) {
- if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n",
- ngrid_max,nfft_both_max,nbuf_max);
- if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n",
- ngrid_max,nfft_both_max,nbuf_max);
- }
-
- // allocate K-space dependent memory
- // don't invoke allocate_peratom() here, wait to see if needed
-
- allocate();
-
- // pre-compute Green's function denomiator expansion
- // pre-compute 1d charge distribution coefficients
-
- compute_gf_denom();
- compute_rho_coeff();
-}
-
-/* ----------------------------------------------------------------------
- adjust PPPM coeffs, called initially and whenever volume has changed
-------------------------------------------------------------------------- */
-
-void PPPMOld::setup()
-{
- int i,j,k,l,m,n;
- double *prd;
-
- // volume-dependent factors
- // adjust z dimension for 2d slab PPPM
- // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
- volume = xprd * yprd * zprd_slab;
-
- delxinv = nx_pppm/xprd;
- delyinv = ny_pppm/yprd;
- delzinv = nz_pppm/zprd_slab;
-
- delvolinv = delxinv*delyinv*delzinv;
-
- double unitkx = (2.0*MY_PI/xprd);
- double unitky = (2.0*MY_PI/yprd);
- double unitkz = (2.0*MY_PI/zprd_slab);
-
- // fkx,fky,fkz for my FFT grid pts
-
- double per;
-
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- per = i - nx_pppm*(2*i/nx_pppm);
- fkx[i] = unitkx*per;
- }
-
- for (i = nylo_fft; i <= nyhi_fft; i++) {
- per = i - ny_pppm*(2*i/ny_pppm);
- fky[i] = unitky*per;
- }
-
- for (i = nzlo_fft; i <= nzhi_fft; i++) {
- per = i - nz_pppm*(2*i/nz_pppm);
- fkz[i] = unitkz*per;
- }
-
- // virial coefficients
-
- double sqk,vterm;
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++) {
- for (j = nylo_fft; j <= nyhi_fft; j++) {
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
- if (sqk == 0.0) {
- vg[n][0] = 0.0;
- vg[n][1] = 0.0;
- vg[n][2] = 0.0;
- vg[n][3] = 0.0;
- vg[n][4] = 0.0;
- vg[n][5] = 0.0;
- } else {
- vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
- vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
- vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
- vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
- vg[n][3] = vterm*fkx[i]*fky[j];
- vg[n][4] = vterm*fkx[i]*fkz[k];
- vg[n][5] = vterm*fky[j]*fkz[k];
- }
- n++;
- }
- }
- }
-
- // modified (Hockney-Eastwood) Coulomb Green's function
-
- int nx,ny,nz,kper,lper,mper;
- double snx,sny,snz,snx2,sny2,snz2;
- double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
- double sum1,dot1,dot2;
- double numerator,denominator;
-
- int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) *
- pow(-log(EPS_HOC),0.25));
- int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) *
- pow(-log(EPS_HOC),0.25));
- int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
- pow(-log(EPS_HOC),0.25));
-
- double form = 1.0;
-
- n = 0;
- for (m = nzlo_fft; m <= nzhi_fft; m++) {
- mper = m - nz_pppm*(2*m/nz_pppm);
- snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm);
- snz2 = snz*snz;
-
- for (l = nylo_fft; l <= nyhi_fft; l++) {
- lper = l - ny_pppm*(2*l/ny_pppm);
- sny = sin(0.5*unitky*lper*yprd/ny_pppm);
- sny2 = sny*sny;
-
- for (k = nxlo_fft; k <= nxhi_fft; k++) {
- kper = k - nx_pppm*(2*k/nx_pppm);
- snx = sin(0.5*unitkx*kper*xprd/nx_pppm);
- snx2 = snx*snx;
-
- sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
- pow(unitkz*mper,2.0);
-
- if (sqk != 0.0) {
- numerator = form*12.5663706/sqk;
- denominator = gf_denom(snx2,sny2,snz2);
- sum1 = 0.0;
- const double dorder = static_cast(order);
- for (nx = -nbx; nx <= nbx; nx++) {
- qx = unitkx*(kper+nx_pppm*nx);
- sx = exp(-0.25*pow(qx/g_ewald,2.0));
- wx = 1.0;
- argx = 0.5*qx*xprd/nx_pppm;
- if (argx != 0.0) wx = pow(sin(argx)/argx,dorder);
- for (ny = -nby; ny <= nby; ny++) {
- qy = unitky*(lper+ny_pppm*ny);
- sy = exp(-0.25*pow(qy/g_ewald,2.0));
- wy = 1.0;
- argy = 0.5*qy*yprd/ny_pppm;
- if (argy != 0.0) wy = pow(sin(argy)/argy,dorder);
- for (nz = -nbz; nz <= nbz; nz++) {
- qz = unitkz*(mper+nz_pppm*nz);
- sz = exp(-0.25*pow(qz/g_ewald,2.0));
- wz = 1.0;
- argz = 0.5*qz*zprd_slab/nz_pppm;
- if (argz != 0.0) wz = pow(sin(argz)/argz,dorder);
-
- dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
- dot2 = qx*qx+qy*qy+qz*qz;
- sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0);
- }
- }
- }
- greensfn[n++] = numerator*sum1/denominator;
- } else greensfn[n++] = 0.0;
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- compute the PPPM long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void PPPMOld::compute(int eflag, int vflag)
-{
- int i,j;
-
- // set energy/virial flags
- // invoke allocate_peratom() if needed for first time
-
- if (eflag || vflag) ev_setup(eflag,vflag);
- else evflag = evflag_atom = eflag_global = vflag_global =
- eflag_atom = vflag_atom = 0;
-
- if (evflag_atom && !peratom_allocate_flag) {
- allocate_peratom();
- peratom_allocate_flag = 1;
- }
-
- // convert atoms from box to lamda coords
-
- if (triclinic == 0) boxlo = domain->boxlo;
- else {
- boxlo = domain->boxlo_lamda;
- domain->x2lamda(atom->nlocal);
- }
-
- // extend size of per-atom arrays if necessary
-
- if (atom->nlocal > nmax) {
- memory->destroy(part2grid);
- nmax = atom->nmax;
- memory->create(part2grid,nmax,3,"pppm:part2grid");
- }
-
- // find grid points for all my particles
- // map my particle charge onto my local 3d density grid
-
- particle_map();
- make_rho();
-
- // all procs communicate density values from their ghost cells
- // to fully sum contribution in their 3d bricks
- // remap from 3d decomposition to FFT decomposition
-
- brick2fft();
-
- // compute potential gradient on my FFT grid and
- // portion of e_long on this proc's FFT grid
- // return gradients (electric fields) in 3d brick decomposition
- // also performs per-atom calculations via poisson_peratom()
-
- poisson();
-
- // all procs communicate E-field values
- // to fill ghost cells surrounding their 3d bricks
-
- fillbrick();
-
- // extra per-atom energy/virial communication
-
- if (evflag_atom) fillbrick_peratom();
-
- // calculate the force on my particles
-
- fieldforce();
-
- // extra per-atom energy/virial communication
-
- if (evflag_atom) fieldforce_peratom();
-
- // sum global energy across procs and add in volume-dependent term
-
- const double qscale = force->qqrd2e * scale;
-
- if (eflag_global) {
- double energy_all;
- MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
- energy = energy_all;
-
- energy *= 0.5*volume;
- energy -= g_ewald*qsqsum/MY_PIS +
- MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
- energy *= qscale;
- }
-
- // sum global virial across procs
-
- if (vflag_global) {
- double virial_all[6];
- MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
- for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
- }
-
- // per-atom energy/virial
- // energy includes self-energy correction
-
- if (evflag_atom) {
- double *q = atom->q;
- int nlocal = atom->nlocal;
-
- if (eflag_atom) {
- for (i = 0; i < nlocal; i++) {
- eatom[i] *= 0.5;
- eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
- (g_ewald*g_ewald*volume);
- eatom[i] *= qscale;
- }
- }
-
- if (vflag_atom) {
- for (i = 0; i < nlocal; i++)
- for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale;
- }
- }
-
- // 2d slab correction
-
- if (slabflag == 1) slabcorr();
-
- // convert atoms back from lamda to box coords
-
- if (triclinic) domain->lamda2x(atom->nlocal);
-}
-
-/* ----------------------------------------------------------------------
- allocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPMOld::allocate()
-{
- memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:density_brick");
- memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:vdx_brick");
- memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:vdy_brick");
- memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:vdz_brick");
-
- memory->create(density_fft,nfft_both,"pppm:density_fft");
- memory->create(greensfn,nfft_both,"pppm:greensfn");
- memory->create(work1,2*nfft_both,"pppm:work1");
- memory->create(work2,2*nfft_both,"pppm:work2");
- memory->create(vg,nfft_both,6,"pppm:vg");
-
- memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
- memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
- memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
-
- memory->create(buf1,nbuf,"pppm:buf1");
- memory->create(buf2,nbuf,"pppm:buf2");
-
- // summation coeffs
-
- memory->create(gf_b,order,"pppm:gf_b");
- memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
- memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
-
- // create 2 FFTs and a Remap
- // 1st FFT keeps data in FFT decompostion
- // 2nd FFT returns data in 3d brick decomposition
- // remap takes data from 3d brick to FFT decomposition
-
- int tmp;
-
- fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- 0,0,&tmp);
-
- fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- 0,0,&tmp);
-
- remap = new Remap(lmp,world,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- 1,0,0,FFT_PRECISION);
-}
-
-/* ----------------------------------------------------------------------
- allocate per-atom memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPMOld::allocate_peratom()
-{
- memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:u_brick");
-
- memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v0_brick");
- memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v1_brick");
- memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v2_brick");
- memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v3_brick");
- memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v4_brick");
- memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:v5_brick");
-
- memory->create(buf3,nbuf_peratom,"pppm:buf3");
- memory->create(buf4,nbuf_peratom,"pppm:buf4");
-}
-
-/* ----------------------------------------------------------------------
- deallocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPMOld::deallocate()
-{
- memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
-
- memory->destroy(density_fft);
- memory->destroy(greensfn);
- memory->destroy(work1);
- memory->destroy(work2);
- memory->destroy(vg);
-
- memory->destroy1d_offset(fkx,nxlo_fft);
- memory->destroy1d_offset(fky,nylo_fft);
- memory->destroy1d_offset(fkz,nzlo_fft);
-
- memory->destroy(buf1);
- memory->destroy(buf2);
-
- memory->destroy(gf_b);
- memory->destroy2d_offset(rho1d,-order/2);
- memory->destroy2d_offset(rho_coeff,(1-order)/2);
-
- delete fft1;
- delete fft2;
- delete remap;
-}
-
-/* ----------------------------------------------------------------------
- deallocate per-atom memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPMOld::deallocate_peratom()
-{
- memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
-
- memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
-
- memory->destroy(buf3);
- memory->destroy(buf4);
-}
-
-/* ----------------------------------------------------------------------
- set size of FFT grid (nx,ny,nz_pppm) and g_ewald
-------------------------------------------------------------------------- */
-
-void PPPMOld::set_grid()
-{
- // see JCP 109, pg 7698 for derivation of coefficients
- // higher order coefficients may be computed if needed
-
- double **acons;
- memory->create(acons,8,7,"pppm:acons");
-
- acons[1][0] = 2.0 / 3.0;
- acons[2][0] = 1.0 / 50.0;
- acons[2][1] = 5.0 / 294.0;
- acons[3][0] = 1.0 / 588.0;
- acons[3][1] = 7.0 / 1440.0;
- acons[3][2] = 21.0 / 3872.0;
- acons[4][0] = 1.0 / 4320.0;
- acons[4][1] = 3.0 / 1936.0;
- acons[4][2] = 7601.0 / 2271360.0;
- acons[4][3] = 143.0 / 28800.0;
- acons[5][0] = 1.0 / 23232.0;
- acons[5][1] = 7601.0 / 13628160.0;
- acons[5][2] = 143.0 / 69120.0;
- acons[5][3] = 517231.0 / 106536960.0;
- acons[5][4] = 106640677.0 / 11737571328.0;
- acons[6][0] = 691.0 / 68140800.0;
- acons[6][1] = 13.0 / 57600.0;
- acons[6][2] = 47021.0 / 35512320.0;
- acons[6][3] = 9694607.0 / 2095994880.0;
- acons[6][4] = 733191589.0 / 59609088000.0;
- acons[6][5] = 326190917.0 / 11700633600.0;
- acons[7][0] = 1.0 / 345600.0;
- acons[7][1] = 3617.0 / 35512320.0;
- acons[7][2] = 745739.0 / 838397952.0;
- acons[7][3] = 56399353.0 / 12773376000.0;
- acons[7][4] = 25091609.0 / 1560084480.0;
- acons[7][5] = 1755948832039.0 / 36229939200000.0;
- acons[7][6] = 4887769399.0 / 37838389248.0;
-
- double q2 = qsqsum * force->qqrd2e / force->dielectric;
-
- // use xprd,yprd,zprd even if triclinic so grid size is the same
- // adjust z dimension for 2d slab PPPM
- // 3d PPPM just uses zprd since slab_volfactor = 1.0
-
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
-
- // make initial g_ewald estimate
- // based on desired accuracy and real space cutoff
- // fluid-occupied volume used to estimate real-space error
- // zprd used rather than zprd_slab
-
- double h_x,h_y,h_z;
- bigint natoms = atom->natoms;
-
- if (!gewaldflag) {
- if (accuracy <= 0.0)
- error->all(FLERR,"KSpace accuracy must be > 0");
- g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
- if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
- else g_ewald = sqrt(-log(g_ewald)) / cutoff;
- }
-
- // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
- // nz_pppm uses extended zprd_slab instead of zprd
- // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1
- // reduce it until accuracy target is met
-
- if (!gridflag) {
- double err;
- h_x = h_y = h_z = 1.0/g_ewald;
-
- nx_pppm = static_cast (xprd/h_x) + 1;
- ny_pppm = static_cast (yprd/h_y) + 1;
- nz_pppm = static_cast (zprd_slab/h_z) + 1;
-
- err = rms(h_x,xprd,natoms,q2,acons);
- while (err > accuracy) {
- err = rms(h_x,xprd,natoms,q2,acons);
- nx_pppm++;
- h_x = xprd/nx_pppm;
- }
-
- err = rms(h_y,yprd,natoms,q2,acons);
- while (err > accuracy) {
- err = rms(h_y,yprd,natoms,q2,acons);
- ny_pppm++;
- h_y = yprd/ny_pppm;
- }
-
- err = rms(h_z,zprd_slab,natoms,q2,acons);
- while (err > accuracy) {
- err = rms(h_z,zprd_slab,natoms,q2,acons);
- nz_pppm++;
- h_z = zprd_slab/nz_pppm;
- }
- }
-
- // boost grid size until it is factorable
-
- while (!factorable(nx_pppm)) nx_pppm++;
- while (!factorable(ny_pppm)) ny_pppm++;
- while (!factorable(nz_pppm)) nz_pppm++;
-
- // adjust g_ewald for new grid size
-
- h_x = xprd/static_cast(nx_pppm);
- h_y = yprd/static_cast(ny_pppm);
- h_z = zprd_slab/static_cast(nz_pppm);
-
- if (!gewaldflag) {
- double gew1,gew2,dgew,f,fmid,hmin,rtb;
- int ncount;
-
- gew1 = 0.0;
- g_ewald = gew1;
- f = diffpr(h_x,h_y,h_z,q2,acons);
-
- hmin = MIN(h_x,MIN(h_y,h_z));
- gew2 = 10.0/hmin;
- g_ewald = gew2;
- fmid = diffpr(h_x,h_y,h_z,q2,acons);
-
- if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G");
- rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2);
- ncount = 0;
- while (fabs(dgew) > SMALL && fmid != 0.0) {
- dgew *= 0.5;
- g_ewald = rtb + dgew;
- fmid = diffpr(h_x,h_y,h_z,q2,acons);
- if (fmid <= 0.0) rtb = g_ewald;
- ncount++;
- if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G");
- }
- }
-
- // final RMS accuracy
-
- double lprx = rms(h_x,xprd,natoms,q2,acons);
- double lpry = rms(h_y,yprd,natoms,q2,acons);
- double lprz = rms(h_z,zprd_slab,natoms,q2,acons);
- double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
- double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab);
- double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
- double tpr = estimate_table_accuracy(q2_over_sqrt,spr);
- double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr);
-
- // free local memory
-
- memory->destroy(acons);
-
- // print info
-
- if (me == 0) {
-#ifdef FFT_SINGLE
- const char fft_prec[] = "single";
-#else
- const char fft_prec[] = "double";
-#endif
- if (screen) {
- fprintf(screen," G vector (1/distance)= %g\n",g_ewald);
- fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
- fprintf(screen," stencil order = %d\n",order);
- fprintf(screen," estimated absolute RMS force accuracy = %g\n",
- accuracy);
- fprintf(screen," estimated relative force accuracy = %g\n",
- accuracy/two_charge_force);
- fprintf(screen," using %s precision FFTs\n",fft_prec);
- }
- if (logfile) {
- fprintf(logfile," G vector (1/distance) = %g\n",g_ewald);
- fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
- fprintf(logfile," stencil order = %d\n",order);
- fprintf(logfile," estimated absolute RMS force accuracy = %g\n",
- accuracy);
- fprintf(logfile," estimated relative force accuracy = %g\n",
- accuracy/two_charge_force);
- fprintf(logfile," using %s precision FFTs\n",fft_prec);
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- check if all factors of n are in list of factors
- return 1 if yes, 0 if no
-------------------------------------------------------------------------- */
-
-int PPPMOld::factorable(int n)
-{
- int i;
-
- while (n > 1) {
- for (i = 0; i < nfactors; i++) {
- if (n % factors[i] == 0) {
- n /= factors[i];
- break;
- }
- }
- if (i == nfactors) return 0;
- }
-
- return 1;
-}
-
-/* ----------------------------------------------------------------------
- compute RMS accuracy for a dimension
-------------------------------------------------------------------------- */
-
-double PPPMOld::rms(double h, double prd, bigint natoms,
- double q2, double **acons)
-{
- double sum = 0.0;
- for (int m = 0; m < order; m++)
- sum += acons[order][m] * pow(h*g_ewald,2.0*m);
- double value = q2 * pow(h*g_ewald,(double)order) *
- sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd);
- return value;
-}
-
-/* ----------------------------------------------------------------------
- compute difference in real-space and KSpace RMS accuracy
-------------------------------------------------------------------------- */
-
-double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2,
- double **acons)
-{
- double lprx,lpry,lprz,kspace_prec,real_prec;
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- bigint natoms = atom->natoms;
-
- lprx = rms(h_x,xprd,natoms,q2,acons);
- lpry = rms(h_y,yprd,natoms,q2,acons);
- lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons);
- kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
- real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
- sqrt(static_cast(natoms)*cutoff*xprd*yprd*zprd);
- double value = kspace_prec - real_prec;
- return value;
-}
-
-/* ----------------------------------------------------------------------
- pre-compute Green's function denominator expansion coeffs, Gamma(2n)
-------------------------------------------------------------------------- */
-
-void PPPMOld::compute_gf_denom()
-{
- int k,l,m;
-
- for (l = 1; l < order; l++) gf_b[l] = 0.0;
- gf_b[0] = 1.0;
-
- for (m = 1; m < order; m++) {
- for (l = m; l > 0; l--)
- gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
- gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
- }
-
- bigint ifact = 1;
- for (k = 1; k < 2*order; k++) ifact *= k;
- double gaminv = 1.0/ifact;
- for (l = 0; l < order; l++) gf_b[l] *= gaminv;
-}
-
-/* ----------------------------------------------------------------------
- ghost-swap to accumulate full density in brick decomposition
- remap density from 3d brick decomposition to FFT decomposition
-------------------------------------------------------------------------- */
-
-void PPPMOld::brick2fft()
-{
- int i,n,ix,iy,iz;
- MPI_Request request;
- MPI_Status status;
-
- // pack my ghosts for +x processor
- // pass data to self or +x processor
- // unpack and sum recv data into my real cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxhi_in+1; ix <= nxhi_out; ix++)
- buf1[n++] = density_brick[iz][iy][ix];
-
- if (comm->procneigh[0][1] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++)
- density_brick[iz][iy][ix] += buf2[n++];
-
- // pack my ghosts for -x processor
- // pass data to self or -x processor
- // unpack and sum recv data into my real cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxlo_out; ix < nxlo_in; ix++)
- buf1[n++] = density_brick[iz][iy][ix];
-
- if (comm->procneigh[0][0] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++)
- density_brick[iz][iy][ix] += buf2[n++];
-
- // pack my ghosts for +y processor
- // pass data to self or +y processor
- // unpack and sum recv data into my real cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++)
- buf1[n++] = density_brick[iz][iy][ix];
-
- if (comm->procneigh[1][1] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++)
- density_brick[iz][iy][ix] += buf2[n++];
-
- // pack my ghosts for -y processor
- // pass data to self or -y processor
- // unpack and sum recv data into my real cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy < nylo_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++)
- buf1[n++] = density_brick[iz][iy][ix];
-
- if (comm->procneigh[1][0] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++)
- density_brick[iz][iy][ix] += buf2[n++];
-
- // pack my ghosts for +z processor
- // pass data to self or +z processor
- // unpack and sum recv data into my real cells
-
- n = 0;
- for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++)
- buf1[n++] = density_brick[iz][iy][ix];
-
- if (comm->procneigh[2][1] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++)
- density_brick[iz][iy][ix] += buf2[n++];
-
- // pack my ghosts for -z processor
- // pass data to self or -z processor
- // unpack and sum recv data into my real cells
-
- n = 0;
- for (iz = nzlo_out; iz < nzlo_in; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++)
- buf1[n++] = density_brick[iz][iy][ix];
-
- if (comm->procneigh[2][0] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++)
- density_brick[iz][iy][ix] += buf2[n++];
-
- // remap from 3d brick decomposition to FFT decomposition
- // copy grabs inner portion of density from 3d brick
- // remap could be done as pre-stage of FFT,
- // but this works optimally on only double values, not complex values
-
- n = 0;
- for (iz = nzlo_in; iz <= nzhi_in; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++)
- density_fft[n++] = density_brick[iz][iy][ix];
-
- remap->perform(density_fft,density_fft,work1);
-}
-
-/* ----------------------------------------------------------------------
- ghost-swap to fill ghost cells of my brick with field values
-------------------------------------------------------------------------- */
-
-void PPPMOld::fillbrick()
-{
- int i,n,ix,iy,iz;
- MPI_Request request;
- MPI_Status status;
-
- // pack my real cells for +z processor
- // pass data to self or +z processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- buf1[n++] = vdx_brick[iz][iy][ix];
- buf1[n++] = vdy_brick[iz][iy][ix];
- buf1[n++] = vdz_brick[iz][iy][ix];
- }
-
- if (comm->procneigh[2][1] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz < nzlo_in; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- vdx_brick[iz][iy][ix] = buf2[n++];
- vdy_brick[iz][iy][ix] = buf2[n++];
- vdz_brick[iz][iy][ix] = buf2[n++];
- }
-
- // pack my real cells for -z processor
- // pass data to self or -z processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- buf1[n++] = vdx_brick[iz][iy][ix];
- buf1[n++] = vdy_brick[iz][iy][ix];
- buf1[n++] = vdz_brick[iz][iy][ix];
- }
-
- if (comm->procneigh[2][0] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- vdx_brick[iz][iy][ix] = buf2[n++];
- vdy_brick[iz][iy][ix] = buf2[n++];
- vdz_brick[iz][iy][ix] = buf2[n++];
- }
-
- // pack my real cells for +y processor
- // pass data to self or +y processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- buf1[n++] = vdx_brick[iz][iy][ix];
- buf1[n++] = vdy_brick[iz][iy][ix];
- buf1[n++] = vdz_brick[iz][iy][ix];
- }
-
- if (comm->procneigh[1][1] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy < nylo_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- vdx_brick[iz][iy][ix] = buf2[n++];
- vdy_brick[iz][iy][ix] = buf2[n++];
- vdz_brick[iz][iy][ix] = buf2[n++];
- }
-
- // pack my real cells for -y processor
- // pass data to self or -y processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- buf1[n++] = vdx_brick[iz][iy][ix];
- buf1[n++] = vdy_brick[iz][iy][ix];
- buf1[n++] = vdz_brick[iz][iy][ix];
- }
-
- if (comm->procneigh[1][0] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- vdx_brick[iz][iy][ix] = buf2[n++];
- vdy_brick[iz][iy][ix] = buf2[n++];
- vdz_brick[iz][iy][ix] = buf2[n++];
- }
-
- // pack my real cells for +x processor
- // pass data to self or +x processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) {
- buf1[n++] = vdx_brick[iz][iy][ix];
- buf1[n++] = vdy_brick[iz][iy][ix];
- buf1[n++] = vdz_brick[iz][iy][ix];
- }
-
- if (comm->procneigh[0][1] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxlo_out; ix < nxlo_in; ix++) {
- vdx_brick[iz][iy][ix] = buf2[n++];
- vdy_brick[iz][iy][ix] = buf2[n++];
- vdz_brick[iz][iy][ix] = buf2[n++];
- }
-
- // pack my real cells for -x processor
- // pass data to self or -x processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) {
- buf1[n++] = vdx_brick[iz][iy][ix];
- buf1[n++] = vdy_brick[iz][iy][ix];
- buf1[n++] = vdz_brick[iz][iy][ix];
- }
-
- if (comm->procneigh[0][0] == me)
- for (i = 0; i < n; i++) buf2[i] = buf1[i];
- else {
- MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
- MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxhi_in+1; ix <= nxhi_out; ix++) {
- vdx_brick[iz][iy][ix] = buf2[n++];
- vdy_brick[iz][iy][ix] = buf2[n++];
- vdz_brick[iz][iy][ix] = buf2[n++];
- }
-}
-
-/* ----------------------------------------------------------------------
- ghost-swap to fill ghost cells of my brick with per-atom field values
-------------------------------------------------------------------------- */
-
-void PPPMOld::fillbrick_peratom()
-{
- int i,n,ix,iy,iz;
- MPI_Request request;
- MPI_Status status;
-
- // pack my real cells for +z processor
- // pass data to self or +z processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
- if (vflag_atom) {
- buf3[n++] = v0_brick[iz][iy][ix];
- buf3[n++] = v1_brick[iz][iy][ix];
- buf3[n++] = v2_brick[iz][iy][ix];
- buf3[n++] = v3_brick[iz][iy][ix];
- buf3[n++] = v4_brick[iz][iy][ix];
- buf3[n++] = v5_brick[iz][iy][ix];
- }
- }
-
- if (comm->procneigh[2][1] == me)
- for (i = 0; i < n; i++) buf4[i] = buf3[i];
- else {
- MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
- comm->procneigh[2][0],0,world,&request);
- MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz < nzlo_in; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
- if (vflag_atom) {
- v0_brick[iz][iy][ix] = buf4[n++];
- v1_brick[iz][iy][ix] = buf4[n++];
- v2_brick[iz][iy][ix] = buf4[n++];
- v3_brick[iz][iy][ix] = buf4[n++];
- v4_brick[iz][iy][ix] = buf4[n++];
- v5_brick[iz][iy][ix] = buf4[n++];
- }
- }
-
- // pack my real cells for -z processor
- // pass data to self or -z processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
- if (vflag_atom) {
- buf3[n++] = v0_brick[iz][iy][ix];
- buf3[n++] = v1_brick[iz][iy][ix];
- buf3[n++] = v2_brick[iz][iy][ix];
- buf3[n++] = v3_brick[iz][iy][ix];
- buf3[n++] = v4_brick[iz][iy][ix];
- buf3[n++] = v5_brick[iz][iy][ix];
- }
- }
-
- if (comm->procneigh[2][0] == me)
- for (i = 0; i < n; i++) buf4[i] = buf3[i];
- else {
- MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
- comm->procneigh[2][1],0,world,&request);
- MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
- for (iy = nylo_in; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
- if (vflag_atom) {
- v0_brick[iz][iy][ix] = buf4[n++];
- v1_brick[iz][iy][ix] = buf4[n++];
- v2_brick[iz][iy][ix] = buf4[n++];
- v3_brick[iz][iy][ix] = buf4[n++];
- v4_brick[iz][iy][ix] = buf4[n++];
- v5_brick[iz][iy][ix] = buf4[n++];
- }
- }
-
- // pack my real cells for +y processor
- // pass data to self or +y processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
- if (vflag_atom) {
- buf3[n++] = v0_brick[iz][iy][ix];
- buf3[n++] = v1_brick[iz][iy][ix];
- buf3[n++] = v2_brick[iz][iy][ix];
- buf3[n++] = v3_brick[iz][iy][ix];
- buf3[n++] = v4_brick[iz][iy][ix];
- buf3[n++] = v5_brick[iz][iy][ix];
- }
- }
-
- if (comm->procneigh[1][1] == me)
- for (i = 0; i < n; i++) buf4[i] = buf3[i];
- else {
- MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
- comm->procneigh[1][0],0,world,&request);
- MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy < nylo_in; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
- if (vflag_atom) {
- v0_brick[iz][iy][ix] = buf4[n++];
- v1_brick[iz][iy][ix] = buf4[n++];
- v2_brick[iz][iy][ix] = buf4[n++];
- v3_brick[iz][iy][ix] = buf4[n++];
- v4_brick[iz][iy][ix] = buf4[n++];
- v5_brick[iz][iy][ix] = buf4[n++];
- }
- }
-
- // pack my real cells for -y processor
- // pass data to self or -y processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
- if (vflag_atom) {
- buf3[n++] = v0_brick[iz][iy][ix];
- buf3[n++] = v1_brick[iz][iy][ix];
- buf3[n++] = v2_brick[iz][iy][ix];
- buf3[n++] = v3_brick[iz][iy][ix];
- buf3[n++] = v4_brick[iz][iy][ix];
- buf3[n++] = v5_brick[iz][iy][ix];
- }
- }
-
- if (comm->procneigh[1][0] == me)
- for (i = 0; i < n; i++) buf4[i] = buf3[i];
- else {
- MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
- comm->procneigh[1][1],0,world,&request);
- MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
- for (ix = nxlo_in; ix <= nxhi_in; ix++) {
- if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
- if (vflag_atom) {
- v0_brick[iz][iy][ix] = buf4[n++];
- v1_brick[iz][iy][ix] = buf4[n++];
- v2_brick[iz][iy][ix] = buf4[n++];
- v3_brick[iz][iy][ix] = buf4[n++];
- v4_brick[iz][iy][ix] = buf4[n++];
- v5_brick[iz][iy][ix] = buf4[n++];
- }
- }
-
- // pack my real cells for +x processor
- // pass data to self or +x processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) {
- if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
- if (vflag_atom) {
- buf3[n++] = v0_brick[iz][iy][ix];
- buf3[n++] = v1_brick[iz][iy][ix];
- buf3[n++] = v2_brick[iz][iy][ix];
- buf3[n++] = v3_brick[iz][iy][ix];
- buf3[n++] = v4_brick[iz][iy][ix];
- buf3[n++] = v5_brick[iz][iy][ix];
- }
- }
-
- if (comm->procneigh[0][1] == me)
- for (i = 0; i < n; i++) buf4[i] = buf3[i];
- else {
- MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
- comm->procneigh[0][0],0,world,&request);
- MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxlo_out; ix < nxlo_in; ix++) {
- if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
- if (vflag_atom) {
- v0_brick[iz][iy][ix] = buf4[n++];
- v1_brick[iz][iy][ix] = buf4[n++];
- v2_brick[iz][iy][ix] = buf4[n++];
- v3_brick[iz][iy][ix] = buf4[n++];
- v4_brick[iz][iy][ix] = buf4[n++];
- v5_brick[iz][iy][ix] = buf4[n++];
- }
- }
-
- // pack my real cells for -x processor
- // pass data to self or -x processor
- // unpack and sum recv data into my ghost cells
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) {
- if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
- if (vflag_atom) {
- buf3[n++] = v0_brick[iz][iy][ix];
- buf3[n++] = v1_brick[iz][iy][ix];
- buf3[n++] = v2_brick[iz][iy][ix];
- buf3[n++] = v3_brick[iz][iy][ix];
- buf3[n++] = v4_brick[iz][iy][ix];
- buf3[n++] = v5_brick[iz][iy][ix];
- }
- }
-
- if (comm->procneigh[0][0] == me)
- for (i = 0; i < n; i++) buf4[i] = buf3[i];
- else {
- MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
- comm->procneigh[0][1],0,world,&request);
- MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
- MPI_Wait(&request,&status);
- }
-
- n = 0;
- for (iz = nzlo_out; iz <= nzhi_out; iz++)
- for (iy = nylo_out; iy <= nyhi_out; iy++)
- for (ix = nxhi_in+1; ix <= nxhi_out; ix++) {
- if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
- if (vflag_atom) {
- v0_brick[iz][iy][ix] = buf4[n++];
- v1_brick[iz][iy][ix] = buf4[n++];
- v2_brick[iz][iy][ix] = buf4[n++];
- v3_brick[iz][iy][ix] = buf4[n++];
- v4_brick[iz][iy][ix] = buf4[n++];
- v5_brick[iz][iy][ix] = buf4[n++];
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- find center grid pt for each of my particles
- check that full stencil for the particle will fit in my 3d brick
- store central grid pt indices in part2grid array
-------------------------------------------------------------------------- */
-
-void PPPMOld::particle_map()
-{
- int nx,ny,nz;
-
- double **x = atom->x;
- int nlocal = atom->nlocal;
-
- int flag = 0;
- for (int i = 0; i < nlocal; i++) {
-
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // current particle coord can be outside global and local box
- // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
- nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
- ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
- nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
-
- part2grid[i][0] = nx;
- part2grid[i][1] = ny;
- part2grid[i][2] = nz;
-
- // check that entire stencil around nx,ny,nz will fit in my 3d brick
-
- if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
- ny+nlower < nylo_out || ny+nupper > nyhi_out ||
- nz+nlower < nzlo_out || nz+nupper > nzhi_out)
- flag = 1;
- }
-
- if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
-}
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = charge "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid
-------------------------------------------------------------------------- */
-
-void PPPMOld::make_rho()
-{
- int l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
- // clear 3d density array
-
- memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
- ngrid*sizeof(FFT_SCALAR));
-
- // loop over my charges, add their contribution to nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
-
- double *q = atom->q;
- double **x = atom->x;
- int nlocal = atom->nlocal;
-
- for (int i = 0; i < nlocal; i++) {
-
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz);
-
- z0 = delvolinv * q[i];
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- y0 = z0*rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- x0 = y0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- density_brick[mz][my][mx] += x0*rho1d[0][l];
- }
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver
-------------------------------------------------------------------------- */
-
-void PPPMOld::poisson()
-{
- int i,j,k,n;
- double eng;
-
- // transform charge density (r -> k)
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work1[n++] = density_fft[i];
- work1[n++] = ZEROF;
- }
-
- fft1->compute(work1,work1,1);
-
- // global energy and virial contribution
-
- double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
- double s2 = scaleinv*scaleinv;
-
- if (eflag_global || vflag_global) {
- if (vflag_global) {
- n = 0;
- for (i = 0; i < nfft; i++) {
- eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
- for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
- if (eflag_global) energy += eng;
- n += 2;
- }
- } else {
- n = 0;
- for (i = 0; i < nfft; i++) {
- energy +=
- s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
- n += 2;
- }
- }
- }
-
- // scale by 1/total-grid-pts to get rho(k)
- // multiply by Green's function to get V(k)
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work1[n++] *= scaleinv * greensfn[i];
- work1[n++] *= scaleinv * greensfn[i];
- }
-
- // extra FFTs for per-atom energy/virial
-
- if (evflag_atom) poisson_peratom();
-
- // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
- // FFT leaves data in 3d brick decomposition
- // copy it into inner portion of vdx,vdy,vdz arrays
-
- // x direction gradient
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- work2[n] = fkx[i]*work1[n+1];
- work2[n+1] = -fkx[i]*work1[n];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- vdx_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- // y direction gradient
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- work2[n] = fky[j]*work1[n+1];
- work2[n+1] = -fky[j]*work1[n];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- vdy_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- // z direction gradient
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- work2[n] = fkz[k]*work1[n+1];
- work2[n+1] = -fkz[k]*work1[n];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- vdz_brick[k][j][i] = work2[n];
- n += 2;
- }
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver for per-atom energy/virial
-------------------------------------------------------------------------- */
-
-void PPPMOld::poisson_peratom()
-{
- int i,j,k,n;
-
- // energy
-
- if (eflag_atom) {
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n];
- work2[n+1] = work1[n+1];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- u_brick[k][j][i] = work2[n];
- n += 2;
- }
- }
-
- // 6 components of virial in v0 thru v5
-
- if (!vflag_atom) return;
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][0];
- work2[n+1] = work1[n+1]*vg[i][0];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v0_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][1];
- work2[n+1] = work1[n+1]*vg[i][1];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v1_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][2];
- work2[n+1] = work1[n+1]*vg[i][2];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v2_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][3];
- work2[n+1] = work1[n+1]*vg[i][3];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v3_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][4];
- work2[n+1] = work1[n+1]*vg[i][4];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v4_brick[k][j][i] = work2[n];
- n += 2;
- }
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work2[n] = work1[n]*vg[i][5];
- work2[n+1] = work1[n+1]*vg[i][5];
- n += 2;
- }
-
- fft2->compute(work2,work2,-1);
-
- n = 0;
- for (k = nzlo_in; k <= nzhi_in; k++)
- for (j = nylo_in; j <= nyhi_in; j++)
- for (i = nxlo_in; i <= nxhi_in; i++) {
- v5_brick[k][j][i] = work2[n];
- n += 2;
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get electric field & force on my particles
-------------------------------------------------------------------------- */
-
-void PPPMOld::fieldforce()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR ekx,eky,ekz;
-
- // loop over my charges, interpolate electric field from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
- // ek = 3 components of E-field on particle
-
- double *q = atom->q;
- double **x = atom->x;
- double **f = atom->f;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz);
-
- ekx = eky = ekz = ZEROF;
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- z0 = rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- y0 = z0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- x0 = y0*rho1d[0][l];
- ekx -= x0*vdx_brick[mz][my][mx];
- eky -= x0*vdy_brick[mz][my][mx];
- ekz -= x0*vdz_brick[mz][my][mx];
- }
- }
- }
-
- // convert E-field to force
-
- const double qfactor = force->qqrd2e * scale * q[i];
- f[i][0] += qfactor*ekx;
- f[i][1] += qfactor*eky;
- if (slabflag != 2) f[i][2] += qfactor*ekz;
- }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get per-atom energy/virial
-------------------------------------------------------------------------- */
-
-void PPPMOld::fieldforce_peratom()
-{
- int i,l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
- FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
-
- // loop over my charges, interpolate from nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
-
- double *q = atom->q;
- double **x = atom->x;
- double **f = atom->f;
-
- int nlocal = atom->nlocal;
-
- for (i = 0; i < nlocal; i++) {
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz);
-
- u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- z0 = rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- y0 = z0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
- x0 = y0*rho1d[0][l];
- if (eflag_atom) u += x0*u_brick[mz][my][mx];
- if (vflag_atom) {
- v0 += x0*v0_brick[mz][my][mx];
- v1 += x0*v1_brick[mz][my][mx];
- v2 += x0*v2_brick[mz][my][mx];
- v3 += x0*v3_brick[mz][my][mx];
- v4 += x0*v4_brick[mz][my][mx];
- v5 += x0*v5_brick[mz][my][mx];
- }
- }
- }
- }
-
- if (eflag_atom) eatom[i] += q[i]*u;
- if (vflag_atom) {
- vatom[i][0] += v0;
- vatom[i][1] += v1;
- vatom[i][2] += v2;
- vatom[i][3] += v3;
- vatom[i][4] += v4;
- vatom[i][5] += v5;
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
-------------------------------------------------------------------------- */
-
-void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
-{
- // loop thru all possible factorizations of nprocs
- // surf = surface area of largest proc sub-domain
- // innermost if test minimizes surface area and surface/volume ratio
-
- int bestsurf = 2 * (nx + ny);
- int bestboxx = 0;
- int bestboxy = 0;
-
- int boxx,boxy,surf,ipx,ipy;
-
- ipx = 1;
- while (ipx <= nprocs) {
- if (nprocs % ipx == 0) {
- ipy = nprocs/ipx;
- boxx = nx/ipx;
- if (nx % ipx) boxx++;
- boxy = ny/ipy;
- if (ny % ipy) boxy++;
- surf = boxx + boxy;
- if (surf < bestsurf ||
- (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
- bestsurf = surf;
- bestboxx = boxx;
- bestboxy = boxy;
- *px = ipx;
- *py = ipy;
- }
- }
- ipx++;
- }
-}
-
-/* ----------------------------------------------------------------------
- charge assignment into rho1d
- dx,dy,dz = distance of particle from "lower left" grid point
-------------------------------------------------------------------------- */
-
-void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
- const FFT_SCALAR &dz)
-{
- int k,l;
- FFT_SCALAR r1,r2,r3;
-
- for (k = (1-order)/2; k <= order/2; k++) {
- r1 = r2 = r3 = ZEROF;
-
- for (l = order-1; l >= 0; l--) {
- r1 = rho_coeff[l][k] + r1*dx;
- r2 = rho_coeff[l][k] + r2*dy;
- r3 = rho_coeff[l][k] + r3*dz;
- }
- rho1d[0][k] = r1;
- rho1d[1][k] = r2;
- rho1d[2][k] = r3;
- }
-}
-
-/* ----------------------------------------------------------------------
- generate coeffients for the weight function of order n
-
- (n-1)
- Wn(x) = Sum wn(k,x) , Sum is over every other integer
- k=-(n-1)
- For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
- k is odd integers if n is even and even integers if n is odd
- ---
- | n-1
- | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2
- wn(k,x) = < l=0
- |
- | 0 otherwise
- ---
- a coeffients are packed into the array rho_coeff to eliminate zeros
- rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
-------------------------------------------------------------------------- */
-
-void PPPMOld::compute_rho_coeff()
-{
- int j,k,l,m;
- FFT_SCALAR s;
-
- FFT_SCALAR **a;
- memory->create2d_offset(a,order,-order,order,"pppm:a");
-
- for (k = -order; k <= order; k++)
- for (l = 0; l < order; l++)
- a[l][k] = 0.0;
-
- a[0][0] = 1.0;
- for (j = 1; j < order; j++) {
- for (k = -j; k <= j; k += 2) {
- s = 0.0;
- for (l = 0; l < j; l++) {
- a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
-#ifdef FFT_SINGLE
- s += powf(0.5,(float) l+1) *
- (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
-#else
- s += pow(0.5,(double) l+1) *
- (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
-#endif
- }
- a[0][k] = s;
- }
- }
-
- m = (1-order)/2;
- for (k = -(order-1); k < order; k += 2) {
- for (l = 0; l < order; l++)
- rho_coeff[l][m] = a[l][k];
- m++;
- }
-
- memory->destroy2d_offset(a,-order);
-}
-
-/* ----------------------------------------------------------------------
- Slab-geometry correction term to dampen inter-slab interactions between
- periodically repeating slabs. Yields good approximation to 2D Ewald if
- adequate empty space is left between repeating slabs (J. Chem. Phys.
- 111, 3155). Slabs defined here to be parallel to the xy plane. Also
- extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void PPPMOld::slabcorr()
-{
- // compute local contribution to global dipole moment
-
- double *q = atom->q;
- double **x = atom->x;
- double zprd = domain->zprd;
- int nlocal = atom->nlocal;
-
- double dipole = 0.0;
- for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
-
- // sum local contributions to get global dipole moment
-
- double dipole_all;
- MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
- // need to make non-neutral systems and/or
- // per-atom energy translationally invariant
-
- double dipole_r2 = 0.0;
- if (eflag_atom || fabs(qsum) > SMALL) {
- for (int i = 0; i < nlocal; i++)
- dipole_r2 += q[i]*x[i][2]*x[i][2];
-
- // sum local contributions
-
- double tmp;
- MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- dipole_r2 = tmp;
- }
-
- // compute corrections
-
- const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
- qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
- const double qscale = force->qqrd2e * scale;
-
- if (eflag_global) energy += qscale * e_slabcorr;
-
- // per-atom energy
-
- if (eflag_atom) {
- double efact = qscale * MY_2PI/volume;
- for (int i = 0; i < nlocal; i++)
- eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
- qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
- }
-
- // add on force corrections
-
- double ffact = qscale * (-4.0*MY_PI/volume);
- double **f = atom->f;
-
- for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
-}
-
-
-/* ----------------------------------------------------------------------
- perform and time the 1d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPMOld::timing_1d(int n, double &time1d)
-{
- double time1,time2;
-
- for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
-
- MPI_Barrier(world);
- time1 = MPI_Wtime();
-
- for (int i = 0; i < n; i++) {
- fft1->timing1d(work1,nfft_both,1);
- fft2->timing1d(work1,nfft_both,-1);
- fft2->timing1d(work1,nfft_both,-1);
- fft2->timing1d(work1,nfft_both,-1);
- }
-
- MPI_Barrier(world);
- time2 = MPI_Wtime();
- time1d = time2 - time1;
-
- return 4;
-}
-
-/* ----------------------------------------------------------------------
- perform and time the 3d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPMOld::timing_3d(int n, double &time3d)
-{
- double time1,time2;
-
- for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
-
- MPI_Barrier(world);
- time1 = MPI_Wtime();
-
- for (int i = 0; i < n; i++) {
- fft1->compute(work1,work1,1);
- fft2->compute(work1,work1,-1);
- fft2->compute(work1,work1,-1);
- fft2->compute(work1,work1,-1);
- }
-
- MPI_Barrier(world);
- time2 = MPI_Wtime();
- time3d = time2 - time1;
-
- return 4;
-}
-
-/* ----------------------------------------------------------------------
- memory usage of local arrays
-------------------------------------------------------------------------- */
-
-double PPPMOld::memory_usage()
-{
- double bytes = nmax*3 * sizeof(double);
- int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
- (nzhi_out-nzlo_out+1);
- bytes += 4 * nbrick * sizeof(FFT_SCALAR);
- bytes += 6 * nfft_both * sizeof(double);
- bytes += nfft_both * sizeof(double);
- bytes += nfft_both*5 * sizeof(FFT_SCALAR);
- bytes += 2 * nbuf * sizeof(FFT_SCALAR);
-
- if (peratom_allocate_flag) {
- bytes += 7 * nbrick * sizeof(FFT_SCALAR);
- bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR);
- }
-
- if (group_allocate_flag) {
- bytes += 2 * nbrick * sizeof(FFT_SCALAR);
- bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
- }
-
- return bytes;
-}
-
-/* ----------------------------------------------------------------------
- group-group interactions
- ------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
- compute the PPPM total long-range force and energy for groups A and B
- ------------------------------------------------------------------------- */
-
-void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag)
-{
- if (slabflag)
- error->all(FLERR,"Cannot (yet) use K-space slab "
- "correction with compute group/group");
-
- int i,j;
-
- if (!group_allocate_flag) {
- allocate_groups();
- group_allocate_flag = 1;
- }
-
- e2group = 0; //energy
- f2group[0] = 0; //force in x-direction
- f2group[1] = 0; //force in y-direction
- f2group[2] = 0; //force in z-direction
-
- double *q = atom->q;
- int nlocal = atom->nlocal;
- int *mask = atom->mask;
-
-
- // map my particle charge onto my local 3d density grid
-
- make_rho_groups(groupbit_A,groupbit_B,BA_flag);
-
- // all procs communicate density values from their ghost cells
- // to fully sum contribution in their 3d bricks
- // remap from 3d decomposition to FFT decomposition
-
- // temporarily store and switch pointers so we can
- // use brick2fft() for groups A and B (without
- // writing an additional function)
-
- FFT_SCALAR ***density_brick_real = density_brick;
- FFT_SCALAR *density_fft_real = density_fft;
-
- // group A
-
- density_brick = density_A_brick;
- density_fft = density_A_fft;
-
- brick2fft();
-
- // group B
-
- density_brick = density_B_brick;
- density_fft = density_B_fft;
-
- brick2fft();
-
- // switch back pointers
-
- density_brick = density_brick_real;
- density_fft = density_fft_real;
-
- // compute potential gradient on my FFT grid and
- // portion of group-group energy/force on this proc's FFT grid
-
- poisson_groups(BA_flag);
-
- const double qscale = force->qqrd2e * scale;
-
- // total group A <--> group B energy
- // self and boundary correction terms are in compute_group_group.cpp
-
- double e2group_all;
- MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
- e2group = e2group_all;
-
- e2group *= qscale*0.5*volume;
-
- // total group A <--> group B force
-
- double f2group_all[3];
- MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
-
- for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i];
-}
-
-/* ----------------------------------------------------------------------
- allocate group-group memory that depends on # of K-vectors and order
- ------------------------------------------------------------------------- */
-
-void PPPMOld::allocate_groups()
-{
- memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:density_A_brick");
- memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:density_B_brick");
- memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
- memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
-}
-
-/* ----------------------------------------------------------------------
- deallocate group-group memory that depends on # of K-vectors and order
- ------------------------------------------------------------------------- */
-
-void PPPMOld::deallocate_groups()
-{
- memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy(density_A_fft);
- memory->destroy(density_B_fft);
-}
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = charge "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid for group-group interactions
- ------------------------------------------------------------------------- */
-
-void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag)
-{
- int l,m,n,nx,ny,nz,mx,my,mz;
- FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
- // clear 3d density arrays
-
- memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
- ngrid*sizeof(FFT_SCALAR));
-
- memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
- ngrid*sizeof(FFT_SCALAR));
-
- // loop over my charges, add their contribution to nearby grid points
- // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
- // (dx,dy,dz) = distance to "lower left" grid pt
- // (mx,my,mz) = global coords of moving stencil pt
-
- double *q = atom->q;
- double **x = atom->x;
- int nlocal = atom->nlocal;
- int *mask = atom->mask;
-
- for (int i = 0; i < nlocal; i++) {
-
- if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B))
- if (BA_flag) continue;
-
- if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
-
- nx = part2grid[i][0];
- ny = part2grid[i][1];
- nz = part2grid[i][2];
- dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
- dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
- dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
- compute_rho1d(dx,dy,dz);
-
- z0 = delvolinv * q[i];
- for (n = nlower; n <= nupper; n++) {
- mz = n+nz;
- y0 = z0*rho1d[2][n];
- for (m = nlower; m <= nupper; m++) {
- my = m+ny;
- x0 = y0*rho1d[1][m];
- for (l = nlower; l <= nupper; l++) {
- mx = l+nx;
-
- // group A
-
- if (mask[i] & groupbit_A)
- density_A_brick[mz][my][mx] += x0*rho1d[0][l];
-
- // group B
-
- if (mask[i] & groupbit_B)
- density_B_brick[mz][my][mx] += x0*rho1d[0][l];
- }
- }
- }
- }
- }
-}
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver for group-group interactions
- ------------------------------------------------------------------------- */
-
-void PPPMOld::poisson_groups(int BA_flag)
-{
- int i,j,k,n;
- double eng;
-
- // reuse memory (already declared)
-
- FFT_SCALAR *work_A = work1;
- FFT_SCALAR *work_B = work2;
-
- // transform charge density (r -> k)
-
- // group A
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work_A[n++] = density_A_fft[i];
- work_A[n++] = ZEROF;
- }
-
- fft1->compute(work_A,work_A,1);
-
- // group B
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work_B[n++] = density_B_fft[i];
- work_B[n++] = ZEROF;
- }
-
- fft1->compute(work_B,work_B,1);
-
- // group-group energy and force contribution,
- // keep everything in reciprocal space so
- // no inverse FFTs needed
-
- double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
- double s2 = scaleinv*scaleinv;
-
- // energy
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- e2group += s2 * greensfn[i] *
- (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
- n += 2;
- }
-
- if (BA_flag) return;
-
-
- // multiply by Green's function and s2
- // (only for work_A so it is not squared below)
-
- n = 0;
- for (i = 0; i < nfft; i++) {
- work_A[n++] *= s2 * greensfn[i];
- work_A[n++] *= s2 * greensfn[i];
- }
-
- double partial_group;
-
- // force, x direction
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
- f2group[0] += fkx[i] * partial_group;
- n += 2;
- }
-
- // force, y direction
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
- f2group[1] += fky[j] * partial_group;
- n += 2;
- }
-
- // force, z direction
-
- n = 0;
- for (k = nzlo_fft; k <= nzhi_fft; k++)
- for (j = nylo_fft; j <= nyhi_fft; j++)
- for (i = nxlo_fft; i <= nxhi_fft; i++) {
- partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
- f2group[2] += fkz[k] * partial_group;
- n += 2;
- }
-}
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+ per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "string.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "math.h"
+#include "pppm_old.h"
+#include "math_const.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "domain.h"
+#include "fft3d_wrap.h"
+#include "remap_wrap.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define MAXORDER 7
+#define OFFSET 16384
+#define SMALL 0.00001
+#define LARGE 10000.0
+#define EPS_HOC 1.0e-7
+
+#ifdef FFT_SINGLE
+#define ZEROF 0.0f
+#define ONEF 1.0f
+#else
+#define ZEROF 0.0
+#define ONEF 1.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+ if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
+
+ triclinic_support = 0;
+ pppmflag = 1;
+ group_group_enable = 0;
+
+ accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
+
+ nfactors = 3;
+ factors = new int[nfactors];
+ factors[0] = 2;
+ factors[1] = 3;
+ factors[2] = 5;
+
+ MPI_Comm_rank(world,&me);
+ MPI_Comm_size(world,&nprocs);
+
+ density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
+ density_fft = NULL;
+ u_brick = NULL;
+ v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
+ greensfn = NULL;
+ work1 = work2 = NULL;
+ vg = NULL;
+ fkx = fky = fkz = NULL;
+ buf1 = buf2 = buf3 = buf4 = NULL;
+
+ density_A_brick = density_B_brick = NULL;
+ density_A_fft = density_B_fft = NULL;
+
+ gf_b = NULL;
+ rho1d = rho_coeff = NULL;
+
+ fft1 = fft2 = NULL;
+ remap = NULL;
+
+ nmax = 0;
+ part2grid = NULL;
+}
+
+/* ----------------------------------------------------------------------
+ free all memory
+------------------------------------------------------------------------- */
+
+PPPMOld::~PPPMOld()
+{
+ delete [] factors;
+ deallocate();
+ deallocate_peratom();
+ deallocate_groups();
+ memory->destroy(part2grid);
+}
+
+/* ----------------------------------------------------------------------
+ called once before run
+------------------------------------------------------------------------- */
+
+void PPPMOld::init()
+{
+ if (me == 0) {
+ if (screen) fprintf(screen,"PPPM initialization ...\n");
+ if (logfile) fprintf(logfile,"PPPM initialization ...\n");
+ }
+
+ // error check
+
+ triclinic_check();
+ if (domain->dimension == 2) error->all(FLERR,
+ "Cannot use PPPM with 2d simulation");
+
+ if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
+
+ if (slabflag == 0 && domain->nonperiodic > 0)
+ error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
+ if (slabflag) {
+ if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
+ domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+ error->all(FLERR,"Incorrect boundaries with slab PPPM");
+ }
+
+ if (order < 2 || order > MAXORDER) {
+ char str[128];
+ sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
+ error->all(FLERR,str);
+ }
+
+ // free all arrays previously allocated
+
+ deallocate();
+ deallocate_peratom();
+ peratom_allocate_flag = 0;
+ deallocate_groups();
+ group_allocate_flag = 0;
+
+ // extract short-range Coulombic cutoff from pair style
+
+ scale = 1.0;
+
+ pair_check();
+
+ int itmp=0;
+ double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
+ if (p_cutoff == NULL)
+ error->all(FLERR,"KSpace style is incompatible with Pair style");
+ cutoff = *p_cutoff;
+
+ // if kspace is TIP4P, extract TIP4P params from pair style
+ // bond/angle are not yet init(), so insure equilibrium request is valid
+
+ qdist = 0.0;
+
+ if (tip4pflag) {
+ double *p_qdist = (double *) force->pair->extract("qdist",itmp);
+ int *p_typeO = (int *) force->pair->extract("typeO",itmp);
+ int *p_typeH = (int *) force->pair->extract("typeH",itmp);
+ int *p_typeA = (int *) force->pair->extract("typeA",itmp);
+ int *p_typeB = (int *) force->pair->extract("typeB",itmp);
+ if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
+ error->all(FLERR,"KSpace style is incompatible with Pair style");
+ qdist = *p_qdist;
+ typeO = *p_typeO;
+ typeH = *p_typeH;
+ int typeA = *p_typeA;
+ int typeB = *p_typeB;
+
+ if (force->angle == NULL || force->bond == NULL)
+ error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
+ if (typeA < 1 || typeA > atom->nangletypes ||
+ force->angle->setflag[typeA] == 0)
+ error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
+ if (typeB < 1 || typeB > atom->nbondtypes ||
+ force->bond->setflag[typeB] == 0)
+ error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
+ double theta = force->angle->equilibrium_angle(typeA);
+ double blen = force->bond->equilibrium_distance(typeB);
+ alpha = qdist / (cos(0.5*theta) * blen);
+ }
+
+ // compute qsum & qsqsum and warn if not charge-neutral
+
+ qsum = qsqsum = 0.0;
+ for (int i = 0; i < atom->nlocal; i++) {
+ qsum += atom->q[i];
+ qsqsum += atom->q[i]*atom->q[i];
+ }
+
+ double tmp;
+ MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsum = tmp;
+ MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ qsqsum = tmp;
+
+ if (qsqsum == 0.0)
+ error->all(FLERR,"Cannot use kspace solver on system with no charge");
+ if (fabs(qsum) > SMALL && me == 0) {
+ char str[128];
+ sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+ error->warning(FLERR,str);
+ }
+
+ // set accuracy (force units) from accuracy_relative or accuracy_absolute
+
+ if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
+ else accuracy = accuracy_relative * two_charge_force;
+
+ // setup FFT grid resolution and g_ewald
+ // normally one iteration thru while loop is all that is required
+ // if grid stencil extends beyond neighbor proc, reduce order and try again
+
+ int iteration = 0;
+
+ while (order > 1) {
+ if (iteration && me == 0)
+ error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
+ "beyond neighbor processor");
+ iteration++;
+
+ set_grid();
+
+ if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
+ error->all(FLERR,"PPPM grid is too large");
+
+ // global indices of PPPM grid range from 0 to N-1
+ // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
+ // global PPPM grid that I own without ghost cells
+ // for slab PPPM, assign z grid as if it were not extended
+
+ nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm);
+ nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
+
+ nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm);
+ nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
+
+ nzlo_in = static_cast
+ (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
+ nzhi_in = static_cast
+ (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
+
+ // nlower,nupper = stencil size for mapping particles to PPPM grid
+
+ nlower = -(order-1)/2;
+ nupper = order/2;
+
+ // shift values for particle <-> grid mapping
+ // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+ if (order % 2) shift = OFFSET + 0.5;
+ else shift = OFFSET;
+ if (order % 2) shiftone = 0.0;
+ else shiftone = 0.5;
+
+ // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
+ // global PPPM grid that my particles can contribute charge to
+ // effectively nlo_in,nhi_in + ghost cells
+ // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
+ // position a particle in my box can be at
+ // dist[3] = particle position bound = subbox + skin/2.0 + qdist
+ // qdist = offset due to TIP4P fictitious charge
+ // convert to triclinic if necessary
+ // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
+ // for slab PPPM, assign z grid as if it were not extended
+
+ triclinic = domain->triclinic;
+ double *prd,*sublo,*subhi;
+
+ if (triclinic == 0) {
+ prd = domain->prd;
+ boxlo = domain->boxlo;
+ sublo = domain->sublo;
+ subhi = domain->subhi;
+ } else {
+ prd = domain->prd_lamda;
+ boxlo = domain->boxlo_lamda;
+ sublo = domain->sublo_lamda;
+ subhi = domain->subhi_lamda;
+ }
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+
+ double dist[3];
+ double cuthalf = 0.5*neighbor->skin + qdist;
+ if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
+ else {
+ dist[0] = cuthalf/domain->prd[0];
+ dist[1] = cuthalf/domain->prd[1];
+ dist[2] = cuthalf/domain->prd[2];
+ }
+
+ int nlo,nhi;
+
+ nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) *
+ nx_pppm/xprd + shift) - OFFSET;
+ nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) *
+ nx_pppm/xprd + shift) - OFFSET;
+ nxlo_out = nlo + nlower;
+ nxhi_out = nhi + nupper;
+
+ nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) *
+ ny_pppm/yprd + shift) - OFFSET;
+ nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) *
+ ny_pppm/yprd + shift) - OFFSET;
+ nylo_out = nlo + nlower;
+ nyhi_out = nhi + nupper;
+
+ nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) *
+ nz_pppm/zprd_slab + shift) - OFFSET;
+ nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) *
+ nz_pppm/zprd_slab + shift) - OFFSET;
+ nzlo_out = nlo + nlower;
+ nzhi_out = nhi + nupper;
+
+ // for slab PPPM, change the grid boundary for processors at +z end
+ // to include the empty volume between periodically repeating slabs
+ // for slab PPPM, want charge data communicated from -z proc to +z proc,
+ // but not vice versa, also want field data communicated from +z proc to
+ // -z proc, but not vice versa
+ // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
+
+ if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) {
+ nzhi_in = nz_pppm - 1;
+ nzhi_out = nz_pppm - 1;
+ }
+
+ // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions
+ // that overlay domain I own
+ // proc in that direction tells me via sendrecv()
+ // if no neighbor proc, value is from self since I have ghosts regardless
+
+ int nplanes;
+ MPI_Status status;
+
+ nplanes = nxlo_in - nxlo_out;
+ if (comm->procneigh[0][0] != me)
+ MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0,
+ &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0,
+ world,&status);
+ else nxhi_ghost = nplanes;
+
+ nplanes = nxhi_out - nxhi_in;
+ if (comm->procneigh[0][1] != me)
+ MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0,
+ &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0],
+ 0,world,&status);
+ else nxlo_ghost = nplanes;
+
+ nplanes = nylo_in - nylo_out;
+ if (comm->procneigh[1][0] != me)
+ MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0,
+ &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0,
+ world,&status);
+ else nyhi_ghost = nplanes;
+
+ nplanes = nyhi_out - nyhi_in;
+ if (comm->procneigh[1][1] != me)
+ MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0,
+ &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0,
+ world,&status);
+ else nylo_ghost = nplanes;
+
+ nplanes = nzlo_in - nzlo_out;
+ if (comm->procneigh[2][0] != me)
+ MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0,
+ &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0,
+ world,&status);
+ else nzhi_ghost = nplanes;
+
+ nplanes = nzhi_out - nzhi_in;
+ if (comm->procneigh[2][1] != me)
+ MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0,
+ &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0,
+ world,&status);
+ else nzlo_ghost = nplanes;
+
+ // test that ghost overlap is not bigger than my sub-domain
+
+ int flag = 0;
+ if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1;
+ if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1;
+ if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1;
+ if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1;
+ if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1;
+ if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1;
+
+ int flag_all;
+ MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
+
+ if (flag_all == 0) break;
+ order--;
+ }
+
+ if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0");
+
+ // decomposition of FFT mesh
+ // global indices range from 0 to N-1
+ // proc owns entire x-dimension, clump of columns in y,z dimensions
+ // npey_fft,npez_fft = # of procs in y,z dims
+ // if nprocs is small enough, proc can own 1 or more entire xy planes,
+ // else proc owns 2d sub-blocks of yz plane
+ // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
+ // nlo_fft,nhi_fft = lower/upper limit of the section
+ // of the global FFT mesh that I own
+
+ int npey_fft,npez_fft;
+ if (nz_pppm >= nprocs) {
+ npey_fft = 1;
+ npez_fft = nprocs;
+ } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
+
+ int me_y = me % npey_fft;
+ int me_z = me / npey_fft;
+
+ nxlo_fft = 0;
+ nxhi_fft = nx_pppm - 1;
+ nylo_fft = me_y*ny_pppm/npey_fft;
+ nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
+ nzlo_fft = me_z*nz_pppm/npez_fft;
+ nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
+
+ // PPPM grid for this proc, including ghosts
+
+ ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
+ (nzhi_out-nzlo_out+1);
+
+ // FFT arrays on this proc, without ghosts
+ // nfft = FFT points in FFT decomposition on this proc
+ // nfft_brick = FFT points in 3d brick-decomposition on this proc
+ // nfft_both = greater of 2 values
+
+ nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
+ (nzhi_fft-nzlo_fft+1);
+ int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
+ (nzhi_in-nzlo_in+1);
+ nfft_both = MAX(nfft,nfft_brick);
+
+ // buffer space for use in brick2fft and fillbrick
+ // idel = max # of ghost planes to send or recv in +/- dir of each dim
+ // nx,ny,nz = owned planes (including ghosts) in each dim
+ // nxx,nyy,nzz = max # of grid cells to send in each dim
+ // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick
+
+ int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz;
+
+ idelx = MAX(nxlo_ghost,nxhi_ghost);
+ idelx = MAX(idelx,nxhi_out-nxhi_in);
+ idelx = MAX(idelx,nxlo_in-nxlo_out);
+
+ idely = MAX(nylo_ghost,nyhi_ghost);
+ idely = MAX(idely,nyhi_out-nyhi_in);
+ idely = MAX(idely,nylo_in-nylo_out);
+
+ idelz = MAX(nzlo_ghost,nzhi_ghost);
+ idelz = MAX(idelz,nzhi_out-nzhi_in);
+ idelz = MAX(idelz,nzlo_in-nzlo_out);
+
+ nx = nxhi_out - nxlo_out + 1;
+ ny = nyhi_out - nylo_out + 1;
+ nz = nzhi_out - nzlo_out + 1;
+
+ nxx = idelx * ny * nz;
+ nyy = idely * nx * nz;
+ nzz = idelz * nx * ny;
+
+ nbuf = MAX(nxx,nyy);
+ nbuf = MAX(nbuf,nzz);
+
+ nbuf_peratom = 7*nbuf;
+ nbuf *= 3;
+
+ // print stats
+
+ int ngrid_max,nfft_both_max,nbuf_max;
+ MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
+ MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
+ MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world);
+
+ if (me == 0) {
+ if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n",
+ ngrid_max,nfft_both_max,nbuf_max);
+ if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n",
+ ngrid_max,nfft_both_max,nbuf_max);
+ }
+
+ // allocate K-space dependent memory
+ // don't invoke allocate_peratom() here, wait to see if needed
+
+ allocate();
+
+ // pre-compute Green's function denomiator expansion
+ // pre-compute 1d charge distribution coefficients
+
+ compute_gf_denom();
+ compute_rho_coeff();
+}
+
+/* ----------------------------------------------------------------------
+ adjust PPPM coeffs, called initially and whenever volume has changed
+------------------------------------------------------------------------- */
+
+void PPPMOld::setup()
+{
+ int i,j,k,l,m,n;
+ double *prd;
+
+ // volume-dependent factors
+ // adjust z dimension for 2d slab PPPM
+ // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+ if (triclinic == 0) prd = domain->prd;
+ else prd = domain->prd_lamda;
+
+ double xprd = prd[0];
+ double yprd = prd[1];
+ double zprd = prd[2];
+ double zprd_slab = zprd*slab_volfactor;
+ volume = xprd * yprd * zprd_slab;
+
+ delxinv = nx_pppm/xprd;
+ delyinv = ny_pppm/yprd;
+ delzinv = nz_pppm/zprd_slab;
+
+ delvolinv = delxinv*delyinv*delzinv;
+
+ double unitkx = (2.0*MY_PI/xprd);
+ double unitky = (2.0*MY_PI/yprd);
+ double unitkz = (2.0*MY_PI/zprd_slab);
+
+ // fkx,fky,fkz for my FFT grid pts
+
+ double per;
+
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ per = i - nx_pppm*(2*i/nx_pppm);
+ fkx[i] = unitkx*per;
+ }
+
+ for (i = nylo_fft; i <= nyhi_fft; i++) {
+ per = i - ny_pppm*(2*i/ny_pppm);
+ fky[i] = unitky*per;
+ }
+
+ for (i = nzlo_fft; i <= nzhi_fft; i++) {
+ per = i - nz_pppm*(2*i/nz_pppm);
+ fkz[i] = unitkz*per;
+ }
+
+ // virial coefficients
+
+ double sqk,vterm;
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++) {
+ for (j = nylo_fft; j <= nyhi_fft; j++) {
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
+ if (sqk == 0.0) {
+ vg[n][0] = 0.0;
+ vg[n][1] = 0.0;
+ vg[n][2] = 0.0;
+ vg[n][3] = 0.0;
+ vg[n][4] = 0.0;
+ vg[n][5] = 0.0;
+ } else {
+ vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
+ vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
+ vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
+ vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
+ vg[n][3] = vterm*fkx[i]*fky[j];
+ vg[n][4] = vterm*fkx[i]*fkz[k];
+ vg[n][5] = vterm*fky[j]*fkz[k];
+ }
+ n++;
+ }
+ }
+ }
+
+ // modified (Hockney-Eastwood) Coulomb Green's function
+
+ int nx,ny,nz,kper,lper,mper;
+ double snx,sny,snz,snx2,sny2,snz2;
+ double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+ double sum1,dot1,dot2;
+ double numerator,denominator;
+
+ int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) *
+ pow(-log(EPS_HOC),0.25));
+ int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) *
+ pow(-log(EPS_HOC),0.25));
+ int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
+ pow(-log(EPS_HOC),0.25));
+
+ double form = 1.0;
+
+ n = 0;
+ for (m = nzlo_fft; m <= nzhi_fft; m++) {
+ mper = m - nz_pppm*(2*m/nz_pppm);
+ snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm);
+ snz2 = snz*snz;
+
+ for (l = nylo_fft; l <= nyhi_fft; l++) {
+ lper = l - ny_pppm*(2*l/ny_pppm);
+ sny = sin(0.5*unitky*lper*yprd/ny_pppm);
+ sny2 = sny*sny;
+
+ for (k = nxlo_fft; k <= nxhi_fft; k++) {
+ kper = k - nx_pppm*(2*k/nx_pppm);
+ snx = sin(0.5*unitkx*kper*xprd/nx_pppm);
+ snx2 = snx*snx;
+
+ sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
+ pow(unitkz*mper,2.0);
+
+ if (sqk != 0.0) {
+ numerator = form*12.5663706/sqk;
+ denominator = gf_denom(snx2,sny2,snz2);
+ sum1 = 0.0;
+ const double dorder = static_cast(order);
+ for (nx = -nbx; nx <= nbx; nx++) {
+ qx = unitkx*(kper+nx_pppm*nx);
+ sx = exp(-0.25*pow(qx/g_ewald,2.0));
+ wx = 1.0;
+ argx = 0.5*qx*xprd/nx_pppm;
+ if (argx != 0.0) wx = pow(sin(argx)/argx,dorder);
+ for (ny = -nby; ny <= nby; ny++) {
+ qy = unitky*(lper+ny_pppm*ny);
+ sy = exp(-0.25*pow(qy/g_ewald,2.0));
+ wy = 1.0;
+ argy = 0.5*qy*yprd/ny_pppm;
+ if (argy != 0.0) wy = pow(sin(argy)/argy,dorder);
+ for (nz = -nbz; nz <= nbz; nz++) {
+ qz = unitkz*(mper+nz_pppm*nz);
+ sz = exp(-0.25*pow(qz/g_ewald,2.0));
+ wz = 1.0;
+ argz = 0.5*qz*zprd_slab/nz_pppm;
+ if (argz != 0.0) wz = pow(sin(argz)/argz,dorder);
+
+ dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
+ dot2 = qx*qx+qy*qy+qz*qz;
+ sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0);
+ }
+ }
+ }
+ greensfn[n++] = numerator*sum1/denominator;
+ } else greensfn[n++] = 0.0;
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ compute the PPPM long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void PPPMOld::compute(int eflag, int vflag)
+{
+ int i,j;
+
+ // set energy/virial flags
+ // invoke allocate_peratom() if needed for first time
+
+ if (eflag || vflag) ev_setup(eflag,vflag);
+ else evflag = evflag_atom = eflag_global = vflag_global =
+ eflag_atom = vflag_atom = 0;
+
+ if (evflag_atom && !peratom_allocate_flag) {
+ allocate_peratom();
+ peratom_allocate_flag = 1;
+ }
+
+ // convert atoms from box to lamda coords
+
+ if (triclinic == 0) boxlo = domain->boxlo;
+ else {
+ boxlo = domain->boxlo_lamda;
+ domain->x2lamda(atom->nlocal);
+ }
+
+ // extend size of per-atom arrays if necessary
+
+ if (atom->nlocal > nmax) {
+ memory->destroy(part2grid);
+ nmax = atom->nmax;
+ memory->create(part2grid,nmax,3,"pppm:part2grid");
+ }
+
+ // find grid points for all my particles
+ // map my particle charge onto my local 3d density grid
+
+ particle_map();
+ make_rho();
+
+ // all procs communicate density values from their ghost cells
+ // to fully sum contribution in their 3d bricks
+ // remap from 3d decomposition to FFT decomposition
+
+ brick2fft();
+
+ // compute potential gradient on my FFT grid and
+ // portion of e_long on this proc's FFT grid
+ // return gradients (electric fields) in 3d brick decomposition
+ // also performs per-atom calculations via poisson_peratom()
+
+ poisson();
+
+ // all procs communicate E-field values
+ // to fill ghost cells surrounding their 3d bricks
+
+ fillbrick();
+
+ // extra per-atom energy/virial communication
+
+ if (evflag_atom) fillbrick_peratom();
+
+ // calculate the force on my particles
+
+ fieldforce();
+
+ // extra per-atom energy/virial communication
+
+ if (evflag_atom) fieldforce_peratom();
+
+ // sum global energy across procs and add in volume-dependent term
+
+ const double qscale = force->qqrd2e * scale;
+
+ if (eflag_global) {
+ double energy_all;
+ MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+ energy = energy_all;
+
+ energy *= 0.5*volume;
+ energy -= g_ewald*qsqsum/MY_PIS +
+ MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+ energy *= qscale;
+ }
+
+ // sum global virial across procs
+
+ if (vflag_global) {
+ double virial_all[6];
+ MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
+ for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
+ }
+
+ // per-atom energy/virial
+ // energy includes self-energy correction
+
+ if (evflag_atom) {
+ double *q = atom->q;
+ int nlocal = atom->nlocal;
+
+ if (eflag_atom) {
+ for (i = 0; i < nlocal; i++) {
+ eatom[i] *= 0.5;
+ eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
+ (g_ewald*g_ewald*volume);
+ eatom[i] *= qscale;
+ }
+ }
+
+ if (vflag_atom) {
+ for (i = 0; i < nlocal; i++)
+ for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale;
+ }
+ }
+
+ // 2d slab correction
+
+ if (slabflag == 1) slabcorr();
+
+ // convert atoms back from lamda to box coords
+
+ if (triclinic) domain->lamda2x(atom->nlocal);
+}
+
+/* ----------------------------------------------------------------------
+ allocate memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPMOld::allocate()
+{
+ memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:density_brick");
+ memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:vdx_brick");
+ memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:vdy_brick");
+ memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:vdz_brick");
+
+ memory->create(density_fft,nfft_both,"pppm:density_fft");
+ memory->create(greensfn,nfft_both,"pppm:greensfn");
+ memory->create(work1,2*nfft_both,"pppm:work1");
+ memory->create(work2,2*nfft_both,"pppm:work2");
+ memory->create(vg,nfft_both,6,"pppm:vg");
+
+ memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
+ memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
+ memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
+
+ memory->create(buf1,nbuf,"pppm:buf1");
+ memory->create(buf2,nbuf,"pppm:buf2");
+
+ // summation coeffs
+
+ memory->create(gf_b,order,"pppm:gf_b");
+ memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
+ memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
+
+ // create 2 FFTs and a Remap
+ // 1st FFT keeps data in FFT decompostion
+ // 2nd FFT returns data in 3d brick decomposition
+ // remap takes data from 3d brick to FFT decomposition
+
+ int tmp;
+
+ fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ 0,0,&tmp);
+
+ fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ 0,0,&tmp);
+
+ remap = new Remap(lmp,world,
+ nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+ nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+ 1,0,0,FFT_PRECISION);
+}
+
+/* ----------------------------------------------------------------------
+ allocate per-atom memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPMOld::allocate_peratom()
+{
+ memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:u_brick");
+
+ memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v0_brick");
+ memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v1_brick");
+ memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v2_brick");
+ memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v3_brick");
+ memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v4_brick");
+ memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:v5_brick");
+
+ memory->create(buf3,nbuf_peratom,"pppm:buf3");
+ memory->create(buf4,nbuf_peratom,"pppm:buf4");
+}
+
+/* ----------------------------------------------------------------------
+ deallocate memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPMOld::deallocate()
+{
+ memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
+
+ memory->destroy(density_fft);
+ memory->destroy(greensfn);
+ memory->destroy(work1);
+ memory->destroy(work2);
+ memory->destroy(vg);
+
+ memory->destroy1d_offset(fkx,nxlo_fft);
+ memory->destroy1d_offset(fky,nylo_fft);
+ memory->destroy1d_offset(fkz,nzlo_fft);
+
+ memory->destroy(buf1);
+ memory->destroy(buf2);
+
+ memory->destroy(gf_b);
+ memory->destroy2d_offset(rho1d,-order/2);
+ memory->destroy2d_offset(rho_coeff,(1-order)/2);
+
+ delete fft1;
+ delete fft2;
+ delete remap;
+}
+
+/* ----------------------------------------------------------------------
+ deallocate per-atom memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPMOld::deallocate_peratom()
+{
+ memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
+
+ memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
+
+ memory->destroy(buf3);
+ memory->destroy(buf4);
+}
+
+/* ----------------------------------------------------------------------
+ set size of FFT grid (nx,ny,nz_pppm) and g_ewald
+------------------------------------------------------------------------- */
+
+void PPPMOld::set_grid()
+{
+ // see JCP 109, pg 7698 for derivation of coefficients
+ // higher order coefficients may be computed if needed
+
+ double **acons;
+ memory->create(acons,8,7,"pppm:acons");
+
+ acons[1][0] = 2.0 / 3.0;
+ acons[2][0] = 1.0 / 50.0;
+ acons[2][1] = 5.0 / 294.0;
+ acons[3][0] = 1.0 / 588.0;
+ acons[3][1] = 7.0 / 1440.0;
+ acons[3][2] = 21.0 / 3872.0;
+ acons[4][0] = 1.0 / 4320.0;
+ acons[4][1] = 3.0 / 1936.0;
+ acons[4][2] = 7601.0 / 2271360.0;
+ acons[4][3] = 143.0 / 28800.0;
+ acons[5][0] = 1.0 / 23232.0;
+ acons[5][1] = 7601.0 / 13628160.0;
+ acons[5][2] = 143.0 / 69120.0;
+ acons[5][3] = 517231.0 / 106536960.0;
+ acons[5][4] = 106640677.0 / 11737571328.0;
+ acons[6][0] = 691.0 / 68140800.0;
+ acons[6][1] = 13.0 / 57600.0;
+ acons[6][2] = 47021.0 / 35512320.0;
+ acons[6][3] = 9694607.0 / 2095994880.0;
+ acons[6][4] = 733191589.0 / 59609088000.0;
+ acons[6][5] = 326190917.0 / 11700633600.0;
+ acons[7][0] = 1.0 / 345600.0;
+ acons[7][1] = 3617.0 / 35512320.0;
+ acons[7][2] = 745739.0 / 838397952.0;
+ acons[7][3] = 56399353.0 / 12773376000.0;
+ acons[7][4] = 25091609.0 / 1560084480.0;
+ acons[7][5] = 1755948832039.0 / 36229939200000.0;
+ acons[7][6] = 4887769399.0 / 37838389248.0;
+
+ double q2 = qsqsum * force->qqrd2e;
+
+ // use xprd,yprd,zprd even if triclinic so grid size is the same
+ // adjust z dimension for 2d slab PPPM
+ // 3d PPPM just uses zprd since slab_volfactor = 1.0
+
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ double zprd_slab = zprd*slab_volfactor;
+
+ // make initial g_ewald estimate
+ // based on desired accuracy and real space cutoff
+ // fluid-occupied volume used to estimate real-space error
+ // zprd used rather than zprd_slab
+
+ double h_x,h_y,h_z;
+ bigint natoms = atom->natoms;
+
+ if (!gewaldflag) {
+ if (accuracy <= 0.0)
+ error->all(FLERR,"KSpace accuracy must be > 0");
+ g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
+ if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
+ else g_ewald = sqrt(-log(g_ewald)) / cutoff;
+ }
+
+ // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
+ // nz_pppm uses extended zprd_slab instead of zprd
+ // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1
+ // reduce it until accuracy target is met
+
+ if (!gridflag) {
+ double err;
+ h_x = h_y = h_z = 1.0/g_ewald;
+
+ nx_pppm = static_cast (xprd/h_x) + 1;
+ ny_pppm = static_cast (yprd/h_y) + 1;
+ nz_pppm = static_cast (zprd_slab/h_z) + 1;
+
+ err = rms(h_x,xprd,natoms,q2,acons);
+ while (err > accuracy) {
+ err = rms(h_x,xprd,natoms,q2,acons);
+ nx_pppm++;
+ h_x = xprd/nx_pppm;
+ }
+
+ err = rms(h_y,yprd,natoms,q2,acons);
+ while (err > accuracy) {
+ err = rms(h_y,yprd,natoms,q2,acons);
+ ny_pppm++;
+ h_y = yprd/ny_pppm;
+ }
+
+ err = rms(h_z,zprd_slab,natoms,q2,acons);
+ while (err > accuracy) {
+ err = rms(h_z,zprd_slab,natoms,q2,acons);
+ nz_pppm++;
+ h_z = zprd_slab/nz_pppm;
+ }
+ }
+
+ // boost grid size until it is factorable
+
+ while (!factorable(nx_pppm)) nx_pppm++;
+ while (!factorable(ny_pppm)) ny_pppm++;
+ while (!factorable(nz_pppm)) nz_pppm++;
+
+ // adjust g_ewald for new grid size
+
+ h_x = xprd/static_cast(nx_pppm);
+ h_y = yprd/static_cast(ny_pppm);
+ h_z = zprd_slab/static_cast(nz_pppm);
+
+ if (!gewaldflag) {
+ double gew1,gew2,dgew,f,fmid,hmin,rtb;
+ int ncount;
+
+ gew1 = 0.0;
+ g_ewald = gew1;
+ f = diffpr(h_x,h_y,h_z,q2,acons);
+
+ hmin = MIN(h_x,MIN(h_y,h_z));
+ gew2 = 10.0/hmin;
+ g_ewald = gew2;
+ fmid = diffpr(h_x,h_y,h_z,q2,acons);
+
+ if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G");
+ rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2);
+ ncount = 0;
+ while (fabs(dgew) > SMALL && fmid != 0.0) {
+ dgew *= 0.5;
+ g_ewald = rtb + dgew;
+ fmid = diffpr(h_x,h_y,h_z,q2,acons);
+ if (fmid <= 0.0) rtb = g_ewald;
+ ncount++;
+ if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G");
+ }
+ }
+
+ // final RMS accuracy
+
+ double lprx = rms(h_x,xprd,natoms,q2,acons);
+ double lpry = rms(h_y,yprd,natoms,q2,acons);
+ double lprz = rms(h_z,zprd_slab,natoms,q2,acons);
+ double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
+ double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab);
+ double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
+ double tpr = estimate_table_accuracy(q2_over_sqrt,spr);
+ double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr);
+
+ // free local memory
+
+ memory->destroy(acons);
+
+ // print info
+
+ if (me == 0) {
+#ifdef FFT_SINGLE
+ const char fft_prec[] = "single";
+#else
+ const char fft_prec[] = "double";
+#endif
+ if (screen) {
+ fprintf(screen," G vector (1/distance)= %g\n",g_ewald);
+ fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+ fprintf(screen," stencil order = %d\n",order);
+ fprintf(screen," estimated absolute RMS force accuracy = %g\n",
+ accuracy);
+ fprintf(screen," estimated relative force accuracy = %g\n",
+ accuracy/two_charge_force);
+ fprintf(screen," using %s precision FFTs\n",fft_prec);
+ }
+ if (logfile) {
+ fprintf(logfile," G vector (1/distance) = %g\n",g_ewald);
+ fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+ fprintf(logfile," stencil order = %d\n",order);
+ fprintf(logfile," estimated absolute RMS force accuracy = %g\n",
+ accuracy);
+ fprintf(logfile," estimated relative force accuracy = %g\n",
+ accuracy/two_charge_force);
+ fprintf(logfile," using %s precision FFTs\n",fft_prec);
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ check if all factors of n are in list of factors
+ return 1 if yes, 0 if no
+------------------------------------------------------------------------- */
+
+int PPPMOld::factorable(int n)
+{
+ int i;
+
+ while (n > 1) {
+ for (i = 0; i < nfactors; i++) {
+ if (n % factors[i] == 0) {
+ n /= factors[i];
+ break;
+ }
+ }
+ if (i == nfactors) return 0;
+ }
+
+ return 1;
+}
+
+/* ----------------------------------------------------------------------
+ compute RMS accuracy for a dimension
+------------------------------------------------------------------------- */
+
+double PPPMOld::rms(double h, double prd, bigint natoms,
+ double q2, double **acons)
+{
+ double sum = 0.0;
+ for (int m = 0; m < order; m++)
+ sum += acons[order][m] * pow(h*g_ewald,2.0*m);
+ double value = q2 * pow(h*g_ewald,(double)order) *
+ sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd);
+ return value;
+}
+
+/* ----------------------------------------------------------------------
+ compute difference in real-space and KSpace RMS accuracy
+------------------------------------------------------------------------- */
+
+double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2,
+ double **acons)
+{
+ double lprx,lpry,lprz,kspace_prec,real_prec;
+ double xprd = domain->xprd;
+ double yprd = domain->yprd;
+ double zprd = domain->zprd;
+ bigint natoms = atom->natoms;
+
+ lprx = rms(h_x,xprd,natoms,q2,acons);
+ lpry = rms(h_y,yprd,natoms,q2,acons);
+ lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons);
+ kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
+ real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
+ sqrt(static_cast(natoms)*cutoff*xprd*yprd*zprd);
+ double value = kspace_prec - real_prec;
+ return value;
+}
+
+/* ----------------------------------------------------------------------
+ pre-compute Green's function denominator expansion coeffs, Gamma(2n)
+------------------------------------------------------------------------- */
+
+void PPPMOld::compute_gf_denom()
+{
+ int k,l,m;
+
+ for (l = 1; l < order; l++) gf_b[l] = 0.0;
+ gf_b[0] = 1.0;
+
+ for (m = 1; m < order; m++) {
+ for (l = m; l > 0; l--)
+ gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
+ gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
+ }
+
+ bigint ifact = 1;
+ for (k = 1; k < 2*order; k++) ifact *= k;
+ double gaminv = 1.0/ifact;
+ for (l = 0; l < order; l++) gf_b[l] *= gaminv;
+}
+
+/* ----------------------------------------------------------------------
+ ghost-swap to accumulate full density in brick decomposition
+ remap density from 3d brick decomposition to FFT decomposition
+------------------------------------------------------------------------- */
+
+void PPPMOld::brick2fft()
+{
+ int i,n,ix,iy,iz;
+ MPI_Request request;
+ MPI_Status status;
+
+ // pack my ghosts for +x processor
+ // pass data to self or +x processor
+ // unpack and sum recv data into my real cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxhi_in+1; ix <= nxhi_out; ix++)
+ buf1[n++] = density_brick[iz][iy][ix];
+
+ if (comm->procneigh[0][1] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++)
+ density_brick[iz][iy][ix] += buf2[n++];
+
+ // pack my ghosts for -x processor
+ // pass data to self or -x processor
+ // unpack and sum recv data into my real cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxlo_out; ix < nxlo_in; ix++)
+ buf1[n++] = density_brick[iz][iy][ix];
+
+ if (comm->procneigh[0][0] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++)
+ density_brick[iz][iy][ix] += buf2[n++];
+
+ // pack my ghosts for +y processor
+ // pass data to self or +y processor
+ // unpack and sum recv data into my real cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++)
+ buf1[n++] = density_brick[iz][iy][ix];
+
+ if (comm->procneigh[1][1] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++)
+ density_brick[iz][iy][ix] += buf2[n++];
+
+ // pack my ghosts for -y processor
+ // pass data to self or -y processor
+ // unpack and sum recv data into my real cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy < nylo_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++)
+ buf1[n++] = density_brick[iz][iy][ix];
+
+ if (comm->procneigh[1][0] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++)
+ density_brick[iz][iy][ix] += buf2[n++];
+
+ // pack my ghosts for +z processor
+ // pass data to self or +z processor
+ // unpack and sum recv data into my real cells
+
+ n = 0;
+ for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++)
+ buf1[n++] = density_brick[iz][iy][ix];
+
+ if (comm->procneigh[2][1] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++)
+ density_brick[iz][iy][ix] += buf2[n++];
+
+ // pack my ghosts for -z processor
+ // pass data to self or -z processor
+ // unpack and sum recv data into my real cells
+
+ n = 0;
+ for (iz = nzlo_out; iz < nzlo_in; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++)
+ buf1[n++] = density_brick[iz][iy][ix];
+
+ if (comm->procneigh[2][0] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++)
+ density_brick[iz][iy][ix] += buf2[n++];
+
+ // remap from 3d brick decomposition to FFT decomposition
+ // copy grabs inner portion of density from 3d brick
+ // remap could be done as pre-stage of FFT,
+ // but this works optimally on only double values, not complex values
+
+ n = 0;
+ for (iz = nzlo_in; iz <= nzhi_in; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++)
+ density_fft[n++] = density_brick[iz][iy][ix];
+
+ remap->perform(density_fft,density_fft,work1);
+}
+
+/* ----------------------------------------------------------------------
+ ghost-swap to fill ghost cells of my brick with field values
+------------------------------------------------------------------------- */
+
+void PPPMOld::fillbrick()
+{
+ int i,n,ix,iy,iz;
+ MPI_Request request;
+ MPI_Status status;
+
+ // pack my real cells for +z processor
+ // pass data to self or +z processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ buf1[n++] = vdx_brick[iz][iy][ix];
+ buf1[n++] = vdy_brick[iz][iy][ix];
+ buf1[n++] = vdz_brick[iz][iy][ix];
+ }
+
+ if (comm->procneigh[2][1] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz < nzlo_in; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ vdx_brick[iz][iy][ix] = buf2[n++];
+ vdy_brick[iz][iy][ix] = buf2[n++];
+ vdz_brick[iz][iy][ix] = buf2[n++];
+ }
+
+ // pack my real cells for -z processor
+ // pass data to self or -z processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ buf1[n++] = vdx_brick[iz][iy][ix];
+ buf1[n++] = vdy_brick[iz][iy][ix];
+ buf1[n++] = vdz_brick[iz][iy][ix];
+ }
+
+ if (comm->procneigh[2][0] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ vdx_brick[iz][iy][ix] = buf2[n++];
+ vdy_brick[iz][iy][ix] = buf2[n++];
+ vdz_brick[iz][iy][ix] = buf2[n++];
+ }
+
+ // pack my real cells for +y processor
+ // pass data to self or +y processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ buf1[n++] = vdx_brick[iz][iy][ix];
+ buf1[n++] = vdy_brick[iz][iy][ix];
+ buf1[n++] = vdz_brick[iz][iy][ix];
+ }
+
+ if (comm->procneigh[1][1] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy < nylo_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ vdx_brick[iz][iy][ix] = buf2[n++];
+ vdy_brick[iz][iy][ix] = buf2[n++];
+ vdz_brick[iz][iy][ix] = buf2[n++];
+ }
+
+ // pack my real cells for -y processor
+ // pass data to self or -y processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ buf1[n++] = vdx_brick[iz][iy][ix];
+ buf1[n++] = vdy_brick[iz][iy][ix];
+ buf1[n++] = vdz_brick[iz][iy][ix];
+ }
+
+ if (comm->procneigh[1][0] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ vdx_brick[iz][iy][ix] = buf2[n++];
+ vdy_brick[iz][iy][ix] = buf2[n++];
+ vdz_brick[iz][iy][ix] = buf2[n++];
+ }
+
+ // pack my real cells for +x processor
+ // pass data to self or +x processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) {
+ buf1[n++] = vdx_brick[iz][iy][ix];
+ buf1[n++] = vdy_brick[iz][iy][ix];
+ buf1[n++] = vdz_brick[iz][iy][ix];
+ }
+
+ if (comm->procneigh[0][1] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxlo_out; ix < nxlo_in; ix++) {
+ vdx_brick[iz][iy][ix] = buf2[n++];
+ vdy_brick[iz][iy][ix] = buf2[n++];
+ vdz_brick[iz][iy][ix] = buf2[n++];
+ }
+
+ // pack my real cells for -x processor
+ // pass data to self or -x processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) {
+ buf1[n++] = vdx_brick[iz][iy][ix];
+ buf1[n++] = vdy_brick[iz][iy][ix];
+ buf1[n++] = vdz_brick[iz][iy][ix];
+ }
+
+ if (comm->procneigh[0][0] == me)
+ for (i = 0; i < n; i++) buf2[i] = buf1[i];
+ else {
+ MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
+ MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxhi_in+1; ix <= nxhi_out; ix++) {
+ vdx_brick[iz][iy][ix] = buf2[n++];
+ vdy_brick[iz][iy][ix] = buf2[n++];
+ vdz_brick[iz][iy][ix] = buf2[n++];
+ }
+}
+
+/* ----------------------------------------------------------------------
+ ghost-swap to fill ghost cells of my brick with per-atom field values
+------------------------------------------------------------------------- */
+
+void PPPMOld::fillbrick_peratom()
+{
+ int i,n,ix,iy,iz;
+ MPI_Request request;
+ MPI_Status status;
+
+ // pack my real cells for +z processor
+ // pass data to self or +z processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+ if (vflag_atom) {
+ buf3[n++] = v0_brick[iz][iy][ix];
+ buf3[n++] = v1_brick[iz][iy][ix];
+ buf3[n++] = v2_brick[iz][iy][ix];
+ buf3[n++] = v3_brick[iz][iy][ix];
+ buf3[n++] = v4_brick[iz][iy][ix];
+ buf3[n++] = v5_brick[iz][iy][ix];
+ }
+ }
+
+ if (comm->procneigh[2][1] == me)
+ for (i = 0; i < n; i++) buf4[i] = buf3[i];
+ else {
+ MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+ comm->procneigh[2][0],0,world,&request);
+ MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz < nzlo_in; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+ if (vflag_atom) {
+ v0_brick[iz][iy][ix] = buf4[n++];
+ v1_brick[iz][iy][ix] = buf4[n++];
+ v2_brick[iz][iy][ix] = buf4[n++];
+ v3_brick[iz][iy][ix] = buf4[n++];
+ v4_brick[iz][iy][ix] = buf4[n++];
+ v5_brick[iz][iy][ix] = buf4[n++];
+ }
+ }
+
+ // pack my real cells for -z processor
+ // pass data to self or -z processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+ if (vflag_atom) {
+ buf3[n++] = v0_brick[iz][iy][ix];
+ buf3[n++] = v1_brick[iz][iy][ix];
+ buf3[n++] = v2_brick[iz][iy][ix];
+ buf3[n++] = v3_brick[iz][iy][ix];
+ buf3[n++] = v4_brick[iz][iy][ix];
+ buf3[n++] = v5_brick[iz][iy][ix];
+ }
+ }
+
+ if (comm->procneigh[2][0] == me)
+ for (i = 0; i < n; i++) buf4[i] = buf3[i];
+ else {
+ MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+ comm->procneigh[2][1],0,world,&request);
+ MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
+ for (iy = nylo_in; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+ if (vflag_atom) {
+ v0_brick[iz][iy][ix] = buf4[n++];
+ v1_brick[iz][iy][ix] = buf4[n++];
+ v2_brick[iz][iy][ix] = buf4[n++];
+ v3_brick[iz][iy][ix] = buf4[n++];
+ v4_brick[iz][iy][ix] = buf4[n++];
+ v5_brick[iz][iy][ix] = buf4[n++];
+ }
+ }
+
+ // pack my real cells for +y processor
+ // pass data to self or +y processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+ if (vflag_atom) {
+ buf3[n++] = v0_brick[iz][iy][ix];
+ buf3[n++] = v1_brick[iz][iy][ix];
+ buf3[n++] = v2_brick[iz][iy][ix];
+ buf3[n++] = v3_brick[iz][iy][ix];
+ buf3[n++] = v4_brick[iz][iy][ix];
+ buf3[n++] = v5_brick[iz][iy][ix];
+ }
+ }
+
+ if (comm->procneigh[1][1] == me)
+ for (i = 0; i < n; i++) buf4[i] = buf3[i];
+ else {
+ MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+ comm->procneigh[1][0],0,world,&request);
+ MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy < nylo_in; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+ if (vflag_atom) {
+ v0_brick[iz][iy][ix] = buf4[n++];
+ v1_brick[iz][iy][ix] = buf4[n++];
+ v2_brick[iz][iy][ix] = buf4[n++];
+ v3_brick[iz][iy][ix] = buf4[n++];
+ v4_brick[iz][iy][ix] = buf4[n++];
+ v5_brick[iz][iy][ix] = buf4[n++];
+ }
+ }
+
+ // pack my real cells for -y processor
+ // pass data to self or -y processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+ if (vflag_atom) {
+ buf3[n++] = v0_brick[iz][iy][ix];
+ buf3[n++] = v1_brick[iz][iy][ix];
+ buf3[n++] = v2_brick[iz][iy][ix];
+ buf3[n++] = v3_brick[iz][iy][ix];
+ buf3[n++] = v4_brick[iz][iy][ix];
+ buf3[n++] = v5_brick[iz][iy][ix];
+ }
+ }
+
+ if (comm->procneigh[1][0] == me)
+ for (i = 0; i < n; i++) buf4[i] = buf3[i];
+ else {
+ MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+ comm->procneigh[1][1],0,world,&request);
+ MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
+ for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+ if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+ if (vflag_atom) {
+ v0_brick[iz][iy][ix] = buf4[n++];
+ v1_brick[iz][iy][ix] = buf4[n++];
+ v2_brick[iz][iy][ix] = buf4[n++];
+ v3_brick[iz][iy][ix] = buf4[n++];
+ v4_brick[iz][iy][ix] = buf4[n++];
+ v5_brick[iz][iy][ix] = buf4[n++];
+ }
+ }
+
+ // pack my real cells for +x processor
+ // pass data to self or +x processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) {
+ if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+ if (vflag_atom) {
+ buf3[n++] = v0_brick[iz][iy][ix];
+ buf3[n++] = v1_brick[iz][iy][ix];
+ buf3[n++] = v2_brick[iz][iy][ix];
+ buf3[n++] = v3_brick[iz][iy][ix];
+ buf3[n++] = v4_brick[iz][iy][ix];
+ buf3[n++] = v5_brick[iz][iy][ix];
+ }
+ }
+
+ if (comm->procneigh[0][1] == me)
+ for (i = 0; i < n; i++) buf4[i] = buf3[i];
+ else {
+ MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+ comm->procneigh[0][0],0,world,&request);
+ MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxlo_out; ix < nxlo_in; ix++) {
+ if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+ if (vflag_atom) {
+ v0_brick[iz][iy][ix] = buf4[n++];
+ v1_brick[iz][iy][ix] = buf4[n++];
+ v2_brick[iz][iy][ix] = buf4[n++];
+ v3_brick[iz][iy][ix] = buf4[n++];
+ v4_brick[iz][iy][ix] = buf4[n++];
+ v5_brick[iz][iy][ix] = buf4[n++];
+ }
+ }
+
+ // pack my real cells for -x processor
+ // pass data to self or -x processor
+ // unpack and sum recv data into my ghost cells
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) {
+ if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+ if (vflag_atom) {
+ buf3[n++] = v0_brick[iz][iy][ix];
+ buf3[n++] = v1_brick[iz][iy][ix];
+ buf3[n++] = v2_brick[iz][iy][ix];
+ buf3[n++] = v3_brick[iz][iy][ix];
+ buf3[n++] = v4_brick[iz][iy][ix];
+ buf3[n++] = v5_brick[iz][iy][ix];
+ }
+ }
+
+ if (comm->procneigh[0][0] == me)
+ for (i = 0; i < n; i++) buf4[i] = buf3[i];
+ else {
+ MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+ comm->procneigh[0][1],0,world,&request);
+ MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
+ MPI_Wait(&request,&status);
+ }
+
+ n = 0;
+ for (iz = nzlo_out; iz <= nzhi_out; iz++)
+ for (iy = nylo_out; iy <= nyhi_out; iy++)
+ for (ix = nxhi_in+1; ix <= nxhi_out; ix++) {
+ if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+ if (vflag_atom) {
+ v0_brick[iz][iy][ix] = buf4[n++];
+ v1_brick[iz][iy][ix] = buf4[n++];
+ v2_brick[iz][iy][ix] = buf4[n++];
+ v3_brick[iz][iy][ix] = buf4[n++];
+ v4_brick[iz][iy][ix] = buf4[n++];
+ v5_brick[iz][iy][ix] = buf4[n++];
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ find center grid pt for each of my particles
+ check that full stencil for the particle will fit in my 3d brick
+ store central grid pt indices in part2grid array
+------------------------------------------------------------------------- */
+
+void PPPMOld::particle_map()
+{
+ int nx,ny,nz;
+
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+
+ int flag = 0;
+ for (int i = 0; i < nlocal; i++) {
+
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // current particle coord can be outside global and local box
+ // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+ nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
+ ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
+ nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
+
+ part2grid[i][0] = nx;
+ part2grid[i][1] = ny;
+ part2grid[i][2] = nz;
+
+ // check that entire stencil around nx,ny,nz will fit in my 3d brick
+
+ if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
+ ny+nlower < nylo_out || ny+nupper > nyhi_out ||
+ nz+nlower < nzlo_out || nz+nupper > nzhi_out)
+ flag = 1;
+ }
+
+ if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
+}
+
+/* ----------------------------------------------------------------------
+ create discretized "density" on section of global grid due to my particles
+ density(x,y,z) = charge "density" at grid points of my 3d brick
+ (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+ in global grid
+------------------------------------------------------------------------- */
+
+void PPPMOld::make_rho()
+{
+ int l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+ // clear 3d density array
+
+ memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
+ ngrid*sizeof(FFT_SCALAR));
+
+ // loop over my charges, add their contribution to nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+
+ double *q = atom->q;
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+
+ for (int i = 0; i < nlocal; i++) {
+
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz);
+
+ z0 = delvolinv * q[i];
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ y0 = z0*rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ x0 = y0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ density_brick[mz][my][mx] += x0*rho1d[0][l];
+ }
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver
+------------------------------------------------------------------------- */
+
+void PPPMOld::poisson()
+{
+ int i,j,k,n;
+ double eng;
+
+ // transform charge density (r -> k)
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work1[n++] = density_fft[i];
+ work1[n++] = ZEROF;
+ }
+
+ fft1->compute(work1,work1,1);
+
+ // global energy and virial contribution
+
+ double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+ double s2 = scaleinv*scaleinv;
+
+ if (eflag_global || vflag_global) {
+ if (vflag_global) {
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+ for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
+ if (eflag_global) energy += eng;
+ n += 2;
+ }
+ } else {
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ energy +=
+ s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+ n += 2;
+ }
+ }
+ }
+
+ // scale by 1/total-grid-pts to get rho(k)
+ // multiply by Green's function to get V(k)
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work1[n++] *= scaleinv * greensfn[i];
+ work1[n++] *= scaleinv * greensfn[i];
+ }
+
+ // extra FFTs for per-atom energy/virial
+
+ if (evflag_atom) poisson_peratom();
+
+ // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+ // FFT leaves data in 3d brick decomposition
+ // copy it into inner portion of vdx,vdy,vdz arrays
+
+ // x direction gradient
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ work2[n] = fkx[i]*work1[n+1];
+ work2[n+1] = -fkx[i]*work1[n];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ vdx_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ // y direction gradient
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ work2[n] = fky[j]*work1[n+1];
+ work2[n+1] = -fky[j]*work1[n];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ vdy_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ // z direction gradient
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ work2[n] = fkz[k]*work1[n+1];
+ work2[n+1] = -fkz[k]*work1[n];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ vdz_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver for per-atom energy/virial
+------------------------------------------------------------------------- */
+
+void PPPMOld::poisson_peratom()
+{
+ int i,j,k,n;
+
+ // energy
+
+ if (eflag_atom) {
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n];
+ work2[n+1] = work1[n+1];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ u_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+ }
+
+ // 6 components of virial in v0 thru v5
+
+ if (!vflag_atom) return;
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][0];
+ work2[n+1] = work1[n+1]*vg[i][0];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v0_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][1];
+ work2[n+1] = work1[n+1]*vg[i][1];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v1_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][2];
+ work2[n+1] = work1[n+1]*vg[i][2];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v2_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][3];
+ work2[n+1] = work1[n+1]*vg[i][3];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v3_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][4];
+ work2[n+1] = work1[n+1]*vg[i][4];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v4_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work2[n] = work1[n]*vg[i][5];
+ work2[n+1] = work1[n+1]*vg[i][5];
+ n += 2;
+ }
+
+ fft2->compute(work2,work2,-1);
+
+ n = 0;
+ for (k = nzlo_in; k <= nzhi_in; k++)
+ for (j = nylo_in; j <= nyhi_in; j++)
+ for (i = nxlo_in; i <= nxhi_in; i++) {
+ v5_brick[k][j][i] = work2[n];
+ n += 2;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get electric field & force on my particles
+------------------------------------------------------------------------- */
+
+void PPPMOld::fieldforce()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR ekx,eky,ekz;
+
+ // loop over my charges, interpolate electric field from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+ // ek = 3 components of E-field on particle
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double **f = atom->f;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz);
+
+ ekx = eky = ekz = ZEROF;
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ z0 = rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ y0 = z0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d[0][l];
+ ekx -= x0*vdx_brick[mz][my][mx];
+ eky -= x0*vdy_brick[mz][my][mx];
+ ekz -= x0*vdz_brick[mz][my][mx];
+ }
+ }
+ }
+
+ // convert E-field to force
+
+ const double qfactor = force->qqrd2e * scale * q[i];
+ f[i][0] += qfactor*ekx;
+ f[i][1] += qfactor*eky;
+ if (slabflag != 2) f[i][2] += qfactor*ekz;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get per-atom energy/virial
+------------------------------------------------------------------------- */
+
+void PPPMOld::fieldforce_peratom()
+{
+ int i,l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+ FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
+
+ // loop over my charges, interpolate from nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double **f = atom->f;
+
+ int nlocal = atom->nlocal;
+
+ for (i = 0; i < nlocal; i++) {
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz);
+
+ u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ z0 = rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ y0 = z0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+ x0 = y0*rho1d[0][l];
+ if (eflag_atom) u += x0*u_brick[mz][my][mx];
+ if (vflag_atom) {
+ v0 += x0*v0_brick[mz][my][mx];
+ v1 += x0*v1_brick[mz][my][mx];
+ v2 += x0*v2_brick[mz][my][mx];
+ v3 += x0*v3_brick[mz][my][mx];
+ v4 += x0*v4_brick[mz][my][mx];
+ v5 += x0*v5_brick[mz][my][mx];
+ }
+ }
+ }
+ }
+
+ if (eflag_atom) eatom[i] += q[i]*u;
+ if (vflag_atom) {
+ vatom[i][0] += v0;
+ vatom[i][1] += v1;
+ vatom[i][2] += v2;
+ vatom[i][3] += v3;
+ vatom[i][4] += v4;
+ vatom[i][5] += v5;
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
+------------------------------------------------------------------------- */
+
+void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
+{
+ // loop thru all possible factorizations of nprocs
+ // surf = surface area of largest proc sub-domain
+ // innermost if test minimizes surface area and surface/volume ratio
+
+ int bestsurf = 2 * (nx + ny);
+ int bestboxx = 0;
+ int bestboxy = 0;
+
+ int boxx,boxy,surf,ipx,ipy;
+
+ ipx = 1;
+ while (ipx <= nprocs) {
+ if (nprocs % ipx == 0) {
+ ipy = nprocs/ipx;
+ boxx = nx/ipx;
+ if (nx % ipx) boxx++;
+ boxy = ny/ipy;
+ if (ny % ipy) boxy++;
+ surf = boxx + boxy;
+ if (surf < bestsurf ||
+ (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
+ bestsurf = surf;
+ bestboxx = boxx;
+ bestboxy = boxy;
+ *px = ipx;
+ *py = ipy;
+ }
+ }
+ ipx++;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ charge assignment into rho1d
+ dx,dy,dz = distance of particle from "lower left" grid point
+------------------------------------------------------------------------- */
+
+void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
+ const FFT_SCALAR &dz)
+{
+ int k,l;
+ FFT_SCALAR r1,r2,r3;
+
+ for (k = (1-order)/2; k <= order/2; k++) {
+ r1 = r2 = r3 = ZEROF;
+
+ for (l = order-1; l >= 0; l--) {
+ r1 = rho_coeff[l][k] + r1*dx;
+ r2 = rho_coeff[l][k] + r2*dy;
+ r3 = rho_coeff[l][k] + r3*dz;
+ }
+ rho1d[0][k] = r1;
+ rho1d[1][k] = r2;
+ rho1d[2][k] = r3;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ generate coeffients for the weight function of order n
+
+ (n-1)
+ Wn(x) = Sum wn(k,x) , Sum is over every other integer
+ k=-(n-1)
+ For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
+ k is odd integers if n is even and even integers if n is odd
+ ---
+ | n-1
+ | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2
+ wn(k,x) = < l=0
+ |
+ | 0 otherwise
+ ---
+ a coeffients are packed into the array rho_coeff to eliminate zeros
+ rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
+------------------------------------------------------------------------- */
+
+void PPPMOld::compute_rho_coeff()
+{
+ int j,k,l,m;
+ FFT_SCALAR s;
+
+ FFT_SCALAR **a;
+ memory->create2d_offset(a,order,-order,order,"pppm:a");
+
+ for (k = -order; k <= order; k++)
+ for (l = 0; l < order; l++)
+ a[l][k] = 0.0;
+
+ a[0][0] = 1.0;
+ for (j = 1; j < order; j++) {
+ for (k = -j; k <= j; k += 2) {
+ s = 0.0;
+ for (l = 0; l < j; l++) {
+ a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
+#ifdef FFT_SINGLE
+ s += powf(0.5,(float) l+1) *
+ (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
+#else
+ s += pow(0.5,(double) l+1) *
+ (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
+#endif
+ }
+ a[0][k] = s;
+ }
+ }
+
+ m = (1-order)/2;
+ for (k = -(order-1); k < order; k += 2) {
+ for (l = 0; l < order; l++)
+ rho_coeff[l][m] = a[l][k];
+ m++;
+ }
+
+ memory->destroy2d_offset(a,-order);
+}
+
+/* ----------------------------------------------------------------------
+ Slab-geometry correction term to dampen inter-slab interactions between
+ periodically repeating slabs. Yields good approximation to 2D Ewald if
+ adequate empty space is left between repeating slabs (J. Chem. Phys.
+ 111, 3155). Slabs defined here to be parallel to the xy plane. Also
+ extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void PPPMOld::slabcorr()
+{
+ // compute local contribution to global dipole moment
+
+ double *q = atom->q;
+ double **x = atom->x;
+ double zprd = domain->zprd;
+ int nlocal = atom->nlocal;
+
+ double dipole = 0.0;
+ for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+
+ // sum local contributions to get global dipole moment
+
+ double dipole_all;
+ MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+ // need to make non-neutral systems and/or
+ // per-atom energy translationally invariant
+
+ double dipole_r2 = 0.0;
+ if (eflag_atom || fabs(qsum) > SMALL) {
+ for (int i = 0; i < nlocal; i++)
+ dipole_r2 += q[i]*x[i][2]*x[i][2];
+
+ // sum local contributions
+
+ double tmp;
+ MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+ dipole_r2 = tmp;
+ }
+
+ // compute corrections
+
+ const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
+ qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
+ const double qscale = force->qqrd2e * scale;
+
+ if (eflag_global) energy += qscale * e_slabcorr;
+
+ // per-atom energy
+
+ if (eflag_atom) {
+ double efact = qscale * MY_2PI/volume;
+ for (int i = 0; i < nlocal; i++)
+ eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
+ qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
+ }
+
+ // add on force corrections
+
+ double ffact = qscale * (-4.0*MY_PI/volume);
+ double **f = atom->f;
+
+ for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
+}
+
+
+/* ----------------------------------------------------------------------
+ perform and time the 1d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPMOld::timing_1d(int n, double &time1d)
+{
+ double time1,time2;
+
+ for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+
+ MPI_Barrier(world);
+ time1 = MPI_Wtime();
+
+ for (int i = 0; i < n; i++) {
+ fft1->timing1d(work1,nfft_both,1);
+ fft2->timing1d(work1,nfft_both,-1);
+ fft2->timing1d(work1,nfft_both,-1);
+ fft2->timing1d(work1,nfft_both,-1);
+ }
+
+ MPI_Barrier(world);
+ time2 = MPI_Wtime();
+ time1d = time2 - time1;
+
+ return 4;
+}
+
+/* ----------------------------------------------------------------------
+ perform and time the 3d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPMOld::timing_3d(int n, double &time3d)
+{
+ double time1,time2;
+
+ for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+
+ MPI_Barrier(world);
+ time1 = MPI_Wtime();
+
+ for (int i = 0; i < n; i++) {
+ fft1->compute(work1,work1,1);
+ fft2->compute(work1,work1,-1);
+ fft2->compute(work1,work1,-1);
+ fft2->compute(work1,work1,-1);
+ }
+
+ MPI_Barrier(world);
+ time2 = MPI_Wtime();
+ time3d = time2 - time1;
+
+ return 4;
+}
+
+/* ----------------------------------------------------------------------
+ memory usage of local arrays
+------------------------------------------------------------------------- */
+
+double PPPMOld::memory_usage()
+{
+ double bytes = nmax*3 * sizeof(double);
+ int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
+ (nzhi_out-nzlo_out+1);
+ bytes += 4 * nbrick * sizeof(FFT_SCALAR);
+ bytes += 6 * nfft_both * sizeof(double);
+ bytes += nfft_both * sizeof(double);
+ bytes += nfft_both*5 * sizeof(FFT_SCALAR);
+ bytes += 2 * nbuf * sizeof(FFT_SCALAR);
+
+ if (peratom_allocate_flag) {
+ bytes += 7 * nbrick * sizeof(FFT_SCALAR);
+ bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR);
+ }
+
+ if (group_allocate_flag) {
+ bytes += 2 * nbrick * sizeof(FFT_SCALAR);
+ bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
+ }
+
+ return bytes;
+}
+
+/* ----------------------------------------------------------------------
+ group-group interactions
+ ------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ compute the PPPM total long-range force and energy for groups A and B
+ ------------------------------------------------------------------------- */
+
+void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag)
+{
+ if (slabflag)
+ error->all(FLERR,"Cannot (yet) use K-space slab "
+ "correction with compute group/group");
+
+ int i,j;
+
+ if (!group_allocate_flag) {
+ allocate_groups();
+ group_allocate_flag = 1;
+ }
+
+ e2group = 0; //energy
+ f2group[0] = 0; //force in x-direction
+ f2group[1] = 0; //force in y-direction
+ f2group[2] = 0; //force in z-direction
+
+ double *q = atom->q;
+ int nlocal = atom->nlocal;
+ int *mask = atom->mask;
+
+
+ // map my particle charge onto my local 3d density grid
+
+ make_rho_groups(groupbit_A,groupbit_B,BA_flag);
+
+ // all procs communicate density values from their ghost cells
+ // to fully sum contribution in their 3d bricks
+ // remap from 3d decomposition to FFT decomposition
+
+ // temporarily store and switch pointers so we can
+ // use brick2fft() for groups A and B (without
+ // writing an additional function)
+
+ FFT_SCALAR ***density_brick_real = density_brick;
+ FFT_SCALAR *density_fft_real = density_fft;
+
+ // group A
+
+ density_brick = density_A_brick;
+ density_fft = density_A_fft;
+
+ brick2fft();
+
+ // group B
+
+ density_brick = density_B_brick;
+ density_fft = density_B_fft;
+
+ brick2fft();
+
+ // switch back pointers
+
+ density_brick = density_brick_real;
+ density_fft = density_fft_real;
+
+ // compute potential gradient on my FFT grid and
+ // portion of group-group energy/force on this proc's FFT grid
+
+ poisson_groups(BA_flag);
+
+ const double qscale = force->qqrd2e * scale;
+
+ // total group A <--> group B energy
+ // self and boundary correction terms are in compute_group_group.cpp
+
+ double e2group_all;
+ MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
+ e2group = e2group_all;
+
+ e2group *= qscale*0.5*volume;
+
+ // total group A <--> group B force
+
+ double f2group_all[3];
+ MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
+
+ for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i];
+}
+
+/* ----------------------------------------------------------------------
+ allocate group-group memory that depends on # of K-vectors and order
+ ------------------------------------------------------------------------- */
+
+void PPPMOld::allocate_groups()
+{
+ memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:density_A_brick");
+ memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+ nxlo_out,nxhi_out,"pppm:density_B_brick");
+ memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
+ memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
+}
+
+/* ----------------------------------------------------------------------
+ deallocate group-group memory that depends on # of K-vectors and order
+ ------------------------------------------------------------------------- */
+
+void PPPMOld::deallocate_groups()
+{
+ memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
+ memory->destroy(density_A_fft);
+ memory->destroy(density_B_fft);
+}
+
+/* ----------------------------------------------------------------------
+ create discretized "density" on section of global grid due to my particles
+ density(x,y,z) = charge "density" at grid points of my 3d brick
+ (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+ in global grid for group-group interactions
+ ------------------------------------------------------------------------- */
+
+void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag)
+{
+ int l,m,n,nx,ny,nz,mx,my,mz;
+ FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+ // clear 3d density arrays
+
+ memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
+ ngrid*sizeof(FFT_SCALAR));
+
+ memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
+ ngrid*sizeof(FFT_SCALAR));
+
+ // loop over my charges, add their contribution to nearby grid points
+ // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+ // (dx,dy,dz) = distance to "lower left" grid pt
+ // (mx,my,mz) = global coords of moving stencil pt
+
+ double *q = atom->q;
+ double **x = atom->x;
+ int nlocal = atom->nlocal;
+ int *mask = atom->mask;
+
+ for (int i = 0; i < nlocal; i++) {
+
+ if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B))
+ if (BA_flag) continue;
+
+ if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
+
+ nx = part2grid[i][0];
+ ny = part2grid[i][1];
+ nz = part2grid[i][2];
+ dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+ dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+ dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+ compute_rho1d(dx,dy,dz);
+
+ z0 = delvolinv * q[i];
+ for (n = nlower; n <= nupper; n++) {
+ mz = n+nz;
+ y0 = z0*rho1d[2][n];
+ for (m = nlower; m <= nupper; m++) {
+ my = m+ny;
+ x0 = y0*rho1d[1][m];
+ for (l = nlower; l <= nupper; l++) {
+ mx = l+nx;
+
+ // group A
+
+ if (mask[i] & groupbit_A)
+ density_A_brick[mz][my][mx] += x0*rho1d[0][l];
+
+ // group B
+
+ if (mask[i] & groupbit_B)
+ density_B_brick[mz][my][mx] += x0*rho1d[0][l];
+ }
+ }
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ FFT-based Poisson solver for group-group interactions
+ ------------------------------------------------------------------------- */
+
+void PPPMOld::poisson_groups(int BA_flag)
+{
+ int i,j,k,n;
+ double eng;
+
+ // reuse memory (already declared)
+
+ FFT_SCALAR *work_A = work1;
+ FFT_SCALAR *work_B = work2;
+
+ // transform charge density (r -> k)
+
+ // group A
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work_A[n++] = density_A_fft[i];
+ work_A[n++] = ZEROF;
+ }
+
+ fft1->compute(work_A,work_A,1);
+
+ // group B
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work_B[n++] = density_B_fft[i];
+ work_B[n++] = ZEROF;
+ }
+
+ fft1->compute(work_B,work_B,1);
+
+ // group-group energy and force contribution,
+ // keep everything in reciprocal space so
+ // no inverse FFTs needed
+
+ double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+ double s2 = scaleinv*scaleinv;
+
+ // energy
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ e2group += s2 * greensfn[i] *
+ (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
+ n += 2;
+ }
+
+ if (BA_flag) return;
+
+
+ // multiply by Green's function and s2
+ // (only for work_A so it is not squared below)
+
+ n = 0;
+ for (i = 0; i < nfft; i++) {
+ work_A[n++] *= s2 * greensfn[i];
+ work_A[n++] *= s2 * greensfn[i];
+ }
+
+ double partial_group;
+
+ // force, x direction
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+ f2group[0] += fkx[i] * partial_group;
+ n += 2;
+ }
+
+ // force, y direction
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+ f2group[1] += fky[j] * partial_group;
+ n += 2;
+ }
+
+ // force, z direction
+
+ n = 0;
+ for (k = nzlo_fft; k <= nzhi_fft; k++)
+ for (j = nylo_fft; j <= nyhi_fft; j++)
+ for (i = nxlo_fft; i <= nxhi_fft; i++) {
+ partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+ f2group[2] += fkz[k] * partial_group;
+ n += 2;
+ }
+}
diff --git a/src/USER-CUDA/pppm_cuda.cpp b/src/USER-CUDA/pppm_cuda.cpp
index 6e09fde133..58574c4bd5 100644
--- a/src/USER-CUDA/pppm_cuda.cpp
+++ b/src/USER-CUDA/pppm_cuda.cpp
@@ -1,1436 +1,1436 @@
-/* ----------------------------------------------------------------------
- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-
- Original Version:
- http://lammps.sandia.gov, Sandia National Laboratories
- Steve Plimpton, sjplimp@sandia.gov
-
- See the README file in the top-level LAMMPS directory.
-
- -----------------------------------------------------------------------
-
- USER-CUDA Package and associated modifications:
- https://sourceforge.net/projects/lammpscuda/
-
- Christian Trott, christian.trott@tu-ilmenau.de
- Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
- Theoretical Physics II, University of Technology Ilmenau, Germany
-
- See the README file in the USER-CUDA directory.
-
- This software is distributed under the GNU General Public License.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
- http://lammps.sandia.gov, Sandia National Laboratories
- Steve Plimpton, sjplimp@sandia.gov
-
- Copyright (2003) Sandia Corporation. Under the terms of Contract
- DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
- certain rights in this software. This software is distributed under
- the GNU General Public License.
-
- See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
- Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
-------------------------------------------------------------------------- */
-
-
-#include "mpi.h"
-#include
-#include
-#include
-#include
-#include "pppm_cuda.h"
-#include "atom.h"
-#include "comm.h"
-#include "neighbor.h"
-#include "force.h"
-#include "pair.h"
-#include "bond.h"
-#include "angle.h"
-#include "domain.h"
-#include "fft3d_wrap_cuda.h"
-#include "remap_wrap.h"
-#include "memory.h"
-#include "error.h"
-#include "update.h"
-#include //crmadd
-#include "cuda_wrapper_cu.h"
-#include "pppm_cuda_cu.h"
-#include "cuda.h"
-#include "math_const.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-
-#define MAXORDER 7
-#define OFFSET 4096
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
-
-
-void printArray(double* data,int nx, int ny, int nz)
-{
- for(int i=0;icuda;
- if(cuda == NULL)
- error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
-
- if ((narg > 3)||(narg<1)) error->all(FLERR,"Illegal kspace_style pppm/cuda command");
- #ifndef FFT_CUFFT
- error->all(FLERR,"Using kspace_style pppm/cuda without cufft is not possible. Compile with cufft=1 to include cufft. Aborting.");
- #endif
-
- triclinic_support = 0;
- accuracy_relative = atof(arg[0]);
-
- nfactors = 3;
- factors = new int[nfactors];
- factors[0] = 2;
- factors[1] = 3;
- factors[2] = 5;
-
- MPI_Comm_rank(world,&me);
- MPI_Comm_size(world,&nprocs);
-
- density_brick = vdx_brick = vdy_brick = vdz_brick = vdx_brick_tmp = NULL;
- density_fft = NULL;
- greensfn = NULL;
- work1 = work2 = NULL;
- vg = NULL;
- fkx = fky = fkz = NULL;
- buf1 = buf2 = NULL;
-
- gf_b = NULL;
- rho1d = rho_coeff = NULL;
-
- fft1c = fft2c = NULL;
- remap = NULL;
-
- density_brick_int=NULL;
- density_intScale=1000000;
- cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL;
- cu_density_brick = NULL;
- cu_density_brick_int = NULL;
- cu_density_fft = NULL;
- cu_energy=NULL;
- cu_greensfn = NULL;
- cu_work1 = cu_work2 = cu_work3 = NULL;
- cu_vg = NULL;
- cu_fkx = cu_fky = cu_fkz = NULL;
-
- cu_flag = NULL;
- cu_debugdata = NULL;
- cu_rho_coeff = NULL;
- cu_virial = NULL;
-
- cu_gf_b = NULL;
-
- cu_slabbuf = NULL;
- slabbuf = NULL;
-
- nmax = 0;
- part2grid = NULL;
- cu_part2grid = NULL;
- adev_data_array=NULL;
- poissontime=0;
- old_nmax=0;
- cu_pppm_grid_n=NULL;
- cu_pppm_grid_ids=NULL;
-
- pppm_grid_nmax=0;
- pppm2partgrid=new int[3];
- pppm_grid=new int[3];
- firstpass=true;
- scale = 1.0;
-}
-
-
-/* ----------------------------------------------------------------------
- free all memory
-------------------------------------------------------------------------- */
-
-PPPMCuda::~PPPMCuda()
-{
- delete [] slabbuf;
- delete cu_slabbuf;
-
- delete [] factors;
- factors=NULL;
- deallocate();
- delete cu_part2grid;
- cu_part2grid=NULL;
- memory->destroy(part2grid);
- part2grid = NULL;
-}
-
-/* ----------------------------------------------------------------------
- called once before run
-------------------------------------------------------------------------- */
-
-void PPPMCuda::init()
-{
-
- cuda->shared_data.pppm.cudable_force=1;
-
- //if(cuda->finished_run) {PPPM::init(); return;}
-
- if (me == 0) {
- if (screen) fprintf(screen,"PPPMCuda initialization ...\n");
- if (logfile) fprintf(logfile,"PPPMCuda initialization ...\n");
- }
-
- // error check
-
- triclinic_check();
- if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation");
-
- if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
-
- if (slabflag == 0 && domain->nonperiodic > 0)
- error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMCuda");
- if (slabflag == 1) {
- if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
- domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
- error->all(FLERR,"Incorrect boundaries with slab PPPMCuda");
- }
-
- if (order < 2 || order > MAXORDER) {
- char str[128];
- sprintf(str,"PPPMCuda order cannot be smaller than 2 or greater than %d",MAXORDER);
- error->all(FLERR,str);
- }
- // free all arrays previously allocated
-
- deallocate();
-
- // extract short-range Coulombic cutoff from pair style
-
- qqrd2e = force->qqrd2e;
-
- if (force->pair == NULL)
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- int itmp=0;
- double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
- if (p_cutoff == NULL)
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- cutoff = *p_cutoff;
-
- // if kspace is TIP4P, extract TIP4P params from pair style
-
- qdist = 0.0;
-
- if (strcmp(force->kspace_style,"pppm/tip4p") == 0) {
- if (force->pair == NULL)
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- double *p_qdist = (double *) force->pair->extract("qdist",itmp);
- int *p_typeO = (int *) force->pair->extract("typeO",itmp);
- int *p_typeH = (int *) force->pair->extract("typeH",itmp);
- int *p_typeA = (int *) force->pair->extract("typeA",itmp);
- int *p_typeB = (int *) force->pair->extract("typeB",itmp);
- if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
- error->all(FLERR,"KSpace style is incompatible with Pair style");
- qdist = *p_qdist;
- typeO = *p_typeO;
- typeH = *p_typeH;
- int typeA = *p_typeA;
- int typeB = *p_typeB;
-
- if (force->angle == NULL || force->bond == NULL)
- error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
- double theta = force->angle->equilibrium_angle(typeA);
- double blen = force->bond->equilibrium_distance(typeB);
- alpha = qdist / (2.0 * cos(0.5*theta) * blen);
- }
-
- // compute qsum & qsqsum and warn if not charge-neutral
-
- qsum = qsqsum = 0.0;
- for (int i = 0; i < atom->nlocal; i++) {
- qsum += atom->q[i];
- qsqsum += atom->q[i]*atom->q[i];
- }
-
- double tmp;
- MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsum = tmp;
- MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
- qsqsum = tmp;
-
- if (qsqsum == 0.0)
- error->all(FLERR,"Cannot use kspace solver on system with no charge");
- if (fabs(qsum) > SMALL && me == 0) {
- char str[128];
- sprintf(str,"System is not charge neutral, net charge = %g",qsum);
- error->warning(FLERR,str);
- }
-
- // set accuracy (force units) from accuracy_relative or accuracy_absolute
-
- if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
- else accuracy = accuracy_relative * two_charge_force;
-
- // setup FFT grid resolution and g_ewald
- // normally one iteration thru while loop is all that is required
- // if grid stencil extends beyond neighbor proc, reduce order and try again
-
- int iteration = 0;
-
- while (order > 1) {
- if (iteration && me == 0)
- error->warning(FLERR,"Reducing PPPMCuda order b/c stencil extends "
- "beyond neighbor processor");
- iteration++;
-
- set_grid();
-
- if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
- error->all(FLERR,"PPPMCuda grid is too large");
-
- // global indices of PPPMCuda grid range from 0 to N-1
- // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
- // global PPPMCuda grid that I own without ghost cells
- // for slab PPPMCuda, assign z grid as if it were not extended
-
- nxlo_in = comm->myloc[0]*nx_pppm / comm->procgrid[0];
- nxhi_in = (comm->myloc[0]+1)*nx_pppm / comm->procgrid[0] - 1;
- nylo_in = comm->myloc[1]*ny_pppm / comm->procgrid[1];
- nyhi_in = (comm->myloc[1]+1)*ny_pppm / comm->procgrid[1] - 1;
- nzlo_in = comm->myloc[2] *
- (static_cast (nz_pppm/slab_volfactor)) / comm->procgrid[2];
- nzhi_in = (comm->myloc[2]+1) *
- (static_cast (nz_pppm/slab_volfactor)) / comm->procgrid[2] - 1;
-
- // nlower,nupper = stencil size for mapping particles to PPPMCuda grid
-
- nlower = -(order-1)/2;
- nupper = order/2;
-
- // shift values for particle <-> grid mapping
- // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
- if (order % 2) shift = OFFSET + 0.5;
- else shift = OFFSET;
- if (order % 2) shiftone = 0.0;
- else shiftone = 0.5;
-
- // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
- // global PPPMCuda grid that my particles can contribute charge to
- // effectively nlo_in,nhi_in + ghost cells
- // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
- // position a particle in my box can be at
- // dist[3] = particle position bound = subbox + skin/2.0 + qdist
- // qdist = offset due to TIP4P fictitious charge
- // convert to triclinic if necessary
- // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
- // for slab PPPMCuda, assign z grid as if it were not extended
-
-
- triclinic = domain->triclinic;
- double *prd,*sublo,*subhi;
-
- if (triclinic == 0) {
- prd = domain->prd;
- boxlo = domain->boxlo;
- sublo = domain->sublo;
- subhi = domain->subhi;
- } else {
- prd = domain->prd_lamda;
- boxlo = domain->boxlo_lamda;
- sublo = domain->sublo_lamda;
- subhi = domain->subhi_lamda;
- }
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
-
- double dist[3];
- double cuthalf = 0.5*neighbor->skin + qdist;
- if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
- else {
- dist[0] = cuthalf/domain->prd[0];
- dist[1] = cuthalf/domain->prd[1];
- dist[2] = cuthalf/domain->prd[2];
- }
-
- int nlo,nhi;
-
- nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) *
- nx_pppm/xprd + shift) - OFFSET;
- nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) *
- nx_pppm/xprd + shift) - OFFSET;
- nxlo_out = nlo + nlower;
- nxhi_out = nhi + nupper;
-
- nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) *
- ny_pppm/yprd + shift) - OFFSET;
- nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) *
- ny_pppm/yprd + shift) - OFFSET;
- nylo_out = nlo + nlower;
- nyhi_out = nhi + nupper;
-
- nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) *
- nz_pppm/zprd_slab + shift) - OFFSET;
- nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) *
- nz_pppm/zprd_slab + shift) - OFFSET;
- nzlo_out = nlo + nlower;
- nzhi_out = nhi + nupper;
-
- // for slab PPPMCuda, change the grid boundary for processors at +z end
- // to include the empty volume between periodically repeating slabs
- // for slab PPPMCuda, want charge data communicated from -z proc to +z proc,
- // but not vice versa, also want field data communicated from +z proc to
- // -z proc, but not vice versa
- // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
-
- if (slabflag && ((comm->myloc[2]+1) == (comm->procgrid[2]))) {
- nzhi_in = nz_pppm - 1;
- nzhi_out = nz_pppm - 1;
- }
-
- // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions
- // that overlay domain I own
- // proc in that direction tells me via sendrecv()
- // if no neighbor proc, value is from self since I have ghosts regardless
-
- int nplanes;
- MPI_Status status;
-
- nplanes = nxlo_in - nxlo_out;
- if (comm->procneigh[0][0] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0,
- &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0,
- world,&status);
- else nxhi_ghost = nplanes;
-
- nplanes = nxhi_out - nxhi_in;
- if (comm->procneigh[0][1] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0,
- &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0],
- 0,world,&status);
- else nxlo_ghost = nplanes;
-
- nplanes = nylo_in - nylo_out;
- if (comm->procneigh[1][0] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0,
- &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0,
- world,&status);
- else nyhi_ghost = nplanes;
-
- nplanes = nyhi_out - nyhi_in;
- if (comm->procneigh[1][1] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0,
- &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0,
- world,&status);
- else nylo_ghost = nplanes;
-
- nplanes = nzlo_in - nzlo_out;
- if (comm->procneigh[2][0] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0,
- &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0,
- world,&status);
- else nzhi_ghost = nplanes;
-
- nplanes = nzhi_out - nzhi_in;
- if (comm->procneigh[2][1] != me)
- MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0,
- &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0,
- world,&status);
- else nzlo_ghost = nplanes;
-
- // test that ghost overlap is not bigger than my sub-domain
-
- int flag = 0;
- if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1;
- if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1;
- if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1;
- if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1;
- if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1;
- if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1;
-
- int flag_all;
- MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
-
- if (flag_all == 0) break;
- order--;
- }
-
- if (order == 0) error->all(FLERR,"PPPMCuda order has been reduced to 0");
-
-
-
- // decomposition of FFT mesh
- // global indices range from 0 to N-1
- // proc owns entire x-dimension, clump of columns in y,z dimensions
- // npey_fft,npez_fft = # of procs in y,z dims
- // if nprocs is small enough, proc can own 1 or more entire xy planes,
- // else proc owns 2d sub-blocks of yz plane
- // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
- // nlo_fft,nhi_fft = lower/upper limit of the section
- // of the global FFT mesh that I own
-
- int npey_fft,npez_fft;
- if (nz_pppm >= nprocs) {
- npey_fft = 1;
- npez_fft = nprocs;
- } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
-
- int me_y = me % npey_fft;
- int me_z = me / npey_fft;
-
- nxlo_fft = 0;
- nxhi_fft = nx_pppm - 1;
- nylo_fft = me_y*ny_pppm/npey_fft;
- nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
- nzlo_fft = me_z*nz_pppm/npez_fft;
- nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
-
- // PPPMCuda grid for this proc, including ghosts
-
- ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
- (nzhi_out-nzlo_out+1);
-
- // FFT arrays on this proc, without ghosts
- // nfft = FFT points in FFT decomposition on this proc
- // nfft_brick = FFT points in 3d brick-decomposition on this proc
- // nfft_both = greater of 2 values
-
- nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
- (nzhi_fft-nzlo_fft+1);
- int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
- (nzhi_in-nzlo_in+1);
- nfft_both = MAX(nfft,nfft_brick);
-
- // buffer space for use in brick2fft and fillbrick
- // idel = max # of ghost planes to send or recv in +/- dir of each dim
- // nx,ny,nz = owned planes (including ghosts) in each dim
- // nxx,nyy,nzz = max # of grid cells to send in each dim
- // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick
-
- int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz;
-
- idelx = MAX(nxlo_ghost,nxhi_ghost);
- idelx = MAX(idelx,nxhi_out-nxhi_in);
- idelx = MAX(idelx,nxlo_in-nxlo_out);
-
- idely = MAX(nylo_ghost,nyhi_ghost);
- idely = MAX(idely,nyhi_out-nyhi_in);
- idely = MAX(idely,nylo_in-nylo_out);
-
- idelz = MAX(nzlo_ghost,nzhi_ghost);
- idelz = MAX(idelz,nzhi_out-nzhi_in);
- idelz = MAX(idelz,nzlo_in-nzlo_out);
-
- nx = nxhi_out - nxlo_out + 1;
- ny = nyhi_out - nylo_out + 1;
- nz = nzhi_out - nzlo_out + 1;
-
- nxx = idelx * ny * nz;
- nyy = idely * nx * nz;
- nzz = idelz * nx * ny;
-
- nbuf = MAX(nxx,nyy);
- nbuf = MAX(nbuf,nzz);
- nbuf *= 3;
-
- // print stats
-
- int ngrid_max,nfft_both_max,nbuf_max;
- MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
- MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
- MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world);
-
- if (me == 0) {
- if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n",
- ngrid_max,nfft_both_max,nbuf_max);
- if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n",
- ngrid_max,nfft_both_max,nbuf_max);
- }
- cuda_shared_pppm* ap=&(cuda->shared_data.pppm);
-
- ap->density_intScale=density_intScale;
- ap->nxlo_in=nxlo_in;
- ap->nxhi_in=nxhi_in;
- ap->nxlo_out=nxlo_out;
- ap->nxhi_out=nxhi_out;
- ap->nylo_in=nylo_in;
- ap->nyhi_in=nyhi_in;
- ap->nylo_out=nylo_out;
- ap->nyhi_out=nyhi_out;
- ap->nzlo_in=nzlo_in;
- ap->nzhi_in=nzhi_in;
- ap->nzlo_out=nzlo_out;
- ap->nzhi_out=nzhi_out;
- ap->nxlo_in=nxlo_fft;
- ap->nxhi_in=nxhi_fft;
- ap->nylo_in=nylo_fft;
- ap->nyhi_in=nyhi_fft;
- ap->nzlo_in=nzlo_fft;
- ap->nzhi_in=nzhi_fft;
- ap->nx_pppm=nx_pppm;
- ap->ny_pppm=ny_pppm;
- ap->nz_pppm=nz_pppm;
- ap->qqrd2e=qqrd2e;
- ap->order=order;
- ap->nmax=nmax;
- ap->nlocal=atom->nlocal;
- ap->delxinv=delxinv;
- ap->delyinv=delyinv;
- ap->delzinv=delzinv;
- ap->nlower=nlower;
- ap->nupper=nupper;
- ap->shiftone=shiftone;
-
- // allocate K-space dependent memory
-
-
- allocate();
-
- // pre-compute Green's function denomiator expansion
- // pre-compute 1d charge distribution coefficients
-
- compute_gf_denom();
- compute_rho_coeff();
-}
-
-/* ----------------------------------------------------------------------
- adjust PPPMCuda coeffs, called initially and whenever volume has changed
-------------------------------------------------------------------------- */
-
-void PPPMCuda::setup()
-{
- double *prd;
- cu_gf_b->upload();
- // volume-dependent factors
- // adjust z dimension for 2d slab PPPMCuda
- // z dimension for 3d PPPMCuda is zprd since slab_volfactor = 1.0
-
- if (triclinic == 0) prd = domain->prd;
- else prd = domain->prd_lamda;
-
- double xprd = prd[0];
- double yprd = prd[1];
- double zprd = prd[2];
- double zprd_slab = zprd*slab_volfactor;
- volume = xprd * yprd * zprd_slab;
-
- delxinv = nx_pppm/xprd;
- delyinv = ny_pppm/yprd;
- delzinv = nz_pppm/zprd_slab;
-
- delvolinv = delxinv*delyinv*delzinv;
-
- double unitkx = (2.0*MY_PI/xprd);
- double unitky = (2.0*MY_PI/yprd);
- double unitkz = (2.0*MY_PI/zprd_slab);
-
- // fkx,fky,fkz for my FFT grid pts
- Cuda_PPPM_Setup_fkxyz_vg(nx_pppm, ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald);
-
-
-
- // modified (Hockney-Eastwood) Coulomb Green's function
-
- int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) *
- pow(-log(EPS_HOC),0.25));
- int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) *
- pow(-log(EPS_HOC),0.25));
- int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
- pow(-log(EPS_HOC),0.25));
- Cuda_PPPM_setup_greensfn(nx_pppm,ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald,
-nbx,nby,nbz,xprd,yprd,zprd_slab);
-
-
-#ifdef FFT_CUFFT
- cu_vdx_brick->upload();
- cu_vdy_brick->upload();
- cu_vdz_brick->upload();
-#endif
- cu_rho_coeff->upload();
- cu_density_brick->memset_device(0);
- pppm_device_init_setup(&cuda->shared_data,shiftone,delxinv,delyinv,delzinv,nlower,nupper);
-}
-
-/* ----------------------------------------------------------------------
- compute the PPPMCuda long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void PPPMCuda::compute(int eflag, int vflag)
-{
- cuda_shared_atom* cu_atom = & cuda->shared_data.atom;
-
- int i;
- my_times starttime;
- my_times endtime;
- my_times starttotal;
- my_times endtotal;
- // convert atoms from box to lamda coords
-
- if (triclinic == 0) boxlo = domain->boxlo;
- else {
- boxlo = domain->boxlo_lamda;
- domain->x2lamda(atom->nlocal);
- }
-
- // extend size of per-atom arrays if necessary
-
- if ((cu_atom->update_nmax)||(old_nmax==0)) {
- memory->destroy(part2grid);
- nmax = atom->nmax;
- memory->create(part2grid,nmax,3,"pppm:part2grid");
- delete cu_part2grid;
- delete [] adev_data_array;
- adev_data_array=new dev_array[1];
- cu_part2grid = new cCudaData ((int*)part2grid,adev_data_array, nmax,3);
-
- pppm_device_update(&cuda->shared_data,cu_part2grid->dev_data(),atom->nlocal,atom->nmax);
- old_nmax=nmax;
- }
- if(cu_atom->update_nlocal) {pppm_update_nlocal(cu_atom->nlocal);}
-
- energy = 0.0;
- if (vflag)
- {
- for (i = 0; i < 6; i++) virial[i] = 0.0;
- cu_virial->memset_device(0);
- }
- if(eflag) cu_energy->memset_device(0);
- my_gettime(CLOCK_REALTIME,&starttotal);
-
- // find grid points for all my particles
- // map my particle charge onto my local 3d density grid
-
-
- my_gettime(CLOCK_REALTIME,&starttime);
-
- particle_map();
-
- my_gettime(CLOCK_REALTIME,&endtime);
- cuda->shared_data.cuda_timings.pppm_particle_map+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
-
- //cu_part2grid->download();
- my_gettime(CLOCK_REALTIME,&starttime);
- make_rho();
- my_gettime(CLOCK_REALTIME,&endtime);
- cuda->shared_data.cuda_timings.pppm_make_rho+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
-
- // all procs communicate density values from their ghost cells
- // to fully sum contribution in their 3d bricks
- // remap from 3d decomposition to FFT decomposition
-
- int nprocs=comm->nprocs;
-
- my_gettime(CLOCK_REALTIME,&starttime);
-
- if(nprocs>1)
- {
- cu_density_brick->download();
- brick2fft();
- }
- else
- {
- #ifdef FFT_CUFFT
- pppm_initfftdata(&cuda->shared_data,(PPPM_FLOAT*)cu_density_brick->dev_data(),(FFT_FLOAT*)cu_work2->dev_data());
- #endif
- }
-
- my_gettime(CLOCK_REALTIME,&endtime);
- cuda->shared_data.cuda_timings.pppm_brick2fft+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
-
- // compute potential gradient on my FFT grid and
- // portion of e_long on this proc's FFT grid
- // return gradients (electric fields) in 3d brick decomposition
-
- my_gettime(CLOCK_REALTIME,&starttime);
- poisson(eflag,vflag);
- my_gettime(CLOCK_REALTIME,&endtime);
- cuda->shared_data.cuda_timings.pppm_poisson+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
-
- // all procs communicate E-field values to fill ghost cells
- // surrounding their 3d bricks
-
- // not necessary since all the calculations are done on one proc
-
- // calculate the force on my particles
-
-
- my_gettime(CLOCK_REALTIME,&starttime);
- fieldforce();
- my_gettime(CLOCK_REALTIME,&endtime);
- cuda->shared_data.cuda_timings.pppm_fieldforce+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
-
- // sum energy across procs and add in volume-dependent term
-
- my_gettime(CLOCK_REALTIME,&endtotal);
- cuda->shared_data.cuda_timings.pppm_compute+=(endtotal.tv_sec-starttotal.tv_sec+1.0*(endtotal.tv_nsec-starttotal.tv_nsec)/1000000000);
-
- if (eflag) {
- double energy_all;
- MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
- energy = energy_all;
-
- energy *= 0.5*volume;
- energy -= g_ewald*qsqsum/1.772453851 +
- MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
- energy *= qqrd2e;
- }
-
- // sum virial across procs
-
- if (vflag) {
- double virial_all[6];
- MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
- for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*volume*virial_all[i];
- }
-
- // 2d slab correction
-
- if (slabflag) slabcorr(eflag);
-
- // convert atoms back from lamda to box coords
-
- if (triclinic) domain->lamda2x(atom->nlocal);
-
- if(firstpass) firstpass=false;
-}
-
-
-/* ----------------------------------------------------------------------
- allocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-
-void PPPMCuda::allocate()
-{
-
- struct dev_array* dev_tmp=new struct dev_array[20];
- int n_cudata=0;
-
-
- memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:density_brick");
- memory->create3d_offset(density_brick_int,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:density_brick_int");
-
-
- cu_density_brick = new cCudaData ((double*) &(density_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]),
- (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1));
-
- cu_density_brick_int = new cCudaData ((int*) &(density_brick_int[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]),
- (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1));
-
- memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:vdx_brick");
- memory->create3d_offset(vdx_brick_tmp,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:vdx_brick_tmp");
-
- cu_vdx_brick = new cCudaData ((double*) &(vdx_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]),
- (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1));
-
- memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:vdy_brick");
- cu_vdy_brick = new cCudaData ((double*) &(vdy_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]),
- (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1));
-
- memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
- nxlo_out,nxhi_out,"pppm:vdz_brick");
- cu_vdz_brick = new cCudaData ((double*) &(vdz_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]),
- (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1));
-
- memory->create(density_fft,nfft_both,"pppm:density_fft");
-
- cu_density_fft = new cCudaData (density_fft, & (dev_tmp[n_cudata++]),nfft_both);
-
- cu_energy = new cCudaData (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm);
- cu_virial = new cCudaData (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm*6);
-
- memory->create(greensfn,nfft_both,"pppm:greensfn");
- cu_greensfn = new cCudaData (greensfn, & (dev_tmp[n_cudata++]) , nx_pppm*ny_pppm*nz_pppm);
-
- memory->create(work1,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work1");
- memory->create(work2,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work2");
- memory->create(work3,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work3");
-
- cu_work1 = new cCudaData (work1, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm);
- cu_work2 = new cCudaData (work2, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm);
- cu_work3 = new cCudaData (work3, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm);
-
-
- memory->create(fkx,nx_pppm,"pppmcuda:fkx");
- cu_fkx = new cCudaData (fkx, & (dev_tmp[n_cudata++]) , nx_pppm);
- memory->create(fky,ny_pppm,"pppmcuda:fky");
- cu_fky = new cCudaData (fky, & (dev_tmp[n_cudata++]) , ny_pppm);
- memory->create(fkz,nz_pppm,"pppmcuda:fkz");
- cu_fkz = new cCudaData (fkz, & (dev_tmp[n_cudata++]) , nz_pppm);
-
- memory->create(vg,nfft_both,6,"pppm:vg");
-
- cu_vg = new cCudaData ((double*)vg, & (dev_tmp[n_cudata++]) , nfft_both,6);
-
- memory->create(buf1,nbuf,"pppm:buf1");
- memory->create(buf2,nbuf,"pppm:buf2");
-
-
- // summation coeffs
-
-
- gf_b = new double[order];
- cu_gf_b = new cCudaData (gf_b, &(dev_tmp[n_cudata++]) , order);
- memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
- memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
-
- cu_rho_coeff = new cCudaData ((double*) &(rho_coeff[0][(1-order)/2]), & (dev_tmp[n_cudata++]) , order*(order/2-(1-order)/2+1));
-
- debugdata=new PPPM_FLOAT[100];
- cu_debugdata = new cCudaData (debugdata,& (dev_tmp[n_cudata++]),100);
- cu_flag = new cCudaData (&global_flag,& (dev_tmp[n_cudata++]),3);
-
- // create 2 FFTs and a Remap
- // 1st FFT keeps data in FFT decompostion
- // 2nd FFT returns data in 3d brick decomposition
- // remap takes data from 3d brick to FFT decomposition
-
- int tmp;
-
-
-
-
- fft1c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- 0,0,&tmp,true);
-
- fft2c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- 0,0,&tmp,false);
-
-
-#ifdef FFT_CUFFT
- fft1c->set_cudata(cu_work2->dev_data(),cu_work1->dev_data());
- fft2c->set_cudata(cu_work2->dev_data(),cu_work3->dev_data());
-#endif
-
- remap = new Remap(lmp,world,
- nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
- nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
- 1,0,0,2);
-
-
-pppm_device_init(cu_density_brick->dev_data(), cu_vdx_brick->dev_data(), cu_vdy_brick->dev_data(), cu_vdz_brick->dev_data(), cu_density_fft->dev_data(),cu_energy->dev_data(),cu_virial->dev_data()
- , cu_work1->dev_data(), cu_work2->dev_data(), cu_work3->dev_data(), cu_greensfn->dev_data(), cu_fkx->dev_data(), cu_fky->dev_data(), cu_fkz->dev_data(), cu_vg->dev_data()
- ,nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,nx_pppm,ny_pppm,nz_pppm
- ,nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,cu_gf_b->dev_data()
- ,qqrd2e,order,cu_rho_coeff->dev_data(),cu_debugdata->dev_data(),cu_density_brick_int->dev_data(),slabflag
- );
-}
-
-
-
-/* ----------------------------------------------------------------------
- deallocate memory that depends on # of K-vectors and order
- ---------------------------------------------------------------------- */
-
-void PPPMCuda::deallocate()
-{
- memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
- memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
-
- density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
-
- memory->destroy(density_fft);
- memory->destroy(greensfn);
- memory->destroy(work1);
- memory->destroy(work2);
- memory->destroy(vg);
-
- density_fft = NULL;
- greensfn = NULL;
- work1 = NULL;
- work2 = NULL;
- vg = NULL;
-
- memory->destroy(fkx);
- memory->destroy(fky);
- memory->destroy(fkz);
-
- fkx = NULL;
- fky = NULL;
- fkz = NULL;
-
- delete cu_density_brick;
- delete cu_density_brick_int;
- delete cu_vdx_brick;
- delete cu_vdy_brick;
- delete cu_vdz_brick;
- delete cu_density_fft;
- delete cu_energy;
- delete cu_virial;
-#ifdef FFT_CUFFT
- delete cu_greensfn;
- delete cu_gf_b;
- delete cu_vg;
- delete cu_work1;
- delete cu_work2;
- delete cu_work3;
- delete cu_fkx;
- delete cu_fky;
- delete cu_fkz;
-#endif
-
- delete cu_flag;
- delete cu_debugdata;
- delete cu_rho_coeff;
-
-
- cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL;
- cu_density_brick = NULL;
- cu_density_brick_int = NULL;
- cu_density_fft = NULL;
- cu_energy=NULL;
- cu_virial=NULL;
-#ifdef FFT_CUFFT
- cu_greensfn = NULL;
- cu_gf_b = NULL;
- cu_work1 = cu_work2 = cu_work3 = NULL;
- cu_vg = NULL;
- cu_fkx = cu_fky = cu_fkz = NULL;
-#endif
-
- cu_flag = NULL;
- cu_debugdata = NULL;
- cu_rho_coeff = NULL;
- cu_part2grid = NULL;
-
- memory->destroy(buf1);
- memory->destroy(buf2);
-
- delete [] gf_b;
- gf_b = NULL;
- memory->destroy2d_offset(rho1d,-order/2); rho1d = NULL;
- memory->destroy2d_offset(rho_coeff,(1-order)/2); rho_coeff = NULL;
-
- delete fft1c;
- fft1c = NULL;
-
- delete fft2c;
- fft2c = NULL;
- delete remap;
- remap = NULL;
- buf1 = NULL;
- buf2 = NULL;
-}
-
-/* ----------------------------------------------------------------------
- set size of FFT grid (nx,ny,nz_pppm) and g_ewald
--------------------------------------------------------------------------*/
-
-void PPPMCuda::set_grid()
-{
- // see JCP 109, pg 7698 for derivation of coefficients
- // higher order coefficients may be computed if needed
-
- double **acons;
- memory->create(acons,8,7,"pppm:acons");
-
- acons[1][0] = 2.0 / 3.0;
- acons[2][0] = 1.0 / 50.0;
- acons[2][1] = 5.0 / 294.0;
- acons[3][0] = 1.0 / 588.0;
- acons[3][1] = 7.0 / 1440.0;
- acons[3][2] = 21.0 / 3872.0;
- acons[4][0] = 1.0 / 4320.0;
- acons[4][1] = 3.0 / 1936.0;
- acons[4][2] = 7601.0 / 2271360.0;
- acons[4][3] = 143.0 / 28800.0;
- acons[5][0] = 1.0 / 23232.0;
- acons[5][1] = 7601.0 / 13628160.0;
- acons[5][2] = 143.0 / 69120.0;
- acons[5][3] = 517231.0 / 106536960.0;
- acons[5][4] = 106640677.0 / 11737571328.0;
- acons[6][0] = 691.0 / 68140800.0;
- acons[6][1] = 13.0 / 57600.0;
- acons[6][2] = 47021.0 / 35512320.0;
- acons[6][3] = 9694607.0 / 2095994880.0;
- acons[6][4] = 733191589.0 / 59609088000.0;
- acons[6][5] = 326190917.0 / 11700633600.0;
- acons[7][0] = 1.0 / 345600.0;
- acons[7][1] = 3617.0 / 35512320.0;
- acons[7][2] = 745739.0 / 838397952.0;
- acons[7][3] = 56399353.0 / 12773376000.0;
- acons[7][4] = 25091609.0 / 1560084480.0;
- acons[7][5] = 1755948832039.0 / 36229939200000.0;
- acons[7][6] = 4887769399.0 / 37838389248.0;
-
- double q2 = qsqsum * force->qqrd2e/ force->dielectric;
- bigint natoms = atom->natoms;
-
- // use xprd,yprd,zprd even if triclinic so grid size is the same
- // adjust z dimension for 2d slab PPPMCuda
- // 3d PPPMCuda just uses zprd since slab_volfactor = 1.0
-
- double xprd = domain->xprd;
- double yprd = domain->yprd;
- double zprd = domain->zprd;
- double zprd_slab = zprd*slab_volfactor;
-
- // make initial g_ewald estimate
- // based on desired error and real space cutoff
- // fluid-occupied volume used to estimate real-space error
- // zprd used rather than zprd_slab
-
- double h_x,h_y,h_z;
-
- if (!gewaldflag)
- g_ewald = sqrt(-log(accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) /
- (2.0*q2))) / cutoff;
-
- // set optimal nx_pppm,ny_pppm,nz_pppm based on order and precision
- // nz_pppm uses extended zprd_slab instead of zprd
- // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1
- // reduce it until precision target is met
-
- if (!gridflag) {
- double err;
- h_x = h_y = h_z = 1/g_ewald;
-
- nx_pppm = static_cast (xprd/h_x + 1);
- ny_pppm = static_cast (yprd/h_y + 1);
- nz_pppm = static_cast (zprd_slab/h_z + 1);
-
- err = rms(h_x,xprd,natoms,q2,acons);
- while (err > accuracy) {
- err = rms(h_x,xprd,natoms,q2,acons);
- nx_pppm++;
- h_x = xprd/nx_pppm;
- }
-
- err = rms(h_y,yprd,natoms,q2,acons);
- while (err > accuracy) {
- err = rms(h_y,yprd,natoms,q2,acons);
- ny_pppm++;
- h_y = yprd/ny_pppm;
- }
-
- err = rms(h_z,zprd_slab,natoms,q2,acons);
- while (err > accuracy) {
- err = rms(h_z,zprd_slab,natoms,q2,acons);
- nz_pppm++;
- h_z = zprd_slab/nz_pppm;
- }
- }
-
- // boost grid size until it is factorable
-
- while (!factorable(nx_pppm)) nx_pppm++;
- while (!factorable(ny_pppm)) ny_pppm++;
- while (!factorable(nz_pppm)) nz_pppm++;
-
-
- // adjust g_ewald for new grid size
-
- h_x = xprd/nx_pppm;
- h_y = yprd/ny_pppm;
- h_z = zprd_slab/nz_pppm;
-
- if (!gewaldflag) {
- double gew1,gew2,dgew,f,fmid,hmin,rtb;
- int ncount;
-
- gew1 = 0.0;
- g_ewald = gew1;
- f = diffpr(h_x,h_y,h_z,q2,acons);
-
- hmin = MIN(h_x,MIN(h_y,h_z));
- gew2 = 10/hmin;
- g_ewald = gew2;
- fmid = diffpr(h_x,h_y,h_z,q2,acons);
-
- if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPMCuda G");
- rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2);
- ncount = 0;
- while (fabs(dgew) > SMALL && fmid != 0.0) {
- dgew *= 0.5;
- g_ewald = rtb + dgew;
- fmid = diffpr(h_x,h_y,h_z,q2,acons);
- if (fmid <= 0.0) rtb = g_ewald;
- ncount++;
- if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPMCuda G");
- }
- }
-
- // final RMS precision
-
- double lprx = rms(h_x,xprd,natoms,q2,acons);
- double lpry = rms(h_y,yprd,natoms,q2,acons);
- double lprz = rms(h_z,zprd_slab,natoms,q2,acons);
- double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
- double spr = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
- sqrt(natoms*cutoff*xprd*yprd*zprd_slab);
-
- // free local memory
-
- memory->destroy(acons);
-
- // print info
-
- if (me == 0) {
- if (screen) {
- fprintf(screen," G vector = %g\n",g_ewald);
- fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
- fprintf(screen," stencil order = %d\n",order);
- fprintf(screen," absolute RMS force accuracy = %g\n",MAX(lpr,spr));
- fprintf(screen," relative force accuracy = %g\n",
- MAX(lpr,spr)/two_charge_force);
- }
- if (logfile) {
- fprintf(logfile," G vector = %g\n",g_ewald);
- fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
- fprintf(logfile," stencil order = %d\n",order);
- fprintf(logfile," absolute RMS force accuracy = %g\n",MAX(lpr,spr));
- fprintf(logfile," relative force accuracy = %g\n",
- MAX(lpr,spr)/two_charge_force);
- }
- }
-}
-
-
-/* ----------------------------------------------------------------------
- find center grid pt for each of my particles
- check that full stencil for the particle will fit in my 3d brick
- store central grid pt indices in part2grid array
-------------------------------------------------------------------------- */
-
-
-void PPPMCuda::particle_map()
-{
- MYDBG(printf("# CUDA PPPMCuda::particle_map() ... start\n");)
- int flag = 0;
-
- cu_flag->memset_device(0);
- flag=cuda_particle_map(&cuda->shared_data,cu_flag->dev_data());
- if(flag)
- {
- cu_debugdata->download();
- printf("Out of range atom: ");
- printf("ID: %i ",atom->tag[int(debugdata[0])]);
- printf("x: %e ",debugdata[7]);
- printf("y: %e ",debugdata[8]);
- printf("z: %e ",debugdata[9]);
- printf("nx: %e ",debugdata[4]);
- printf("ny: %e ",debugdata[5]);
-
- printf("\n");
- //printf("debugdata: cpu: %e %e %e %i\n",boxlo[0],boxlo[1],boxlo[2],atom->nlocal);
- cuda->cu_x->download();
- int nx,ny,nz;
-
- double **x = atom->x;
- int nlocal = atom->nlocal;
- for (int i = 0; i < nlocal; i++) {
- nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
- ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
- nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
-
- if(i==1203)printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz);
- if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
- ny+nlower < nylo_out || ny+nupper > nyhi_out ||
- nz+nlower < nzlo_out || nz+nupper > nzhi_out || i==1203) {printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); }
- }
-
- }
-
- int flag_all;
- MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
- if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPMCuda!");
-}
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = charge "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid
-------------------------------------------------------------------------- */
-
-
-void PPPMCuda::make_rho()
-{
- cuda_make_rho(&cuda->shared_data,cu_flag->dev_data(),&density_intScale,nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,cu_density_brick->dev_data(),cu_density_brick_int->dev_data());
-}
-
-
-/* ----------------------------------------------------------------------
- FFT-based Poisson solver
-------------------------------------------------------------------------- */
-void PPPMCuda::poisson(int eflag, int vflag)
-{
-
-#ifndef FFT_CUFFT
- PPPM::poisson(eflag,vflag);
- return;
-#endif
-#ifdef FFT_CUFFT
- my_times starttime;
- my_times endtime;
-
-
- my_gettime(CLOCK_REALTIME,&starttime);
- fft1c->compute(density_fft,work1,1);
-
- my_gettime(CLOCK_REALTIME,&endtime);
- poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
-
-
-
- if (eflag || vflag) {
- poisson_energy(nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,vflag);
- ENERGY_FLOAT gpuvirial[6];
- energy+=sum_energy(cu_virial->dev_data(),cu_energy->dev_data(),nx_pppm,ny_pppm,nz_pppm,vflag,gpuvirial);
- if(vflag)
- {
- for(int j=0;j<6;j++) virial[j]+=gpuvirial[j];
- }
- }
-
-
- // scale by 1/total-grid-pts to get rho(k)
- // multiply by Green's function to get V(k)
-
- poisson_scale(nx_pppm,ny_pppm,nz_pppm);
-
- // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
- // FFT leaves data in 3d brick decomposition
- // copy it into inner portion of vdx,vdy,vdz arrays
-
- // x direction gradient
-
-
- poisson_xgrad(nx_pppm,ny_pppm,nz_pppm);
-
-
- my_gettime(CLOCK_REALTIME,&starttime);
- fft2c->compute(work2,work2,-1);
- my_gettime(CLOCK_REALTIME,&endtime);
- poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
-
- poisson_vdx_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm);
-
-
- // y direction gradient
-
- poisson_ygrad(nx_pppm,ny_pppm,nz_pppm);
-
- my_gettime(CLOCK_REALTIME,&starttime);
- fft2c->compute(work2,work2,-1);
- my_gettime(CLOCK_REALTIME,&endtime);
- poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
-
- poisson_vdy_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm);
-
- // z direction gradient
-
- poisson_zgrad(nx_pppm,ny_pppm,nz_pppm);
-
- my_gettime(CLOCK_REALTIME,&starttime);
- fft2c->compute(work2,work2,-1);
- my_gettime(CLOCK_REALTIME,&endtime);
- poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
-
- poisson_vdz_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm);
- #endif
-}
-
-/*----------------------------------------------------------------------
- interpolate from grid to get electric field & force on my particles
--------------------------------------------------------------------------*/
-
-void PPPMCuda::fieldforce()
-{
- cuda_fieldforce(& cuda->shared_data,cu_flag);
- return;
-}
-
-/* ----------------------------------------------------------------------
- perform and time the 4 FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPMCuda::timing_1d(int n, double &time1d)
-{
- time1d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps/4*n;
- return 4;
-}
-
-int PPPMCuda::timing_3d(int n, double &time3d)
-{
- time3d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps*n;
- return 4;
-}
-
-void PPPMCuda::slabcorr(int eflag)
-{
- // compute local contribution to global dipole moment
- if(slabbuf==NULL)
- {
- slabbuf=new ENERGY_FLOAT[(atom->nmax+31)/32];
- cu_slabbuf = new cCudaData (slabbuf, (atom->nmax+31)/32);
- }
- if(unsigned((atom->nlocal+31)/32)*sizeof(ENERGY_FLOAT)>=unsigned(cu_slabbuf->dev_size()))
- {
- delete [] slabbuf;
- delete cu_slabbuf;
- slabbuf=new ENERGY_FLOAT[(atom->nmax+31)/32];
- cu_slabbuf = new cCudaData (slabbuf, (atom->nmax+31)/32);
- }
-
-
- double dipole = cuda_slabcorr_energy(&cuda->shared_data,slabbuf,(ENERGY_FLOAT*) cu_slabbuf->dev_data());
-
- double dipole_all;
- MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
- // compute corrections
-
- double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume;
-
- //if (eflag) energy += qqrd2e*scale * e_slabcorr;
- // need to add a correction to make non-neutral systems and per-atom energy translationally invariant
- if (eflag || fabs(qsum) > SMALL)
- error->all(FLERR,"Cannot (yet) use slab correction with kspace_style pppm/cuda for non-neutral systems or to get per-atom energy. Aborting.");
-
- double ffact = -4.0*MY_PI*dipole_all/volume;
-
- cuda_slabcorr_force(&cuda->shared_data,ffact);
-}
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+
+ Original Version:
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ See the README file in the top-level LAMMPS directory.
+
+ -----------------------------------------------------------------------
+
+ USER-CUDA Package and associated modifications:
+ https://sourceforge.net/projects/lammpscuda/
+
+ Christian Trott, christian.trott@tu-ilmenau.de
+ Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
+ Theoretical Physics II, University of Technology Ilmenau, Germany
+
+ See the README file in the USER-CUDA directory.
+
+ This software is distributed under the GNU General Public License.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+------------------------------------------------------------------------- */
+
+
+#include "mpi.h"
+#include