From d3ad8087022b013f6eadbfdd3b3cc6165ae50a22 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Tue, 4 Mar 2014 15:57:40 +0000 Subject: [PATCH 01/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11583 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- doc/lattice.html | 6 ++++-- doc/lattice.txt | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/lattice.html b/doc/lattice.html index f0c45f21a2..e3dd79db8a 100644 --- a/doc/lattice.html +++ b/doc/lattice.html @@ -121,8 +121,10 @@ and a3 are 3 orthogonal unit vectors (edges of a unit cube). But you can specify them to be of any length and non-orthogonal to each other, so that they describe a tilted parallelepiped. Via the basis keyword you add atoms, one at a time, to the unit cell. Its arguments -are fractional coordinates (0.0 <= x,y,z < 1.0), so that a value of -0.5 means a position half-way across the unit cell in that dimension. +are fractional coordinates (0.0 <= x,y,z < 1.0). The position vector +x of a basis atom within the unit cell is thus a linear combination of +the the unit cell's 3 edge vectors, i.e. x = bx a1 + by a2 + bz a3, +where bx,by,bz are the 3 values specified for the basis keyword.


diff --git a/doc/lattice.txt b/doc/lattice.txt index ec622d3434..38f38ae58b 100644 --- a/doc/lattice.txt +++ b/doc/lattice.txt @@ -113,8 +113,10 @@ and a3 are 3 orthogonal unit vectors (edges of a unit cube). But you can specify them to be of any length and non-orthogonal to each other, so that they describe a tilted parallelepiped. Via the {basis} keyword you add atoms, one at a time, to the unit cell. Its arguments -are fractional coordinates (0.0 <= x,y,z < 1.0), so that a value of -0.5 means a position half-way across the unit cell in that dimension. +are fractional coordinates (0.0 <= x,y,z < 1.0). The position vector +x of a basis atom within the unit cell is thus a linear combination of +the the unit cell's 3 edge vectors, i.e. x = bx a1 + by a2 + bz a3, +where bx,by,bz are the 3 values specified for the {basis} keyword. :line From f1e2227ed1cf33c4ab6f097ae280559099324430 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Tue, 4 Mar 2014 16:45:44 +0000 Subject: [PATCH 02/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11584 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/KSPACE/ewald.cpp | 2984 +++--- src/KSPACE/ewald_disp.cpp | 2949 +++--- src/KSPACE/msm.cpp | 2 +- src/KSPACE/pppm.cpp | 7002 +++++++-------- src/KSPACE/pppm_disp.cpp | 16418 +++++++++++++++++----------------- src/KSPACE/pppm_old.cpp | 5726 ++++++------ src/USER-CUDA/pppm_cuda.cpp | 2872 +++--- 7 files changed, 18977 insertions(+), 18976 deletions(-) diff --git a/src/KSPACE/ewald.cpp b/src/KSPACE/ewald.cpp index ac98f224f2..f750c2cbf3 100644 --- a/src/KSPACE/ewald.cpp +++ b/src/KSPACE/ewald.cpp @@ -1,1492 +1,1492 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) - per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU) - group/group energy/force added by Stan Moore (BYU) - triclinic added by Stan Moore (SNL) -------------------------------------------------------------------------- */ - -#include "mpi.h" -#include "stdlib.h" -#include "stdio.h" -#include "string.h" -#include "math.h" -#include "ewald.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "pair.h" -#include "domain.h" -#include "math_const.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define SMALL 0.00001 - -/* ---------------------------------------------------------------------- */ - -Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command"); - - ewaldflag = 1; - group_group_enable = 1; - group_allocate_flag = 0; - - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - kmax = 0; - kxvecs = kyvecs = kzvecs = NULL; - ug = NULL; - eg = vg = NULL; - sfacrl = sfacim = sfacrl_all = sfacim_all = NULL; - - nmax = 0; - ek = NULL; - cs = sn = NULL; - - kcount = 0; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -Ewald::~Ewald() -{ - deallocate(); - if (group_allocate_flag) deallocate_groups(); - memory->destroy(ek); - memory->destroy3d_offset(cs,-kmax_created); - memory->destroy3d_offset(sn,-kmax_created); -} - -/* ---------------------------------------------------------------------- */ - -void Ewald::init() -{ - if (comm->me == 0) { - if (screen) fprintf(screen,"Ewald initialization ...\n"); - if (logfile) fprintf(logfile,"Ewald initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->dimension == 2) - error->all(FLERR,"Cannot use Ewald with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald"); - if (slabflag) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab Ewald"); - if (domain->triclinic) - error->all(FLERR,"Cannot (yet) use Ewald with triclinic box " - "and slab correction"); - } - - // extract short-range Coulombic cutoff from pair style - - scale = 1.0; - - pair_check(); - - int itmp; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - double cutoff = *p_cutoff; - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && comm->me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup K-space resolution - - q2 = qsqsum * force->qqrd2e / force->dielectric; - bigint natoms = atom->natoms; - - triclinic = domain->triclinic; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab Ewald - // 3d Ewald just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - if (!gewaldflag) { - if (accuracy <= 0.0) - error->all(FLERR,"KSpace accuracy must be > 0"); - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; - else g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // setup Ewald coefficients so can print stats - - setup(); - - // final RMS accuracy - - double lprx = rms(kxmax_orig,xprd,natoms,q2); - double lpry = rms(kymax_orig,yprd,natoms,q2); - double lprz = rms(kzmax_orig,zprd_slab,natoms,q2); - double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); - double tpr = estimate_table_accuracy(q2_over_sqrt,spr); - double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); - - // stats - - if (comm->me == 0) { - if (screen) { - fprintf(screen," G vector (1/distance) = %g\n",g_ewald); - fprintf(screen," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(screen," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n", - kcount,kmax,kmax3d); - fprintf(screen," kxmax kymax kzmax = %d %d %d\n", - kxmax,kymax,kzmax); - } - if (logfile) { - fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(logfile," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n", - kcount,kmax,kmax3d); - fprintf(logfile," kxmax kymax kzmax = %d %d %d\n", - kxmax,kymax,kzmax); - } - } -} - -/* ---------------------------------------------------------------------- - adjust Ewald coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void Ewald::setup() -{ - // volume-dependent factors - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - - // adjustment of z dimension for 2d slab Ewald - // 3d Ewald just uses zprd since slab_volfactor = 1.0 - - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - unitk[0] = 2.0*MY_PI/xprd; - unitk[1] = 2.0*MY_PI/yprd; - unitk[2] = 2.0*MY_PI/zprd_slab; - - int kmax_old = kmax; - - if (kewaldflag == 0) { - - // determine kmax - // function of current box size, accuracy, G_ewald (short-range cutoff) - - bigint natoms = atom->natoms; - double err; - kxmax = 1; - kymax = 1; - kzmax = 1; - - err = rms(kxmax,xprd,natoms,q2); - while (err > accuracy) { - kxmax++; - err = rms(kxmax,xprd,natoms,q2); - } - - err = rms(kymax,yprd,natoms,q2); - while (err > accuracy) { - kymax++; - err = rms(kymax,yprd,natoms,q2); - } - - err = rms(kzmax,zprd_slab,natoms,q2); - while (err > accuracy) { - kzmax++; - err = rms(kzmax,zprd_slab,natoms,q2); - } - - kmax = MAX(kxmax,kymax); - kmax = MAX(kmax,kzmax); - kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; - - double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; - double gsqymx = unitk[1]*unitk[1]*kymax*kymax; - double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; - gsqmx = MAX(gsqxmx,gsqymx); - gsqmx = MAX(gsqmx,gsqzmx); - - kxmax_orig = kxmax; - kymax_orig = kymax; - kzmax_orig = kzmax; - - // scale lattice vectors for triclinic skew - - if (triclinic) { - double tmp[3]; - tmp[0] = kxmax/xprd; - tmp[1] = kymax/yprd; - tmp[2] = kzmax/zprd; - lamda2xT(&tmp[0],&tmp[0]); - kxmax = MAX(1,static_cast(tmp[0])); - kymax = MAX(1,static_cast(tmp[1])); - kzmax = MAX(1,static_cast(tmp[2])); - - kmax = MAX(kxmax,kymax); - kmax = MAX(kmax,kzmax); - kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; - } - - } else { - - kxmax = kx_ewald; - kymax = ky_ewald; - kzmax = kz_ewald; - - kxmax_orig = kxmax; - kymax_orig = kymax; - kzmax_orig = kzmax; - - kmax = MAX(kxmax,kymax); - kmax = MAX(kmax,kzmax); - kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; - - double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; - double gsqymx = unitk[1]*unitk[1]*kymax*kymax; - double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; - gsqmx = MAX(gsqxmx,gsqymx); - gsqmx = MAX(gsqmx,gsqzmx); - } - - gsqmx *= 1.00001; - - // if size has grown, reallocate k-dependent and nlocal-dependent arrays - - if (kmax > kmax_old) { - deallocate(); - allocate(); - group_allocate_flag = 0; - - memory->destroy(ek); - memory->destroy3d_offset(cs,-kmax_created); - memory->destroy3d_offset(sn,-kmax_created); - nmax = atom->nmax; - memory->create(ek,nmax,3,"ewald:ek"); - memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); - memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); - kmax_created = kmax; - } - - // pre-compute Ewald coefficients - - if (triclinic == 0) - coeffs(); - else - coeffs_triclinic(); -} - -/* ---------------------------------------------------------------------- - compute RMS accuracy for a dimension -------------------------------------------------------------------------- */ - -double Ewald::rms(int km, double prd, bigint natoms, double q2) -{ - double value = 2.0*q2*g_ewald/prd * - sqrt(1.0/(MY_PI*km*natoms)) * - exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd)); - - return value; -} - -/* ---------------------------------------------------------------------- - compute the Ewald long-range force, energy, virial -------------------------------------------------------------------------- */ - -void Ewald::compute(int eflag, int vflag) -{ - int i,j,k; - - // set energy/virial flags - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(ek); - memory->destroy3d_offset(cs,-kmax_created); - memory->destroy3d_offset(sn,-kmax_created); - nmax = atom->nmax; - memory->create(ek,nmax,3,"ewald:ek"); - memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); - memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); - kmax_created = kmax; - } - - // partial structure factors on each processor - // total structure factor by summing over procs - - if (triclinic == 0) - eik_dot_r(); - else - eik_dot_r_triclinic(); - - MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); - - // K-space portion of electric field - // double loop over K-vectors and local atoms - // perform per-atom calculations if needed - - double **f = atom->f; - double *q = atom->q; - int nlocal = atom->nlocal; - - int kx,ky,kz; - double cypz,sypz,exprl,expim,partial,partial_peratom; - - for (i = 0; i < nlocal; i++) { - ek[i][0] = 0.0; - ek[i][1] = 0.0; - ek[i][2] = 0.0; - } - - for (k = 0; k < kcount; k++) { - kx = kxvecs[k]; - ky = kyvecs[k]; - kz = kzvecs[k]; - - for (i = 0; i < nlocal; i++) { - cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; - sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; - exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; - expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; - partial = expim*sfacrl_all[k] - exprl*sfacim_all[k]; - ek[i][0] += partial*eg[k][0]; - ek[i][1] += partial*eg[k][1]; - ek[i][2] += partial*eg[k][2]; - - if (evflag_atom) { - partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k]; - if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom; - if (vflag_atom) - for (j = 0; j < 6; j++) - vatom[i][j] += ug[k]*vg[k][j]*partial_peratom; - } - } - } - - // convert E-field to force - - const double qscale = force->qqrd2e * scale; - - for (i = 0; i < nlocal; i++) { - f[i][0] += qscale * q[i]*ek[i][0]; - f[i][1] += qscale * q[i]*ek[i][1]; - if (slabflag != 2) f[i][2] += qscale * q[i]*ek[i][2]; - } - - // global energy - - if (eflag_global) { - for (k = 0; k < kcount; k++) - energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + - sfacim_all[k]*sfacim_all[k]); - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // global virial - - if (vflag_global) { - double uk; - for (k = 0; k < kcount; k++) { - uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); - for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j]; - } - for (j = 0; j < 6; j++) virial[j] *= qscale; - } - - // per-atom energy/virial - // energy includes self-energy correction - - if (evflag_atom) { - if (eflag_atom) { - for (i = 0; i < nlocal; i++) { - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - } - - if (vflag_atom) - for (i = 0; i < nlocal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale; - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); -} - -/* ---------------------------------------------------------------------- */ - -void Ewald::eik_dot_r() -{ - int i,k,l,m,n,ic; - double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; - double sqk,clpm,slpm; - - double **x = atom->x; - double *q = atom->q; - int nlocal = atom->nlocal; - - n = 0; - - // (k,0,0), (0,l,0), (0,0,m) - - for (ic = 0; ic < 3; ic++) { - sqk = unitk[ic]*unitk[ic]; - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - for (i = 0; i < nlocal; i++) { - cs[0][ic][i] = 1.0; - sn[0][ic][i] = 0.0; - cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); - sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); - cs[-1][ic][i] = cs[1][ic][i]; - sn[-1][ic][i] = -sn[1][ic][i]; - cstr1 += q[i]*cs[1][ic][i]; - sstr1 += q[i]*sn[1][ic][i]; - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - } - } - - for (m = 2; m <= kmax; m++) { - for (ic = 0; ic < 3; ic++) { - sqk = m*unitk[ic] * m*unitk[ic]; - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - for (i = 0; i < nlocal; i++) { - cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - - sn[m-1][ic][i]*sn[1][ic][i]; - sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + - cs[m-1][ic][i]*sn[1][ic][i]; - cs[-m][ic][i] = cs[m][ic][i]; - sn[-m][ic][i] = -sn[m][ic][i]; - cstr1 += q[i]*cs[m][ic][i]; - sstr1 += q[i]*sn[m][ic][i]; - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - } - } - } - - // 1 = (k,l,0), 2 = (k,-l,0) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - for (i = 0; i < nlocal; i++) { - cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]); - sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]); - cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]); - sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - } - } - } - - // 1 = (0,l,m), 2 = (0,l,-m) - - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - for (i = 0; i < nlocal; i++) { - cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); - sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); - cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]); - sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - } - } - } - - // 1 = (k,0,m), 2 = (k,0,-m) - - for (k = 1; k <= kxmax; k++) { - for (m = 1; m <= kzmax; m++) { - sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - for (i = 0; i < nlocal; i++) { - cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]); - sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]); - cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]); - sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - } - } - } - - // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + - (m*unitk[2] * m*unitk[2]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - cstr3 = 0.0; - sstr3 = 0.0; - cstr4 = 0.0; - sstr4 = 0.0; - for (i = 0; i < nlocal; i++) { - clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; - slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; - cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - - clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; - slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; - cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - - clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; - slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; - cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - - clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; - slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; - cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - sfacrl[n] = cstr3; - sfacim[n++] = sstr3; - sfacrl[n] = cstr4; - sfacim[n++] = sstr4; - } - } - } - } -} - -/* ---------------------------------------------------------------------- */ - -void Ewald::eik_dot_r_triclinic() -{ - int i,k,l,m,n,ic; - double cstr1,sstr1; - double sqk,clpm,slpm; - - double **x = atom->x; - double *q = atom->q; - int nlocal = atom->nlocal; - - double unitk_lamda[3]; - - double max_kvecs[3]; - max_kvecs[0] = kxmax; - max_kvecs[1] = kymax; - max_kvecs[2] = kzmax; - - // (k,0,0), (0,l,0), (0,0,m) - - for (ic = 0; ic < 3; ic++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 0.0; - unitk_lamda[2] = 0.0; - unitk_lamda[ic] = 2.0*MY_PI; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[ic]*unitk_lamda[ic]; - if (sqk <= gsqmx) { - for (i = 0; i < nlocal; i++) { - cs[0][ic][i] = 1.0; - sn[0][ic][i] = 0.0; - cs[1][ic][i] = cos(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); - sn[1][ic][i] = sin(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); - cs[-1][ic][i] = cs[1][ic][i]; - sn[-1][ic][i] = -sn[1][ic][i]; - } - } - } - - for (ic = 0; ic < 3; ic++) { - for (m = 2; m <= max_kvecs[ic]; m++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 0.0; - unitk_lamda[2] = 0.0; - unitk_lamda[ic] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[ic]*unitk_lamda[ic]; - for (i = 0; i < nlocal; i++) { - cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - - sn[m-1][ic][i]*sn[1][ic][i]; - sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + - cs[m-1][ic][i]*sn[1][ic][i]; - cs[-m][ic][i] = cs[m][ic][i]; - sn[-m][ic][i] = -sn[m][ic][i]; - } - } - } - - for (n = 0; n < kcount; n++) { - k = kxvecs[n]; - l = kyvecs[n]; - m = kzvecs[n]; - cstr1 = 0.0; - sstr1 = 0.0; - for (i = 0; i < nlocal; i++) { - clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; - slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; - cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - } - sfacrl[n] = cstr1; - sfacim[n] = sstr1; - } -} - -/* ---------------------------------------------------------------------- - pre-compute coefficients for each Ewald K-vector -------------------------------------------------------------------------- */ - -void Ewald::coeffs() -{ - int k,l,m; - double sqk,vterm; - - double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); - double preu = 4.0*MY_PI/volume; - - kcount = 0; - - // (k,0,0), (0,l,0), (0,0,m) - - for (m = 1; m <= kmax; m++) { - sqk = (m*unitk[0]) * (m*unitk[0]); - if (sqk <= gsqmx) { - kxvecs[kcount] = m; - kyvecs[kcount] = 0; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount]; - eg[kcount][1] = 0.0; - eg[kcount][2] = 0.0; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m); - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - sqk = (m*unitk[1]) * (m*unitk[1]); - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = m; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount]; - eg[kcount][2] = 0.0; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m); - vg[kcount][2] = 1.0; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - sqk = (m*unitk[2]) * (m*unitk[2]); - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = 0; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - } - - // 1 = (k,l,0), 2 = (k,-l,0) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l); - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 0.0; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0; - vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = -l; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 0.0; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0; - vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++;; - } - } - } - - // 1 = (0,l,m), 2 = (0,l,-m) - - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m); - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = 0; - kyvecs[kcount] = l; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; - kcount++; - } - } - } - - // 1 = (k,0,m), 2 = (k,0,-m) - - for (k = 1; k <= kxmax; k++) { - for (m = 1; m <= kzmax; m++) { - sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m); - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = 0; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = 0.0; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = 0; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 0.0; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = 0.0; - kcount++; - } - } - } - - // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) + - (unitk[2]*m) * (unitk[2]*m); - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = -l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = -l; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; - kcount++; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - pre-compute coefficients for each Ewald K-vector for a triclinic - system -------------------------------------------------------------------------- */ - -void Ewald::coeffs_triclinic() -{ - int k,l,m; - double sqk,vterm; - - double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); - double preu = 4.0*MY_PI/volume; - - double unitk_lamda[3]; - - kcount = 0; - - // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) - - for (k = 1; k <= kxmax; k++) { - for (l = -kymax; l <= kymax; l++) { - for (m = -kzmax; m <= kzmax; m++) { - unitk_lamda[0] = 2.0*MY_PI*k; - unitk_lamda[1] = 2.0*MY_PI*l; - unitk_lamda[2] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[0]*unitk_lamda[0] + unitk_lamda[1]*unitk_lamda[1] + - unitk_lamda[2]*unitk_lamda[2]; - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk_lamda[0]*ug[kcount]; - eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; - eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*unitk_lamda[0]*unitk_lamda[0]; - vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; - vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; - vg[kcount][3] = vterm*unitk_lamda[0]*unitk_lamda[1]; - vg[kcount][4] = vterm*unitk_lamda[0]*unitk_lamda[2]; - vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; - kcount++; - } - } - } - } - - // 1 = (0,l,m), 2 = (0,l,-m) - - for (l = 1; l <= kymax; l++) { - for (m = -kzmax; m <= kzmax; m++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 2.0*MY_PI*l; - unitk_lamda[2] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[1]*unitk_lamda[1] + unitk_lamda[2]*unitk_lamda[2]; - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; - eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; - vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; - kcount++; - } - } - } - - // (0,0,m) - - for (m = 1; m <= kmax; m++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 0.0; - unitk_lamda[2] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[2]*unitk_lamda[2]; - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = 0; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - } -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::allocate() -{ - kxvecs = new int[kmax3d]; - kyvecs = new int[kmax3d]; - kzvecs = new int[kmax3d]; - - ug = new double[kmax3d]; - memory->create(eg,kmax3d,3,"ewald:eg"); - memory->create(vg,kmax3d,6,"ewald:vg"); - - sfacrl = new double[kmax3d]; - sfacim = new double[kmax3d]; - sfacrl_all = new double[kmax3d]; - sfacim_all = new double[kmax3d]; -} - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::deallocate() -{ - delete [] kxvecs; - delete [] kyvecs; - delete [] kzvecs; - - delete [] ug; - memory->destroy(eg); - memory->destroy(vg); - - delete [] sfacrl; - delete [] sfacim; - delete [] sfacrl_all; - delete [] sfacim_all; -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void Ewald::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double Ewald::memory_usage() -{ - double bytes = 3 * kmax3d * sizeof(int); - bytes += (1 + 3 + 6) * kmax3d * sizeof(double); - bytes += 4 * kmax3d * sizeof(double); - bytes += nmax*3 * sizeof(double); - bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double); - return bytes; -} - -/* ---------------------------------------------------------------------- - group-group interactions - ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - compute the Ewald total long-range force and energy for groups A and B - ------------------------------------------------------------------------- */ - -void Ewald::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) -{ - if (slabflag && triclinic) - error->all(FLERR,"Cannot (yet) use K-space slab " - "correction with compute group/group for triclinic systems"); - - int i,k; - - if (!group_allocate_flag) { - allocate_groups(); - group_allocate_flag = 1; - } - - e2group = 0.0; //energy - f2group[0] = 0.0; //force in x-direction - f2group[1] = 0.0; //force in y-direction - f2group[2] = 0.0; //force in z-direction - - // partial and total structure factors for groups A and B - - for (k = 0; k < kcount; k++) { - - // group A - - sfacrl_A[k] = 0.0; - sfacim_A[k] = 0.0; - sfacrl_A_all[k] = 0.0; - sfacim_A_all[k] = 0; - - // group B - - sfacrl_B[k] = 0.0; - sfacim_B[k] = 0.0; - sfacrl_B_all[k] = 0.0; - sfacim_B_all[k] = 0.0; - } - - double *q = atom->q; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - int kx,ky,kz; - double cypz,sypz,exprl,expim; - - // partial structure factors for groups A and B on each processor - - for (k = 0; k < kcount; k++) { - kx = kxvecs[k]; - ky = kyvecs[k]; - kz = kzvecs[k]; - - for (i = 0; i < nlocal; i++) { - - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { - - cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; - sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; - exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; - expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; - - // group A - - if (mask[i] & groupbit_A) { - sfacrl_A[k] += q[i]*exprl; - sfacim_A[k] += q[i]*expim; - } - - // group B - - if (mask[i] & groupbit_B) { - sfacrl_B[k] += q[i]*exprl; - sfacim_B[k] += q[i]*expim; - } - } - } - } - - // total structure factor by summing over procs - - MPI_Allreduce(sfacrl_A,sfacrl_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(sfacim_A,sfacim_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); - - MPI_Allreduce(sfacrl_B,sfacrl_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(sfacim_B,sfacim_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); - - const double qscale = force->qqrd2e * scale; - double partial_group; - - // total group A <--> group B energy - // self and boundary correction terms are in compute_group_group.cpp - - for (k = 0; k < kcount; k++) { - partial_group = sfacrl_A_all[k]*sfacrl_B_all[k] + - sfacim_A_all[k]*sfacim_B_all[k]; - e2group += ug[k]*partial_group; - } - - e2group *= qscale; - - // total group A <--> group B force - - for (k = 0; k < kcount; k++) { - partial_group = sfacim_A_all[k]*sfacrl_B_all[k] - - sfacrl_A_all[k]*sfacim_B_all[k]; - f2group[0] += eg[k][0]*partial_group; - f2group[1] += eg[k][1]*partial_group; - if (slabflag != 2) f2group[2] += eg[k][2]*partial_group; - } - - f2group[0] *= qscale; - f2group[1] *= qscale; - f2group[2] *= qscale; - - // 2d slab correction - - if (slabflag == 1) - slabcorr_groups(groupbit_A, groupbit_B, AA_flag); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void Ewald::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double qsum_A = 0.0; - double qsum_B = 0.0; - double dipole_A = 0.0; - double dipole_B = 0.0; - double dipole_r2_A = 0.0; - double dipole_r2_B = 0.0; - - for (int i = 0; i < nlocal; i++) { - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if (mask[i] & groupbit_A) { - qsum_A += q[i]; - dipole_A += q[i]*x[i][2]; - dipole_r2_A += q[i]*x[i][2]*x[i][2]; - } - - if (mask[i] & groupbit_B) { - qsum_B += q[i]; - dipole_B += q[i]*x[i][2]; - dipole_r2_B += q[i]*x[i][2]*x[i][2]; - } - } - - // sum local contributions to get total charge and global dipole moment - // for each group - - double tmp; - MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_A = tmp; - - MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_B = tmp; - - MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_A = tmp; - - MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_B = tmp; - - MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_A = tmp; - - MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_B = tmp; - - // compute corrections - - const double qscale = force->qqrd2e * scale; - const double efact = qscale * MY_2PI/volume; - - e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + - qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); - - // add on force corrections - - const double ffact = qscale * (-4.0*MY_PI/volume); - f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); -} - -/* ---------------------------------------------------------------------- - allocate group-group memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::allocate_groups() -{ - // group A - - sfacrl_A = new double[kmax3d]; - sfacim_A = new double[kmax3d]; - sfacrl_A_all = new double[kmax3d]; - sfacim_A_all = new double[kmax3d]; - - // group B - - sfacrl_B = new double[kmax3d]; - sfacim_B = new double[kmax3d]; - sfacrl_B_all = new double[kmax3d]; - sfacim_B_all = new double[kmax3d]; -} - -/* ---------------------------------------------------------------------- - deallocate group-group memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::deallocate_groups() -{ - // group A - - delete [] sfacrl_A; - delete [] sfacim_A; - delete [] sfacrl_A_all; - delete [] sfacim_A_all; - - // group B - - delete [] sfacrl_B; - delete [] sfacim_B; - delete [] sfacrl_B_all; - delete [] sfacim_B_all; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU) + group/group energy/force added by Stan Moore (BYU) + triclinic added by Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "mpi.h" +#include "stdlib.h" +#include "stdio.h" +#include "string.h" +#include "math.h" +#include "ewald.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "pair.h" +#include "domain.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.00001 + +/* ---------------------------------------------------------------------- */ + +Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command"); + + ewaldflag = 1; + group_group_enable = 1; + group_allocate_flag = 0; + + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + kmax = 0; + kxvecs = kyvecs = kzvecs = NULL; + ug = NULL; + eg = vg = NULL; + sfacrl = sfacim = sfacrl_all = sfacim_all = NULL; + + nmax = 0; + ek = NULL; + cs = sn = NULL; + + kcount = 0; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +Ewald::~Ewald() +{ + deallocate(); + if (group_allocate_flag) deallocate_groups(); + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); +} + +/* ---------------------------------------------------------------------- */ + +void Ewald::init() +{ + if (comm->me == 0) { + if (screen) fprintf(screen,"Ewald initialization ...\n"); + if (logfile) fprintf(logfile,"Ewald initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->dimension == 2) + error->all(FLERR,"Cannot use Ewald with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab Ewald"); + if (domain->triclinic) + error->all(FLERR,"Cannot (yet) use Ewald with triclinic box " + "and slab correction"); + } + + // extract short-range Coulombic cutoff from pair style + + scale = 1.0; + + pair_check(); + + int itmp; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + double cutoff = *p_cutoff; + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && comm->me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup K-space resolution + + q2 = qsqsum * force->qqrd2e; + bigint natoms = atom->natoms; + + triclinic = domain->triclinic; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab Ewald + // 3d Ewald just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // setup Ewald coefficients so can print stats + + setup(); + + // final RMS accuracy + + double lprx = rms(kxmax_orig,xprd,natoms,q2); + double lpry = rms(kymax_orig,yprd,natoms,q2); + double lprz = rms(kzmax_orig,zprd_slab,natoms,q2); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double tpr = estimate_table_accuracy(q2_over_sqrt,spr); + double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); + + // stats + + if (comm->me == 0) { + if (screen) { + fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n", + kcount,kmax,kmax3d); + fprintf(screen," kxmax kymax kzmax = %d %d %d\n", + kxmax,kymax,kzmax); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n", + kcount,kmax,kmax3d); + fprintf(logfile," kxmax kymax kzmax = %d %d %d\n", + kxmax,kymax,kzmax); + } + } +} + +/* ---------------------------------------------------------------------- + adjust Ewald coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void Ewald::setup() +{ + // volume-dependent factors + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + + // adjustment of z dimension for 2d slab Ewald + // 3d Ewald just uses zprd since slab_volfactor = 1.0 + + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + unitk[0] = 2.0*MY_PI/xprd; + unitk[1] = 2.0*MY_PI/yprd; + unitk[2] = 2.0*MY_PI/zprd_slab; + + int kmax_old = kmax; + + if (kewaldflag == 0) { + + // determine kmax + // function of current box size, accuracy, G_ewald (short-range cutoff) + + bigint natoms = atom->natoms; + double err; + kxmax = 1; + kymax = 1; + kzmax = 1; + + err = rms(kxmax,xprd,natoms,q2); + while (err > accuracy) { + kxmax++; + err = rms(kxmax,xprd,natoms,q2); + } + + err = rms(kymax,yprd,natoms,q2); + while (err > accuracy) { + kymax++; + err = rms(kymax,yprd,natoms,q2); + } + + err = rms(kzmax,zprd_slab,natoms,q2); + while (err > accuracy) { + kzmax++; + err = rms(kzmax,zprd_slab,natoms,q2); + } + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + + double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; + double gsqymx = unitk[1]*unitk[1]*kymax*kymax; + double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + + kxmax_orig = kxmax; + kymax_orig = kymax; + kzmax_orig = kzmax; + + // scale lattice vectors for triclinic skew + + if (triclinic) { + double tmp[3]; + tmp[0] = kxmax/xprd; + tmp[1] = kymax/yprd; + tmp[2] = kzmax/zprd; + lamda2xT(&tmp[0],&tmp[0]); + kxmax = MAX(1,static_cast(tmp[0])); + kymax = MAX(1,static_cast(tmp[1])); + kzmax = MAX(1,static_cast(tmp[2])); + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + } + + } else { + + kxmax = kx_ewald; + kymax = ky_ewald; + kzmax = kz_ewald; + + kxmax_orig = kxmax; + kymax_orig = kymax; + kzmax_orig = kzmax; + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + + double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; + double gsqymx = unitk[1]*unitk[1]*kymax*kymax; + double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + } + + gsqmx *= 1.00001; + + // if size has grown, reallocate k-dependent and nlocal-dependent arrays + + if (kmax > kmax_old) { + deallocate(); + allocate(); + group_allocate_flag = 0; + + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald:ek"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); + kmax_created = kmax; + } + + // pre-compute Ewald coefficients + + if (triclinic == 0) + coeffs(); + else + coeffs_triclinic(); +} + +/* ---------------------------------------------------------------------- + compute RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double Ewald::rms(int km, double prd, bigint natoms, double q2) +{ + double value = 2.0*q2*g_ewald/prd * + sqrt(1.0/(MY_PI*km*natoms)) * + exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd)); + + return value; +} + +/* ---------------------------------------------------------------------- + compute the Ewald long-range force, energy, virial +------------------------------------------------------------------------- */ + +void Ewald::compute(int eflag, int vflag) +{ + int i,j,k; + + // set energy/virial flags + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald:ek"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); + kmax_created = kmax; + } + + // partial structure factors on each processor + // total structure factor by summing over procs + + if (triclinic == 0) + eik_dot_r(); + else + eik_dot_r_triclinic(); + + MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + // K-space portion of electric field + // double loop over K-vectors and local atoms + // perform per-atom calculations if needed + + double **f = atom->f; + double *q = atom->q; + int nlocal = atom->nlocal; + + int kx,ky,kz; + double cypz,sypz,exprl,expim,partial,partial_peratom; + + for (i = 0; i < nlocal; i++) { + ek[i][0] = 0.0; + ek[i][1] = 0.0; + ek[i][2] = 0.0; + } + + for (k = 0; k < kcount; k++) { + kx = kxvecs[k]; + ky = kyvecs[k]; + kz = kzvecs[k]; + + for (i = 0; i < nlocal; i++) { + cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; + sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; + exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; + expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; + partial = expim*sfacrl_all[k] - exprl*sfacim_all[k]; + ek[i][0] += partial*eg[k][0]; + ek[i][1] += partial*eg[k][1]; + ek[i][2] += partial*eg[k][2]; + + if (evflag_atom) { + partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k]; + if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom; + if (vflag_atom) + for (j = 0; j < 6; j++) + vatom[i][j] += ug[k]*vg[k][j]*partial_peratom; + } + } + } + + // convert E-field to force + + const double qscale = force->qqrd2e * scale; + + for (i = 0; i < nlocal; i++) { + f[i][0] += qscale * q[i]*ek[i][0]; + f[i][1] += qscale * q[i]*ek[i][1]; + if (slabflag != 2) f[i][2] += qscale * q[i]*ek[i][2]; + } + + // global energy + + if (eflag_global) { + for (k = 0; k < kcount; k++) + energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + + sfacim_all[k]*sfacim_all[k]); + energy -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qscale; + } + + // global virial + + if (vflag_global) { + double uk; + for (k = 0; k < kcount; k++) { + uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); + for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j]; + } + for (j = 0; j < 6; j++) virial[j] *= qscale; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + } + + if (vflag_atom) + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale; + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); +} + +/* ---------------------------------------------------------------------- */ + +void Ewald::eik_dot_r() +{ + int i,k,l,m,n,ic; + double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; + double sqk,clpm,slpm; + + double **x = atom->x; + double *q = atom->q; + int nlocal = atom->nlocal; + + n = 0; + + // (k,0,0), (0,l,0), (0,0,m) + + for (ic = 0; ic < 3; ic++) { + sqk = unitk[ic]*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + cs[0][ic][i] = 1.0; + sn[0][ic][i] = 0.0; + cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); + sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); + cs[-1][ic][i] = cs[1][ic][i]; + sn[-1][ic][i] = -sn[1][ic][i]; + cstr1 += q[i]*cs[1][ic][i]; + sstr1 += q[i]*sn[1][ic][i]; + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + } + } + + for (m = 2; m <= kmax; m++) { + for (ic = 0; ic < 3; ic++) { + sqk = m*unitk[ic] * m*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - + sn[m-1][ic][i]*sn[1][ic][i]; + sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + + cs[m-1][ic][i]*sn[1][ic][i]; + cs[-m][ic][i] = cs[m][ic][i]; + sn[-m][ic][i] = -sn[m][ic][i]; + cstr1 += q[i]*cs[m][ic][i]; + sstr1 += q[i]*sn[m][ic][i]; + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + } + } + } + + // 1 = (k,l,0), 2 = (k,-l,0) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]); + sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]); + cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]); + sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); + sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); + cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]); + sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (k,0,m), 2 = (k,0,-m) + + for (k = 1; k <= kxmax; k++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]); + sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]); + cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]); + sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + cstr3 = 0.0; + sstr3 = 0.0; + cstr4 = 0.0; + sstr4 = 0.0; + for (i = 0; i < nlocal; i++) { + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + sfacrl[n] = cstr3; + sfacim[n++] = sstr3; + sfacrl[n] = cstr4; + sfacim[n++] = sstr4; + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +void Ewald::eik_dot_r_triclinic() +{ + int i,k,l,m,n,ic; + double cstr1,sstr1; + double sqk,clpm,slpm; + + double **x = atom->x; + double *q = atom->q; + int nlocal = atom->nlocal; + + double unitk_lamda[3]; + + double max_kvecs[3]; + max_kvecs[0] = kxmax; + max_kvecs[1] = kymax; + max_kvecs[2] = kzmax; + + // (k,0,0), (0,l,0), (0,0,m) + + for (ic = 0; ic < 3; ic++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 0.0; + unitk_lamda[2] = 0.0; + unitk_lamda[ic] = 2.0*MY_PI; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[ic]*unitk_lamda[ic]; + if (sqk <= gsqmx) { + for (i = 0; i < nlocal; i++) { + cs[0][ic][i] = 1.0; + sn[0][ic][i] = 0.0; + cs[1][ic][i] = cos(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); + sn[1][ic][i] = sin(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); + cs[-1][ic][i] = cs[1][ic][i]; + sn[-1][ic][i] = -sn[1][ic][i]; + } + } + } + + for (ic = 0; ic < 3; ic++) { + for (m = 2; m <= max_kvecs[ic]; m++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 0.0; + unitk_lamda[2] = 0.0; + unitk_lamda[ic] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[ic]*unitk_lamda[ic]; + for (i = 0; i < nlocal; i++) { + cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - + sn[m-1][ic][i]*sn[1][ic][i]; + sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + + cs[m-1][ic][i]*sn[1][ic][i]; + cs[-m][ic][i] = cs[m][ic][i]; + sn[-m][ic][i] = -sn[m][ic][i]; + } + } + } + + for (n = 0; n < kcount; n++) { + k = kxvecs[n]; + l = kyvecs[n]; + m = kzvecs[n]; + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + } + sfacrl[n] = cstr1; + sfacim[n] = sstr1; + } +} + +/* ---------------------------------------------------------------------- + pre-compute coefficients for each Ewald K-vector +------------------------------------------------------------------------- */ + +void Ewald::coeffs() +{ + int k,l,m; + double sqk,vterm; + + double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); + double preu = 4.0*MY_PI/volume; + + kcount = 0; + + // (k,0,0), (0,l,0), (0,0,m) + + for (m = 1; m <= kmax; m++) { + sqk = (m*unitk[0]) * (m*unitk[0]); + if (sqk <= gsqmx) { + kxvecs[kcount] = m; + kyvecs[kcount] = 0; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount]; + eg[kcount][1] = 0.0; + eg[kcount][2] = 0.0; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m); + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + sqk = (m*unitk[1]) * (m*unitk[1]); + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = m; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount]; + eg[kcount][2] = 0.0; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m); + vg[kcount][2] = 1.0; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + sqk = (m*unitk[2]) * (m*unitk[2]); + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = 0; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 0.0; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + } + + // 1 = (k,l,0), 2 = (k,-l,0) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l); + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 0.0; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0; + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = -l; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 0.0; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0; + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++;; + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m); + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = 0; + kyvecs[kcount] = l; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; + kcount++; + } + } + } + + // 1 = (k,0,m), 2 = (k,0,-m) + + for (k = 1; k <= kxmax; k++) { + for (m = 1; m <= kzmax; m++) { + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m); + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = 0; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 0.0; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = 0.0; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = 0; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 0.0; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = 0.0; + kcount++; + } + } + } + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) + + (unitk[2]*m) * (unitk[2]*m); + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = -l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = -l; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; + kcount++; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + pre-compute coefficients for each Ewald K-vector for a triclinic + system +------------------------------------------------------------------------- */ + +void Ewald::coeffs_triclinic() +{ + int k,l,m; + double sqk,vterm; + + double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); + double preu = 4.0*MY_PI/volume; + + double unitk_lamda[3]; + + kcount = 0; + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = -kymax; l <= kymax; l++) { + for (m = -kzmax; m <= kzmax; m++) { + unitk_lamda[0] = 2.0*MY_PI*k; + unitk_lamda[1] = 2.0*MY_PI*l; + unitk_lamda[2] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[0]*unitk_lamda[0] + unitk_lamda[1]*unitk_lamda[1] + + unitk_lamda[2]*unitk_lamda[2]; + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk_lamda[0]*ug[kcount]; + eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; + eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*unitk_lamda[0]*unitk_lamda[0]; + vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; + vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; + vg[kcount][3] = vterm*unitk_lamda[0]*unitk_lamda[1]; + vg[kcount][4] = vterm*unitk_lamda[0]*unitk_lamda[2]; + vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; + kcount++; + } + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = -kzmax; m <= kzmax; m++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 2.0*MY_PI*l; + unitk_lamda[2] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[1]*unitk_lamda[1] + unitk_lamda[2]*unitk_lamda[2]; + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; + eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; + vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; + kcount++; + } + } + } + + // (0,0,m) + + for (m = 1; m <= kmax; m++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 0.0; + unitk_lamda[2] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[2]*unitk_lamda[2]; + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = 0; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 0.0; + eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + } +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::allocate() +{ + kxvecs = new int[kmax3d]; + kyvecs = new int[kmax3d]; + kzvecs = new int[kmax3d]; + + ug = new double[kmax3d]; + memory->create(eg,kmax3d,3,"ewald:eg"); + memory->create(vg,kmax3d,6,"ewald:vg"); + + sfacrl = new double[kmax3d]; + sfacim = new double[kmax3d]; + sfacrl_all = new double[kmax3d]; + sfacim_all = new double[kmax3d]; +} + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::deallocate() +{ + delete [] kxvecs; + delete [] kyvecs; + delete [] kzvecs; + + delete [] ug; + memory->destroy(eg); + memory->destroy(vg); + + delete [] sfacrl; + delete [] sfacim; + delete [] sfacrl_all; + delete [] sfacim_all; +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void Ewald::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double Ewald::memory_usage() +{ + double bytes = 3 * kmax3d * sizeof(int); + bytes += (1 + 3 + 6) * kmax3d * sizeof(double); + bytes += 4 * kmax3d * sizeof(double); + bytes += nmax*3 * sizeof(double); + bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double); + return bytes; +} + +/* ---------------------------------------------------------------------- + group-group interactions + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + compute the Ewald total long-range force and energy for groups A and B + ------------------------------------------------------------------------- */ + +void Ewald::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) +{ + if (slabflag && triclinic) + error->all(FLERR,"Cannot (yet) use K-space slab " + "correction with compute group/group for triclinic systems"); + + int i,k; + + if (!group_allocate_flag) { + allocate_groups(); + group_allocate_flag = 1; + } + + e2group = 0.0; //energy + f2group[0] = 0.0; //force in x-direction + f2group[1] = 0.0; //force in y-direction + f2group[2] = 0.0; //force in z-direction + + // partial and total structure factors for groups A and B + + for (k = 0; k < kcount; k++) { + + // group A + + sfacrl_A[k] = 0.0; + sfacim_A[k] = 0.0; + sfacrl_A_all[k] = 0.0; + sfacim_A_all[k] = 0; + + // group B + + sfacrl_B[k] = 0.0; + sfacim_B[k] = 0.0; + sfacrl_B_all[k] = 0.0; + sfacim_B_all[k] = 0.0; + } + + double *q = atom->q; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + int kx,ky,kz; + double cypz,sypz,exprl,expim; + + // partial structure factors for groups A and B on each processor + + for (k = 0; k < kcount; k++) { + kx = kxvecs[k]; + ky = kyvecs[k]; + kz = kzvecs[k]; + + for (i = 0; i < nlocal; i++) { + + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { + + cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; + sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; + exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; + expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; + + // group A + + if (mask[i] & groupbit_A) { + sfacrl_A[k] += q[i]*exprl; + sfacim_A[k] += q[i]*expim; + } + + // group B + + if (mask[i] & groupbit_B) { + sfacrl_B[k] += q[i]*exprl; + sfacim_B[k] += q[i]*expim; + } + } + } + } + + // total structure factor by summing over procs + + MPI_Allreduce(sfacrl_A,sfacrl_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim_A,sfacim_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + MPI_Allreduce(sfacrl_B,sfacrl_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim_B,sfacim_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + const double qscale = force->qqrd2e * scale; + double partial_group; + + // total group A <--> group B energy + // self and boundary correction terms are in compute_group_group.cpp + + for (k = 0; k < kcount; k++) { + partial_group = sfacrl_A_all[k]*sfacrl_B_all[k] + + sfacim_A_all[k]*sfacim_B_all[k]; + e2group += ug[k]*partial_group; + } + + e2group *= qscale; + + // total group A <--> group B force + + for (k = 0; k < kcount; k++) { + partial_group = sfacim_A_all[k]*sfacrl_B_all[k] - + sfacrl_A_all[k]*sfacim_B_all[k]; + f2group[0] += eg[k][0]*partial_group; + f2group[1] += eg[k][1]*partial_group; + if (slabflag != 2) f2group[2] += eg[k][2]*partial_group; + } + + f2group[0] *= qscale; + f2group[1] *= qscale; + f2group[2] *= qscale; + + // 2d slab correction + + if (slabflag == 1) + slabcorr_groups(groupbit_A, groupbit_B, AA_flag); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void Ewald::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + double qsum_A = 0.0; + double qsum_B = 0.0; + double dipole_A = 0.0; + double dipole_B = 0.0; + double dipole_r2_A = 0.0; + double dipole_r2_B = 0.0; + + for (int i = 0; i < nlocal; i++) { + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if (mask[i] & groupbit_A) { + qsum_A += q[i]; + dipole_A += q[i]*x[i][2]; + dipole_r2_A += q[i]*x[i][2]*x[i][2]; + } + + if (mask[i] & groupbit_B) { + qsum_B += q[i]; + dipole_B += q[i]*x[i][2]; + dipole_r2_B += q[i]*x[i][2]*x[i][2]; + } + } + + // sum local contributions to get total charge and global dipole moment + // for each group + + double tmp; + MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_A = tmp; + + MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_B = tmp; + + MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_A = tmp; + + MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_B = tmp; + + MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_A = tmp; + + MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_B = tmp; + + // compute corrections + + const double qscale = force->qqrd2e * scale; + const double efact = qscale * MY_2PI/volume; + + e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + + qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); + + // add on force corrections + + const double ffact = qscale * (-4.0*MY_PI/volume); + f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); +} + +/* ---------------------------------------------------------------------- + allocate group-group memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::allocate_groups() +{ + // group A + + sfacrl_A = new double[kmax3d]; + sfacim_A = new double[kmax3d]; + sfacrl_A_all = new double[kmax3d]; + sfacim_A_all = new double[kmax3d]; + + // group B + + sfacrl_B = new double[kmax3d]; + sfacim_B = new double[kmax3d]; + sfacrl_B_all = new double[kmax3d]; + sfacim_B_all = new double[kmax3d]; +} + +/* ---------------------------------------------------------------------- + deallocate group-group memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::deallocate_groups() +{ + // group A + + delete [] sfacrl_A; + delete [] sfacim_A; + delete [] sfacrl_A_all; + delete [] sfacim_A_all; + + // group B + + delete [] sfacrl_B; + delete [] sfacim_B; + delete [] sfacrl_B_all; + delete [] sfacim_B_all; +} diff --git a/src/KSPACE/ewald_disp.cpp b/src/KSPACE/ewald_disp.cpp index f623c3a5df..ba88e40f14 100644 --- a/src/KSPACE/ewald_disp.cpp +++ b/src/KSPACE/ewald_disp.cpp @@ -1,1474 +1,1475 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Pieter in 't Veld (SNL), Stan Moore (SNL) -------------------------------------------------------------------------- */ - -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "ewald_disp.h" -#include "math_vector.h" -#include "math_const.h" -#include "math_special.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "pair.h" -#include "domain.h" -#include "memory.h" -#include "error.h" -#include "update.h" - -using namespace LAMMPS_NS; -using namespace MathConst; -using namespace MathSpecial; - -#define SMALL 0.00001 - -enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; // same as in pair.h - -//#define DEBUG - -/* ---------------------------------------------------------------------- */ - -EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command"); - - ewaldflag = dispersionflag = dipoleflag = 1; - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - memset(function, 0, EWALD_NORDER*sizeof(int)); - kenergy = kvirial = NULL; - cek_local = cek_global = NULL; - ekr_local = NULL; - hvec = NULL; - kvec = NULL; - B = NULL; - first_output = 0; - energy_self_peratom = NULL; - virial_self_peratom = NULL; - nmax = 0; - q2 = 0; - b2 = 0; - M2 = 0; -} - -/* ---------------------------------------------------------------------- */ - -EwaldDisp::~EwaldDisp() -{ - deallocate(); - deallocate_peratom(); - delete [] ekr_local; - delete [] B; -} - -/* --------------------------------------------------------------------- */ - -void EwaldDisp::init() -{ - nkvec = nkvec_max = nevec = nevec_max = 0; - nfunctions = nsums = sums = 0; - nbox = -1; - bytes = 0.0; - - if (!comm->me) { - if (screen) fprintf(screen,"EwaldDisp initialization ...\n"); - if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n"); - } - - triclinic_check(); - if (domain->dimension == 2) - error->all(FLERR,"Cannot use EwaldDisp with 2d simulation"); - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp"); - if (slabflag == 1) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab EwaldDisp"); - } - - scale = 1.0; - mumurd2e = force->qqrd2e; - dielectric = force->dielectric; - - int tmp; - Pair *pair = force->pair; - int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; - double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; - if (!(ptr||cutoff)) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - int ewald_order = ptr ? *((int *) ptr) : 1<<1; - int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; - memset(function, 0, EWALD_NFUNCS*sizeof(int)); - for (int i=0; i<=EWALD_NORDER; ++i) // transcribe order - if (ewald_order&(1<all(FLERR, - "Unsupported mixing rule in kspace_style ewald/disp"); - default: - error->all(FLERR,"Unsupported order in kspace_style ewald/disp"); - } - nfunctions += function[k] = 1; - nsums += n[k]; - } - - if (!gewaldflag) g_ewald = 0.0; - pair->init(); // so B is defined - init_coeffs(); - init_coeff_sums(); - - double qsum, qsqsum, bsbsum; - qsum = qsqsum = bsbsum = 0.0; - if (function[0]) { - qsum = sum[0].x; - qsqsum = sum[0].x2; - } - - // turn off coulombic if no charge - - if (function[0] && qsqsum == 0.0) { - function[0] = 0; - nfunctions -= 1; - nsums -= 1; - } - - if (function[1]) bsbsum = sum[1].x2; - if (function[2]) bsbsum = sum[2].x2; - - if (function[3]) M2 = sum[9].x2; - - if (function[3] && strcmp(update->unit_style,"electron") == 0) - error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); - - if (qsqsum == 0.0 && bsbsum == 0.0 && M2 == 0.0) - error->all(FLERR,"Cannot use Ewald/disp solver " - "on system with no charge, dipole, or LJ particles"); - if (fabs(qsum) > SMALL && comm->me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - if (!function[1] && !function[2]) - dispersionflag = 0; - - if (!function[3]) - dipoleflag = 0; - - pair_check(); - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup K-space resolution - - q2 = qsqsum * force->qqrd2e / force->dielectric; - M2 *= mumurd2e / force->dielectric; - b2 = bsbsum; //Are these units right? - bigint natoms = atom->natoms; - - if (!gewaldflag) { - if (function[0]) { - g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff); - else g_ewald = sqrt(-log(g_ewald)) / (*cutoff); - } - else if (function[1] || function[2]) { - //Try Newton Solver - //Use old method to get guess - g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; - - double g_ewald_new = - NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2); - if (g_ewald_new > 0.0) g_ewald = g_ewald_new; - else error->warning(FLERR,"Ewald/disp Newton solver failed, " - "using old method to estimate g_ewald"); - } else if (function[3]) { - //Try Newton Solver - //Use old method to get guess - g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; - double g_ewald_new = - NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),M2); - if (g_ewald_new > 0.0) g_ewald = g_ewald_new; - else error->warning(FLERR,"Ewald/disp Newton solver failed, " - "using old method to estimate g_ewald"); - } - } - - if (!comm->me) { - if (screen) fprintf(screen, " G vector = %g\n", g_ewald); - if (logfile) fprintf(logfile, " G vector = %g\n", g_ewald); - } - - g_ewald_6 = g_ewald; - deallocate_peratom(); - peratom_allocate_flag = 0; -} - -/* ---------------------------------------------------------------------- - adjust EwaldDisp coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void EwaldDisp::setup() -{ - volume = shape_det(domain->h)*slab_volfactor; - memcpy(unit, domain->h_inv, sizeof(shape)); - shape_scalar_mult(unit, 2.0*MY_PI); - unit[2] /= slab_volfactor; - - // int nbox_old = nbox, nkvec_old = nkvec; - - if (accuracy >= 1) { - nbox = 0; - error->all(FLERR,"KSpace accuracy too low"); - } - - bigint natoms = atom->natoms; - double err; - int kxmax = 1; - int kymax = 1; - int kzmax = 1; - err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); - while (err > accuracy) { - kxmax++; - err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); - } - err = rms(kymax,domain->h[1],natoms,q2,b2,M2); - while (err > accuracy) { - kymax++; - err = rms(kymax,domain->h[1],natoms,q2,b2,M2); - } - err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); - while (err > accuracy) { - kzmax++; - err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); - } - nbox = MAX(kxmax,kymax); - nbox = MAX(nbox,kzmax); - double gsqxmx = unit[0]*unit[0]*kxmax*kxmax; - double gsqymx = unit[1]*unit[1]*kymax*kymax; - double gsqzmx = unit[2]*unit[2]*kzmax*kzmax; - gsqmx = MAX(gsqxmx,gsqymx); - gsqmx = MAX(gsqmx,gsqzmx); - gsqmx *= 1.00001; - - reallocate(); - coefficients(); - init_coeffs(); - init_coeff_sums(); - init_self(); - - if (!(first_output||comm->me)) { - first_output = 1; - if (screen) fprintf(screen, - " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); - if (logfile) fprintf(logfile, - " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); - } -} - -/* ---------------------------------------------------------------------- - compute RMS accuracy for a dimension -------------------------------------------------------------------------- */ - -double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2, double M2) -{ - double value = 0.0; - - // Coulombic - - double g2 = g_ewald*g_ewald; - - value += 2.0*q2*g_ewald/prd * - sqrt(1.0/(MY_PI*km*natoms)) * - exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)); - - // Lennard-Jones - - double g7 = g2*g2*g2*g_ewald; - - value += 4.0*b2*g7/3.0 * - sqrt(1.0/(MY_PI*natoms)) * - (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) * - (MY_PI*km/(g_ewald*prd) + 1)); - - // dipole - - value += 8.0*MY_PI*M2/volume*g_ewald * - sqrt(2.0*MY_PI*km*km*km/(15.0*natoms)) * - exp(-pow(MY_PI*km/(g_ewald*prd),2.0)); - - return value; -} - -void EwaldDisp::reallocate() -{ - int ix, iy, iz; - int nkvec_max = nkvec; - vector h; - - nkvec = 0; - int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)]; - int *flag = kflag; - - for (ix=0; ix<=nbox; ++ix) - for (iy=-nbox; iy<=nbox; ++iy) - for (iz=-nbox; iz<=nbox; ++iz) - if (!(ix||iy||iz)) *(flag++) = 0; - else if ((!ix)&&(iy<0)) *(flag++) = 0; - else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0; // use symmetry - else { - h[0] = unit[0]*ix; - h[1] = unit[5]*ix+unit[1]*iy; - h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz; - if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec; - } - - if (nkvec>nkvec_max) { - deallocate(); // free memory - hvec = new hvector[nkvec]; // hvec - bytes += (nkvec-nkvec_max)*sizeof(hvector); - kvec = new kvector[nkvec]; // kvec - bytes += (nkvec-nkvec_max)*sizeof(kvector); - kenergy = new double[nkvec*nfunctions]; // kenergy - bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double); - kvirial = new double[6*nkvec*nfunctions]; // kvirial - bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double); - cek_local = new complex[nkvec*nsums]; // cek_local - bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); - cek_global = new complex[nkvec*nsums]; // cek_global - bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); - nkvec_max = nkvec; - } - - flag = kflag; // create index and - kvector *k = kvec; // wave vectors - hvector *hi = hvec; - for (ix=0; ix<=nbox; ++ix) - for (iy=-nbox; iy<=nbox; ++iy) - for (iz=-nbox; iz<=nbox; ++iz) - if (*(flag++)) { - hi->x = unit[0]*ix; - hi->y = unit[5]*ix+unit[1]*iy; - (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz; - k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; } - - delete [] kflag; -} - - -void EwaldDisp::reallocate_atoms() -{ - if (eflag_atom || vflag_atom) - if (atom->nlocal > nmax) { - deallocate_peratom(); - allocate_peratom(); - nmax = atom->nmax; - } - - if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return; - delete [] ekr_local; - ekr_local = new cvector[nevec]; - bytes += (nevec-nevec_max)*sizeof(cvector); - nevec_max = nevec; -} - - -void EwaldDisp::allocate_peratom() -{ - memory->create(energy_self_peratom, - atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom"); - memory->create(virial_self_peratom, - atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom"); -} - - -void EwaldDisp::deallocate_peratom() // free memory -{ - memory->destroy(energy_self_peratom); - memory->destroy(virial_self_peratom); -} - - -void EwaldDisp::deallocate() // free memory -{ - delete [] hvec; hvec = NULL; - delete [] kvec; kvec = NULL; - delete [] kenergy; kenergy = NULL; - delete [] kvirial; kvirial = NULL; - delete [] cek_local; cek_local = NULL; - delete [] cek_global; cek_global = NULL; -} - - -void EwaldDisp::coefficients() -{ - vector h; - hvector *hi = hvec, *nh; - double eta2 = 0.25/(g_ewald*g_ewald); - double b1, b2, expb2, h1, h2, c1, c2; - double *ke = kenergy, *kv = kvirial; - int func0 = function[0], func12 = function[1]||function[2], - func3 = function[3]; - - for (nh = (hi = hvec)+nkvec; hintypes; - - if (function[1]) { // geometric 1/r^6 - double **b = (double **) force->pair->extract("B",tmp); - delete [] B; - B = new double[n+1]; - bytes += (n+1)*sizeof(double); - for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); - } - if (function[2]) { // arithmetic 1/r^6 - double **epsilon = (double **) force->pair->extract("epsilon",tmp); - double **sigma = (double **) force->pair->extract("sigma",tmp); - double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; - double c[7] = { - 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; - - if (!(epsilon&&sigma)) - error->all( - FLERR,"Epsilon or sigma reference not set by pair style in ewald/n"); - for (int i=0; i<=n; ++i) { - eps_i = sqrt(epsilon[i][i]); - sigma_i = sigma[i][i]; - sigma_n = 1.0; - for (int j=0; j<7; ++j) { - *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i; - } - } - } -} - -void EwaldDisp::init_coeff_sums() -{ - if (sums) return; // calculated only once - sums = 1; - - Sum sum_local[EWALD_MAX_NSUMS]; - - memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum)); - if (function[0]) { // 1/r - double *q = atom->q, *qn = q+atom->nlocal; - for (double *i=q; itype, *ntype = type+atom->nlocal; - for (int *i=type; itype, *ntype = type+atom->nlocal; - for (int *i=type; imu) { // dipole - double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal; - for (double *i = mu; i < nmu; i += 4) - sum_local[9].x2 += i[3]*i[3]; - } - MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world); -} - - -void EwaldDisp::init_self() -{ - double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; - const double qscale = force->qqrd2e * scale; - - memset(energy_self, 0, EWALD_NFUNCS*sizeof(double)); // self energy - memset(virial_self, 0, EWALD_NFUNCS*sizeof(double)); - - if (function[0]) { // 1/r - virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x; - energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0]; - } - if (function[1]) { // geometric 1/r^6 - virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x; - energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1]; - } - if (function[2]) { // arithmetic 1/r^6 - virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+ - sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x); - energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2]; - } - if (function[3]) { // dipole - virial_self[3] = 0; // in surface - energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3]; - } -} - - -void EwaldDisp::init_self_peratom() -{ - if (!(vflag_atom || eflag_atom)) return; - - double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; - const double qscale = force->qqrd2e * scale; - double *energy = energy_self_peratom[0]; - double *virial = virial_self_peratom[0]; - int nlocal = atom->nlocal; - - memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double)); - memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double)); - - if (function[0]) { // 1/r - double *ei = energy; - double *vi = virial; - double ce = qscale*g1/MY_PIS; - double cv = -0.5*MY_PI*qscale/(g2*volume); - double *qi = atom->q, *qn = qi + nlocal; - for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - double q = *qi; - *vi = cv*q*sum[0].x; - *ei = ce*q*q-vi[0]; - } - } - if (function[1]) { // geometric 1/r^6 - double *ei = energy+1; - double *vi = virial+1; - double ce = -g3*g3/12.0; - double cv = MY_PI*MY_PIS*g3/(6.0*volume); - int *typei = atom->type, *typen = typei + atom->nlocal; - for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - double b = B[*typei]; - *vi = cv*b*sum[1].x; - *ei = ce*b*b+vi[0]; - } - } - if (function[2]) { // arithmetic 1/r^6 - double *bi; - double *ei = energy+2; - double *vi = virial+2; - double ce = -g3*g3/3.0; - double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume); - int *typei = atom->type, *typen = typei + atom->nlocal; - for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - bi = B+7*typei[0]+7; - for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0]; - - /* PJV 20120225: - should this be this instead? above implies an inverse dependence - seems to be the above way in original; i recall having tested - arithmetic mixing in the conception phase, but an extra test would - be prudent (pattern repeats in multiple functions below) - - bi = B+7*typei[0]; - for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0]; - - */ - - *ei = ce*bi[0]*bi[6]+vi[0]; - } - } - if (function[3]&&atom->mu) { // dipole - double *ei = energy+3; - double *vi = virial+3; - double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal; - double ce = mumurd2e*2.0*g3/3.0/MY_PIS; - for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - *vi = 0; // in surface - *ei = ce*imu[3]*imu[3]-vi[0]; - } - } -} - - -/* ---------------------------------------------------------------------- - compute the EwaldDisp long-range force, energy, virial -------------------------------------------------------------------------- */ - -void EwaldDisp::compute(int eflag, int vflag) -{ - if (!nbox) return; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; - - if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { - allocate_peratom(); - peratom_allocate_flag = 1; - nmax = atom->nmax; - } - - reallocate_atoms(); - init_self_peratom(); - compute_ek(); - compute_force(); - //compute_surface(); // assume conducting metal (tinfoil) boundary conditions - compute_energy(); - compute_energy_peratom(); - compute_virial(); - compute_virial_dipole(); - compute_virial_peratom(); -} - - -void EwaldDisp::compute_ek() -{ - cvector *ekr = ekr_local; - int lbytes = (2*nbox+1)*sizeof(cvector); - hvector *h = NULL; - kvector *k, *nk = kvec+nkvec; - cvector *z = new cvector[2*nbox+1]; - cvector z1, *zx, *zy, *zz, *zn = z+2*nbox; - complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL; - vector mui; - double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0; - double bi = 0.0, ci[7]; - double *mu = atom->mu ? atom->mu[0] : NULL; - int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(cek_local, 0, n*sizeof(complex)); // reset sums - while (xx, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0); // z[0] - if (tri) { // triclinic z[1] - C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]); - C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]); - C_ANGLE(z1.z, x[2]*unit[2]); x += 3; - } - else { // orthogonal z[1] - C_ANGLE(z1.x, *(x++)*unit[0]); - C_ANGLE(z1.y, *(x++)*unit[1]); - C_ANGLE(z1.z, *(x++)*unit[2]); - } - for (; zzx, zz->x, z1.x); // 3D k-vector - C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y); - C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z); - } - kx = ky = -1; - cek = cek_local; - if (func[0]) qi = *(q++); - if (func[1]) bi = B[*type]; - if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double)); - if (func[3]) { - memcpy(mui, mu, sizeof(vector)); - mu += 4; - h = hvec; - } - for (k=kvec; ky) { // based on order in - if (kx!=k->x) cx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, cx); - } - C_RMULT(zxyz, z[k->z].z, zxy); - if (func[0]) { - cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi; - } - if (func[1]) { - cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi; - } - if (func[2]) for (i=0; i<7; ++i) { - cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i]; - } - if (func[3]) { - register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h; - cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk; - } - } - ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes); - ++type; - } - MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world); - - delete [] z; -} - - -void EwaldDisp::compute_force() -{ - kvector *k; - hvector *h, *nh; - cvector *z = ekr_local; - vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL; - complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; - complex *cek_coul; - double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL; - double *mu = atom->mu ? atom->mu[0] : NULL; - const double qscale = force->qqrd2e * scale; - double *ke, c[EWALD_NFUNCS] = { - 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; - double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - int func[EWALD_NFUNCS]; - - if (atom->torque) t = atom->torque[0]; - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector)); // fj = -dE/dr = - for (; fy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - if (func[0]) { // 1/r - register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); - if (func[3]) cek_coul = cek; - ++cek; - sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im; - } - if (func[1]) { // geometric 1/r^6 - register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek; - sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im; - } - if (func[2]) { // arithmetic 1/r^6 - register double im, c = *(ke++); - for (i=2; i<9; ++i) { - im = c*(zc.im*cek->re+cek->im*zc.re); ++cek; - sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im; - } - } - if (func[3]) { // dipole - register double im = *(ke)*(zc.im*cek->re+ - cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - register double im2 = *(ke)*(zc.re*cek->re- - cek->im*zc.im); - sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; - t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque - t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; - t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; - if (func[0]) { // charge-dipole - register double qi = *(q)*c[0]; - im = - *(ke)*(zc.re*cek_coul->re - - cek_coul->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - im += *(ke)*(zc.re*cek->re - cek->im*zc.im)*qi; - sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; - - im2 = *(ke)*(zc.re*cek_coul->im + cek_coul->re*zc.im); - im2 += -*(ke)*(zc.re*cek->im - cek->im*zc.re); - t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque - t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; - t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; - } - ++cek; - ke++; - } - } - if (func[0]) { // 1/r - register double qi = *(q++)*c[0]; - f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi; - } - if (func[1]) { // geometric 1/r^6 - register double bi = B[*type]*c[1]; - f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi; - } - if (func[2]) { // arithmetic 1/r^6 - register double *bi = B+7*type[0]+7; - for (i=2; i<9; ++i) { - register double c2 = (--bi)[0]*c[2]; - f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2; - } - } - if (func[3]) { // dipole - f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2]; - } - z = (cvector *) ((char *) z+lbytes); - ++type; - t += 3; - } -} - - -void EwaldDisp::compute_surface() -{ - // assume conducting metal (tinfoil) boundary conditions, so this function is - // not called because dielectric --> infinity, which makes all the terms here zero. - - if (!function[3]) return; - if (!atom->mu) return; - - vector sum_local = VECTOR_NULL, sum_total; - memset(sum_local, 0, sizeof(vector)); - double *i, *n, *mu = atom->mu[0]; - - for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) { - sum_local[0] += (i++)[0]; - sum_local[1] += (i++)[0]; - sum_local[2] += (i++)[0]; - } - MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world); - - virial_self[3] = - mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume); - energy_self[3] -= virial_self[3]; - - if (!(vflag_atom || eflag_atom)) return; - - double *ei = energy_self_peratom[0]+3; - double *vi = virial_self_peratom[0]+3; - double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume; - - for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) { - *vi = cv*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]); - *ei -= *vi; - } -} - - -void EwaldDisp::compute_energy() -{ - energy = 0.0; - if (!eflag_global) return; - - complex *cek = cek_global; - complex *cek_coul; - double *ke = kenergy; - const double qscale = force->qqrd2e * scale; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - double sum[EWALD_NFUNCS]; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(sum, 0, EWALD_NFUNCS*sizeof(double)); // reset sums - for (int k=0; kre*cek->re+cek->im*cek->im); - if (func[3]) cek_coul = cek; - ++cek; - } - if (func[1]) { // geometric 1/r^6 - sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; } - if (func[2]) { // arithmetic 1/r^6 - register double r = - (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ - (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ - (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ - 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; - sum[2] += *(ke++)*r; - } - if (func[3]) { // dipole - sum[3] += *(ke)*(cek->re*cek->re+cek->im*cek->im); - if (func[0]) { // charge-dipole - sum[3] += *(ke)*2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); - } - ke++; - ++cek; - } - } - for (int k=0; kq; - double *eatomj = eatom; - double *mu = atom->mu ? atom->mu[0] : NULL; - const double qscale = force->qqrd2e * scale; - double *ke = kenergy; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - for (int j = 0; j < atom->nlocal; j++, ++eatomj) { - k = kvec; - kx = ky = -1; - ke = kenergy; - cek = cek_global; - memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double)); - if (func[3]) { - register double di = c[3]; - mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; - mu++; - } - for (nh = (h = hvec)+nkvec; hy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - if (func[0]) { // 1/r - sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); - if (func[3]) cek_coul = cek; - ++cek; - } - if (func[1]) { // geometric 1/r^6 - sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; } - if (func[2]) { // arithmetic 1/r^6 - register double im, c = *(ke++); - for (i=2; i<9; ++i) { - im = c*(cek->re*zc.re - cek->im*zc.im); ++cek; - sum[i] += im; - } - } - if (func[3]) { // dipole - double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - sum[9] += *(ke)*(cek->re*zc.re - cek->im*zc.im)*muk; - if (func[0]) { // charge-dipole - register double qj = *(q)*c[0]; - sum[9] += *(ke)*(cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; - sum[9] -= *(ke)*(cek->re*zc.im + cek->im*zc.re)*qj; - } - ++cek; - ke++; - } - } - - if (func[0]) { // 1/r - register double qj = *(q++)*c[0]; - *eatomj += sum[0]*qj - energy_self_peratom[j][0]; - } - if (func[1]) { // geometric 1/r^6 - register double bj = B[*type]*c[1]; - *eatomj += sum[1]*bj - energy_self_peratom[j][1]; - } - if (func[2]) { // arithmetic 1/r^6 - register double *bj = B+7*type[0]+7; - for (i=2; i<9; ++i) { - register double c2 = (--bj)[0]*c[2]; - *eatomj += 0.5*sum[i]*c2; - } - *eatomj -= energy_self_peratom[j][2]; - } - if (func[3]) { // dipole - *eatomj += sum[9] - energy_self_peratom[j][3]; - } - z = (cvector *) ((char *) z+lbytes); - ++type; - } -} - - -#define swap(a, b) { register double t = a; a= b; b = t; } - -void EwaldDisp::compute_virial() -{ - memset(virial, 0, sizeof(shape)); - if (!vflag_global) return; - - complex *cek = cek_global; - complex *cek_coul; - double *kv = kvirial; - const double qscale = force->qqrd2e * scale; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - shape sum[EWALD_NFUNCS]; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(sum, 0, EWALD_NFUNCS*sizeof(shape)); - for (int k=0; kre*cek->re+cek->im*cek->im; - if (func[3]) cek_coul = cek; - ++cek; - sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r; - sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r; - } - if (func[1]) { // geometric 1/r^6 - register double r = cek->re*cek->re+cek->im*cek->im; ++cek; - sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r; - sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r; - } - if (func[2]) { // arithmetic 1/r^6 - register double r = - (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ - (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ - (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ - 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; - sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r; - sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r; - } - if (func[3]) { - register double r = cek->re*cek->re+cek->im*cek->im; - sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; - sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; - if (func[0]) { // charge-dipole - kv -= 6; - register double r = 2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); - sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; - sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; - } - ++cek; - } - } - for (int k=0; kmu ? atom->mu[0] : NULL; - double *vatomj = NULL; - if (vflag_atom && vatom) vatomj = vatom[0]; - const double qscale = force->qqrd2e * scale; - double *ke, c[EWALD_NFUNCS] = { - 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; - double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(&sum[0], 0, 6*sizeof(double)); - memset(&sum_total[0], 0, 6*sizeof(double)); - for (int j = 0; j < atom->nlocal; j++) { - k = kvec; - kx = ky = -1; - ke = kenergy; - cek = cek_global; - memset(&sum[0], 0, 6*sizeof(double)); - if (func[3]) { - register double di = c[3]; - mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; - mu++; - } - for (nh = (h = hvec)+nkvec; hy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - double im = 0.0; - if (func[0]) { // 1/r - ke++; - if (func[3]) cek_coul = cek; - ++cek; - } - if (func[1]) { // geometric 1/r^6 - ke++; - ++cek; - } - if (func[2]) { // arithmetic 1/r^6 - ke++; - for (i=2; i<9; ++i) { - ++cek; - } - } - if (func[3]) { // dipole - im = *(ke)*(zc.re*cek->re - cek->im*zc.im); - if (func[0]) { // charge-dipole - im += *(ke)*(zc.im*cek_coul->re + cek_coul->im*zc.re); - } - sum[0] -= mui[0]*h->x*im; - sum[1] -= mui[1]*h->y*im; - sum[2] -= mui[2]*h->z*im; - sum[3] -= mui[0]*h->y*im; - sum[4] -= mui[0]*h->z*im; - sum[5] -= mui[1]*h->z*im; - ++cek; - ke++; - } - } - - if (vflag_global) - for (int n = 0; n < 6; n++) - sum_total[n] -= sum[n]; - - if (vflag_atom) - for (int n = 0; n < 6; n++) - vatomj[n] -= sum[n]; - - z = (cvector *) ((char *) z+lbytes); - ++type; - if (vflag_atom) vatomj += 6; - } - - if (vflag_global) { - MPI_Allreduce(&sum_total[0],&sum[0],6,MPI_DOUBLE,MPI_SUM,world); - for (int n = 0; n < 6; n++) - virial[n] += sum[n]; - } - -} - -void EwaldDisp::compute_virial_peratom() -{ - if (!vflag_atom) return; - - kvector *k; - hvector *h, *nh; - cvector *z = ekr_local; - vector mui = VECTOR_NULL; - complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; - complex *cek_coul; - double *kv; - double *q = atom->q; - double *vatomj = vatom ? vatom[0] : NULL; - double *mu = atom->mu ? atom->mu[0] : NULL; - const double qscale = force->qqrd2e * scale; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - shape sum[EWALD_MAX_NSUMS]; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - for (int j = 0; j < atom->nlocal; j++) { - k = kvec; - kx = ky = -1; - kv = kvirial; - cek = cek_global; - memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape)); - if (func[3]) { - register double di = c[3]; - mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; - mu++; - } - for (nh = (h = hvec)+nkvec; hy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - if (func[0]) { // 1/r - if (func[3]) cek_coul = cek; - register double r = cek->re*zc.re - cek->im*zc.im; ++cek; - sum[0][0] += *(kv++)*r; - sum[0][1] += *(kv++)*r; - sum[0][2] += *(kv++)*r; - sum[0][3] += *(kv++)*r; - sum[0][4] += *(kv++)*r; - sum[0][5] += *(kv++)*r; - } - if (func[1]) { // geometric 1/r^6 - register double r = cek->re*zc.re - cek->im*zc.im; ++cek; - sum[1][0] += *(kv++)*r; - sum[1][1] += *(kv++)*r; - sum[1][2] += *(kv++)*r; - sum[1][3] += *(kv++)*r; - sum[1][4] += *(kv++)*r; - sum[1][5] += *(kv++)*r; - } - if (func[2]) { // arithmetic 1/r^6 - register double r; - for (i=2; i<9; ++i) { - r = cek->re*zc.re - cek->im*zc.im; ++cek; - sum[i][0] += *(kv++)*r; - sum[i][1] += *(kv++)*r; - sum[i][2] += *(kv++)*r; - sum[i][3] += *(kv++)*r; - sum[i][4] += *(kv++)*r; - sum[i][5] += *(kv++)*r; - kv -= 6; - } - kv += 6; - } - if (func[3]) { // dipole - double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - register double - r = (cek->re*zc.re - cek->im*zc.im)*muk; - sum[9][0] += *(kv++)*r; - sum[9][1] += *(kv++)*r; - sum[9][2] += *(kv++)*r; - sum[9][3] += *(kv++)*r; - sum[9][4] += *(kv++)*r; - sum[9][5] += *(kv++)*r; - if (func[0]) { // charge-dipole - kv -= 6; - register double qj = *(q)*c[0]; - r = (cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; - r += -(cek->re*zc.im + cek->im*zc.re)*qj; - sum[9][0] += *(kv++)*r; sum[9][1] += *(kv++)*r; sum[9][2] += *(kv++)*r; - sum[9][3] += *(kv++)*r; sum[9][4] += *(kv++)*r; sum[9][5] += *(kv++)*r; - } - ++cek; - } - } - - if (func[0]) { // 1/r - register double qi = *(q++)*c[0]; - for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi; - } - if (func[1]) { // geometric 1/r^6 - register double bi = B[*type]*c[1]; - for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi; - } - if (func[2]) { // arithmetic 1/r^6 - register double *bj = B+7*type[0]+7; - for (i=2; i<9; ++i) { - register double c2 = (--bj)[0]*c[2]; - for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2; - } - } - if (func[3]) { // dipole - for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n]; - } - - for (int k=0; kq; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double qsum = 0.0; - if (function[0]) qsum = sum[0].x; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - if (function[3] && atom->mu) { - double **mu = atom->mu; - for (int i = 0; i < nlocal; i++) dipole += mu[i][2]; - } - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - - if (function[3] && atom->mu) - error->all(FLERR,"Cannot (yet) use kspace slab correction with " - "long-range dipoles and non-neutral systems or per-atom energy"); - - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); - - // add on torque corrections - - if (function[3] && atom->mu && atom->torque) { - double **mu = atom->mu; - double **torque = atom->torque; - for (int i = 0; i < nlocal; i++) { - torque[i][0] += ffact * dipole_all * mu[i][1]; - torque[i][1] += -ffact * dipole_all * mu[i][0]; - } - } -} - -/* ---------------------------------------------------------------------- - Newton solver used to find g_ewald for LJ systems - ------------------------------------------------------------------------- */ - -double EwaldDisp::NewtonSolve(double x, double Rc, - bigint natoms, double vol, double b2) -{ - double dx,tol; - int maxit; - - maxit = 10000; //Maximum number of iterations - tol = 0.00001; //Convergence tolerance - - //Begin algorithm - - for (int i = 0; i < maxit; i++) { - dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2); - x = x - dx; //Update x - if (fabs(dx) < tol) return x; - if (x < 0 || x != x) // solver failed - return -1; - } - return -1; -} - -/* ---------------------------------------------------------------------- - Calculate f(x) - ------------------------------------------------------------------------- */ - -double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2) -{ - double a = Rc*x; - double f = 0.0; - - if (function[1] || function[2]) { // LJ - f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) * - (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy); - } else { // dipole - double rg2 = a*a; - double rg4 = rg2*rg2; - double rg6 = rg4*rg2; - double Cc = 4.0*rg4 + 6.0*rg2 + 3.0; - double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0; - f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) * - sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) * - exp(-rg2)) - accuracy; - } - - return f; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) - ------------------------------------------------------------------------- */ - -double EwaldDisp::derivf(double x, double Rc, - bigint natoms, double vol, double b2) -{ - double h = 0.000001; //Derivative step-size - return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Pieter in 't Veld (SNL), Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "ewald_disp.h" +#include "math_vector.h" +#include "math_const.h" +#include "math_special.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "pair.h" +#include "domain.h" +#include "memory.h" +#include "error.h" +#include "update.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define SMALL 0.00001 + +enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; // same as in pair.h + +//#define DEBUG + +/* ---------------------------------------------------------------------- */ + +EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command"); + + ewaldflag = dispersionflag = dipoleflag = 1; + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + memset(function, 0, EWALD_NORDER*sizeof(int)); + kenergy = kvirial = NULL; + cek_local = cek_global = NULL; + ekr_local = NULL; + hvec = NULL; + kvec = NULL; + B = NULL; + first_output = 0; + energy_self_peratom = NULL; + virial_self_peratom = NULL; + nmax = 0; + q2 = 0; + b2 = 0; + M2 = 0; +} + +/* ---------------------------------------------------------------------- */ + +EwaldDisp::~EwaldDisp() +{ + deallocate(); + deallocate_peratom(); + delete [] ekr_local; + delete [] B; +} + +/* --------------------------------------------------------------------- */ + +void EwaldDisp::init() +{ + nkvec = nkvec_max = nevec = nevec_max = 0; + nfunctions = nsums = sums = 0; + nbox = -1; + bytes = 0.0; + + if (!comm->me) { + if (screen) fprintf(screen,"EwaldDisp initialization ...\n"); + if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n"); + } + + triclinic_check(); + if (domain->dimension == 2) + error->all(FLERR,"Cannot use EwaldDisp with 2d simulation"); + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp"); + if (slabflag == 1) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab EwaldDisp"); + } + + scale = 1.0; + mumurd2e = force->qqrd2e; + dielectric = force->dielectric; + + int tmp; + Pair *pair = force->pair; + int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; + double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; + if (!(ptr||cutoff)) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + int ewald_order = ptr ? *((int *) ptr) : 1<<1; + int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; + memset(function, 0, EWALD_NFUNCS*sizeof(int)); + for (int i=0; i<=EWALD_NORDER; ++i) // transcribe order + if (ewald_order&(1<all(FLERR, + "Unsupported mixing rule in kspace_style ewald/disp"); + default: + error->all(FLERR,"Unsupported order in kspace_style ewald/disp"); + } + nfunctions += function[k] = 1; + nsums += n[k]; + } + + if (!gewaldflag) g_ewald = 0.0; + pair->init(); // so B is defined + init_coeffs(); + init_coeff_sums(); + + double qsum, qsqsum, bsbsum; + qsum = qsqsum = bsbsum = 0.0; + if (function[0]) { + qsum = sum[0].x; + qsqsum = sum[0].x2; + } + + // turn off coulombic if no charge + + if (function[0] && qsqsum == 0.0) { + function[0] = 0; + nfunctions -= 1; + nsums -= 1; + } + + if (function[1]) bsbsum = sum[1].x2; + if (function[2]) bsbsum = sum[2].x2; + + if (function[3]) M2 = sum[9].x2; + + if (function[3] && strcmp(update->unit_style,"electron") == 0) + error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); + + if (qsqsum == 0.0 && bsbsum == 0.0 && M2 == 0.0) + error->all(FLERR,"Cannot use Ewald/disp solver " + "on system with no charge, dipole, or LJ particles"); + if (fabs(qsum) > SMALL && comm->me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + if (!function[1] && !function[2]) + dispersionflag = 0; + + if (!function[3]) + dipoleflag = 0; + + pair_check(); + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup K-space resolution + + q2 = qsqsum * force->qqrd2e; + M2 *= mumurd2e; + b2 = bsbsum; //Are these units right? + bigint natoms = atom->natoms; + + if (!gewaldflag) { + if (function[0]) { + g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff); + else g_ewald = sqrt(-log(g_ewald)) / (*cutoff); + } + else if (function[1] || function[2]) { + //Try Newton Solver + //Use old method to get guess + g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; + + double g_ewald_new = + NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2); + if (g_ewald_new > 0.0) g_ewald = g_ewald_new; + else error->warning(FLERR,"Ewald/disp Newton solver failed, " + "using old method to estimate g_ewald"); + } else if (function[3]) { + //Try Newton Solver + //Use old method to get guess + g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; + double g_ewald_new = + NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),M2); + if (g_ewald_new > 0.0) g_ewald = g_ewald_new; + else error->warning(FLERR,"Ewald/disp Newton solver failed, " + "using old method to estimate g_ewald"); + } + } + + if (!comm->me) { + if (screen) fprintf(screen, " G vector = %g\n", g_ewald); + if (logfile) fprintf(logfile, " G vector = %g\n", g_ewald); + } + + g_ewald_6 = g_ewald; + deallocate_peratom(); + peratom_allocate_flag = 0; +} + +/* ---------------------------------------------------------------------- + adjust EwaldDisp coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void EwaldDisp::setup() +{ + volume = shape_det(domain->h)*slab_volfactor; + memcpy(unit, domain->h_inv, sizeof(shape)); + shape_scalar_mult(unit, 2.0*MY_PI); + unit[2] /= slab_volfactor; + + // int nbox_old = nbox, nkvec_old = nkvec; + + if (accuracy >= 1) { + nbox = 0; + error->all(FLERR,"KSpace accuracy too low"); + } + + bigint natoms = atom->natoms; + double err; + int kxmax = 1; + int kymax = 1; + int kzmax = 1; + err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); + while (err > accuracy) { + kxmax++; + err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); + } + err = rms(kymax,domain->h[1],natoms,q2,b2,M2); + while (err > accuracy) { + kymax++; + err = rms(kymax,domain->h[1],natoms,q2,b2,M2); + } + err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); + while (err > accuracy) { + kzmax++; + err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); + } + nbox = MAX(kxmax,kymax); + nbox = MAX(nbox,kzmax); + double gsqxmx = unit[0]*unit[0]*kxmax*kxmax; + double gsqymx = unit[1]*unit[1]*kymax*kymax; + double gsqzmx = unit[2]*unit[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + gsqmx *= 1.00001; + + reallocate(); + coefficients(); + init_coeffs(); + init_coeff_sums(); + init_self(); + + if (!(first_output||comm->me)) { + first_output = 1; + if (screen) fprintf(screen, + " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); + if (logfile) fprintf(logfile, + " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); + } +} + +/* ---------------------------------------------------------------------- + compute RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2, double M2) +{ + double value = 0.0; + + // Coulombic + + double g2 = g_ewald*g_ewald; + + value += 2.0*q2*g_ewald/prd * + sqrt(1.0/(MY_PI*km*natoms)) * + exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)); + + // Lennard-Jones + + double g7 = g2*g2*g2*g_ewald; + + value += 4.0*b2*g7/3.0 * + sqrt(1.0/(MY_PI*natoms)) * + (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) * + (MY_PI*km/(g_ewald*prd) + 1)); + + // dipole + + value += 8.0*MY_PI*M2/volume*g_ewald * + sqrt(2.0*MY_PI*km*km*km/(15.0*natoms)) * + exp(-pow(MY_PI*km/(g_ewald*prd),2.0)); + + return value; +} + +void EwaldDisp::reallocate() +{ + int ix, iy, iz; + int nkvec_max = nkvec; + vector h; + + nkvec = 0; + int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)]; + int *flag = kflag; + + for (ix=0; ix<=nbox; ++ix) + for (iy=-nbox; iy<=nbox; ++iy) + for (iz=-nbox; iz<=nbox; ++iz) + if (!(ix||iy||iz)) *(flag++) = 0; + else if ((!ix)&&(iy<0)) *(flag++) = 0; + else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0; // use symmetry + else { + h[0] = unit[0]*ix; + h[1] = unit[5]*ix+unit[1]*iy; + h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz; + if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec; + } + + if (nkvec>nkvec_max) { + deallocate(); // free memory + hvec = new hvector[nkvec]; // hvec + bytes += (nkvec-nkvec_max)*sizeof(hvector); + kvec = new kvector[nkvec]; // kvec + bytes += (nkvec-nkvec_max)*sizeof(kvector); + kenergy = new double[nkvec*nfunctions]; // kenergy + bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double); + kvirial = new double[6*nkvec*nfunctions]; // kvirial + bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double); + cek_local = new complex[nkvec*nsums]; // cek_local + bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); + cek_global = new complex[nkvec*nsums]; // cek_global + bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); + nkvec_max = nkvec; + } + + flag = kflag; // create index and + kvector *k = kvec; // wave vectors + hvector *hi = hvec; + for (ix=0; ix<=nbox; ++ix) + for (iy=-nbox; iy<=nbox; ++iy) + for (iz=-nbox; iz<=nbox; ++iz) + if (*(flag++)) { + hi->x = unit[0]*ix; + hi->y = unit[5]*ix+unit[1]*iy; + (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz; + k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; } + + delete [] kflag; +} + + +void EwaldDisp::reallocate_atoms() +{ + if (eflag_atom || vflag_atom) + if (atom->nlocal > nmax) { + deallocate_peratom(); + allocate_peratom(); + nmax = atom->nmax; + } + + if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return; + delete [] ekr_local; + ekr_local = new cvector[nevec]; + bytes += (nevec-nevec_max)*sizeof(cvector); + nevec_max = nevec; +} + + +void EwaldDisp::allocate_peratom() +{ + memory->create(energy_self_peratom, + atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom"); + memory->create(virial_self_peratom, + atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom"); +} + + +void EwaldDisp::deallocate_peratom() // free memory +{ + memory->destroy(energy_self_peratom); + memory->destroy(virial_self_peratom); +} + + +void EwaldDisp::deallocate() // free memory +{ + delete [] hvec; hvec = NULL; + delete [] kvec; kvec = NULL; + delete [] kenergy; kenergy = NULL; + delete [] kvirial; kvirial = NULL; + delete [] cek_local; cek_local = NULL; + delete [] cek_global; cek_global = NULL; +} + + +void EwaldDisp::coefficients() +{ + vector h; + hvector *hi = hvec, *nh; + double eta2 = 0.25/(g_ewald*g_ewald); + double b1, b2, expb2, h1, h2, c1, c2; + double *ke = kenergy, *kv = kvirial; + int func0 = function[0], func12 = function[1]||function[2], + func3 = function[3]; + + for (nh = (hi = hvec)+nkvec; hintypes; + + if (function[1]) { // geometric 1/r^6 + double **b = (double **) force->pair->extract("B",tmp); + delete [] B; + B = new double[n+1]; + bytes += (n+1)*sizeof(double); + for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); + } + if (function[2]) { // arithmetic 1/r^6 + double **epsilon = (double **) force->pair->extract("epsilon",tmp); + double **sigma = (double **) force->pair->extract("sigma",tmp); + double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; + double c[7] = { + 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; + + if (!(epsilon&&sigma)) + error->all( + FLERR,"Epsilon or sigma reference not set by pair style in ewald/n"); + for (int i=0; i<=n; ++i) { + eps_i = sqrt(epsilon[i][i]); + sigma_i = sigma[i][i]; + sigma_n = 1.0; + for (int j=0; j<7; ++j) { + *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i; + } + } + } +} + +void EwaldDisp::init_coeff_sums() +{ + if (sums) return; // calculated only once + sums = 1; + + Sum sum_local[EWALD_MAX_NSUMS]; + + memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum)); + if (function[0]) { // 1/r + double *q = atom->q, *qn = q+atom->nlocal; + for (double *i=q; itype, *ntype = type+atom->nlocal; + for (int *i=type; itype, *ntype = type+atom->nlocal; + for (int *i=type; imu) { // dipole + double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal; + for (double *i = mu; i < nmu; i += 4) + sum_local[9].x2 += i[3]*i[3]; + } + MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world); +} + + +void EwaldDisp::init_self() +{ + double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; + const double qscale = force->qqrd2e * scale; + + memset(energy_self, 0, EWALD_NFUNCS*sizeof(double)); // self energy + memset(virial_self, 0, EWALD_NFUNCS*sizeof(double)); + + if (function[0]) { // 1/r + virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x; + energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0]; + } + if (function[1]) { // geometric 1/r^6 + virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x; + energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1]; + } + if (function[2]) { // arithmetic 1/r^6 + virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+ + sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x); + energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2]; + } + if (function[3]) { // dipole + virial_self[3] = 0; // in surface + energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3]; + } +} + + +void EwaldDisp::init_self_peratom() +{ + if (!(vflag_atom || eflag_atom)) return; + + double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; + const double qscale = force->qqrd2e * scale; + double *energy = energy_self_peratom[0]; + double *virial = virial_self_peratom[0]; + int nlocal = atom->nlocal; + + memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double)); + memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double)); + + if (function[0]) { // 1/r + double *ei = energy; + double *vi = virial; + double ce = qscale*g1/MY_PIS; + double cv = -0.5*MY_PI*qscale/(g2*volume); + double *qi = atom->q, *qn = qi + nlocal; + for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + double q = *qi; + *vi = cv*q*sum[0].x; + *ei = ce*q*q-vi[0]; + } + } + if (function[1]) { // geometric 1/r^6 + double *ei = energy+1; + double *vi = virial+1; + double ce = -g3*g3/12.0; + double cv = MY_PI*MY_PIS*g3/(6.0*volume); + int *typei = atom->type, *typen = typei + atom->nlocal; + for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + double b = B[*typei]; + *vi = cv*b*sum[1].x; + *ei = ce*b*b+vi[0]; + } + } + if (function[2]) { // arithmetic 1/r^6 + double *bi; + double *ei = energy+2; + double *vi = virial+2; + double ce = -g3*g3/3.0; + double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume); + int *typei = atom->type, *typen = typei + atom->nlocal; + for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + bi = B+7*typei[0]+7; + for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0]; + + /* PJV 20120225: + should this be this instead? above implies an inverse dependence + seems to be the above way in original; i recall having tested + arithmetic mixing in the conception phase, but an extra test would + be prudent (pattern repeats in multiple functions below) + + bi = B+7*typei[0]; + for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0]; + + */ + + *ei = ce*bi[0]*bi[6]+vi[0]; + } + } + if (function[3]&&atom->mu) { // dipole + double *ei = energy+3; + double *vi = virial+3; + double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal; + double ce = mumurd2e*2.0*g3/3.0/MY_PIS; + for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + *vi = 0; // in surface + *ei = ce*imu[3]*imu[3]-vi[0]; + } + } +} + + +/* ---------------------------------------------------------------------- + compute the EwaldDisp long-range force, energy, virial +------------------------------------------------------------------------- */ + +void EwaldDisp::compute(int eflag, int vflag) +{ + if (!nbox) return; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; + + if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { + allocate_peratom(); + peratom_allocate_flag = 1; + nmax = atom->nmax; + } + + reallocate_atoms(); + init_self_peratom(); + compute_ek(); + compute_force(); + //compute_surface(); // assume conducting metal (tinfoil) boundary conditions + compute_energy(); + compute_energy_peratom(); + compute_virial(); + compute_virial_dipole(); + compute_virial_peratom(); +} + + +void EwaldDisp::compute_ek() +{ + cvector *ekr = ekr_local; + int lbytes = (2*nbox+1)*sizeof(cvector); + hvector *h = NULL; + kvector *k, *nk = kvec+nkvec; + cvector *z = new cvector[2*nbox+1]; + cvector z1, *zx, *zy, *zz, *zn = z+2*nbox; + complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL; + vector mui; + double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0; + double bi = 0.0, ci[7]; + double *mu = atom->mu ? atom->mu[0] : NULL; + int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(cek_local, 0, n*sizeof(complex)); // reset sums + while (xx, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0); // z[0] + if (tri) { // triclinic z[1] + C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]); + C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]); + C_ANGLE(z1.z, x[2]*unit[2]); x += 3; + } + else { // orthogonal z[1] + C_ANGLE(z1.x, *(x++)*unit[0]); + C_ANGLE(z1.y, *(x++)*unit[1]); + C_ANGLE(z1.z, *(x++)*unit[2]); + } + for (; zzx, zz->x, z1.x); // 3D k-vector + C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y); + C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z); + } + kx = ky = -1; + cek = cek_local; + if (func[0]) qi = *(q++); + if (func[1]) bi = B[*type]; + if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double)); + if (func[3]) { + memcpy(mui, mu, sizeof(vector)); + mu += 4; + h = hvec; + } + for (k=kvec; ky) { // based on order in + if (kx!=k->x) cx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, cx); + } + C_RMULT(zxyz, z[k->z].z, zxy); + if (func[0]) { + cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi; + } + if (func[1]) { + cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi; + } + if (func[2]) for (i=0; i<7; ++i) { + cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i]; + } + if (func[3]) { + register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h; + cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk; + } + } + ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes); + ++type; + } + MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world); + + delete [] z; +} + + +void EwaldDisp::compute_force() +{ + kvector *k; + hvector *h, *nh; + cvector *z = ekr_local; + vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL; + complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; + complex *cek_coul; + double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL; + double *mu = atom->mu ? atom->mu[0] : NULL; + const double qscale = force->qqrd2e * scale; + double *ke, c[EWALD_NFUNCS] = { + 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; + double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + int func[EWALD_NFUNCS]; + + if (atom->torque) t = atom->torque[0]; + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector)); // fj = -dE/dr = + for (; fy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + if (func[0]) { // 1/r + register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); + if (func[3]) cek_coul = cek; + ++cek; + sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im; + } + if (func[1]) { // geometric 1/r^6 + register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek; + sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im; + } + if (func[2]) { // arithmetic 1/r^6 + register double im, c = *(ke++); + for (i=2; i<9; ++i) { + im = c*(zc.im*cek->re+cek->im*zc.re); ++cek; + sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im; + } + } + if (func[3]) { // dipole + register double im = *(ke)*(zc.im*cek->re+ + cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + register double im2 = *(ke)*(zc.re*cek->re- + cek->im*zc.im); + sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; + t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque + t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; + t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; + if (func[0]) { // charge-dipole + register double qi = *(q)*c[0]; + im = - *(ke)*(zc.re*cek_coul->re - + cek_coul->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + im += *(ke)*(zc.re*cek->re - cek->im*zc.im)*qi; + sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; + + im2 = *(ke)*(zc.re*cek_coul->im + cek_coul->re*zc.im); + im2 += -*(ke)*(zc.re*cek->im - cek->im*zc.re); + t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque + t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; + t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; + } + ++cek; + ke++; + } + } + if (func[0]) { // 1/r + register double qi = *(q++)*c[0]; + f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi; + } + if (func[1]) { // geometric 1/r^6 + register double bi = B[*type]*c[1]; + f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi; + } + if (func[2]) { // arithmetic 1/r^6 + register double *bi = B+7*type[0]+7; + for (i=2; i<9; ++i) { + register double c2 = (--bi)[0]*c[2]; + f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2; + } + } + if (func[3]) { // dipole + f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2]; + } + z = (cvector *) ((char *) z+lbytes); + ++type; + t += 3; + } +} + + +void EwaldDisp::compute_surface() +{ + // assume conducting metal (tinfoil) boundary conditions, so this function is + // not called because dielectric at the boundary --> infinity, which makes all + // the terms here zero. + + if (!function[3]) return; + if (!atom->mu) return; + + vector sum_local = VECTOR_NULL, sum_total; + memset(sum_local, 0, sizeof(vector)); + double *i, *n, *mu = atom->mu[0]; + + for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) { + sum_local[0] += (i++)[0]; + sum_local[1] += (i++)[0]; + sum_local[2] += (i++)[0]; + } + MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world); + + virial_self[3] = + mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume); + energy_self[3] -= virial_self[3]; + + if (!(vflag_atom || eflag_atom)) return; + + double *ei = energy_self_peratom[0]+3; + double *vi = virial_self_peratom[0]+3; + double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume; + + for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) { + *vi = cv*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]); + *ei -= *vi; + } +} + + +void EwaldDisp::compute_energy() +{ + energy = 0.0; + if (!eflag_global) return; + + complex *cek = cek_global; + complex *cek_coul; + double *ke = kenergy; + const double qscale = force->qqrd2e * scale; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + double sum[EWALD_NFUNCS]; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(sum, 0, EWALD_NFUNCS*sizeof(double)); // reset sums + for (int k=0; kre*cek->re+cek->im*cek->im); + if (func[3]) cek_coul = cek; + ++cek; + } + if (func[1]) { // geometric 1/r^6 + sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; } + if (func[2]) { // arithmetic 1/r^6 + register double r = + (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ + (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ + (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ + 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; + sum[2] += *(ke++)*r; + } + if (func[3]) { // dipole + sum[3] += *(ke)*(cek->re*cek->re+cek->im*cek->im); + if (func[0]) { // charge-dipole + sum[3] += *(ke)*2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); + } + ke++; + ++cek; + } + } + for (int k=0; kq; + double *eatomj = eatom; + double *mu = atom->mu ? atom->mu[0] : NULL; + const double qscale = force->qqrd2e * scale; + double *ke = kenergy; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + for (int j = 0; j < atom->nlocal; j++, ++eatomj) { + k = kvec; + kx = ky = -1; + ke = kenergy; + cek = cek_global; + memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double)); + if (func[3]) { + register double di = c[3]; + mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; + mu++; + } + for (nh = (h = hvec)+nkvec; hy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + if (func[0]) { // 1/r + sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); + if (func[3]) cek_coul = cek; + ++cek; + } + if (func[1]) { // geometric 1/r^6 + sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; } + if (func[2]) { // arithmetic 1/r^6 + register double im, c = *(ke++); + for (i=2; i<9; ++i) { + im = c*(cek->re*zc.re - cek->im*zc.im); ++cek; + sum[i] += im; + } + } + if (func[3]) { // dipole + double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + sum[9] += *(ke)*(cek->re*zc.re - cek->im*zc.im)*muk; + if (func[0]) { // charge-dipole + register double qj = *(q)*c[0]; + sum[9] += *(ke)*(cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; + sum[9] -= *(ke)*(cek->re*zc.im + cek->im*zc.re)*qj; + } + ++cek; + ke++; + } + } + + if (func[0]) { // 1/r + register double qj = *(q++)*c[0]; + *eatomj += sum[0]*qj - energy_self_peratom[j][0]; + } + if (func[1]) { // geometric 1/r^6 + register double bj = B[*type]*c[1]; + *eatomj += sum[1]*bj - energy_self_peratom[j][1]; + } + if (func[2]) { // arithmetic 1/r^6 + register double *bj = B+7*type[0]+7; + for (i=2; i<9; ++i) { + register double c2 = (--bj)[0]*c[2]; + *eatomj += 0.5*sum[i]*c2; + } + *eatomj -= energy_self_peratom[j][2]; + } + if (func[3]) { // dipole + *eatomj += sum[9] - energy_self_peratom[j][3]; + } + z = (cvector *) ((char *) z+lbytes); + ++type; + } +} + + +#define swap(a, b) { register double t = a; a= b; b = t; } + +void EwaldDisp::compute_virial() +{ + memset(virial, 0, sizeof(shape)); + if (!vflag_global) return; + + complex *cek = cek_global; + complex *cek_coul; + double *kv = kvirial; + const double qscale = force->qqrd2e * scale; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + shape sum[EWALD_NFUNCS]; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(sum, 0, EWALD_NFUNCS*sizeof(shape)); + for (int k=0; kre*cek->re+cek->im*cek->im; + if (func[3]) cek_coul = cek; + ++cek; + sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r; + sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r; + } + if (func[1]) { // geometric 1/r^6 + register double r = cek->re*cek->re+cek->im*cek->im; ++cek; + sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r; + sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r; + } + if (func[2]) { // arithmetic 1/r^6 + register double r = + (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ + (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ + (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ + 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; + sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r; + sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r; + } + if (func[3]) { + register double r = cek->re*cek->re+cek->im*cek->im; + sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; + sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; + if (func[0]) { // charge-dipole + kv -= 6; + register double r = 2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); + sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; + sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; + } + ++cek; + } + } + for (int k=0; kmu ? atom->mu[0] : NULL; + double *vatomj = NULL; + if (vflag_atom && vatom) vatomj = vatom[0]; + const double qscale = force->qqrd2e * scale; + double *ke, c[EWALD_NFUNCS] = { + 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; + double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(&sum[0], 0, 6*sizeof(double)); + memset(&sum_total[0], 0, 6*sizeof(double)); + for (int j = 0; j < atom->nlocal; j++) { + k = kvec; + kx = ky = -1; + ke = kenergy; + cek = cek_global; + memset(&sum[0], 0, 6*sizeof(double)); + if (func[3]) { + register double di = c[3]; + mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; + mu++; + } + for (nh = (h = hvec)+nkvec; hy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + double im = 0.0; + if (func[0]) { // 1/r + ke++; + if (func[3]) cek_coul = cek; + ++cek; + } + if (func[1]) { // geometric 1/r^6 + ke++; + ++cek; + } + if (func[2]) { // arithmetic 1/r^6 + ke++; + for (i=2; i<9; ++i) { + ++cek; + } + } + if (func[3]) { // dipole + im = *(ke)*(zc.re*cek->re - cek->im*zc.im); + if (func[0]) { // charge-dipole + im += *(ke)*(zc.im*cek_coul->re + cek_coul->im*zc.re); + } + sum[0] -= mui[0]*h->x*im; + sum[1] -= mui[1]*h->y*im; + sum[2] -= mui[2]*h->z*im; + sum[3] -= mui[0]*h->y*im; + sum[4] -= mui[0]*h->z*im; + sum[5] -= mui[1]*h->z*im; + ++cek; + ke++; + } + } + + if (vflag_global) + for (int n = 0; n < 6; n++) + sum_total[n] -= sum[n]; + + if (vflag_atom) + for (int n = 0; n < 6; n++) + vatomj[n] -= sum[n]; + + z = (cvector *) ((char *) z+lbytes); + ++type; + if (vflag_atom) vatomj += 6; + } + + if (vflag_global) { + MPI_Allreduce(&sum_total[0],&sum[0],6,MPI_DOUBLE,MPI_SUM,world); + for (int n = 0; n < 6; n++) + virial[n] += sum[n]; + } + +} + +void EwaldDisp::compute_virial_peratom() +{ + if (!vflag_atom) return; + + kvector *k; + hvector *h, *nh; + cvector *z = ekr_local; + vector mui = VECTOR_NULL; + complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; + complex *cek_coul; + double *kv; + double *q = atom->q; + double *vatomj = vatom ? vatom[0] : NULL; + double *mu = atom->mu ? atom->mu[0] : NULL; + const double qscale = force->qqrd2e * scale; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + shape sum[EWALD_MAX_NSUMS]; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + for (int j = 0; j < atom->nlocal; j++) { + k = kvec; + kx = ky = -1; + kv = kvirial; + cek = cek_global; + memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape)); + if (func[3]) { + register double di = c[3]; + mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; + mu++; + } + for (nh = (h = hvec)+nkvec; hy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + if (func[0]) { // 1/r + if (func[3]) cek_coul = cek; + register double r = cek->re*zc.re - cek->im*zc.im; ++cek; + sum[0][0] += *(kv++)*r; + sum[0][1] += *(kv++)*r; + sum[0][2] += *(kv++)*r; + sum[0][3] += *(kv++)*r; + sum[0][4] += *(kv++)*r; + sum[0][5] += *(kv++)*r; + } + if (func[1]) { // geometric 1/r^6 + register double r = cek->re*zc.re - cek->im*zc.im; ++cek; + sum[1][0] += *(kv++)*r; + sum[1][1] += *(kv++)*r; + sum[1][2] += *(kv++)*r; + sum[1][3] += *(kv++)*r; + sum[1][4] += *(kv++)*r; + sum[1][5] += *(kv++)*r; + } + if (func[2]) { // arithmetic 1/r^6 + register double r; + for (i=2; i<9; ++i) { + r = cek->re*zc.re - cek->im*zc.im; ++cek; + sum[i][0] += *(kv++)*r; + sum[i][1] += *(kv++)*r; + sum[i][2] += *(kv++)*r; + sum[i][3] += *(kv++)*r; + sum[i][4] += *(kv++)*r; + sum[i][5] += *(kv++)*r; + kv -= 6; + } + kv += 6; + } + if (func[3]) { // dipole + double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + register double + r = (cek->re*zc.re - cek->im*zc.im)*muk; + sum[9][0] += *(kv++)*r; + sum[9][1] += *(kv++)*r; + sum[9][2] += *(kv++)*r; + sum[9][3] += *(kv++)*r; + sum[9][4] += *(kv++)*r; + sum[9][5] += *(kv++)*r; + if (func[0]) { // charge-dipole + kv -= 6; + register double qj = *(q)*c[0]; + r = (cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; + r += -(cek->re*zc.im + cek->im*zc.re)*qj; + sum[9][0] += *(kv++)*r; sum[9][1] += *(kv++)*r; sum[9][2] += *(kv++)*r; + sum[9][3] += *(kv++)*r; sum[9][4] += *(kv++)*r; sum[9][5] += *(kv++)*r; + } + ++cek; + } + } + + if (func[0]) { // 1/r + register double qi = *(q++)*c[0]; + for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi; + } + if (func[1]) { // geometric 1/r^6 + register double bi = B[*type]*c[1]; + for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi; + } + if (func[2]) { // arithmetic 1/r^6 + register double *bj = B+7*type[0]+7; + for (i=2; i<9; ++i) { + register double c2 = (--bj)[0]*c[2]; + for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2; + } + } + if (func[3]) { // dipole + for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n]; + } + + for (int k=0; kq; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double qsum = 0.0; + if (function[0]) qsum = sum[0].x; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + if (function[3] && atom->mu) { + double **mu = atom->mu; + for (int i = 0; i < nlocal; i++) dipole += mu[i][2]; + } + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + + if (function[3] && atom->mu) + error->all(FLERR,"Cannot (yet) use kspace slab correction with " + "long-range dipoles and non-neutral systems or per-atom energy"); + + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); + + // add on torque corrections + + if (function[3] && atom->mu && atom->torque) { + double **mu = atom->mu; + double **torque = atom->torque; + for (int i = 0; i < nlocal; i++) { + torque[i][0] += ffact * dipole_all * mu[i][1]; + torque[i][1] += -ffact * dipole_all * mu[i][0]; + } + } +} + +/* ---------------------------------------------------------------------- + Newton solver used to find g_ewald for LJ systems + ------------------------------------------------------------------------- */ + +double EwaldDisp::NewtonSolve(double x, double Rc, + bigint natoms, double vol, double b2) +{ + double dx,tol; + int maxit; + + maxit = 10000; //Maximum number of iterations + tol = 0.00001; //Convergence tolerance + + //Begin algorithm + + for (int i = 0; i < maxit; i++) { + dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2); + x = x - dx; //Update x + if (fabs(dx) < tol) return x; + if (x < 0 || x != x) // solver failed + return -1; + } + return -1; +} + +/* ---------------------------------------------------------------------- + Calculate f(x) + ------------------------------------------------------------------------- */ + +double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2) +{ + double a = Rc*x; + double f = 0.0; + + if (function[1] || function[2]) { // LJ + f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) * + (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy); + } else { // dipole + double rg2 = a*a; + double rg4 = rg2*rg2; + double rg6 = rg4*rg2; + double Cc = 4.0*rg4 + 6.0*rg2 + 3.0; + double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0; + f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) * + sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) * + exp(-rg2)) - accuracy; + } + + return f; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) + ------------------------------------------------------------------------- */ + +double EwaldDisp::derivf(double x, double Rc, + bigint natoms, double vol, double b2) +{ + double h = 0.000001; //Derivative step-size + return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h; +} diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp index c7dd91e083..a99d5bb1fb 100644 --- a/src/KSPACE/msm.cpp +++ b/src/KSPACE/msm.cpp @@ -191,7 +191,7 @@ void MSM::init() qsum = tmp; MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); qsqsum = tmp; - q2 = qsqsum * force->qqrd2e / force->dielectric; + q2 = qsqsum * force->qqrd2e; if (qsqsum == 0.0) error->all(FLERR,"Cannot use kspace solver on system with no charge"); diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index 9b94ecdae1..9c5db42ad8 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -1,3501 +1,3501 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) - per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) - analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University) - triclinic added by Stan Moore (SNL) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "pppm.h" -#include "atom.h" -#include "comm.h" -#include "commgrid.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" - -#include "math_const.h" -#include "math_special.h" - -using namespace LAMMPS_NS; -using namespace MathConst; -using namespace MathSpecial; - -#define MAXORDER 7 -#define OFFSET 16384 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -enum{REVERSE_RHO}; -enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM}; - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); - - pppmflag = 1; - group_group_enable = 1; - - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - u_brick = NULL; - v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; - greensfn = NULL; - work1 = work2 = NULL; - vg = NULL; - fkx = fky = fkz = NULL; - - sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = - sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; - - density_A_brick = density_B_brick = NULL; - density_A_fft = density_B_fft = NULL; - - gf_b = NULL; - rho1d = rho_coeff = drho1d = drho_coeff = NULL; - - fft1 = fft2 = NULL; - remap = NULL; - cg = NULL; - cg_peratom = NULL; - - nmax = 0; - part2grid = NULL; - - peratom_allocate_flag = 0; - group_allocate_flag = 0; - - // define acons coefficients for estimation of kspace errors - // see JCP 109, pg 7698 for derivation of coefficients - // higher order coefficients may be computed if needed - - memory->create(acons,8,7,"pppm:acons"); - acons[1][0] = 2.0 / 3.0; - acons[2][0] = 1.0 / 50.0; - acons[2][1] = 5.0 / 294.0; - acons[3][0] = 1.0 / 588.0; - acons[3][1] = 7.0 / 1440.0; - acons[3][2] = 21.0 / 3872.0; - acons[4][0] = 1.0 / 4320.0; - acons[4][1] = 3.0 / 1936.0; - acons[4][2] = 7601.0 / 2271360.0; - acons[4][3] = 143.0 / 28800.0; - acons[5][0] = 1.0 / 23232.0; - acons[5][1] = 7601.0 / 13628160.0; - acons[5][2] = 143.0 / 69120.0; - acons[5][3] = 517231.0 / 106536960.0; - acons[5][4] = 106640677.0 / 11737571328.0; - acons[6][0] = 691.0 / 68140800.0; - acons[6][1] = 13.0 / 57600.0; - acons[6][2] = 47021.0 / 35512320.0; - acons[6][3] = 9694607.0 / 2095994880.0; - acons[6][4] = 733191589.0 / 59609088000.0; - acons[6][5] = 326190917.0 / 11700633600.0; - acons[7][0] = 1.0 / 345600.0; - acons[7][1] = 3617.0 / 35512320.0; - acons[7][2] = 745739.0 / 838397952.0; - acons[7][3] = 56399353.0 / 12773376000.0; - acons[7][4] = 25091609.0 / 1560084480.0; - acons[7][5] = 1755948832039.0 / 36229939200000.0; - acons[7][6] = 4887769399.0 / 37838389248.0; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPM::~PPPM() -{ - delete [] factors; - deallocate(); - if (peratom_allocate_flag) deallocate_peratom(); - if (group_allocate_flag) deallocate_groups(); - memory->destroy(part2grid); - memory->destroy(acons); -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPM::init() -{ - if (me == 0) { - if (screen) fprintf(screen,"PPPM initialization ...\n"); - if (logfile) fprintf(logfile,"PPPM initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->triclinic && differentiation_flag == 1) - error->all(FLERR,"Cannot (yet) use PPPM with triclinic box " - "and kspace_modify diff ad"); - if (domain->triclinic && slabflag) - error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and " - "slab correction"); - if (domain->dimension == 2) error->all(FLERR, - "Cannot use PPPM with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); - if (slabflag) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPM"); - } - - if (order < 2 || order > MAXORDER) { - char str[128]; - sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); - error->all(FLERR,str); - } - - // extract short-range Coulombic cutoff from pair style - - triclinic = domain->triclinic; - scale = 1.0; - - pair_check(); - - int itmp = 0; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - - // if kspace is TIP4P, extract TIP4P params from pair style - // bond/angle are not yet init(), so insure equilibrium request is valid - - qdist = 0.0; - - if (tip4pflag) { - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL || - force->angle->setflag == NULL || force->bond->setflag == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - if (typeA < 1 || typeA > atom->nangletypes || - force->angle->setflag[typeA] == 0) - error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); - if (typeB < 1 || typeB > atom->nbondtypes || - force->bond->setflag[typeB] == 0) - error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (cos(0.5*theta) * blen); - if (domain->triclinic) - error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P"); - } - - // compute qsum & qsqsum and warn if not charge-neutral - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - q2 = qsqsum * force->qqrd2e / force->dielectric; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // free all arrays previously allocated - - deallocate(); - if (peratom_allocate_flag) deallocate_peratom(); - if (group_allocate_flag) deallocate_groups(); - - // setup FFT grid resolution and g_ewald - // normally one iteration thru while loop is all that is required - // if grid stencil does not extend beyond neighbor proc - // or overlap is allowed, then done - // else reduce order and try again - - int (*procneigh)[2] = comm->procneigh; - - CommGrid *cgtmp = NULL; - int iteration = 0; - - while (order >= minorder) { - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPM order b/c stencil extends " - "beyond nearest neighbor processor"); - - if (stagger_flag && !differentiation_flag) compute_gf_denom(); - set_grid_global(); - set_grid_local(); - if (overlap_allowed) break; - - cgtmp = new CommGrid(lmp,world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - cgtmp->ghost_notify(); - if (!cgtmp->ghost_overlap()) break; - delete cgtmp; - - order--; - iteration++; - } - - if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order"); - if (!overlap_allowed && cgtmp->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - if (cgtmp) delete cgtmp; - - // adjust g_ewald - - if (!gewaldflag) adjust_gewald(); - - // calculate the final accuracy - - double estimated_accuracy = final_accuracy(); - - // print stats - - int ngrid_max,nfft_both_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - -#ifdef FFT_SINGLE - const char fft_prec[] = "single"; -#else - const char fft_prec[] = "double"; -#endif - - if (screen) { - fprintf(screen," G vector (1/distance) = %g\n",g_ewald); - fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," stencil order = %d\n",order); - fprintf(screen," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(screen," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - fprintf(screen," 3d grid and FFT values/proc = %d %d\n", - ngrid_max,nfft_both_max); - } - if (logfile) { - fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," stencil order = %d\n",order); - fprintf(logfile," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(logfile," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", - ngrid_max,nfft_both_max); - } - } - - // allocate K-space dependent memory - // don't invoke allocate peratom() or group(), will be allocated when needed - - allocate(); - cg->ghost_notify(); - cg->setup(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - if (differentiation_flag == 1) compute_sf_precoeff(); - compute_rho_coeff(); -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPM::setup() -{ - if (triclinic) { - setup_triclinic(); - return; - } - - int i,j,k,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double unitkx = (MY_2PI/xprd); - double unitky = (MY_2PI/yprd); - double unitkz = (MY_2PI/zprd_slab); - - // fkx,fky,fkz for my FFT grid pts - - double per; - - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per = i - nx_pppm*(2*i/nx_pppm); - fkx[i] = unitkx*per; - } - - for (i = nylo_fft; i <= nyhi_fft; i++) { - per = i - ny_pppm*(2*i/ny_pppm); - fky[i] = unitky*per; - } - - for (i = nzlo_fft; i <= nzhi_fft; i++) { - per = i - nz_pppm*(2*i/nz_pppm); - fkz[i] = unitkz*per; - } - - // virial coefficients - - double sqk,vterm; - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - for (j = nylo_fft; j <= nyhi_fft; j++) { - for (i = nxlo_fft; i <= nxhi_fft; i++) { - sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; - vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; - vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; - vg[n][3] = vterm*fkx[i]*fky[j]; - vg[n][4] = vterm*fkx[i]*fkz[k]; - vg[n][5] = vterm*fky[j]*fkz[k]; - } - n++; - } - } - } - - if (differentiation_flag == 1) compute_gf_ad(); - else compute_gf_ik(); -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed - for a triclinic system -------------------------------------------------------------------------- */ - -void PPPM::setup_triclinic() -{ - int i,j,k,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - prd = domain->prd; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - // use lamda (0-1) coordinates - - delxinv = nx_pppm; - delyinv = ny_pppm; - delzinv = nz_pppm; - delvolinv = delxinv*delyinv*delzinv/volume; - - // fkx,fky,fkz for my FFT grid pts - - double per_i,per_j,per_k; - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - per_k = k - nz_pppm*(2*k/nz_pppm); - for (j = nylo_fft; j <= nyhi_fft; j++) { - per_j = j - ny_pppm*(2*j/ny_pppm); - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per_i = i - nx_pppm*(2*i/nx_pppm); - - double unitk_lamda[3]; - unitk_lamda[0] = 2.0*MY_PI*per_i; - unitk_lamda[1] = 2.0*MY_PI*per_j; - unitk_lamda[2] = 2.0*MY_PI*per_k; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - fkx[n] = unitk_lamda[0]; - fky[n] = unitk_lamda[1]; - fkz[n] = unitk_lamda[2]; - n++; - } - } - } - - // virial coefficients - - double sqk,vterm; - - for (n = 0; n < nfft; n++) { - sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n]; - vg[n][1] = 1.0 + vterm*fky[n]*fky[n]; - vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n]; - vg[n][3] = vterm*fkx[n]*fky[n]; - vg[n][4] = vterm*fkx[n]*fkz[n]; - vg[n][5] = vterm*fky[n]*fkz[n]; - } - } - - compute_gf_ik_triclinic(); -} - -/* ---------------------------------------------------------------------- - reset local grid arrays and communication stencils - called by fix balance b/c it changed sizes of processor sub-domains -------------------------------------------------------------------------- */ - -void PPPM::setup_grid() -{ - // free all arrays previously allocated - - deallocate(); - if (peratom_allocate_flag) deallocate_peratom(); - if (group_allocate_flag) deallocate_groups(); - - // reset portion of global grid that each proc owns - - set_grid_local(); - - // reallocate K-space dependent memory - // check if grid communication is now overlapping if not allowed - // don't invoke allocate peratom() or group(), will be allocated when needed - - allocate(); - - cg->ghost_notify(); - if (overlap_allowed == 0 && cg->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - cg->setup(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - if (differentiation_flag == 1) compute_sf_precoeff(); - compute_rho_coeff(); - - // pre-compute volume-dependent coeffs - - setup(); -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPM::compute(int eflag, int vflag) -{ - int i,j; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - cg_peratom->ghost_notify(); - cg_peratom->setup(); - } - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(part2grid); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - } - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - particle_map(); - make_rho(); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - cg->reverse_comm(this,REVERSE_RHO); - brick2fft(); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - // also performs per-atom calculations via poisson_peratom() - - poisson(); - - // all procs communicate E-field values - // to fill ghost cells surrounding their 3d bricks - - if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD); - else cg->forward_comm(this,FORWARD_IK); - - // extra per-atom energy/virial communication - - if (evflag_atom) { - if (differentiation_flag == 1 && vflag_atom) - cg_peratom->forward_comm(this,FORWARD_AD_PERATOM); - else if (differentiation_flag == 0) - cg_peratom->forward_comm(this,FORWARD_IK_PERATOM); - } - - // calculate the force on my particles - - fieldforce(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fieldforce_peratom(); - - // sum global energy across procs and add in volume-dependent term - - const double qscale = force->qqrd2e * scale; - - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // sum global virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - } - - // per-atom energy/virial - // energy includes self-energy correction - // notal accounts for TIP4P tallying eatom/vatom for ghost atoms - - if (evflag_atom) { - double *q = atom->q; - int nlocal = atom->nlocal; - int ntotal = nlocal; - if (tip4pflag) ntotal += atom->nghost; - - if (eflag_atom) { - for (i = 0; i < nlocal; i++) { - eatom[i] *= 0.5; - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale; - } - - if (vflag_atom) { - for (i = 0; i < ntotal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale; - } - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::allocate() -{ - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); - - memory->create(density_fft,nfft_both,"pppm:density_fft"); - memory->create(greensfn,nfft_both,"pppm:greensfn"); - memory->create(work1,2*nfft_both,"pppm:work1"); - memory->create(work2,2*nfft_both,"pppm:work2"); - memory->create(vg,nfft_both,6,"pppm:vg"); - - if (triclinic == 0) { - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); - } else { - memory->create(fkx,nfft_both,"pppm:fkx"); - memory->create(fky,nfft_both,"pppm:fky"); - memory->create(fkz,nfft_both,"pppm:fkz"); - } - - if (differentiation_flag == 1) { - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); - - memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1"); - memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2"); - memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3"); - memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4"); - memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5"); - memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6"); - - } else { - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); - } - - // summation coeffs - - order_allocated = order; - if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2, - "pppm:drho_coeff"); - - // create 2 FFTs and a Remap - // 1st FFT keeps data in FFT decompostion - // 2nd FFT returns data in 3d brick decomposition - // remap takes data from 3d brick to FFT decomposition - - int tmp; - - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp); - - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp); - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - int (*procneigh)[2] = comm->procneigh; - - if (differentiation_flag == 1) - cg = new CommGrid(lmp,world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg = new CommGrid(lmp,world,3,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); -} - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - - if (differentiation_flag == 1) { - memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(sf_precoeff1); - memory->destroy(sf_precoeff2); - memory->destroy(sf_precoeff3); - memory->destroy(sf_precoeff4); - memory->destroy(sf_precoeff5); - memory->destroy(sf_precoeff6); - } else { - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - } - - memory->destroy(density_fft); - memory->destroy(greensfn); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(vg); - - if (triclinic == 0) { - memory->destroy1d_offset(fkx,nxlo_fft); - memory->destroy1d_offset(fky,nylo_fft); - memory->destroy1d_offset(fkz,nzlo_fft); - } else { - memory->destroy(fkx); - memory->destroy(fky); - memory->destroy(fkz); - } - - memory->destroy(gf_b); - if (stagger_flag) gf_b = NULL; - memory->destroy2d_offset(rho1d,-order_allocated/2); - memory->destroy2d_offset(drho1d,-order_allocated/2); - memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2); - memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2); - - delete fft1; - delete fft2; - delete remap; - delete cg; -} - -/* ---------------------------------------------------------------------- - allocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::allocate_peratom() -{ - peratom_allocate_flag = 1; - - if (differentiation_flag != 1) - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); - - memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v0_brick"); - - memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v1_brick"); - memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v2_brick"); - memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v3_brick"); - memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v4_brick"); - memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v5_brick"); - - // create ghost grid object for rho and electric field communication - - int (*procneigh)[2] = comm->procneigh; - - if (differentiation_flag == 1) - cg_peratom = - new CommGrid(lmp,world,6,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom = - new CommGrid(lmp,world,7,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); -} - -/* ---------------------------------------------------------------------- - deallocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::deallocate_peratom() -{ - peratom_allocate_flag = 0; - - memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); - - if (differentiation_flag != 1) - memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); - - delete cg_peratom; -} - -/* ---------------------------------------------------------------------- - set global size of PPPM grid = nx,ny,nz_pppm - used for charge accumulation, FFTs, and electric field interpolation -------------------------------------------------------------------------- */ - -void PPPM::set_grid_global() -{ - // use xprd,yprd,zprd (even if triclinic, and then scale later) - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h; - bigint natoms = atom->natoms; - - if (!gewaldflag) { - if (accuracy <= 0.0) - error->all(FLERR,"KSpace accuracy must be > 0"); - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; - else g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy - // nz_pppm uses extended zprd_slab instead of zprd - // reduce it until accuracy target is met - - if (!gridflag) { - - if (differentiation_flag == 1 || stagger_flag) { - - h = h_x = h_y = h_z = 4.0/g_ewald; - int count = 0; - while (1) { - - // set grid dimension - nx_pppm = static_cast (xprd/h_x); - ny_pppm = static_cast (yprd/h_y); - nz_pppm = static_cast (zprd_slab/h_z); - - if (nx_pppm <= 1) nx_pppm = 2; - if (ny_pppm <= 1) ny_pppm = 2; - if (nz_pppm <= 1) nz_pppm = 2; - - //set local grid dimension - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - double df_kspace = compute_df_kspace(); - - count++; - - // break loop if the accuracy has been reached or - // too many loops have been performed - - if (df_kspace <= accuracy) break; - if (count > 500) error->all(FLERR, "Could not compute grid size"); - h *= 0.95; - h_x = h_y = h_z = h; - } - - } else { - - double err; - h_x = h_y = h_z = 1.0/g_ewald; - - nx_pppm = static_cast (xprd/h_x) + 1; - ny_pppm = static_cast (yprd/h_y) + 1; - nz_pppm = static_cast (zprd_slab/h_z) + 1; - - err = estimate_ik_error(h_x,xprd,natoms); - while (err > accuracy) { - err = estimate_ik_error(h_x,xprd,natoms); - nx_pppm++; - h_x = xprd/nx_pppm; - } - - err = estimate_ik_error(h_y,yprd,natoms); - while (err > accuracy) { - err = estimate_ik_error(h_y,yprd,natoms); - ny_pppm++; - h_y = yprd/ny_pppm; - } - - err = estimate_ik_error(h_z,zprd_slab,natoms); - while (err > accuracy) { - err = estimate_ik_error(h_z,zprd_slab,natoms); - nz_pppm++; - h_z = zprd_slab/nz_pppm; - } - } - - // scale grid for triclinic skew - - if (triclinic) { - double tmp[3]; - tmp[0] = nx_pppm/xprd; - tmp[1] = ny_pppm/yprd; - tmp[2] = nz_pppm/zprd; - lamda2xT(&tmp[0],&tmp[0]); - nx_pppm = static_cast(tmp[0]) + 1; - ny_pppm = static_cast(tmp[1]) + 1; - nz_pppm = static_cast(tmp[2]) + 1; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; - - if (triclinic == 0) { - h_x = xprd/nx_pppm; - h_y = yprd/ny_pppm; - h_z = zprd_slab/nz_pppm; - } else { - double tmp[3]; - tmp[0] = nx_pppm; - tmp[1] = ny_pppm; - tmp[2] = nz_pppm; - x2lamdaT(&tmp[0],&tmp[0]); - h_x = 1.0/tmp[0]; - h_y = 1.0/tmp[1]; - h_z = 1.0/tmp[2]; - } - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPM grid is too large"); -} - -/* ---------------------------------------------------------------------- - check if all factors of n are in list of factors - return 1 if yes, 0 if no -------------------------------------------------------------------------- */ - -int PPPM::factorable(int n) -{ - int i; - - while (n > 1) { - for (i = 0; i < nfactors; i++) { - if (n % factors[i] == 0) { - n /= factors[i]; - break; - } - } - if (i == nfactors) return 0; - } - - return 1; -} - -/* ---------------------------------------------------------------------- - compute estimated kspace force error -------------------------------------------------------------------------- */ - -double PPPM::compute_df_kspace() -{ - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - double df_kspace = 0.0; - if (differentiation_flag == 1 || stagger_flag) { - double qopt = compute_qopt(); - df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - } else { - double lprx = estimate_ik_error(h_x,xprd,natoms); - double lpry = estimate_ik_error(h_y,yprd,natoms); - double lprz = estimate_ik_error(h_z,zprd_slab,natoms); - df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - } - return df_kspace; -} - -/* ---------------------------------------------------------------------- - compute qopt -------------------------------------------------------------------------- */ - -double PPPM::compute_qopt() -{ - double qopt = 0.0; - double *prd = domain->prd; - - const double xprd = prd[0]; - const double yprd = prd[1]; - const double zprd = prd[2]; - const double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - const double unitkx = (MY_2PI/xprd); - const double unitky = (MY_2PI/yprd); - const double unitkz = (MY_2PI/zprd_slab); - - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double u1, u2, sqk; - double sum1,sum2,sum3,sum4,dot2; - - int k,l,m,nx,ny,nz; - const int twoorder = 2*order; - - for (m = nzlo_fft; m <= nzhi_fft; m++) { - const int mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - const int lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - const int kper = k - nx_pppm*(2*k/nx_pppm); - - sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); - - if (sqk != 0.0) { - - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - sum4 = 0.0; - for (nx = -2; nx <= 2; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*square(qx/g_ewald)); - argx = 0.5*qx*xprd/nx_pppm; - wx = powsinxx(argx,twoorder); - qx *= qx; - - for (ny = -2; ny <= 2; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*square(qy/g_ewald)); - argy = 0.5*qy*yprd/ny_pppm; - wy = powsinxx(argy,twoorder); - qy *= qy; - - for (nz = -2; nz <= 2; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*square(qz/g_ewald)); - argz = 0.5*qz*zprd_slab/nz_pppm; - wz = powsinxx(argz,twoorder); - qz *= qz; - - dot2 = qx+qy+qz; - u1 = sx*sy*sz; - u2 = wx*wy*wz; - sum1 += u1*u1/dot2*MY_4PI*MY_4PI; - sum2 += u1 * u2 * MY_4PI; - sum3 += u2; - sum4 += dot2*u2; - } - } - } - sum2 *= sum2; - qopt += sum1 - sum2/(sum3*sum4); - } - } - } - } - double qopt_all; - MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); - return qopt_all; -} - -/* ---------------------------------------------------------------------- - estimate kspace force error for ik method -------------------------------------------------------------------------- */ - -double PPPM::estimate_ik_error(double h, double prd, bigint natoms) -{ - double sum = 0.0; - for (int m = 0; m < order; m++) - sum += acons[order][m] * pow(h*g_ewald,2.0*m); - double value = q2 * pow(h*g_ewald,(double)order) * - sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd); - - return value; -} - -/* ---------------------------------------------------------------------- - adjust the g_ewald parameter to near its optimal value - using a Newton-Raphson solver -------------------------------------------------------------------------- */ - -void PPPM::adjust_gewald() -{ - double dx; - - for (int i = 0; i < LARGE; i++) { - dx = newton_raphson_f() / derivf(); - g_ewald -= dx; - if (fabs(newton_raphson_f()) < SMALL) return; - } - - char str[128]; - sprintf(str, "Could not compute g_ewald"); - error->all(FLERR, str); -} - -/* ---------------------------------------------------------------------- - Calculate f(x) using Newton-Raphson solver - ------------------------------------------------------------------------- */ - -double PPPM::newton_raphson_f() -{ - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - bigint natoms = atom->natoms; - - double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd); - - double df_kspace = compute_df_kspace(); - - return df_rspace - df_kspace; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) using forward difference - [f(x + h) - f(x)] / h - ------------------------------------------------------------------------- */ - -double PPPM::derivf() -{ - double h = 0.000001; //Derivative step-size - double df,f1,f2,g_ewald_old; - - f1 = newton_raphson_f(); - g_ewald_old = g_ewald; - g_ewald += h; - f2 = newton_raphson_f(); - g_ewald = g_ewald_old; - df = (f2 - f1)/h; - - return df; -} - -/* ---------------------------------------------------------------------- - Calculate the final estimate of the accuracy -------------------------------------------------------------------------- */ - -double PPPM::final_accuracy() -{ - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - - double df_kspace = compute_df_kspace(); - double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd); - double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); - double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace); - double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace + - df_table*df_table); - - return estimated_accuracy; -} - -/* ---------------------------------------------------------------------- - set local subset of PPPM/FFT grid that I own - n xyz lo/hi in = 3d brick that I own (inclusive) - n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive) - n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz) -------------------------------------------------------------------------- */ - -void PPPM::set_grid_local() -{ - // global indices of PPPM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPM grid that I own without ghost cells - // for slab PPPM, assign z grid as if it were not extended - - nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); - nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; - - nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); - nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; - - nzlo_in = static_cast - (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); - nzhi_in = static_cast - (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; - - // nlower,nupper = stencil size for mapping particles to PPPM grid - - nlower = -(order-1)/2; - nupper = order/2; - - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shift = OFFSET + 0.5; - else shift = OFFSET; - if (order % 2) shiftone = 0.0; - else shiftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended - - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else kspacebbox(cuthalf,&dist[0]); - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nxlo_out = nlo + nlower; - nxhi_out = nhi + nupper; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nylo_out = nlo + nlower; - nyhi_out = nhi + nupper; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - if (stagger_flag) { - nxhi_out++; - nyhi_out++; - nzhi_out++; - } - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - // also insure no other procs use ghost cells beyond +z limit - - if (slabflag == 1) { - if (comm->myloc[2] == comm->procgrid[2]-1) - nzhi_in = nzhi_out = nz_pppm - 1; - nzhi_out = MIN(nzhi_out,nz_pppm-1); - } - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clumps of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // PPPM grid pts owned by this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // FFT grids owned by this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_denom() -{ - int k,l,m; - - for (l = 1; l < order; l++) gf_b[l] = 0.0; - gf_b[0] = 1.0; - - for (m = 1; m < order; m++) { - for (l = m; l > 0; l--) - gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); - gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); - } - - bigint ifact = 1; - for (k = 1; k < 2*order; k++) ifact *= k; - double gaminv = 1.0/ifact; - for (l = 0; l < order; l++) gf_b[l] *= gaminv; -} - -/* ---------------------------------------------------------------------- - pre-compute modified (Hockney-Eastwood) Coulomb Green's function -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_ik() -{ - const double * const prd = domain->prd; - - const double xprd = prd[0]; - const double yprd = prd[1]; - const double zprd = prd[2]; - const double zprd_slab = zprd*slab_volfactor; - const double unitkx = (MY_2PI/xprd); - const double unitky = (MY_2PI/yprd); - const double unitkz = (MY_2PI/zprd_slab); - - double snx,sny,snz; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,dot1,dot2; - double numerator,denominator; - double sqk; - - int k,l,m,n,nx,ny,nz,kper,lper,mper; - - const int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * - pow(-log(EPS_HOC),0.25)); - const int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * - pow(-log(EPS_HOC),0.25)); - const int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * - pow(-log(EPS_HOC),0.25)); - const int twoorder = 2*order; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); - - sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); - - if (sqk != 0.0) { - numerator = 12.5663706/sqk; - denominator = gf_denom(snx,sny,snz); - sum1 = 0.0; - - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*square(qx/g_ewald)); - argx = 0.5*qx*xprd/nx_pppm; - wx = powsinxx(argx,twoorder); - - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*square(qy/g_ewald)); - argy = 0.5*qy*yprd/ny_pppm; - wy = powsinxx(argy,twoorder); - - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*square(qz/g_ewald)); - argz = 0.5*qz*zprd_slab/nz_pppm; - wz = powsinxx(argz,twoorder); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; - } - } - } - greensfn[n++] = numerator*sum1/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - pre-compute modified (Hockney-Eastwood) Coulomb Green's function - for a triclinic system -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_ik_triclinic() -{ - double snx,sny,snz; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,dot1,dot2; - double numerator,denominator; - double sqk; - - int k,l,m,n,nx,ny,nz,kper,lper,mper; - - double tmp[3]; - tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25); - tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25); - tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25); - lamda2xT(&tmp[0],&tmp[0]); - const int nbx = static_cast (tmp[0]); - const int nby = static_cast (tmp[1]); - const int nbz = static_cast (tmp[2]); - - const int twoorder = 2*order; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - snz = square(sin(MY_PI*mper/nz_pppm)); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - sny = square(sin(MY_PI*lper/ny_pppm)); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - snx = square(sin(MY_PI*kper/nx_pppm)); - - double unitk_lamda[3]; - unitk_lamda[0] = 2.0*MY_PI*kper; - unitk_lamda[1] = 2.0*MY_PI*lper; - unitk_lamda[2] = 2.0*MY_PI*mper; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - - sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); - - if (sqk != 0.0) { - numerator = 12.5663706/sqk; - denominator = gf_denom(snx,sny,snz); - sum1 = 0.0; - - for (nx = -nbx; nx <= nbx; nx++) { - argx = MY_PI*kper/nx_pppm + MY_PI*nx; - wx = powsinxx(argx,twoorder); - - for (ny = -nby; ny <= nby; ny++) { - argy = MY_PI*lper/ny_pppm + MY_PI*ny; - wy = powsinxx(argy,twoorder); - - for (nz = -nbz; nz <= nbz; nz++) { - argz = MY_PI*mper/nz_pppm + MY_PI*nz; - wz = powsinxx(argz,twoorder); - - double b[3]; - b[0] = 2.0*MY_PI*nx_pppm*nx; - b[1] = 2.0*MY_PI*ny_pppm*ny; - b[2] = 2.0*MY_PI*nz_pppm*nz; - x2lamdaT(&b[0],&b[0]); - - qx = unitk_lamda[0]+b[0]; - sx = exp(-0.25*square(qx/g_ewald)); - - qy = unitk_lamda[1]+b[1]; - sy = exp(-0.25*square(qy/g_ewald)); - - qz = unitk_lamda[2]+b[2]; - sz = exp(-0.25*square(qz/g_ewald)); - - dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; - dot2 = qx*qx+qy*qy+qz*qz; - sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; - } - } - } - greensfn[n++] = numerator*sum1/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute optimized Green's function for energy calculation -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_ad() -{ - const double * const prd = domain->prd; - - const double xprd = prd[0]; - const double yprd = prd[1]; - const double zprd = prd[2]; - const double zprd_slab = zprd*slab_volfactor; - const double unitkx = (MY_2PI/xprd); - const double unitky = (MY_2PI/yprd); - const double unitkz = (MY_2PI/zprd_slab); - - double snx,sny,snz,sqk; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double numerator,denominator; - int k,l,m,n,kper,lper,mper; - - const int twoorder = 2*order; - - for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - qz = unitkz*mper; - snz = square(sin(0.5*qz*zprd_slab/nz_pppm)); - sz = exp(-0.25*square(qz/g_ewald)); - argz = 0.5*qz*zprd_slab/nz_pppm; - wz = powsinxx(argz,twoorder); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - qy = unitky*lper; - sny = square(sin(0.5*qy*yprd/ny_pppm)); - sy = exp(-0.25*square(qy/g_ewald)); - argy = 0.5*qy*yprd/ny_pppm; - wy = powsinxx(argy,twoorder); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - qx = unitkx*kper; - snx = square(sin(0.5*qx*xprd/nx_pppm)); - sx = exp(-0.25*square(qx/g_ewald)); - argx = 0.5*qx*xprd/nx_pppm; - wx = powsinxx(argx,twoorder); - - sqk = qx*qx + qy*qy + qz*qz; - - if (sqk != 0.0) { - numerator = MY_4PI/sqk; - denominator = gf_denom(snx,sny,snz); - greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator; - sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; - sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; - sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; - sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; - sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; - sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; - n++; - } else { - greensfn[n] = 0.0; - sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; - sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; - sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; - sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; - sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; - sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; - n++; - } - } - } - } - - // compute the coefficients for the self-force correction - - double prex, prey, prez; - prex = prey = prez = MY_PI/volume; - prex *= nx_pppm/xprd; - prey *= ny_pppm/yprd; - prez *= nz_pppm/zprd_slab; - sf_coeff[0] *= prex; - sf_coeff[1] *= prex*2; - sf_coeff[2] *= prey; - sf_coeff[3] *= prey*2; - sf_coeff[4] *= prez; - sf_coeff[5] *= prez*2; - - // communicate values with other procs - - double tmp[6]; - MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); - for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme -------------------------------------------------------------------------- */ - -void PPPM::compute_sf_precoeff() -{ - int i,k,l,m,n; - int nx,ny,nz,kper,lper,mper; - double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; - double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; - double u0,u1,u2,u3,u4,u5,u6; - double sum1,sum2,sum3,sum4,sum5,sum6; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - - sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; - for (i = 0; i < 5; i++) { - - qx0 = MY_2PI*(kper+nx_pppm*(i-2)); - qx1 = MY_2PI*(kper+nx_pppm*(i-1)); - qx2 = MY_2PI*(kper+nx_pppm*(i )); - wx0[i] = powsinxx(0.5*qx0/nx_pppm,order); - wx1[i] = powsinxx(0.5*qx1/nx_pppm,order); - wx2[i] = powsinxx(0.5*qx2/nx_pppm,order); - - qy0 = MY_2PI*(lper+ny_pppm*(i-2)); - qy1 = MY_2PI*(lper+ny_pppm*(i-1)); - qy2 = MY_2PI*(lper+ny_pppm*(i )); - wy0[i] = powsinxx(0.5*qy0/ny_pppm,order); - wy1[i] = powsinxx(0.5*qy1/ny_pppm,order); - wy2[i] = powsinxx(0.5*qy2/ny_pppm,order); - - qz0 = MY_2PI*(mper+nz_pppm*(i-2)); - qz1 = MY_2PI*(mper+nz_pppm*(i-1)); - qz2 = MY_2PI*(mper+nz_pppm*(i )); - - wz0[i] = powsinxx(0.5*qz0/nz_pppm,order); - wz1[i] = powsinxx(0.5*qz1/nz_pppm,order); - wz2[i] = powsinxx(0.5*qz2/nz_pppm,order); - } - - for (nx = 0; nx < 5; nx++) { - for (ny = 0; ny < 5; ny++) { - for (nz = 0; nz < 5; nz++) { - u0 = wx0[nx]*wy0[ny]*wz0[nz]; - u1 = wx1[nx]*wy0[ny]*wz0[nz]; - u2 = wx2[nx]*wy0[ny]*wz0[nz]; - u3 = wx0[nx]*wy1[ny]*wz0[nz]; - u4 = wx0[nx]*wy2[ny]*wz0[nz]; - u5 = wx0[nx]*wy0[ny]*wz1[nz]; - u6 = wx0[nx]*wy0[ny]*wz2[nz]; - - sum1 += u0*u1; - sum2 += u0*u2; - sum3 += u0*u3; - sum4 += u0*u4; - sum5 += u0*u5; - sum6 += u0*u6; - } - } - } - - // store values - - sf_precoeff1[n] = sum1; - sf_precoeff2[n] = sum2; - sf_precoeff3[n] = sum3; - sf_precoeff4[n] = sum4; - sf_precoeff5[n] = sum5; - sf_precoeff6[n++] = sum6; - } - } - } -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPM::particle_map() -{ - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - part2grid[i][0] = nx; - part2grid[i][1] = ny; - part2grid[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPM::make_rho() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - remap density from 3d brick decomposition to FFT decomposition -------------------------------------------------------------------------- */ - -void PPPM::brick2fft() -{ - int n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_in; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_fft[n++] = density_brick[iz][iy][ix]; - - remap->perform(density_fft,density_fft,work1); -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver -------------------------------------------------------------------------- */ - -void PPPM::poisson() -{ - if (differentiation_flag == 1) poisson_ad(); - else poisson_ik(); -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ik -------------------------------------------------------------------------- */ - -void PPPM::poisson_ik() -{ - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (evflag_atom) poisson_peratom(); - - // triclinic system - - if (triclinic) { - poisson_ik_triclinic(); - return; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdx_brick[k][j][i] = work2[n]; - n += 2; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fky[j]*work1[n+1]; - work2[n+1] = -fky[j]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdy_brick[k][j][i] = work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkz[k]*work1[n+1]; - work2[n+1] = -fkz[k]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ik for a triclinic system -------------------------------------------------------------------------- */ - -void PPPM::poisson_ik_triclinic() -{ - int i,j,k,n; - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdx_brick[k][j][i] = work2[n]; - n += 2; - } - - // y direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = fky[i]*work1[n+1]; - work2[n+1] = -fky[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdy_brick[k][j][i] = work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = fkz[i]*work1[n+1]; - work2[n+1] = -fkz[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ad -------------------------------------------------------------------------- */ - -void PPPM::poisson_ad() -{ - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (vflag_atom) poisson_peratom(); - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]; - work2[n+1] = work1[n+1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - u_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPM::poisson_peratom() -{ - int i,j,k,n; - - // energy - - if (eflag_atom && differentiation_flag != 1) { - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]; - work2[n+1] = work1[n+1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - u_brick[k][j][i] = work2[n]; - n += 2; - } - } - - // 6 components of virial in v0 thru v5 - - if (!vflag_atom) return; - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][0]; - work2[n+1] = work1[n+1]*vg[i][0]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v0_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][1]; - work2[n+1] = work1[n+1]*vg[i][1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v1_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][2]; - work2[n+1] = work1[n+1]*vg[i][2]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v2_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][3]; - work2[n+1] = work1[n+1]*vg[i][3]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v3_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][4]; - work2[n+1] = work1[n+1]*vg[i][4]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v4_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][5]; - work2[n+1] = work1[n+1]*vg[i][5]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v5_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPM::fieldforce() -{ - if (differentiation_flag == 1) fieldforce_ad(); - else fieldforce_ik(); -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles for ik -------------------------------------------------------------------------- */ - -void PPPM::fieldforce_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles for ad -------------------------------------------------------------------------- */ - -void PPPM::fieldforce_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz; - FFT_SCALAR ekx,eky,ekz; - double s1,s2,s3; - double sf = 0.0; - double *prd; - - prd = domain->prd; - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - - double hx_inv = nx_pppm/xprd; - double hy_inv = ny_pppm/yprd; - double hz_inv = nz_pppm/zprd; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - compute_drho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; - } - } - } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - - // convert E-field to force and substract self forces - - const double qfactor = force->qqrd2e * scale; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - sf = sf_coeff[0]*sin(2*MY_PI*s1); - sf += sf_coeff[1]*sin(4*MY_PI*s1); - sf *= 2*q[i]*q[i]; - f[i][0] += qfactor*(ekx*q[i] - sf); - - sf = sf_coeff[2]*sin(2*MY_PI*s2); - sf += sf_coeff[3]*sin(4*MY_PI*s2); - sf *= 2*q[i]*q[i]; - f[i][1] += qfactor*(eky*q[i] - sf); - - - sf = sf_coeff[4]*sin(2*MY_PI*s3); - sf += sf_coeff[5]*sin(4*MY_PI*s3); - sf *= 2*q[i]*q[i]; - if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPM::fieldforce_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - if (eflag_atom) eatom[i] += q[i]*u; - if (vflag_atom) { - vatom[i][0] += q[i]*v0; - vatom[i][1] += q[i]*v1; - vatom[i][2] += q[i]*v2; - vatom[i][3] += q[i]*v3; - vatom[i][4] += q[i]*v4; - vatom[i][5] += q[i]*v5; - } - } -} - -/* ---------------------------------------------------------------------- - pack own values to buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - if (flag == FORWARD_IK) { - FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - } else if (flag == FORWARD_AD) { - FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - } else if (flag == FORWARD_IK_PERATOM) { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - } else if (flag == FORWARD_AD_PERATOM) { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's own values from buf and set own ghost values -------------------------------------------------------------------------- */ - -void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - if (flag == FORWARD_IK) { - FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - } else if (flag == FORWARD_AD) { - FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[i]; - } else if (flag == FORWARD_IK_PERATOM) { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - } else if (flag == FORWARD_AD_PERATOM) { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } -} - -/* ---------------------------------------------------------------------- - pack ghost values into buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - if (flag == REVERSE_RHO) { - FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's ghost values from buf and add to own values -------------------------------------------------------------------------- */ - -void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - if (flag == REVERSE_RHO) { - FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[i]; - } -} - -/* ---------------------------------------------------------------------- - map nprocs to NX by NY grid as PX by PY procs - return optimal px,py -------------------------------------------------------------------------- */ - -void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) -{ - // loop thru all possible factorizations of nprocs - // surf = surface area of largest proc sub-domain - // innermost if test minimizes surface area and surface/volume ratio - - int bestsurf = 2 * (nx + ny); - int bestboxx = 0; - int bestboxy = 0; - - int boxx,boxy,surf,ipx,ipy; - - ipx = 1; - while (ipx <= nprocs) { - if (nprocs % ipx == 0) { - ipy = nprocs/ipx; - boxx = nx/ipx; - if (nx % ipx) boxx++; - boxy = ny/ipy; - if (ny % ipy) boxy++; - surf = boxx + boxy; - if (surf < bestsurf || - (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { - bestsurf = surf; - bestboxx = boxx; - bestboxy = boxy; - *px = ipx; - *py = ipy; - } - } - ipx++; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into rho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = order-1; l >= 0; l--) { - r1 = rho_coeff[l][k] + r1*dx; - r2 = rho_coeff[l][k] + r2*dy; - r3 = rho_coeff[l][k] + r3*dz; - } - rho1d[0][k] = r1; - rho1d[1][k] = r2; - rho1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into drho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = order-2; l >= 0; l--) { - r1 = drho_coeff[l][k] + r1*dx; - r2 = drho_coeff[l][k] + r2*dy; - r3 = drho_coeff[l][k] + r3*dz; - } - drho1d[0][k] = r1; - drho1d[1][k] = r2; - drho1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - generate coeffients for the weight function of order n - - (n-1) - Wn(x) = Sum wn(k,x) , Sum is over every other integer - k=-(n-1) - For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 - k is odd integers if n is even and even integers if n is odd - --- - | n-1 - | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 - wn(k,x) = < l=0 - | - | 0 otherwise - --- - a coeffients are packed into the array rho_coeff to eliminate zeros - rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) -------------------------------------------------------------------------- */ - -void PPPM::compute_rho_coeff() -{ - int j,k,l,m; - FFT_SCALAR s; - - FFT_SCALAR **a; - memory->create2d_offset(a,order,-order,order,"pppm:a"); - - for (k = -order; k <= order; k++) - for (l = 0; l < order; l++) - a[l][k] = 0.0; - - a[0][0] = 1.0; - for (j = 1; j < order; j++) { - for (k = -j; k <= j; k += 2) { - s = 0.0; - for (l = 0; l < j; l++) { - a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); -#ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); -#else - s += pow(0.5,(double) l+1) * - (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); -#endif - } - a[0][k] = s; - } - } - - m = (1-order)/2; - for (k = -(order-1); k < order; k += 2) { - for (l = 0; l < order; l++) - rho_coeff[l][m] = a[l][k]; - for (l = 1; l < order; l++) - drho_coeff[l-1][m] = l*a[l][k]; - m++; - } - - memory->destroy2d_offset(a,-order); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPM::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - -/* ---------------------------------------------------------------------- - perform and time the 1d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPM::timing_1d(int n, double &time1d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->timing1d(work1,nfft_both,1); - fft2->timing1d(work1,nfft_both,-1); - if (differentiation_flag != 1) { - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d = time2 - time1; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - perform and time the 3d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPM::timing_3d(int n, double &time3d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->compute(work1,work1,1); - fft2->compute(work1,work1,-1); - if (differentiation_flag != 1) { - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d = time2 - time1; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPM::memory_usage() -{ - double bytes = nmax*3 * sizeof(double); - int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - if (differentiation_flag == 1) { - bytes += 2 * nbrick * sizeof(FFT_SCALAR); - } else { - bytes += 4 * nbrick * sizeof(FFT_SCALAR); - } - if (triclinic) bytes += 3 * nfft_both * sizeof(double); - bytes += 6 * nfft_both * sizeof(double); - bytes += nfft_both * sizeof(double); - bytes += nfft_both*5 * sizeof(FFT_SCALAR); - - if (peratom_allocate_flag) - bytes += 6 * nbrick * sizeof(FFT_SCALAR); - - if (group_allocate_flag) { - bytes += 2 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; - } - - bytes += cg->memory_usage(); - - return bytes; -} - -/* ---------------------------------------------------------------------- - group-group interactions - ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - compute the PPPM total long-range force and energy for groups A and B - ------------------------------------------------------------------------- */ - -void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) -{ - if (slabflag && triclinic) - error->all(FLERR,"Cannot (yet) use K-space slab " - "correction with compute group/group for triclinic systems"); - - if (differentiation_flag) - error->all(FLERR,"Cannot (yet) use kspace_modify " - "diff ad with compute group/group"); - - if (!group_allocate_flag) allocate_groups(); - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - e2group = 0.0; //energy - f2group[0] = 0.0; //force in x-direction - f2group[1] = 0.0; //force in y-direction - f2group[2] = 0.0; //force in z-direction - - // map my particle charge onto my local 3d density grid - - make_rho_groups(groupbit_A,groupbit_B,AA_flag); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - // temporarily store and switch pointers so we can - // use brick2fft() for groups A and B (without - // writing an additional function) - - FFT_SCALAR ***density_brick_real = density_brick; - FFT_SCALAR *density_fft_real = density_fft; - - // group A - - density_brick = density_A_brick; - density_fft = density_A_fft; - - cg->reverse_comm(this,REVERSE_RHO); - brick2fft(); - - // group B - - density_brick = density_B_brick; - density_fft = density_B_fft; - - cg->reverse_comm(this,REVERSE_RHO); - brick2fft(); - - // switch back pointers - - density_brick = density_brick_real; - density_fft = density_fft_real; - - // compute potential gradient on my FFT grid and - // portion of group-group energy/force on this proc's FFT grid - - poisson_groups(AA_flag); - - const double qscale = force->qqrd2e * scale; - - // total group A <--> group B energy - // self and boundary correction terms are in compute_group_group.cpp - - double e2group_all; - MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); - e2group = e2group_all; - - e2group *= qscale*0.5*volume; - - // total group A <--> group B force - - double f2group_all[3]; - MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); - - f2group[0] = qscale*volume*f2group_all[0]; - f2group[1] = qscale*volume*f2group_all[1]; - if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2]; - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); - - if (slabflag == 1) - slabcorr_groups(groupbit_A, groupbit_B, AA_flag); -} - -/* ---------------------------------------------------------------------- - allocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPM::allocate_groups() -{ - group_allocate_flag = 1; - - memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_A_brick"); - memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_B_brick"); - memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); - memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); -} - -/* ---------------------------------------------------------------------- - deallocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPM::deallocate_groups() -{ - group_allocate_flag = 0; - - memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(density_A_fft); - memory->destroy(density_B_fft); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag) -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density arrays - - memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - for (int i = 0; i < nlocal; i++) { - - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - - // group A - - if (mask[i] & groupbit_A) - density_A_brick[mz][my][mx] += x0*rho1d[0][l]; - - // group B - - if (mask[i] & groupbit_B) - density_B_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPM::poisson_groups(int AA_flag) -{ - int i,j,k,n; - - // reuse memory (already declared) - - FFT_SCALAR *work_A = work1; - FFT_SCALAR *work_B = work2; - - // transform charge density (r -> k) - - // group A - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] = density_A_fft[i]; - work_A[n++] = ZEROF; - } - - fft1->compute(work_A,work_A,1); - - // group B - - n = 0; - for (i = 0; i < nfft; i++) { - work_B[n++] = density_B_fft[i]; - work_B[n++] = ZEROF; - } - - fft1->compute(work_B,work_B,1); - - // group-group energy and force contribution, - // keep everything in reciprocal space so - // no inverse FFTs needed - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - // energy - - n = 0; - for (i = 0; i < nfft; i++) { - e2group += s2 * greensfn[i] * - (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); - n += 2; - } - - if (AA_flag) return; - - - // multiply by Green's function and s2 - // (only for work_A so it is not squared below) - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] *= s2 * greensfn[i]; - work_A[n++] *= s2 * greensfn[i]; - } - - // triclinic system - - if (triclinic) { - poisson_groups_triclinic(); - return; - } - - double partial_group; - - // force, x direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[0] += fkx[i] * partial_group; - n += 2; - } - - // force, y direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[1] += fky[j] * partial_group; - n += 2; - } - - // force, z direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[2] += fkz[k] * partial_group; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for group-group interactions - for a triclinic system - ------------------------------------------------------------------------- */ - -void PPPM::poisson_groups_triclinic() -{ - int i,j,k,n; - - // reuse memory (already declared) - - FFT_SCALAR *work_A = work1; - FFT_SCALAR *work_B = work2; - - double partial_group; - - // force, x direction - - n = 0; - for (i = 0; i < nfft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[0] += fkx[i] * partial_group; - n += 2; - } - - // force, y direction - - n = 0; - for (i = 0; i < nfft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[1] += fky[i] * partial_group; - n += 2; - } - - // force, z direction - - n = 0; - for (i = 0; i < nfft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[2] += fkz[i] * partial_group; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double qsum_A = 0.0; - double qsum_B = 0.0; - double dipole_A = 0.0; - double dipole_B = 0.0; - double dipole_r2_A = 0.0; - double dipole_r2_B = 0.0; - - for (int i = 0; i < nlocal; i++) { - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if (mask[i] & groupbit_A) { - qsum_A += q[i]; - dipole_A += q[i]*x[i][2]; - dipole_r2_A += q[i]*x[i][2]*x[i][2]; - } - - if (mask[i] & groupbit_B) { - qsum_B += q[i]; - dipole_B += q[i]*x[i][2]; - dipole_r2_B += q[i]*x[i][2]*x[i][2]; - } - } - - // sum local contributions to get total charge and global dipole moment - // for each group - - double tmp; - MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_A = tmp; - - MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_B = tmp; - - MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_A = tmp; - - MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_B = tmp; - - MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_A = tmp; - - MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_B = tmp; - - // compute corrections - - const double qscale = force->qqrd2e * scale; - const double efact = qscale * MY_2PI/volume; - - e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + - qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); - - // add on force corrections - - const double ffact = qscale * (-4.0*MY_PI/volume); - f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) + analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University) + triclinic added by Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "pppm.h" +#include "atom.h" +#include "comm.h" +#include "commgrid.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" + +#include "math_const.h" +#include "math_special.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define MAXORDER 7 +#define OFFSET 16384 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + +enum{REVERSE_RHO}; +enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM}; + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); + + pppmflag = 1; + group_group_enable = 1; + + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + u_brick = NULL; + v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; + greensfn = NULL; + work1 = work2 = NULL; + vg = NULL; + fkx = fky = fkz = NULL; + + sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = + sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; + + density_A_brick = density_B_brick = NULL; + density_A_fft = density_B_fft = NULL; + + gf_b = NULL; + rho1d = rho_coeff = drho1d = drho_coeff = NULL; + + fft1 = fft2 = NULL; + remap = NULL; + cg = NULL; + cg_peratom = NULL; + + nmax = 0; + part2grid = NULL; + + peratom_allocate_flag = 0; + group_allocate_flag = 0; + + // define acons coefficients for estimation of kspace errors + // see JCP 109, pg 7698 for derivation of coefficients + // higher order coefficients may be computed if needed + + memory->create(acons,8,7,"pppm:acons"); + acons[1][0] = 2.0 / 3.0; + acons[2][0] = 1.0 / 50.0; + acons[2][1] = 5.0 / 294.0; + acons[3][0] = 1.0 / 588.0; + acons[3][1] = 7.0 / 1440.0; + acons[3][2] = 21.0 / 3872.0; + acons[4][0] = 1.0 / 4320.0; + acons[4][1] = 3.0 / 1936.0; + acons[4][2] = 7601.0 / 2271360.0; + acons[4][3] = 143.0 / 28800.0; + acons[5][0] = 1.0 / 23232.0; + acons[5][1] = 7601.0 / 13628160.0; + acons[5][2] = 143.0 / 69120.0; + acons[5][3] = 517231.0 / 106536960.0; + acons[5][4] = 106640677.0 / 11737571328.0; + acons[6][0] = 691.0 / 68140800.0; + acons[6][1] = 13.0 / 57600.0; + acons[6][2] = 47021.0 / 35512320.0; + acons[6][3] = 9694607.0 / 2095994880.0; + acons[6][4] = 733191589.0 / 59609088000.0; + acons[6][5] = 326190917.0 / 11700633600.0; + acons[7][0] = 1.0 / 345600.0; + acons[7][1] = 3617.0 / 35512320.0; + acons[7][2] = 745739.0 / 838397952.0; + acons[7][3] = 56399353.0 / 12773376000.0; + acons[7][4] = 25091609.0 / 1560084480.0; + acons[7][5] = 1755948832039.0 / 36229939200000.0; + acons[7][6] = 4887769399.0 / 37838389248.0; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPM::~PPPM() +{ + delete [] factors; + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + if (group_allocate_flag) deallocate_groups(); + memory->destroy(part2grid); + memory->destroy(acons); +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPM::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPM initialization ...\n"); + if (logfile) fprintf(logfile,"PPPM initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->triclinic && differentiation_flag == 1) + error->all(FLERR,"Cannot (yet) use PPPM with triclinic box " + "and kspace_modify diff ad"); + if (domain->triclinic && slabflag) + error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and " + "slab correction"); + if (domain->dimension == 2) error->all(FLERR, + "Cannot use PPPM with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPM"); + } + + if (order < 2 || order > MAXORDER) { + char str[128]; + sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); + error->all(FLERR,str); + } + + // extract short-range Coulombic cutoff from pair style + + triclinic = domain->triclinic; + scale = 1.0; + + pair_check(); + + int itmp = 0; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + + // if kspace is TIP4P, extract TIP4P params from pair style + // bond/angle are not yet init(), so insure equilibrium request is valid + + qdist = 0.0; + + if (tip4pflag) { + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL || + force->angle->setflag == NULL || force->bond->setflag == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + if (typeA < 1 || typeA > atom->nangletypes || + force->angle->setflag[typeA] == 0) + error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); + if (typeB < 1 || typeB > atom->nbondtypes || + force->bond->setflag[typeB] == 0) + error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (cos(0.5*theta) * blen); + if (domain->triclinic) + error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P"); + } + + // compute qsum & qsqsum and warn if not charge-neutral + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + q2 = qsqsum * force->qqrd2e; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // free all arrays previously allocated + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + if (group_allocate_flag) deallocate_groups(); + + // setup FFT grid resolution and g_ewald + // normally one iteration thru while loop is all that is required + // if grid stencil does not extend beyond neighbor proc + // or overlap is allowed, then done + // else reduce order and try again + + int (*procneigh)[2] = comm->procneigh; + + CommGrid *cgtmp = NULL; + int iteration = 0; + + while (order >= minorder) { + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPM order b/c stencil extends " + "beyond nearest neighbor processor"); + + if (stagger_flag && !differentiation_flag) compute_gf_denom(); + set_grid_global(); + set_grid_local(); + if (overlap_allowed) break; + + cgtmp = new CommGrid(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + + order--; + iteration++; + } + + if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order"); + if (!overlap_allowed && cgtmp->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald + + if (!gewaldflag) adjust_gewald(); + + // calculate the final accuracy + + double estimated_accuracy = final_accuracy(); + + // print stats + + int ngrid_max,nfft_both_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + +#ifdef FFT_SINGLE + const char fft_prec[] = "single"; +#else + const char fft_prec[] = "double"; +#endif + + if (screen) { + fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," stencil order = %d\n",order); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc = %d %d\n", + ngrid_max,nfft_both_max); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," stencil order = %d\n",order); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", + ngrid_max,nfft_both_max); + } + } + + // allocate K-space dependent memory + // don't invoke allocate peratom() or group(), will be allocated when needed + + allocate(); + cg->ghost_notify(); + cg->setup(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + if (differentiation_flag == 1) compute_sf_precoeff(); + compute_rho_coeff(); +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPM::setup() +{ + if (triclinic) { + setup_triclinic(); + return; + } + + int i,j,k,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double unitkx = (MY_2PI/xprd); + double unitky = (MY_2PI/yprd); + double unitkz = (MY_2PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + + double per; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + } + + // virial coefficients + + double sqk,vterm; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + } + n++; + } + } + } + + if (differentiation_flag == 1) compute_gf_ad(); + else compute_gf_ik(); +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed + for a triclinic system +------------------------------------------------------------------------- */ + +void PPPM::setup_triclinic() +{ + int i,j,k,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + prd = domain->prd; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + // use lamda (0-1) coordinates + + delxinv = nx_pppm; + delyinv = ny_pppm; + delzinv = nz_pppm; + delvolinv = delxinv*delyinv*delzinv/volume; + + // fkx,fky,fkz for my FFT grid pts + + double per_i,per_j,per_k; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + per_k = k - nz_pppm*(2*k/nz_pppm); + for (j = nylo_fft; j <= nyhi_fft; j++) { + per_j = j - ny_pppm*(2*j/ny_pppm); + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per_i = i - nx_pppm*(2*i/nx_pppm); + + double unitk_lamda[3]; + unitk_lamda[0] = 2.0*MY_PI*per_i; + unitk_lamda[1] = 2.0*MY_PI*per_j; + unitk_lamda[2] = 2.0*MY_PI*per_k; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + fkx[n] = unitk_lamda[0]; + fky[n] = unitk_lamda[1]; + fkz[n] = unitk_lamda[2]; + n++; + } + } + } + + // virial coefficients + + double sqk,vterm; + + for (n = 0; n < nfft; n++) { + sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n]; + vg[n][1] = 1.0 + vterm*fky[n]*fky[n]; + vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n]; + vg[n][3] = vterm*fkx[n]*fky[n]; + vg[n][4] = vterm*fkx[n]*fkz[n]; + vg[n][5] = vterm*fky[n]*fkz[n]; + } + } + + compute_gf_ik_triclinic(); +} + +/* ---------------------------------------------------------------------- + reset local grid arrays and communication stencils + called by fix balance b/c it changed sizes of processor sub-domains +------------------------------------------------------------------------- */ + +void PPPM::setup_grid() +{ + // free all arrays previously allocated + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + if (group_allocate_flag) deallocate_groups(); + + // reset portion of global grid that each proc owns + + set_grid_local(); + + // reallocate K-space dependent memory + // check if grid communication is now overlapping if not allowed + // don't invoke allocate peratom() or group(), will be allocated when needed + + allocate(); + + cg->ghost_notify(); + if (overlap_allowed == 0 && cg->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + cg->setup(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + if (differentiation_flag == 1) compute_sf_precoeff(); + compute_rho_coeff(); + + // pre-compute volume-dependent coeffs + + setup(); +} + +/* ---------------------------------------------------------------------- + compute the PPPM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPM::compute(int eflag, int vflag) +{ + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + cg_peratom->ghost_notify(); + cg_peratom->setup(); + } + + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(part2grid); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm:part2grid"); + } + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid + + particle_map(); + make_rho(); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + cg->reverse_comm(this,REVERSE_RHO); + brick2fft(); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + // also performs per-atom calculations via poisson_peratom() + + poisson(); + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks + + if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD); + else cg->forward_comm(this,FORWARD_IK); + + // extra per-atom energy/virial communication + + if (evflag_atom) { + if (differentiation_flag == 1 && vflag_atom) + cg_peratom->forward_comm(this,FORWARD_AD_PERATOM); + else if (differentiation_flag == 0) + cg_peratom->forward_comm(this,FORWARD_IK_PERATOM); + } + + // calculate the force on my particles + + fieldforce(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fieldforce_peratom(); + + // sum global energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qscale; + } + + // sum global virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + } + + // per-atom energy/virial + // energy includes self-energy correction + // notal accounts for TIP4P tallying eatom/vatom for ghost atoms + + if (evflag_atom) { + double *q = atom->q; + int nlocal = atom->nlocal; + int ntotal = nlocal; + if (tip4pflag) ntotal += atom->nghost; + + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] *= 0.5; + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale; + } + + if (vflag_atom) { + for (i = 0; i < ntotal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale; + } + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::allocate() +{ + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); + + memory->create(density_fft,nfft_both,"pppm:density_fft"); + memory->create(greensfn,nfft_both,"pppm:greensfn"); + memory->create(work1,2*nfft_both,"pppm:work1"); + memory->create(work2,2*nfft_both,"pppm:work2"); + memory->create(vg,nfft_both,6,"pppm:vg"); + + if (triclinic == 0) { + memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); + memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); + memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); + } else { + memory->create(fkx,nfft_both,"pppm:fkx"); + memory->create(fky,nfft_both,"pppm:fky"); + memory->create(fkz,nfft_both,"pppm:fkz"); + } + + if (differentiation_flag == 1) { + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:u_brick"); + + memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1"); + memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2"); + memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3"); + memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4"); + memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5"); + memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6"); + + } else { + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdx_brick"); + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdy_brick"); + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdz_brick"); + } + + // summation coeffs + + order_allocated = order; + if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b"); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); + memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); + memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2, + "pppm:drho_coeff"); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decompostion + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition + + int tmp; + + fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp); + + fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp); + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + int (*procneigh)[2] = comm->procneigh; + + if (differentiation_flag == 1) + cg = new CommGrid(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg = new CommGrid(lmp,world,3,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); +} + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + + if (differentiation_flag == 1) { + memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(sf_precoeff1); + memory->destroy(sf_precoeff2); + memory->destroy(sf_precoeff3); + memory->destroy(sf_precoeff4); + memory->destroy(sf_precoeff5); + memory->destroy(sf_precoeff6); + } else { + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + } + + memory->destroy(density_fft); + memory->destroy(greensfn); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(vg); + + if (triclinic == 0) { + memory->destroy1d_offset(fkx,nxlo_fft); + memory->destroy1d_offset(fky,nylo_fft); + memory->destroy1d_offset(fkz,nzlo_fft); + } else { + memory->destroy(fkx); + memory->destroy(fky); + memory->destroy(fkz); + } + + memory->destroy(gf_b); + if (stagger_flag) gf_b = NULL; + memory->destroy2d_offset(rho1d,-order_allocated/2); + memory->destroy2d_offset(drho1d,-order_allocated/2); + memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2); + memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2); + + delete fft1; + delete fft2; + delete remap; + delete cg; +} + +/* ---------------------------------------------------------------------- + allocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::allocate_peratom() +{ + peratom_allocate_flag = 1; + + if (differentiation_flag != 1) + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:u_brick"); + + memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v0_brick"); + + memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v1_brick"); + memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v2_brick"); + memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v3_brick"); + memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v4_brick"); + memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v5_brick"); + + // create ghost grid object for rho and electric field communication + + int (*procneigh)[2] = comm->procneigh; + + if (differentiation_flag == 1) + cg_peratom = + new CommGrid(lmp,world,6,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom = + new CommGrid(lmp,world,7,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); +} + +/* ---------------------------------------------------------------------- + deallocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::deallocate_peratom() +{ + peratom_allocate_flag = 0; + + memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); + + if (differentiation_flag != 1) + memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); + + delete cg_peratom; +} + +/* ---------------------------------------------------------------------- + set global size of PPPM grid = nx,ny,nz_pppm + used for charge accumulation, FFTs, and electric field interpolation +------------------------------------------------------------------------- */ + +void PPPM::set_grid_global() +{ + // use xprd,yprd,zprd (even if triclinic, and then scale later) + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h; + bigint natoms = atom->natoms; + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy + // nz_pppm uses extended zprd_slab instead of zprd + // reduce it until accuracy target is met + + if (!gridflag) { + + if (differentiation_flag == 1 || stagger_flag) { + + h = h_x = h_y = h_z = 4.0/g_ewald; + int count = 0; + while (1) { + + // set grid dimension + nx_pppm = static_cast (xprd/h_x); + ny_pppm = static_cast (yprd/h_y); + nz_pppm = static_cast (zprd_slab/h_z); + + if (nx_pppm <= 1) nx_pppm = 2; + if (ny_pppm <= 1) ny_pppm = 2; + if (nz_pppm <= 1) nz_pppm = 2; + + //set local grid dimension + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + double df_kspace = compute_df_kspace(); + + count++; + + // break loop if the accuracy has been reached or + // too many loops have been performed + + if (df_kspace <= accuracy) break; + if (count > 500) error->all(FLERR, "Could not compute grid size"); + h *= 0.95; + h_x = h_y = h_z = h; + } + + } else { + + double err; + h_x = h_y = h_z = 1.0/g_ewald; + + nx_pppm = static_cast (xprd/h_x) + 1; + ny_pppm = static_cast (yprd/h_y) + 1; + nz_pppm = static_cast (zprd_slab/h_z) + 1; + + err = estimate_ik_error(h_x,xprd,natoms); + while (err > accuracy) { + err = estimate_ik_error(h_x,xprd,natoms); + nx_pppm++; + h_x = xprd/nx_pppm; + } + + err = estimate_ik_error(h_y,yprd,natoms); + while (err > accuracy) { + err = estimate_ik_error(h_y,yprd,natoms); + ny_pppm++; + h_y = yprd/ny_pppm; + } + + err = estimate_ik_error(h_z,zprd_slab,natoms); + while (err > accuracy) { + err = estimate_ik_error(h_z,zprd_slab,natoms); + nz_pppm++; + h_z = zprd_slab/nz_pppm; + } + } + + // scale grid for triclinic skew + + if (triclinic) { + double tmp[3]; + tmp[0] = nx_pppm/xprd; + tmp[1] = ny_pppm/yprd; + tmp[2] = nz_pppm/zprd; + lamda2xT(&tmp[0],&tmp[0]); + nx_pppm = static_cast(tmp[0]) + 1; + ny_pppm = static_cast(tmp[1]) + 1; + nz_pppm = static_cast(tmp[2]) + 1; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; + + if (triclinic == 0) { + h_x = xprd/nx_pppm; + h_y = yprd/ny_pppm; + h_z = zprd_slab/nz_pppm; + } else { + double tmp[3]; + tmp[0] = nx_pppm; + tmp[1] = ny_pppm; + tmp[2] = nz_pppm; + x2lamdaT(&tmp[0],&tmp[0]); + h_x = 1.0/tmp[0]; + h_y = 1.0/tmp[1]; + h_z = 1.0/tmp[2]; + } + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPM grid is too large"); +} + +/* ---------------------------------------------------------------------- + check if all factors of n are in list of factors + return 1 if yes, 0 if no +------------------------------------------------------------------------- */ + +int PPPM::factorable(int n) +{ + int i; + + while (n > 1) { + for (i = 0; i < nfactors; i++) { + if (n % factors[i] == 0) { + n /= factors[i]; + break; + } + } + if (i == nfactors) return 0; + } + + return 1; +} + +/* ---------------------------------------------------------------------- + compute estimated kspace force error +------------------------------------------------------------------------- */ + +double PPPM::compute_df_kspace() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + double df_kspace = 0.0; + if (differentiation_flag == 1 || stagger_flag) { + double qopt = compute_qopt(); + df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + } else { + double lprx = estimate_ik_error(h_x,xprd,natoms); + double lpry = estimate_ik_error(h_y,yprd,natoms); + double lprz = estimate_ik_error(h_z,zprd_slab,natoms); + df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + } + return df_kspace; +} + +/* ---------------------------------------------------------------------- + compute qopt +------------------------------------------------------------------------- */ + +double PPPM::compute_qopt() +{ + double qopt = 0.0; + double *prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double u1, u2, sqk; + double sum1,sum2,sum3,sum4,dot2; + + int k,l,m,nx,ny,nz; + const int twoorder = 2*order; + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + const int mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + const int lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + const int kper = k - nx_pppm*(2*k/nx_pppm); + + sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); + + if (sqk != 0.0) { + + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + sum4 = 0.0; + for (nx = -2; nx <= 2; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + qx *= qx; + + for (ny = -2; ny <= 2; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + qy *= qy; + + for (nz = -2; nz <= 2; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + qz *= qz; + + dot2 = qx+qy+qz; + u1 = sx*sy*sz; + u2 = wx*wy*wz; + sum1 += u1*u1/dot2*MY_4PI*MY_4PI; + sum2 += u1 * u2 * MY_4PI; + sum3 += u2; + sum4 += dot2*u2; + } + } + } + sum2 *= sum2; + qopt += sum1 - sum2/(sum3*sum4); + } + } + } + } + double qopt_all; + MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); + return qopt_all; +} + +/* ---------------------------------------------------------------------- + estimate kspace force error for ik method +------------------------------------------------------------------------- */ + +double PPPM::estimate_ik_error(double h, double prd, bigint natoms) +{ + double sum = 0.0; + for (int m = 0; m < order; m++) + sum += acons[order][m] * pow(h*g_ewald,2.0*m); + double value = q2 * pow(h*g_ewald,(double)order) * + sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd); + + return value; +} + +/* ---------------------------------------------------------------------- + adjust the g_ewald parameter to near its optimal value + using a Newton-Raphson solver +------------------------------------------------------------------------- */ + +void PPPM::adjust_gewald() +{ + double dx; + + for (int i = 0; i < LARGE; i++) { + dx = newton_raphson_f() / derivf(); + g_ewald -= dx; + if (fabs(newton_raphson_f()) < SMALL) return; + } + + char str[128]; + sprintf(str, "Could not compute g_ewald"); + error->all(FLERR, str); +} + +/* ---------------------------------------------------------------------- + Calculate f(x) using Newton-Raphson solver + ------------------------------------------------------------------------- */ + +double PPPM::newton_raphson_f() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + bigint natoms = atom->natoms; + + double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd); + + double df_kspace = compute_df_kspace(); + + return df_rspace - df_kspace; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) using forward difference + [f(x + h) - f(x)] / h + ------------------------------------------------------------------------- */ + +double PPPM::derivf() +{ + double h = 0.000001; //Derivative step-size + double df,f1,f2,g_ewald_old; + + f1 = newton_raphson_f(); + g_ewald_old = g_ewald; + g_ewald += h; + f2 = newton_raphson_f(); + g_ewald = g_ewald_old; + df = (f2 - f1)/h; + + return df; +} + +/* ---------------------------------------------------------------------- + Calculate the final estimate of the accuracy +------------------------------------------------------------------------- */ + +double PPPM::final_accuracy() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + + double df_kspace = compute_df_kspace(); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd); + double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace); + double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace + + df_table*df_table); + + return estimated_accuracy; +} + +/* ---------------------------------------------------------------------- + set local subset of PPPM/FFT grid that I own + n xyz lo/hi in = 3d brick that I own (inclusive) + n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive) + n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz) +------------------------------------------------------------------------- */ + +void PPPM::set_grid_local() +{ + // global indices of PPPM grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPM grid that I own without ghost cells + // for slab PPPM, assign z grid as if it were not extended + + nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); + nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; + + nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); + nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; + + nzlo_in = static_cast + (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); + nzhi_in = static_cast + (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; + + // nlower,nupper = stencil size for mapping particles to PPPM grid + + nlower = -(order-1)/2; + nupper = order/2; + + // shift values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (order % 2) shift = OFFSET + 0.5; + else shift = OFFSET; + if (order % 2) shiftone = 0.0; + else shiftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPM grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPM, assign z grid as if it were not extended + + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else kspacebbox(cuthalf,&dist[0]); + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nxlo_out = nlo + nlower; + nxhi_out = nhi + nupper; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nylo_out = nlo + nlower; + nyhi_out = nhi + nupper; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nzlo_out = nlo + nlower; + nzhi_out = nhi + nupper; + + if (stagger_flag) { + nxhi_out++; + nyhi_out++; + nzhi_out++; + } + + // for slab PPPM, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPM, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) + // also insure no other procs use ghost cells beyond +z limit + + if (slabflag == 1) { + if (comm->myloc[2] == comm->procgrid[2]-1) + nzhi_in = nzhi_out = nz_pppm - 1; + nzhi_out = MIN(nzhi_out,nz_pppm-1); + } + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clumps of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + // PPPM grid pts owned by this proc, including ghosts + + ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + + // FFT grids owned by this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * + (nzhi_fft-nzlo_fft+1); + int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * + (nzhi_in-nzlo_in+1); + nfft_both = MAX(nfft,nfft_brick); +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_denom() +{ + int k,l,m; + + for (l = 1; l < order; l++) gf_b[l] = 0.0; + gf_b[0] = 1.0; + + for (m = 1; m < order; m++) { + for (l = m; l > 0; l--) + gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); + gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); + } + + bigint ifact = 1; + for (k = 1; k < 2*order; k++) ifact *= k; + double gaminv = 1.0/ifact; + for (l = 0; l < order; l++) gf_b[l] *= gaminv; +} + +/* ---------------------------------------------------------------------- + pre-compute modified (Hockney-Eastwood) Coulomb Green's function +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_ik() +{ + const double * const prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double snx,sny,snz; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + double sqk; + + int k,l,m,n,nx,ny,nz,kper,lper,mper; + + const int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int twoorder = 2*order; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); + + sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); + + if (sqk != 0.0) { + numerator = 12.5663706/sqk; + denominator = gf_denom(snx,sny,snz); + sum1 = 0.0; + + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; + } + } + } + greensfn[n++] = numerator*sum1/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + pre-compute modified (Hockney-Eastwood) Coulomb Green's function + for a triclinic system +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_ik_triclinic() +{ + double snx,sny,snz; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + double sqk; + + int k,l,m,n,nx,ny,nz,kper,lper,mper; + + double tmp[3]; + tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25); + tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25); + tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25); + lamda2xT(&tmp[0],&tmp[0]); + const int nbx = static_cast (tmp[0]); + const int nby = static_cast (tmp[1]); + const int nbz = static_cast (tmp[2]); + + const int twoorder = 2*order; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = square(sin(MY_PI*mper/nz_pppm)); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = square(sin(MY_PI*lper/ny_pppm)); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = square(sin(MY_PI*kper/nx_pppm)); + + double unitk_lamda[3]; + unitk_lamda[0] = 2.0*MY_PI*kper; + unitk_lamda[1] = 2.0*MY_PI*lper; + unitk_lamda[2] = 2.0*MY_PI*mper; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + + sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); + + if (sqk != 0.0) { + numerator = 12.5663706/sqk; + denominator = gf_denom(snx,sny,snz); + sum1 = 0.0; + + for (nx = -nbx; nx <= nbx; nx++) { + argx = MY_PI*kper/nx_pppm + MY_PI*nx; + wx = powsinxx(argx,twoorder); + + for (ny = -nby; ny <= nby; ny++) { + argy = MY_PI*lper/ny_pppm + MY_PI*ny; + wy = powsinxx(argy,twoorder); + + for (nz = -nbz; nz <= nbz; nz++) { + argz = MY_PI*mper/nz_pppm + MY_PI*nz; + wz = powsinxx(argz,twoorder); + + double b[3]; + b[0] = 2.0*MY_PI*nx_pppm*nx; + b[1] = 2.0*MY_PI*ny_pppm*ny; + b[2] = 2.0*MY_PI*nz_pppm*nz; + x2lamdaT(&b[0],&b[0]); + + qx = unitk_lamda[0]+b[0]; + sx = exp(-0.25*square(qx/g_ewald)); + + qy = unitk_lamda[1]+b[1]; + sy = exp(-0.25*square(qy/g_ewald)); + + qz = unitk_lamda[2]+b[2]; + sz = exp(-0.25*square(qz/g_ewald)); + + dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; + } + } + } + greensfn[n++] = numerator*sum1/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute optimized Green's function for energy calculation +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_ad() +{ + const double * const prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double snx,sny,snz,sqk; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double numerator,denominator; + int k,l,m,n,kper,lper,mper; + + const int twoorder = 2*order; + + for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + qz = unitkz*mper; + snz = square(sin(0.5*qz*zprd_slab/nz_pppm)); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + qy = unitky*lper; + sny = square(sin(0.5*qy*yprd/ny_pppm)); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + qx = unitkx*kper; + snx = square(sin(0.5*qx*xprd/nx_pppm)); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + + sqk = qx*qx + qy*qy + qz*qz; + + if (sqk != 0.0) { + numerator = MY_4PI/sqk; + denominator = gf_denom(snx,sny,snz); + greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator; + sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; + sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; + sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; + sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; + sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; + sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; + n++; + } else { + greensfn[n] = 0.0; + sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; + sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; + sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; + sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; + sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; + sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; + n++; + } + } + } + } + + // compute the coefficients for the self-force correction + + double prex, prey, prez; + prex = prey = prez = MY_PI/volume; + prex *= nx_pppm/xprd; + prey *= ny_pppm/yprd; + prez *= nz_pppm/zprd_slab; + sf_coeff[0] *= prex; + sf_coeff[1] *= prex*2; + sf_coeff[2] *= prey; + sf_coeff[3] *= prey*2; + sf_coeff[4] *= prez; + sf_coeff[5] *= prez*2; + + // communicate values with other procs + + double tmp[6]; + MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); + for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme +------------------------------------------------------------------------- */ + +void PPPM::compute_sf_precoeff() +{ + int i,k,l,m,n; + int nx,ny,nz,kper,lper,mper; + double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; + double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; + double u0,u1,u2,u3,u4,u5,u6; + double sum1,sum2,sum3,sum4,sum5,sum6; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + + sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; + for (i = 0; i < 5; i++) { + + qx0 = MY_2PI*(kper+nx_pppm*(i-2)); + qx1 = MY_2PI*(kper+nx_pppm*(i-1)); + qx2 = MY_2PI*(kper+nx_pppm*(i )); + wx0[i] = powsinxx(0.5*qx0/nx_pppm,order); + wx1[i] = powsinxx(0.5*qx1/nx_pppm,order); + wx2[i] = powsinxx(0.5*qx2/nx_pppm,order); + + qy0 = MY_2PI*(lper+ny_pppm*(i-2)); + qy1 = MY_2PI*(lper+ny_pppm*(i-1)); + qy2 = MY_2PI*(lper+ny_pppm*(i )); + wy0[i] = powsinxx(0.5*qy0/ny_pppm,order); + wy1[i] = powsinxx(0.5*qy1/ny_pppm,order); + wy2[i] = powsinxx(0.5*qy2/ny_pppm,order); + + qz0 = MY_2PI*(mper+nz_pppm*(i-2)); + qz1 = MY_2PI*(mper+nz_pppm*(i-1)); + qz2 = MY_2PI*(mper+nz_pppm*(i )); + + wz0[i] = powsinxx(0.5*qz0/nz_pppm,order); + wz1[i] = powsinxx(0.5*qz1/nz_pppm,order); + wz2[i] = powsinxx(0.5*qz2/nz_pppm,order); + } + + for (nx = 0; nx < 5; nx++) { + for (ny = 0; ny < 5; ny++) { + for (nz = 0; nz < 5; nz++) { + u0 = wx0[nx]*wy0[ny]*wz0[nz]; + u1 = wx1[nx]*wy0[ny]*wz0[nz]; + u2 = wx2[nx]*wy0[ny]*wz0[nz]; + u3 = wx0[nx]*wy1[ny]*wz0[nz]; + u4 = wx0[nx]*wy2[ny]*wz0[nz]; + u5 = wx0[nx]*wy0[ny]*wz1[nz]; + u6 = wx0[nx]*wy0[ny]*wz2[nz]; + + sum1 += u0*u1; + sum2 += u0*u2; + sum3 += u0*u3; + sum4 += u0*u4; + sum5 += u0*u5; + sum6 += u0*u6; + } + } + } + + // store values + + sf_precoeff1[n] = sum1; + sf_precoeff2[n] = sum2; + sf_precoeff3[n] = sum3; + sf_precoeff4[n] = sum4; + sf_precoeff5[n] = sum5; + sf_precoeff6[n++] = sum6; + } + } + } +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPM::particle_map() +{ + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + + int flag = 0; + for (int i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPM::make_rho() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + remap density from 3d brick decomposition to FFT decomposition +------------------------------------------------------------------------- */ + +void PPPM::brick2fft() +{ + int n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_in; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_fft[n++] = density_brick[iz][iy][ix]; + + remap->perform(density_fft,density_fft,work1); +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver +------------------------------------------------------------------------- */ + +void PPPM::poisson() +{ + if (differentiation_flag == 1) poisson_ad(); + else poisson_ik(); +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik +------------------------------------------------------------------------- */ + +void PPPM::poisson_ik() +{ + int i,j,k,n; + double eng; + + // transform charge density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] = density_fft[i]; + work1[n++] = ZEROF; + } + + fft1->compute(work1,work1,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nfft; i++) { + eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; + if (eflag_global) energy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft; i++) { + energy += + s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] *= scaleinv * greensfn[i]; + work1[n++] *= scaleinv * greensfn[i]; + } + + // extra FFTs for per-atom energy/virial + + if (evflag_atom) poisson_peratom(); + + // triclinic system + + if (triclinic) { + poisson_ik_triclinic(); + return; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkx[i]*work1[n+1]; + work2[n+1] = -fkx[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdx_brick[k][j][i] = work2[n]; + n += 2; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fky[j]*work1[n+1]; + work2[n+1] = -fky[j]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdy_brick[k][j][i] = work2[n]; + n += 2; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkz[k]*work1[n+1]; + work2[n+1] = -fkz[k]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdz_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik for a triclinic system +------------------------------------------------------------------------- */ + +void PPPM::poisson_ik_triclinic() +{ + int i,j,k,n; + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = fkx[i]*work1[n+1]; + work2[n+1] = -fkx[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdx_brick[k][j][i] = work2[n]; + n += 2; + } + + // y direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = fky[i]*work1[n+1]; + work2[n+1] = -fky[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdy_brick[k][j][i] = work2[n]; + n += 2; + } + + // z direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = fkz[i]*work1[n+1]; + work2[n+1] = -fkz[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdz_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ad +------------------------------------------------------------------------- */ + +void PPPM::poisson_ad() +{ + int i,j,k,n; + double eng; + + // transform charge density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] = density_fft[i]; + work1[n++] = ZEROF; + } + + fft1->compute(work1,work1,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nfft; i++) { + eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; + if (eflag_global) energy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft; i++) { + energy += + s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] *= scaleinv * greensfn[i]; + work1[n++] *= scaleinv * greensfn[i]; + } + + // extra FFTs for per-atom energy/virial + + if (vflag_atom) poisson_peratom(); + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]; + work2[n+1] = work1[n+1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + u_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPM::poisson_peratom() +{ + int i,j,k,n; + + // energy + + if (eflag_atom && differentiation_flag != 1) { + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]; + work2[n+1] = work1[n+1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + u_brick[k][j][i] = work2[n]; + n += 2; + } + } + + // 6 components of virial in v0 thru v5 + + if (!vflag_atom) return; + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][0]; + work2[n+1] = work1[n+1]*vg[i][0]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v0_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][1]; + work2[n+1] = work1[n+1]*vg[i][1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v1_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][2]; + work2[n+1] = work1[n+1]*vg[i][2]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v2_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][3]; + work2[n+1] = work1[n+1]*vg[i][3]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v3_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][4]; + work2[n+1] = work1[n+1]*vg[i][4]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v4_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][5]; + work2[n+1] = work1[n+1]*vg[i][5]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v5_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPM::fieldforce() +{ + if (differentiation_flag == 1) fieldforce_ad(); + else fieldforce_ik(); +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles for ik +------------------------------------------------------------------------- */ + +void PPPM::fieldforce_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles for ad +------------------------------------------------------------------------- */ + +void PPPM::fieldforce_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR ekx,eky,ekz; + double s1,s2,s3; + double sf = 0.0; + double *prd; + + prd = domain->prd; + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + + double hx_inv = nx_pppm/xprd; + double hy_inv = ny_pppm/yprd; + double hz_inv = nz_pppm/zprd; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + compute_drho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force and substract self forces + + const double qfactor = force->qqrd2e * scale; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + sf = sf_coeff[0]*sin(2*MY_PI*s1); + sf += sf_coeff[1]*sin(4*MY_PI*s1); + sf *= 2*q[i]*q[i]; + f[i][0] += qfactor*(ekx*q[i] - sf); + + sf = sf_coeff[2]*sin(2*MY_PI*s2); + sf += sf_coeff[3]*sin(4*MY_PI*s2); + sf *= 2*q[i]*q[i]; + f[i][1] += qfactor*(eky*q[i] - sf); + + + sf = sf_coeff[4]*sin(2*MY_PI*s3); + sf += sf_coeff[5]*sin(4*MY_PI*s3); + sf *= 2*q[i]*q[i]; + if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPM::fieldforce_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += q[i]*v0; + vatom[i][1] += q[i]*v1; + vatom[i][2] += q[i]*v2; + vatom[i][3] += q[i]*v3; + vatom[i][4] += q[i]*v4; + vatom[i][5] += q[i]*v5; + } + } +} + +/* ---------------------------------------------------------------------- + pack own values to buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + if (flag == FORWARD_IK) { + FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + } else if (flag == FORWARD_AD) { + FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + } else if (flag == FORWARD_IK_PERATOM) { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + } else if (flag == FORWARD_AD_PERATOM) { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's own values from buf and set own ghost values +------------------------------------------------------------------------- */ + +void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + if (flag == FORWARD_IK) { + FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + } else if (flag == FORWARD_AD) { + FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[i]; + } else if (flag == FORWARD_IK_PERATOM) { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + } else if (flag == FORWARD_AD_PERATOM) { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + pack ghost values into buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + if (flag == REVERSE_RHO) { + FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's ghost values from buf and add to own values +------------------------------------------------------------------------- */ + +void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + if (flag == REVERSE_RHO) { + FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[i]; + } +} + +/* ---------------------------------------------------------------------- + map nprocs to NX by NY grid as PX by PY procs - return optimal px,py +------------------------------------------------------------------------- */ + +void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) +{ + // loop thru all possible factorizations of nprocs + // surf = surface area of largest proc sub-domain + // innermost if test minimizes surface area and surface/volume ratio + + int bestsurf = 2 * (nx + ny); + int bestboxx = 0; + int bestboxy = 0; + + int boxx,boxy,surf,ipx,ipy; + + ipx = 1; + while (ipx <= nprocs) { + if (nprocs % ipx == 0) { + ipy = nprocs/ipx; + boxx = nx/ipx; + if (nx % ipx) boxx++; + boxy = ny/ipy; + if (ny % ipy) boxy++; + surf = boxx + boxy; + if (surf < bestsurf || + (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { + bestsurf = surf; + bestboxx = boxx; + bestboxy = boxy; + *px = ipx; + *py = ipy; + } + } + ipx++; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + rho1d[0][k] = r1; + rho1d[1][k] = r2; + rho1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into drho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-2; l >= 0; l--) { + r1 = drho_coeff[l][k] + r1*dx; + r2 = drho_coeff[l][k] + r2*dy; + r3 = drho_coeff[l][k] + r3*dz; + } + drho1d[0][k] = r1; + drho1d[1][k] = r2; + drho1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + generate coeffients for the weight function of order n + + (n-1) + Wn(x) = Sum wn(k,x) , Sum is over every other integer + k=-(n-1) + For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 + k is odd integers if n is even and even integers if n is odd + --- + | n-1 + | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 + wn(k,x) = < l=0 + | + | 0 otherwise + --- + a coeffients are packed into the array rho_coeff to eliminate zeros + rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) +------------------------------------------------------------------------- */ + +void PPPM::compute_rho_coeff() +{ + int j,k,l,m; + FFT_SCALAR s; + + FFT_SCALAR **a; + memory->create2d_offset(a,order,-order,order,"pppm:a"); + + for (k = -order; k <= order; k++) + for (l = 0; l < order; l++) + a[l][k] = 0.0; + + a[0][0] = 1.0; + for (j = 1; j < order; j++) { + for (k = -j; k <= j; k += 2) { + s = 0.0; + for (l = 0; l < j; l++) { + a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); +#ifdef FFT_SINGLE + s += powf(0.5,(float) l+1) * + (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); +#else + s += pow(0.5,(double) l+1) * + (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); +#endif + } + a[0][k] = s; + } + } + + m = (1-order)/2; + for (k = -(order-1); k < order; k += 2) { + for (l = 0; l < order; l++) + rho_coeff[l][m] = a[l][k]; + for (l = 1; l < order; l++) + drho_coeff[l-1][m] = l*a[l][k]; + m++; + } + + memory->destroy2d_offset(a,-order); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPM::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + +/* ---------------------------------------------------------------------- + perform and time the 1d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPM::timing_1d(int n, double &time1d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->timing1d(work1,nfft_both,1); + fft2->timing1d(work1,nfft_both,-1); + if (differentiation_flag != 1) { + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d = time2 - time1; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + perform and time the 3d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPM::timing_3d(int n, double &time3d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->compute(work1,work1,1); + fft2->compute(work1,work1,-1); + if (differentiation_flag != 1) { + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d = time2 - time1; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPM::memory_usage() +{ + double bytes = nmax*3 * sizeof(double); + int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + if (differentiation_flag == 1) { + bytes += 2 * nbrick * sizeof(FFT_SCALAR); + } else { + bytes += 4 * nbrick * sizeof(FFT_SCALAR); + } + if (triclinic) bytes += 3 * nfft_both * sizeof(double); + bytes += 6 * nfft_both * sizeof(double); + bytes += nfft_both * sizeof(double); + bytes += nfft_both*5 * sizeof(FFT_SCALAR); + + if (peratom_allocate_flag) + bytes += 6 * nbrick * sizeof(FFT_SCALAR); + + if (group_allocate_flag) { + bytes += 2 * nbrick * sizeof(FFT_SCALAR); + bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; + } + + bytes += cg->memory_usage(); + + return bytes; +} + +/* ---------------------------------------------------------------------- + group-group interactions + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + compute the PPPM total long-range force and energy for groups A and B + ------------------------------------------------------------------------- */ + +void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) +{ + if (slabflag && triclinic) + error->all(FLERR,"Cannot (yet) use K-space slab " + "correction with compute group/group for triclinic systems"); + + if (differentiation_flag) + error->all(FLERR,"Cannot (yet) use kspace_modify " + "diff ad with compute group/group"); + + if (!group_allocate_flag) allocate_groups(); + + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + e2group = 0.0; //energy + f2group[0] = 0.0; //force in x-direction + f2group[1] = 0.0; //force in y-direction + f2group[2] = 0.0; //force in z-direction + + // map my particle charge onto my local 3d density grid + + make_rho_groups(groupbit_A,groupbit_B,AA_flag); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + // temporarily store and switch pointers so we can + // use brick2fft() for groups A and B (without + // writing an additional function) + + FFT_SCALAR ***density_brick_real = density_brick; + FFT_SCALAR *density_fft_real = density_fft; + + // group A + + density_brick = density_A_brick; + density_fft = density_A_fft; + + cg->reverse_comm(this,REVERSE_RHO); + brick2fft(); + + // group B + + density_brick = density_B_brick; + density_fft = density_B_fft; + + cg->reverse_comm(this,REVERSE_RHO); + brick2fft(); + + // switch back pointers + + density_brick = density_brick_real; + density_fft = density_fft_real; + + // compute potential gradient on my FFT grid and + // portion of group-group energy/force on this proc's FFT grid + + poisson_groups(AA_flag); + + const double qscale = force->qqrd2e * scale; + + // total group A <--> group B energy + // self and boundary correction terms are in compute_group_group.cpp + + double e2group_all; + MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); + e2group = e2group_all; + + e2group *= qscale*0.5*volume; + + // total group A <--> group B force + + double f2group_all[3]; + MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); + + f2group[0] = qscale*volume*f2group_all[0]; + f2group[1] = qscale*volume*f2group_all[1]; + if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2]; + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); + + if (slabflag == 1) + slabcorr_groups(groupbit_A, groupbit_B, AA_flag); +} + +/* ---------------------------------------------------------------------- + allocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPM::allocate_groups() +{ + group_allocate_flag = 1; + + memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_A_brick"); + memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_B_brick"); + memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); + memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); +} + +/* ---------------------------------------------------------------------- + deallocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPM::deallocate_groups() +{ + group_allocate_flag = 0; + + memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(density_A_fft); + memory->destroy(density_B_fft); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag) +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density arrays + + memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + for (int i = 0; i < nlocal; i++) { + + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + + // group A + + if (mask[i] & groupbit_A) + density_A_brick[mz][my][mx] += x0*rho1d[0][l]; + + // group B + + if (mask[i] & groupbit_B) + density_B_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPM::poisson_groups(int AA_flag) +{ + int i,j,k,n; + + // reuse memory (already declared) + + FFT_SCALAR *work_A = work1; + FFT_SCALAR *work_B = work2; + + // transform charge density (r -> k) + + // group A + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] = density_A_fft[i]; + work_A[n++] = ZEROF; + } + + fft1->compute(work_A,work_A,1); + + // group B + + n = 0; + for (i = 0; i < nfft; i++) { + work_B[n++] = density_B_fft[i]; + work_B[n++] = ZEROF; + } + + fft1->compute(work_B,work_B,1); + + // group-group energy and force contribution, + // keep everything in reciprocal space so + // no inverse FFTs needed + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + // energy + + n = 0; + for (i = 0; i < nfft; i++) { + e2group += s2 * greensfn[i] * + (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); + n += 2; + } + + if (AA_flag) return; + + + // multiply by Green's function and s2 + // (only for work_A so it is not squared below) + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] *= s2 * greensfn[i]; + work_A[n++] *= s2 * greensfn[i]; + } + + // triclinic system + + if (triclinic) { + poisson_groups_triclinic(); + return; + } + + double partial_group; + + // force, x direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[0] += fkx[i] * partial_group; + n += 2; + } + + // force, y direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[1] += fky[j] * partial_group; + n += 2; + } + + // force, z direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[2] += fkz[k] * partial_group; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for group-group interactions + for a triclinic system + ------------------------------------------------------------------------- */ + +void PPPM::poisson_groups_triclinic() +{ + int i,j,k,n; + + // reuse memory (already declared) + + FFT_SCALAR *work_A = work1; + FFT_SCALAR *work_B = work2; + + double partial_group; + + // force, x direction + + n = 0; + for (i = 0; i < nfft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[0] += fkx[i] * partial_group; + n += 2; + } + + // force, y direction + + n = 0; + for (i = 0; i < nfft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[1] += fky[i] * partial_group; + n += 2; + } + + // force, z direction + + n = 0; + for (i = 0; i < nfft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[2] += fkz[i] * partial_group; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + double qsum_A = 0.0; + double qsum_B = 0.0; + double dipole_A = 0.0; + double dipole_B = 0.0; + double dipole_r2_A = 0.0; + double dipole_r2_B = 0.0; + + for (int i = 0; i < nlocal; i++) { + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if (mask[i] & groupbit_A) { + qsum_A += q[i]; + dipole_A += q[i]*x[i][2]; + dipole_r2_A += q[i]*x[i][2]*x[i][2]; + } + + if (mask[i] & groupbit_B) { + qsum_B += q[i]; + dipole_B += q[i]*x[i][2]; + dipole_r2_B += q[i]*x[i][2]*x[i][2]; + } + } + + // sum local contributions to get total charge and global dipole moment + // for each group + + double tmp; + MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_A = tmp; + + MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_B = tmp; + + MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_A = tmp; + + MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_B = tmp; + + MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_A = tmp; + + MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_B = tmp; + + // compute corrections + + const double qscale = force->qqrd2e * scale; + const double efact = qscale * MY_2PI/volume; + + e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + + qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); + + // add on force corrections + + const double ffact = qscale * (-4.0*MY_PI/volume); + f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); +} diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp index 6d8d604f5c..426dbf0e7b 100755 --- a/src/KSPACE/pppm_disp.cpp +++ b/src/KSPACE/pppm_disp.cpp @@ -1,8209 +1,8209 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Rolf Isele-Holder (Aachen University) - Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "pppm_disp.h" -#include "math_const.h" -#include "atom.h" -#include "comm.h" -#include "commgrid.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXORDER 7 -#define OFFSET 16384 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; -enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE}; -enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM, - FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G, - FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A, - FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE}; - - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command"); - - triclinic_support = 0; - pppmflag = dispersionflag = 1; - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - csumflag = 0; - B = NULL; - cii = NULL; - csumi = NULL; - peratom_allocate_flag = 0; - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - u_brick = v0_brick = v1_brick = v2_brick = v3_brick = - v4_brick = v5_brick = NULL; - - density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; - density_fft_g = NULL; - u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = - v4_brick_g = v5_brick_g = NULL; - - density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; - density_fft_a0 = NULL; - u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = - v4_brick_a0 = v5_brick_a0 = NULL; - - density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; - density_fft_a1 = NULL; - u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = - v4_brick_a1 = v5_brick_a1 = NULL; - - density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; - density_fft_a2 = NULL; - u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = - v4_brick_a2 = v5_brick_a2 = NULL; - - density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; - density_fft_a3 = NULL; - u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = - v4_brick_a3 = v5_brick_a3 = NULL; - - density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; - density_fft_a4 = NULL; - u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = - v4_brick_a4 = v5_brick_a4 = NULL; - - density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; - density_fft_a5 = NULL; - u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = - v4_brick_a5 = v5_brick_a5 = NULL; - - density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; - density_fft_a6 = NULL; - u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = - v4_brick_a6 = v5_brick_a6 = NULL; - - density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; - density_fft_none = NULL; - u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = - v4_brick_none = v5_brick_none = NULL; - - greensfn = NULL; - greensfn_6 = NULL; - work1 = work2 = NULL; - work1_6 = work2_6 = NULL; - vg = NULL; - vg2 = NULL; - vg_6 = NULL; - vg2_6 = NULL; - fkx = fky = fkz = NULL; - fkx2 = fky2 = fkz2 = NULL; - fkx_6 = fky_6 = fkz_6 = NULL; - fkx2_6 = fky2_6 = fkz2_6 = NULL; - - sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = - sf_precoeff5 = sf_precoeff6 = NULL; - sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = - sf_precoeff5_6 = sf_precoeff6_6 = NULL; - - gf_b = NULL; - gf_b_6 = NULL; - rho1d = rho_coeff = NULL; - drho1d = drho_coeff = NULL; - rho1d_6 = rho_coeff_6 = NULL; - drho1d_6 = drho_coeff_6 = NULL; - fft1 = fft2 = NULL; - fft1_6 = fft2_6 = NULL; - remap = NULL; - remap_6 = NULL; - - nmax = 0; - part2grid = NULL; - part2grid_6 = NULL; - - cg = NULL; - cg_peratom = NULL; - cg_6 = NULL; - cg_peratom_6 = NULL; - - memset(function, 0, EWALD_FUNCS*sizeof(int)); -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMDisp::~PPPMDisp() -{ - delete [] factors; - delete [] B; - B = NULL; - delete [] cii; - cii = NULL; - delete [] csumi; - csumi = NULL; - deallocate(); - deallocate_peratom(); - memory->destroy(part2grid); - memory->destroy(part2grid_6); - part2grid = part2grid_6 = NULL; -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPMDisp::init() -{ - if (me == 0) { - if (screen) fprintf(screen,"PPPMDisp initialization ...\n"); - if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n"); - } - - triclinic_check(); - if (domain->dimension == 2) - error->all(FLERR,"Cannot use PPPMDisp with 2d simulation"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp"); - if (slabflag == 1) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPMDisp"); - } - - if (order > MAXORDER || order_6 > MAXORDER) { - char str[128]; - sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER); - error->all(FLERR,str); - } - - // free all arrays previously allocated - - deallocate(); - deallocate_peratom(); - - // set scale - - scale = 1.0; - - triclinic = domain->triclinic; - - // check whether cutoff and pair style are set - - pair_check(); - - int tmp; - Pair *pair = force->pair; - int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; - double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; - double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL; - if (!(ptr||*p_cutoff||*p_cutoff_lj)) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - cutoff_lj = *p_cutoff_lj; - - double tmp2; - MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world); - - // check out which types of potentials will have to be calculated - - int ewald_order = ptr ? *((int *) ptr) : 1<<1; - int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; - memset(function, 0, EWALD_FUNCS*sizeof(int)); - for (int i=0; i<=EWALD_MAXORDER; ++i) // transcribe order - if (ewald_order&(1<pair_style); - error->all(FLERR,str); - } - function[k] = 1; - } - - - // warn, if function[0] is not set but charge attribute is set! - if (!function[0] && atom->q_flag && me == 0) { - char str[128]; - sprintf(str, "Charges are set, but coulombic solver is not used"); - error->warning(FLERR, str); - } - - // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral - - if (function[0]) { - if (!atom->q_flag) - error->all(FLERR,"Kspace style with selected options " - "requires atom attribute q"); - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver with selected options " - "on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - } - - // if kspace is TIP4P, extract TIP4P params from pair style - // bond/angle are not yet init(), so insure equilibrium request is valid - - qdist = 0.0; - - if (tip4pflag) { - int itmp; - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - if (typeA < 1 || typeA > atom->nangletypes || - force->angle->setflag[typeA] == 0) - error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P"); - if (typeB < 1 || typeB > atom->nbondtypes || - force->bond->setflag[typeB] == 0) - error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (cos(0.5*theta) * blen); - } - - - // initialize the pair style to get the coefficients - neighrequest_flag = 0; - pair->init(); - neighrequest_flag = 1; - init_coeffs(); - - //if g_ewald and g_ewald_6 have not been specified, set some initial value - // to avoid problems when calculating the energies! - - if (!gewaldflag) g_ewald = 1; - if (!gewaldflag_6) g_ewald_6 = 1; - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - int (*procneigh)[2] = comm->procneigh; - - int iteration = 0; - if (function[0]) { - CommGrid *cgtmp = NULL; - while (order >= minorder) { - - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPMDisp Coulomb order " - "b/c stencil extends beyond neighbor processor"); - iteration++; - - // set grid for dispersion interaction and coulomb interactions - - set_grid(); - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPMDisp Coulomb grid is too large"); - - set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, - nxhi_in, nyhi_in, nzhi_in, - nxlo_out, nylo_out, nzlo_out, - nxhi_out, nyhi_out, nzhi_out, - nlower, nupper, - ngrid, nfft, nfft_both, - shift, shiftone, order); - - if (overlap_allowed) break; - - cgtmp = new CommGrid(lmp, world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out, - nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - cgtmp->ghost_notify(); - if (!cgtmp->ghost_overlap()) break; - delete cgtmp; - - order--; - } - - if (order < minorder) - error->all(FLERR, - "Coulomb PPPMDisp order has been reduced below minorder"); - if (cgtmp) delete cgtmp; - - // adjust g_ewald - - if (!gewaldflag) adjust_gewald(); - - // calculate the final accuracy - - double acc = final_accuracy(); - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - #ifdef FFT_SINGLE - const char fft_prec[] = "single"; - #else - const char fft_prec[] = "double"; - #endif - - if (screen) { - fprintf(screen," Coulomb G vector (1/distance)= %g\n",g_ewald); - fprintf(screen," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," Coulomb stencil order = %d\n",order); - fprintf(screen," Coulomb estimated absolute RMS force accuracy = %g\n", - acc); - fprintf(screen," Coulomb estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - fprintf(screen," 3d grid and FFT values/proc = %d %d\n", - ngrid_max, nfft_both_max); - } - if (logfile) { - fprintf(logfile," Coulomb G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," Coulomb stencil order = %d\n",order); - fprintf(logfile, - " Coulomb estimated absolute RMS force accuracy = %g\n", - acc); - fprintf(logfile," Coulomb estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", - ngrid_max, nfft_both_max); - } - } - } - - iteration = 0; - if (function[1] + function[2] + function[3]) { - CommGrid *cgtmp = NULL; - while (order_6 >= minorder) { - - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPMDisp dispersion order " - "b/c stencil extends beyond neighbor processor"); - iteration++; - - set_grid_6(); - - if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET) - error->all(FLERR,"PPPMDisp Dispersion grid is too large"); - - set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, - nxhi_in_6, nyhi_in_6, nzhi_in_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, - nxhi_out_6, nyhi_out_6, nzhi_out_6, - nlower_6, nupper_6, - ngrid_6, nfft_6, nfft_both_6, - shift_6, shiftone_6, order_6); - - if (overlap_allowed) break; - - cgtmp = new CommGrid(lmp,world,1,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6, - nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6, - nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - cgtmp->ghost_notify(); - if (!cgtmp->ghost_overlap()) break; - delete cgtmp; - order_6--; - } - - if (order_6 < minorder) - error->all(FLERR,"Dispersion PPPMDisp order has been " - "reduced below minorder"); - if (cgtmp) delete cgtmp; - - // adjust g_ewald_6 - - if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) - adjust_gewald_6(); - - // calculate the final accuracy - - double acc, acc_real, acc_kspace; - final_accuracy_6(acc, acc_real, acc_kspace); - - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - #ifdef FFT_SINGLE - const char fft_prec[] = "single"; - #else - const char fft_prec[] = "double"; - #endif - - if (screen) { - fprintf(screen," Dispersion G vector (1/distance)= %g\n",g_ewald_6); - fprintf(screen," Dispersion grid = %d %d %d\n", - nx_pppm_6,ny_pppm_6,nz_pppm_6); - fprintf(screen," Dispersion stencil order = %d\n",order_6); - fprintf(screen," Dispersion estimated absolute " - "RMS force accuracy = %g\n",acc); - fprintf(screen," Dispersion estimated absolute " - "real space RMS force accuracy = %g\n",acc_real); - fprintf(screen," Dispersion estimated absolute " - "kspace RMS force accuracy = %g\n",acc_kspace); - fprintf(screen," Dispersion estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - fprintf(screen," 3d grid and FFT values/proc dispersion = %d %d\n", - ngrid_max,nfft_both_max); - } - if (logfile) { - fprintf(logfile," Dispersion G vector (1/distance) = %g\n",g_ewald_6); - fprintf(logfile," Dispersion grid = %d %d %d\n", - nx_pppm_6,ny_pppm_6,nz_pppm_6); - fprintf(logfile," Dispersion stencil order = %d\n",order_6); - fprintf(logfile," Dispersion estimated absolute " - "RMS force accuracy = %g\n",acc); - fprintf(logfile," Dispersion estimated absolute " - "real space RMS force accuracy = %g\n",acc_real); - fprintf(logfile," Dispersion estimated absolute " - "kspace RMS force accuracy = %g\n",acc_kspace); - fprintf(logfile," Disperion estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - fprintf(logfile," 3d grid and FFT values/proc dispersion = %d %d\n", - ngrid_max,nfft_both_max); - } - } - } - - // allocate K-space dependent memory - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - if (function[0]) { - compute_gf_denom(gf_b, order); - compute_rho_coeff(rho_coeff, drho_coeff, order); - cg->ghost_notify(); - cg->setup(); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - sf_precoeff1, sf_precoeff2, sf_precoeff3, - sf_precoeff4, sf_precoeff5, sf_precoeff6); - } - if (function[1] + function[2] + function[3]) { - compute_gf_denom(gf_b_6, order_6); - compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); - cg_6->ghost_notify(); - cg_6->setup(); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, - sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); - } - -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPMDisp::setup() -{ - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - // compute fkx,fky,fkz for my FFT grid pts - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - //compute the virial coefficients and green functions - if (function[0]){ - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double per; - int i, j, k, n; - - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per = i - nx_pppm*(2*i/nx_pppm); - fkx[i] = unitkx*per; - j = (nx_pppm - i) % nx_pppm; - per = j - nx_pppm*(2*j/nx_pppm); - fkx2[i] = unitkx*per; - } - - for (i = nylo_fft; i <= nyhi_fft; i++) { - per = i - ny_pppm*(2*i/ny_pppm); - fky[i] = unitky*per; - j = (ny_pppm - i) % ny_pppm; - per = j - ny_pppm*(2*j/ny_pppm); - fky2[i] = unitky*per; - } - - for (i = nzlo_fft; i <= nzhi_fft; i++) { - per = i - nz_pppm*(2*i/nz_pppm); - fkz[i] = unitkz*per; - j = (nz_pppm - i) % nz_pppm; - per = j - nz_pppm*(2*j/nz_pppm); - fkz2[i] = unitkz*per; - } - - double sqk,vterm; - double gew2inv = 1/(g_ewald*g_ewald); - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - for (j = nylo_fft; j <= nyhi_fft; j++) { - for (i = nxlo_fft; i <= nxhi_fft; i++) { - sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25*gew2inv); - vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; - vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; - vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; - vg[n][3] = vterm*fkx[i]*fky[j]; - vg[n][4] = vterm*fkx[i]*fkz[k]; - vg[n][5] = vterm*fky[j]*fkz[k]; - vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]); - vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]); - vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]); - } - n++; - } - } - } - compute_gf(); - if (differentiation_flag == 1) compute_sf_coeff(); - } - - if (function[1] + function[2] + function[3]) { - delxinv_6 = nx_pppm_6/xprd; - delyinv_6 = ny_pppm_6/yprd; - delzinv_6 = nz_pppm_6/zprd_slab; - delvolinv_6 = delxinv_6*delyinv_6*delzinv_6; - - double per; - int i, j, k, n; - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - per = i - nx_pppm_6*(2*i/nx_pppm_6); - fkx_6[i] = unitkx*per; - j = (nx_pppm_6 - i) % nx_pppm_6; - per = j - nx_pppm_6*(2*j/nx_pppm_6); - fkx2_6[i] = unitkx*per; - } - for (i = nylo_fft_6; i <= nyhi_fft_6; i++) { - per = i - ny_pppm_6*(2*i/ny_pppm_6); - fky_6[i] = unitky*per; - j = (ny_pppm_6 - i) % ny_pppm_6; - per = j - ny_pppm_6*(2*j/ny_pppm_6); - fky2_6[i] = unitky*per; - } - for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) { - per = i - nz_pppm_6*(2*i/nz_pppm_6); - fkz_6[i] = unitkz*per; - j = (nz_pppm_6 - i) % nz_pppm_6; - per = j - nz_pppm_6*(2*j/nz_pppm_6); - fkz2_6[i] = unitkz*per; - } - double sqk,vterm; - long double erft, expt,nom, denom; - long double b, bs, bt; - double rtpi = sqrt(MY_PI); - double gewinv = 1/g_ewald_6; - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) { - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) { - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k]; - if (sqk == 0.0) { - vg_6[n][0] = 0.0; - vg_6[n][1] = 0.0; - vg_6[n][2] = 0.0; - vg_6[n][3] = 0.0; - vg_6[n][4] = 0.0; - vg_6[n][5] = 0.0; - } else { - b = 0.5*sqrt(sqk)*gewinv; - bs = b*b; - bt = bs*b; - erft = 2*bt*rtpi*erfc(b); - expt = exp(-bs); - nom = erft - 2*bs*expt; - denom = nom + expt; - if (denom == 0) vterm = 3.0/sqk; - else vterm = 3.0*nom/(sqk*denom); - vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i]; - vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j]; - vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k]; - vg_6[n][3] = vterm*fkx_6[i]*fky_6[j]; - vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k]; - vg_6[n][5] = vterm*fky_6[j]*fkz_6[k]; - vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]); - vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]); - vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]); - } - n++; - } - } - } - compute_gf_6(); - if (differentiation_flag == 1) compute_sf_coeff_6(); - } -} - -/* ---------------------------------------------------------------------- - reset local grid arrays and communication stencils - called by fix balance b/c it changed sizes of processor sub-domains -------------------------------------------------------------------------- */ - -void PPPMDisp::setup_grid() -{ - // free all arrays previously allocated - - deallocate(); - deallocate_peratom(); - - // reset portion of global grid that each proc owns - - if (function[0]) - set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, - nxhi_in, nyhi_in, nzhi_in, - nxlo_out, nylo_out, nzlo_out, - nxhi_out, nyhi_out, nzhi_out, - nlower, nupper, - ngrid, nfft, nfft_both, - shift, shiftone, order); - - if (function[1] + function[2] + function[3]) - set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, - nxhi_in_6, nyhi_in_6, nzhi_in_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, - nxhi_out_6, nyhi_out_6, nzhi_out_6, - nlower_6, nupper_6, - ngrid_6, nfft_6, nfft_both_6, - shift_6, shiftone_6, order_6); - - // reallocate K-space dependent memory - // check if grid communication is now overlapping if not allowed - // don't invoke allocate_peratom(), compute() will allocate when needed - - allocate(); - - if (function[0]) { - cg->ghost_notify(); - if (overlap_allowed == 0 && cg->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - cg->setup(); - } - if (function[1] + function[2] + function[3]) { - cg_6->ghost_notify(); - if (overlap_allowed == 0 && cg_6->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - cg_6->setup(); - } - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - if (function[0]) { - compute_gf_denom(gf_b, order); - compute_rho_coeff(rho_coeff, drho_coeff, order); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - sf_precoeff1, sf_precoeff2, sf_precoeff3, - sf_precoeff4, sf_precoeff5, sf_precoeff6); - } - if (function[1] + function[2] + function[3]) { - compute_gf_denom(gf_b_6, order_6); - compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, - sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); - } - - // pre-compute volume-dependent coeffs - - setup(); -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMDisp::compute(int eflag, int vflag) -{ - - int i; - // convert atoms from box to lamda coords - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - if (function[0]) { - cg_peratom->ghost_notify(); - cg_peratom->setup(); - } - if (function[1] + function[2] + function[3]) { - cg_peratom_6->ghost_notify(); - cg_peratom_6->setup(); - } - peratom_allocate_flag = 1; - } - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - - if (function[0]) memory->destroy(part2grid); - if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6); - nmax = atom->nmax; - if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid"); - if (function[1] + function[2] + function[3]) - memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6"); - } - - - energy = 0.0; - energy_1 = 0.0; - energy_6 = 0.0; - if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0; - - // find grid points for all my particles - // distribute partcles' charges/dispersion coefficients on the grid - // communication between processors and remapping two fft - // Solution of poissons equation in k-space and backtransformation - // communication between processors - // calculation of forces - - if (function[0]) { - - //perfrom calculations for coulomb interactions only - - particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower, - nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out); - - make_rho_c(); - - cg->reverse_comm(this,REVERSE_RHO); - - brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, - density_brick, density_fft, work1,remap); - - if (differentiation_flag == 1) { - - poisson_ad(work1, work2, density_fft, fft1, fft2, - nx_pppm, ny_pppm, nz_pppm, nfft, - nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, - energy_1, greensfn, - virial_1, vg,vg2, - u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); - - cg->forward_comm(this,FORWARD_AD); - - fieldforce_c_ad(); - - if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM); - - } else { - poisson_ik(work1, work2, density_fft, fft1, fft2, - nx_pppm, ny_pppm, nz_pppm, nfft, - nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, - energy_1, greensfn, - fkx, fky, fkz,fkx2, fky2, fkz2, - vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2, - u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); - - cg->forward_comm(this, FORWARD_IK); - - fieldforce_c_ik(); - - if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM); - } - if (evflag_atom) fieldforce_c_peratom(); - } - - if (function[1]) { - //perfrom calculations for geometric mixing - particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); - make_rho_g(); - - - cg_6->reverse_comm(this, REVERSE_RHO_G); - - brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - density_brick_g, density_fft_g, work1_6,remap_6); - - if (differentiation_flag == 1) { - - poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - virial_6, vg_6, vg2_6, - u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); - - cg_6->forward_comm(this,FORWARD_AD_G); - - fieldforce_g_ad(); - - if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G); - - } else { - poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, - vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6, - u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); - - cg_6->forward_comm(this,FORWARD_IK_G); - - fieldforce_g_ik(); - - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G); - } - if (evflag_atom) fieldforce_g_peratom(); - } - - if (function[2]) { - //perform calculations for arithmetic mixing - particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); - make_rho_a(); - - cg_6->reverse_comm(this, REVERSE_RHO_A); - - brick2fft_a(); - - if ( differentiation_flag == 1) { - - poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - virial_6, vg_6, vg2_6, - u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); - poisson_2s_ad(density_fft_a0, density_fft_a6, - u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, - u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); - poisson_2s_ad(density_fft_a1, density_fft_a5, - u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, - u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); - poisson_2s_ad(density_fft_a2, density_fft_a4, - u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, - u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); - - cg_6->forward_comm(this, FORWARD_AD_A); - - fieldforce_a_ad(); - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A); - - } else { - - poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, - vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6, - u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); - poisson_2s_ik(density_fft_a0, density_fft_a6, - vdx_brick_a0, vdy_brick_a0, vdz_brick_a0, - vdx_brick_a6, vdy_brick_a6, vdz_brick_a6, - u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, - u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); - poisson_2s_ik(density_fft_a1, density_fft_a5, - vdx_brick_a1, vdy_brick_a1, vdz_brick_a1, - vdx_brick_a5, vdy_brick_a5, vdz_brick_a5, - u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, - u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); - poisson_2s_ik(density_fft_a2, density_fft_a4, - vdx_brick_a2, vdy_brick_a2, vdz_brick_a2, - vdx_brick_a4, vdy_brick_a4, vdz_brick_a4, - u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, - u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); - - cg_6->forward_comm(this, FORWARD_IK_A); - - fieldforce_a_ik(); - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A); - } - if (evflag_atom) fieldforce_a_peratom(); - } - - if (function[3]) { - //perfrom calculations if no mixing rule applies - particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); - - make_rho_none(); - - cg_6->reverse_comm(this, REVERSE_RHO_NONE); - - brick2fft_none(); - - if (differentiation_flag == 1) { - - int n = 0; - for (int k = 0; kforward_comm(this,FORWARD_AD_NONE); - - fieldforce_none_ad(); - - if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE); - - } else { - int n = 0; - for (int k = 0; kforward_comm(this,FORWARD_IK_NONE); - - fieldforce_none_ik(); - - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE); - } - if (evflag_atom) fieldforce_none_peratom(); - } - - // sum energy across procs and add in volume-dependent term - - const double qscale = force->qqrd2e * scale; - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy_1 = energy_all; - MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy_6 = energy_all; - - energy_1 *= 0.5*volume; - energy_6 *= 0.5*volume; - - energy_1 -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij + - 1.0/12.0*pow(g_ewald_6,6)*csum; - energy_1 *= qscale; - } - - // sum virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i]; - if (function[1]+function[2]+function[3]){ - double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij; - virial[0] -= a; - virial[1] -= a; - virial[2] -= a; - } - } - - if (eflag_atom) { - if (function[0]) { - double *q = atom->q; - for (i = 0; i < atom->nlocal; i++) { - eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction - } - } - if (function[1] + function[2] + function[3]) { - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] + - 1.0/12.0*pow(g_ewald_6,6)*cii[tmp]; - } - } - } - - if (vflag_atom) { - if (function[1] + function[2] + function[3]) { - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction - } - } - } - - - // 2d slab correction - - if (slabflag) slabcorr(eflag); - if (function[0]) energy += energy_1; - if (function[1] + function[2] + function[3]) energy += energy_6; - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - initialize coefficients needed for the dispersion density on the grids -------------------------------------------------------------------------- */ - -void PPPMDisp::init_coeffs() // local pair coeffs -{ - int tmp; - int n = atom->ntypes; - int converged; - delete [] B; - if (function[3] + function[2]) { // no mixing rule or arithmetic - if (function[2] && me == 0) { - if (screen) fprintf(screen," Optimizing splitting of Dispersion coefficients\n"); - if (logfile) fprintf(logfile," Optimizing splitting of Dispersion coefficients\n"); - } - // get dispersion coefficients - double **b = (double **) force->pair->extract("B",tmp); - // allocate data for eigenvalue decomposition - double **A; - double **Q; - memory->create(A,n,n,"pppm/disp:A"); - memory->create(Q,n,n,"pppm/disp:Q"); - // fill coefficients to matrix a - for (int i = 1; i <= n; i++) - for (int j = 1; j <= n; j++) - A[i-1][j-1] = b[i][j]; - // transform q to a unity matrix - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - Q[i][j] = 0.0; - for (int i = 0; i < n; i++) - Q[i][i] = 1.0; - // perfrom eigenvalue decomposition with QR algorithm - converged = qr_alg(A,Q,n); - if (function[3] && !converged) { - error->all(FLERR,"Matrix factorization to split dispersion coefficients failed"); - } - // determine number of used eigenvalues - // based on maximum allowed number or cutoff criterion - // sort eigenvalues according to their size with bubble sort - double t; - for (int i = 0; i < n; i++) { - for (int j = 0; j < n-1-i; j++) { - if (fabs(A[j][j]) < fabs(A[j+1][j+1])) { - t = A[j][j]; - A[j][j] = A[j+1][j+1]; - A[j+1][j+1] = t; - for (int k = 0; k < n; k++) { - t = Q[k][j]; - Q[k][j] = Q[k][j+1]; - Q[k][j+1] = t; - } - } - } - } - - // check which eigenvalue is the first that is smaller - // than a specified tolerance - // check how many are maximum allowed by the user - double amax = fabs(A[0][0]); - double acrit = amax*splittol; - double bmax = 0; - double err = 0; - nsplit = 0; - for (int i = 0; i < n; i++) { - if (fabs(A[i][i]) > acrit) nsplit++; - else { - bmax = fabs(A[i][i]); - break; - } - } - - err = bmax/amax; - if (err > 1.0e-4) { - char str[128]; - sprintf(str,"Error in splitting of dispersion coeffs is estimated %g",err); - error->warning(FLERR, str); - } - // set B - B = new double[nsplit*n+nsplit]; - for (int i = 0; i< nsplit; i++) { - B[i] = A[i][i]; - for (int j = 0; j < n; j++) { - B[nsplit*(j+1) + i] = Q[j][i]; - } - } - - nsplit_alloc = nsplit; - if (nsplit%2 == 1) nsplit_alloc = nsplit + 1; - // check if the function should preferably be [1] or [2] or [3] - if (nsplit == 1) { - delete [] B; - function[3] = 0; - function[2] = 0; - function[1] = 1; - if (me == 0) { - if (screen) fprintf(screen," Using geometric mixing for reciprocal space\n"); - if (logfile) fprintf(logfile," Using geometric mixing for reciprocal space\n"); - } - } - if (function[2] && nsplit <= 6) { - if (me == 0) { - if (screen) fprintf(screen," Using %d instead of 7 structure factors\n",nsplit); - if (logfile) fprintf(logfile," Using %d instead of 7 structure factors\n",nsplit); - } - function[3] = 1; - function[2] = 0; - } - if (function[2] && (nsplit > 6)) { - if (me == 0) { - if (screen) fprintf(screen," Using 7 structure factors\n"); - if (logfile) fprintf(logfile," Using 7 structure factors\n"); - } - delete [] B; - } - if (function[3]) { - if (me == 0) { - if (screen) fprintf(screen," Using %d structure factors\n",nsplit); - if (logfile) fprintf(logfile," Using %d structure factors\n",nsplit); - } - if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors"); - } - - memory->destroy(A); - memory->destroy(Q); - } - if (function[1]) { // geometric 1/r^6 - double **b = (double **) force->pair->extract("B",tmp); - B = new double[n+1]; - for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); - } - if (function[2]) { // arithmetic 1/r^6 - //cannot use epsilon, because this has not been set yet - double **epsilon = (double **) force->pair->extract("epsilon",tmp); - //cannot use sigma, because this has not been set yet - double **sigma = (double **) force->pair->extract("sigma",tmp); - if (!(epsilon&&sigma)) - error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp"); - double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; - double c[7] = { - 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; - for (int i=0; i<=n; ++i) { - eps_i = sqrt(epsilon[i][i]); - sigma_i = sigma[i][i]; - sigma_n = 1.0; - for (int j=0; j<7; ++j) { - *(bi++) = sigma_n*eps_i*c[j]*0.25; - sigma_n *= sigma_i; - } - } - } -} - -/* ---------------------------------------------------------------------- - Eigenvalue decomposition of a real, symmetric matrix with the QR - method (includes transpformation to Tridiagonal Matrix + Wilkinson - shift) -------------------------------------------------------------------------- */ - -int PPPMDisp::qr_alg(double **A, double **Q, int n) -{ - int converged = 0; - double an1, an, bn1, d, mue; - // allocate some memory for the required operations - double **A0,**Qi,**C,**D,**E; - // make a copy of A for convergence check - memory->create(A0,n,n,"pppm/disp:A0"); - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - A0[i][j] = A[i][j]; - - // allocate an auxiliary matrix Qi - memory->create(Qi,n,n,"pppm/disp:Qi"); - - // alllocate an auxillary matrices for the matrix multiplication - memory->create(C,n,n,"pppm/disp:C"); - memory->create(D,n,n,"pppm/disp:D"); - memory->create(E,n,n,"pppm/disp:E"); - - // transform Matrix A to Tridiagonal form - hessenberg(A,Q,n); - - // start loop for the matrix factorization - int count = 0; - int countmax = 100000; - while (1) { - // make a Wilkinson shift - an1 = A[n-2][n-2]; - an = A[n-1][n-1]; - bn1 = A[n-2][n-1]; - d = (an1-an)/2; - mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1); - for (int i = 0; i < n; i++) - A[i][i] -= mue; - - // perform a QR factorization for a tridiagonal matrix A - qr_tri(Qi,A,n); - - // update the matrices - mmult(A,Qi,C,n); - mmult(Q,Qi,C,n); - - // backward Wilkinson shift - for (int i = 0; i < n; i++) - A[i][i] += mue; - - // check the convergence - converged = check_convergence(A,Q,A0,C,D,E,n); - if (converged) break; - count = count + 1; - if (count == countmax) break; - } - - // free allocated memory - memory->destroy(Qi); - memory->destroy(A0); - memory->destroy(C); - memory->destroy(D); - memory->destroy(E); - - return converged; -} - -/* ---------------------------------------------------------------------- - Transform a Matrix to Hessenberg form (for symmetric Matrices, the - result will be a tridiagonal matrix) -------------------------------------------------------------------------- */ - -void PPPMDisp::hessenberg(double **A, double **Q, int n) -{ - double r,a,b,c,s,x1,x2; - for (int i = 0; i < n-1; i++) { - for (int j = i+2; j < n; j++) { - // compute coeffs for the rotation matrix - a = A[i+1][i]; - b = A[j][i]; - r = sqrt(a*a + b*b); - c = a/r; - s = b/r; - // update the entries of A with multiplication from the left - for (int k = 0; k < n; k++) { - x1 = A[i+1][k]; - x2 = A[j][k]; - A[i+1][k] = c*x1 + s*x2; - A[j][k] = -s*x1 + c*x2; - } - // update the entries of A and Q with a multiplication from the right - for (int k = 0; k < n; k++) { - x1 = A[k][i+1]; - x2 = A[k][j]; - A[k][i+1] = c*x1 + s*x2; - A[k][j] = -s*x1 + c*x2; - x1 = Q[k][i+1]; - x2 = Q[k][j]; - Q[k][i+1] = c*x1 + s*x2; - Q[k][j] = -s*x1 + c*x2; - } - } - } -} - -/* ---------------------------------------------------------------------- - QR factorization for a tridiagonal matrix; Result of the factorization - is stored in A and Qi -------------------------------------------------------------------------- */ - -void PPPMDisp::qr_tri(double** Qi,double** A,int n) -{ - double r,a,b,c,s,x1,x2; - int j,k,k0,kmax; - // make Qi a unity matrix - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - Qi[i][j] = 0.0; - for (int i = 0; i < n; i++) - Qi[i][i] = 1.0; - // loop over main diagonal and first of diagonal of A - for (int i = 0; i < n-1; i++) { - j = i+1; - // coefficients of the rotation matrix - a = A[i][i]; - b = A[j][i]; - r = sqrt(a*a + b*b); - c = a/r; - s = b/r; - // update the entries of A and Q - k0 = (i-1>0)?i-1:0; //min(i-1,0); - kmax = (i+3A0[i][j])?Bmax:A0[i][j]; //max(Bmax,A0[i][j]); - double epsabs = eps*Bmax; - - // reconstruct the original matrix - // store the diagonal elements in D - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - D[i][j] = 0.0; - for (int i = 0; i < n; i++) - D[i][i] = A[i][i]; - // store matrix Q in E - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - E[i][j] = Q[i][j]; - // E = Q*A - mmult(E,D,C,n); - // store transpose of Q in D - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - D[i][j] = Q[j][i]; - // E = Q*A*Q.t - mmult(E,D,C,n); - - //compare the original matrix and the final matrix - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - diff = A0[i][j] - E[i][j]; - epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff)); - } - } - if (epsmax > epsabs) converged = 0; - return converged; -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMDisp::allocate() -{ - - int (*procneigh)[2] = comm->procneigh; - - if (function[0]) { - memory->create(work1,2*nfft_both,"pppm/disp:work1"); - memory->create(work2,2*nfft_both,"pppm/disp:work2"); - - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz"); - - memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2"); - memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2"); - memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2"); - - - memory->create(gf_b,order,"pppm/disp:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff"); - memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d"); - memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff"); - - memory->create(greensfn,nfft_both,"pppm/disp:greensfn"); - memory->create(vg,nfft_both,6,"pppm/disp:vg"); - memory->create(vg2,nfft_both,3,"pppm/disp:vg2"); - - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:density_brick"); - if ( differentiation_flag == 1) { - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:u_brick"); - memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1"); - memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2"); - memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3"); - memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4"); - memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5"); - memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6"); - - } else { - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:vdz_brick"); - } - memory->create(density_fft,nfft_both,"pppm/disp:density_fft"); - - int tmp; - - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp); - - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp); - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg = new CommGrid(lmp,world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg = new CommGrid(lmp,world,3,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - - if (function[1]) { - memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); - memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); - - memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); - memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); - memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); - - memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); - memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); - memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); - - memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); - memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); - memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); - memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); - memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); - - memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); - memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); - memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); - - memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g"); - if ( differentiation_flag == 1) { - memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); - - memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); - memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); - memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); - memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); - memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); - memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); - - } else { - memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g"); - memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g"); - memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g"); - } - memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g"); - - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_6 = new CommGrid(lmp,world,1,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_6 = new CommGrid(lmp,world,3,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - - if (function[2]) { - memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); - memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); - - memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); - memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); - memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); - - memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); - memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); - memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); - - memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); - memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); - memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); - memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); - memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); - - memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); - memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); - memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); - - memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0"); - memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1"); - memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2"); - memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3"); - memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4"); - memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5"); - memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6"); - - memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0"); - memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1"); - memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2"); - memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3"); - memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4"); - memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5"); - memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6"); - - - if ( differentiation_flag == 1 ) { - memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); - memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); - memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); - memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); - memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); - memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); - memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); - - memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); - memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); - memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); - memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); - memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); - memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); - - } else { - - memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0"); - memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0"); - memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0"); - - memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1"); - memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1"); - memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1"); - - memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2"); - memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2"); - memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2"); - - memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3"); - memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3"); - memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3"); - - memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4"); - memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4"); - memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4"); - - memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5"); - memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5"); - memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5"); - - memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6"); - memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6"); - memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6"); - } - - - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - - if (differentiation_flag == 1) - cg_6 = new CommGrid(lmp,world,7,7, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_6 = new CommGrid(lmp,world,21,7, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - - if (function[3]) { - memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); - memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); - - memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); - memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); - memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); - - memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); - memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); - memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); - - memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); - memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); - memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); - memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); - memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); - - memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); - memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); - memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); - - memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none"); - if ( differentiation_flag == 1) { - memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); - - memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); - memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); - memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); - memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); - memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); - memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); - - } else { - memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none"); - memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none"); - memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none"); - } - memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none"); - - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order - for per atom calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::allocate_peratom() -{ - - int (*procneigh)[2] = comm->procneigh; - - if (function[0]) { - - if (differentiation_flag != 1) - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:u_brick"); - - memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v0_brick"); - memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v1_brick"); - memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v2_brick"); - memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v3_brick"); - memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v4_brick"); - memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v5_brick"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom = - new CommGrid(lmp,world,6,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom = - new CommGrid(lmp,world,7,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } - - - if (function[1]) { - - if ( differentiation_flag != 1 ) - memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); - - memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g"); - memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g"); - memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g"); - memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g"); - memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g"); - memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom_6 = - new CommGrid(lmp,world,6,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom_6 = - new CommGrid(lmp,world,7,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } - - if (function[2]) { - - if ( differentiation_flag != 1 ) { - memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); - memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); - memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); - memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); - memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); - memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); - memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); - } - - memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0"); - memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0"); - memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0"); - memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0"); - memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0"); - memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0"); - - memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1"); - memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1"); - memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1"); - memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1"); - memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1"); - memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1"); - - memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2"); - memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2"); - memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2"); - memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2"); - memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2"); - memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2"); - - memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3"); - memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3"); - memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3"); - memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3"); - memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3"); - memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3"); - - memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4"); - memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4"); - memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4"); - memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4"); - memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4"); - memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4"); - - memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5"); - memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5"); - memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5"); - memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5"); - memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5"); - memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5"); - - memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6"); - memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6"); - memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6"); - memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6"); - memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6"); - memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom_6 = - new CommGrid(lmp,world,42,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom_6 = - new CommGrid(lmp,world,49,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } - - if (function[3]) { - - if ( differentiation_flag != 1 ) - memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); - - memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none"); - memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none"); - memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none"); - memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none"); - memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none"); - memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom_6 = - new CommGrid(lmp,world,6*nsplit_alloc,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom_6 = - new CommGrid(lmp,world,7*nsplit_alloc,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } -} - - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMDisp::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(density_fft); - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - - memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_g); - density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; - density_fft_g = NULL; - - memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a0); - density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; - density_fft_a0 = NULL; - - memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a1); - density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; - density_fft_a1 = NULL; - - memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a2); - density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; - density_fft_a2 = NULL; - - memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a3); - density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; - density_fft_a3 = NULL; - - memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a4); - density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; - density_fft_a4 = NULL; - - memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a5); - density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; - density_fft_a5 = NULL; - - memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a6); - density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; - density_fft_a6 = NULL; - - memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_none); - density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; - density_fft_none = NULL; - - memory->destroy(sf_precoeff1); - memory->destroy(sf_precoeff2); - memory->destroy(sf_precoeff3); - memory->destroy(sf_precoeff4); - memory->destroy(sf_precoeff5); - memory->destroy(sf_precoeff6); - sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; - - memory->destroy(sf_precoeff1_6); - memory->destroy(sf_precoeff2_6); - memory->destroy(sf_precoeff3_6); - memory->destroy(sf_precoeff4_6); - memory->destroy(sf_precoeff5_6); - memory->destroy(sf_precoeff6_6); - sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL; - - memory->destroy(greensfn); - memory->destroy(greensfn_6); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(work1_6); - memory->destroy(work2_6); - memory->destroy(vg); - memory->destroy(vg2); - memory->destroy(vg_6); - memory->destroy(vg2_6); - greensfn = greensfn_6 = NULL; - work1 = work2 = work1_6 = work2_6 = NULL; - vg = vg2 = vg_6 = vg2_6 = NULL; - - memory->destroy1d_offset(fkx,nxlo_fft); - memory->destroy1d_offset(fky,nylo_fft); - memory->destroy1d_offset(fkz,nzlo_fft); - fkx = fky = fkz = NULL; - - memory->destroy1d_offset(fkx2,nxlo_fft); - memory->destroy1d_offset(fky2,nylo_fft); - memory->destroy1d_offset(fkz2,nzlo_fft); - fkx2 = fky2 = fkz2 = NULL; - - memory->destroy1d_offset(fkx_6,nxlo_fft_6); - memory->destroy1d_offset(fky_6,nylo_fft_6); - memory->destroy1d_offset(fkz_6,nzlo_fft_6); - fkx_6 = fky_6 = fkz_6 = NULL; - - memory->destroy1d_offset(fkx2_6,nxlo_fft_6); - memory->destroy1d_offset(fky2_6,nylo_fft_6); - memory->destroy1d_offset(fkz2_6,nzlo_fft_6); - fkx2_6 = fky2_6 = fkz2_6 = NULL; - - - memory->destroy(gf_b); - memory->destroy2d_offset(rho1d,-order/2); - memory->destroy2d_offset(rho_coeff,(1-order)/2); - memory->destroy2d_offset(drho1d,-order/2); - memory->destroy2d_offset(drho_coeff, (1-order)/2); - gf_b = NULL; - rho1d = rho_coeff = drho1d = drho_coeff = NULL; - - memory->destroy(gf_b_6); - memory->destroy2d_offset(rho1d_6,-order_6/2); - memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2); - memory->destroy2d_offset(drho1d_6,-order_6/2); - memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2); - gf_b_6 = NULL; - rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL; - - delete fft1; - delete fft2; - delete remap; - delete cg; - fft1 = fft2 = NULL; - remap = NULL; - cg = NULL; - - delete fft1_6; - delete fft2_6; - delete remap_6; - delete cg_6; - fft1_6 = fft2_6 = NULL; - remap_6 = NULL; - cg_6 = NULL; -} - - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order - for per atom calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::deallocate_peratom() -{ - peratom_allocate_flag = 0; - - memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out); - u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; - - memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL; - - memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL; - - memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL; - - memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL; - - memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL; - - memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL; - - memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL; - - memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL; - - memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL; - - delete cg_peratom; - delete cg_peratom_6; - cg_peratom = cg_peratom_6 = NULL; -} - -/* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald - for Coulomb interactions -------------------------------------------------------------------------- */ - -void PPPMDisp::set_grid() -{ - double q2 = qsqsum * force->qqrd2e / force->dielectric; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h, h_x,h_y,h_z; - bigint natoms = atom->natoms; - - if (!gewaldflag) { - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) - error->all(FLERR,"KSpace accuracy too large to estimate G vector"); - g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy - // nz_pppm uses extended zprd_slab instead of zprd - // reduce it until accuracy target is met - - if (!gridflag) { - h = h_x = h_y = h_z = 4.0/g_ewald; - int count = 0; - while (1) { - - // set grid dimension - nx_pppm = static_cast (xprd/h_x); - ny_pppm = static_cast (yprd/h_y); - nz_pppm = static_cast (zprd_slab/h_z); - - if (nx_pppm <= 1) nx_pppm = 2; - if (ny_pppm <= 1) ny_pppm = 2; - if (nz_pppm <= 1) nz_pppm = 2; - - //set local grid dimension - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - double qopt = compute_qopt(); - - double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - - count++; - - // break loop if the accuracy has been reached or too many loops have been performed - if (dfkspace <= accuracy) break; - if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction"); - h *= 0.95; - h_x = h_y = h_z = h; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; -} - -/* ---------------------------------------------------------------------- - set the FFT parameters -------------------------------------------------------------------------- */ - -void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p, - int& nxlo_f,int& nylo_f,int& nzlo_f, - int& nxhi_f,int& nyhi_f,int& nzhi_f, - int& nxlo_i,int& nylo_i,int& nzlo_i, - int& nxhi_i,int& nyhi_i,int& nzhi_i, - int& nxlo_o,int& nylo_o,int& nzlo_o, - int& nxhi_o,int& nyhi_o,int& nzhi_o, - int& nlow, int& nupp, - int& ng, int& nf, int& nfb, - double& sft,double& sftone, int& ord) -{ - // global indices of PPPM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPM grid that I own without ghost cells - // for slab PPPM, assign z grid as if it were not extended - - nxlo_i = static_cast (comm->xsplit[comm->myloc[0]] * nx_p); - nxhi_i = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1; - - nylo_i = static_cast (comm->ysplit[comm->myloc[1]] * ny_p); - nyhi_i = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1; - - nzlo_i = static_cast - (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor); - nzhi_i = static_cast - (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1; - - - // nlow,nupp = stencil size for mapping particles to PPPM grid - - nlow = -(ord-1)/2; - nupp = ord/2; - - // sft values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (ord % 2) sft = OFFSET + 0.5; - else sft = OFFSET; - if (ord % 2) sftone = 0.0; - else sftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended - - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_p/xprd + sft) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_p/xprd + sft) - OFFSET; - nxlo_o = nlo + nlow; - nxhi_o = nhi + nupp; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_p/yprd + sft) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_p/yprd + sft) - OFFSET; - nylo_o = nlo + nlow; - nyhi_o = nhi + nupp; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_p/zprd_slab + sft) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_p/zprd_slab + sft) - OFFSET; - nzlo_o = nlo + nlow; - nzhi_o = nhi + nupp; - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells) - - if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) { - nzhi_i = nz_p - 1; - nzhi_o = nz_p - 1; - } - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_p >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_f = 0; - nxhi_f = nx_p - 1; - nylo_f = me_y*ny_p/npey_fft; - nyhi_f = (me_y+1)*ny_p/npey_fft - 1; - nzlo_f = me_z*nz_p/npez_fft; - nzhi_f = (me_z+1)*nz_p/npez_fft - 1; - - // PPPM grid for this proc, including ghosts - - ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) * - (nzhi_o-nzlo_o+1); - - // FFT arrays on this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) * - (nzhi_f-nzlo_f+1); - int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) * - (nzhi_i-nzlo_i+1); - nfb = MAX(nf,nfft_brick); - -} - -/* ---------------------------------------------------------------------- - check if all factors of n are in list of factors - return 1 if yes, 0 if no -------------------------------------------------------------------------- */ - -int PPPMDisp::factorable(int n) -{ - int i; - - while (n > 1) { - for (i = 0; i < nfactors; i++) { - if (n % factors[i] == 0) { - n /= factors[i]; - break; - } - } - if (i == nfactors) return 0; - } - - return 1; -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ -void PPPMDisp::adjust_gewald() -{ - - // Use Newton solver to find g_ewald - - double dx; - - // Begin algorithm - - for (int i = 0; i < LARGE; i++) { - dx = f() / derivf(); - g_ewald -= dx; //Update g_ewald - if (fabs(f()) < SMALL) return; - } - - // Failed to converge - - char str[128]; - sprintf(str, "Could not compute g_ewald"); - error->all(FLERR, str); - -} - -/* ---------------------------------------------------------------------- - Calculate f(x) - ------------------------------------------------------------------------- */ - -double PPPMDisp::f() -{ - double df_rspace, df_kspace; - double q2 = qsqsum * force->qqrd2e / force->dielectric; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - - df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd); - - double qopt = compute_qopt(); - df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - - return df_rspace - df_kspace; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) using forward difference - [f(x + h) - f(x)] / h - ------------------------------------------------------------------------- */ - -double PPPMDisp::derivf() -{ - double h = 0.000001; //Derivative step-size - double df,f1,f2,g_ewald_old; - - f1 = f(); - g_ewald_old = g_ewald; - g_ewald += h; - f2 = f(); - g_ewald = g_ewald_old; - df = (f2 - f1)/h; - - return df; -} - -/* ---------------------------------------------------------------------- - Calculate the final estimator for the accuracy -------------------------------------------------------------------------- */ - -double PPPMDisp::final_accuracy() -{ - double df_rspace, df_kspace; - double q2 = qsqsum * force->qqrd2e / force->dielectric; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd); - - double qopt = compute_qopt(); - - df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - - double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace); - return acc; -} - -/* ---------------------------------------------------------------------- - Calculate the final estimator for the Dispersion accuracy -------------------------------------------------------------------------- */ - -void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace) -{ - double df_rspace, df_kspace; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - acc_real = lj_rspace_error(); - - double qopt = compute_qopt_6(); - - acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); - - acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace); - return; -} - -/* ---------------------------------------------------------------------- - Compute qopt for Coulomb interactions -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt() -{ - double qopt; - if (differentiation_flag == 1) { - qopt = compute_qopt_ad(); - } else { - qopt = compute_qopt_ik(); - } - double qopt_all; - MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); - return qopt_all; -} - -/* ---------------------------------------------------------------------- - Compute qopt for Dispersion interactions -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_6() -{ - double qopt; - if (differentiation_flag == 1) { - qopt = compute_qopt_6_ad(); - } else { - qopt = compute_qopt_6_ik(); - } - double qopt_all; - MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); - return qopt_all; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ik differentiation scheme and Coulomb interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_ik() -{ - double qopt = 0.0; - int k,l,m; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double sqk, u2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,sum2, sum3,dot1,dot2; - - int nbx = 2; - int nby = 2; - int nbz = 2; - - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,order); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,order); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,order); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - u2 = pow(wx*wy*wz,2.0); - sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; - sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1; - sum3 += u2; - } - } - } - sum2 *= sum2; - sum3 *= sum3*sqk; - qopt += sum1 -sum2/sum3; - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ad differentiation scheme and Coulomb interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_ad() -{ - double qopt = 0.0; - int k,l,m; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double u2, sqk; - double sum1,sum2,sum3,sum4,dot2; - double numerator; - - int nbx = 2; - int nby = 2; - int nbz = 2; - double form = 1.0; - - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - numerator = form*12.5663706; - - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - sum4 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,order); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,order); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,order); - - dot2 = qx*qx+qy*qy+qz*qz; - u2 = pow(wx*wy*wz,2.0); - sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; - sum2 += sx*sy*sz * u2*4.0*MY_PI; - sum3 += u2; - sum4 += dot2*u2; - } - } - } - sum2 *= sum2; - qopt += sum1 - sum2/(sum3*sum4); - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ik differentiation scheme and Dispersion interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_6_ik() -{ - double qopt = 0.0; - int k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double sqk, u2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,sum2, sum3; - double dot1,dot2, rtdot2, term; - double inv2ew = 2*g_ewald_6; - inv2ew = 1.0/inv2ew; - double rtpi = sqrt(MY_PI); - - int nbx = 2; - int nby = 2; - int nbz = 2; - - n = 0; - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - mper = m - nz_pppm_6*(2*m/nz_pppm_6); - - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - lper = l - ny_pppm_6*(2*l/ny_pppm_6); - - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - kper = k - nx_pppm_6*(2*k/nx_pppm_6); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm_6*nx); - sx = exp(-qx*qx*inv2ew*inv2ew); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm_6; - if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm_6*ny); - sy = exp(-qy*qy*inv2ew*inv2ew); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm_6; - if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm_6*nz); - sz = exp(-qz*qz*inv2ew*inv2ew); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm_6; - if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - rtdot2 = sqrt(dot2); - term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + - 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); - term *= g_ewald_6*g_ewald_6*g_ewald_6; - u2 = pow(wx*wy*wz,2.0); - sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; - sum2 += -u2*term*MY_PI*rtpi/3.0*dot1; - sum3 += u2; - } - } - } - sum2 *= sum2; - sum3 *= sum3*sqk; - qopt += sum1 -sum2/sum3; - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ad differentiation scheme and Dispersion interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_6_ad() -{ - double qopt = 0.0; - int k,l,m; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double u2, sqk; - double sum1,sum2,sum3,sum4; - double dot2, rtdot2, term; - double inv2ew = 2*g_ewald_6; - inv2ew = 1/inv2ew; - double rtpi = sqrt(MY_PI); - - int nbx = 2; - int nby = 2; - int nbz = 2; - - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - mper = m - nz_pppm_6*(2*m/nz_pppm_6); - - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - lper = l - ny_pppm_6*(2*l/ny_pppm_6); - - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - kper = k - nx_pppm_6*(2*k/nx_pppm_6); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - sum4 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm_6*nx); - sx = exp(-qx*qx*inv2ew*inv2ew); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm_6; - if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm_6*ny); - sy = exp(-qy*qy*inv2ew*inv2ew); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm_6; - if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm_6*nz); - sz = exp(-qz*qz*inv2ew*inv2ew); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm_6; - if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); - - dot2 = qx*qx+qy*qy+qz*qz; - rtdot2 = sqrt(dot2); - term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + - 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); - term *= g_ewald_6*g_ewald_6*g_ewald_6; - u2 = pow(wx*wy*wz,2.0); - sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; - sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2; - sum3 += u2; - sum4 += dot2*u2; - } - } - } - sum2 *= sum2; - qopt += sum1 - sum2/(sum3*sum4); - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - set size of FFT grid and g_ewald_6 - for Dispersion interactions -------------------------------------------------------------------------- */ - -void PPPMDisp::set_grid_6() -{ - // Calculate csum - if (!csumflag) calc_csum(); - if (!gewaldflag_6) set_init_g6(); - if (!gridflag_6) set_n_pppm_6(); - while (!factorable(nx_pppm_6)) nx_pppm_6++; - while (!factorable(ny_pppm_6)) ny_pppm_6++; - while (!factorable(nz_pppm_6)) nz_pppm_6++; - -} - -/* ---------------------------------------------------------------------- - Calculate the sum of the squared dispersion coefficients and other - related quantities required for the calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::calc_csum() -{ - csumij = 0.0; - csum = 0.0; - - int ntypes = atom->ntypes; - int i,j,k; - - delete [] cii; - cii = new double[ntypes +1]; - for (i = 0; i<=ntypes; i++) cii[i] = 0.0; - delete [] csumi; - csumi = new double[ntypes +1]; - for (i = 0; i<=ntypes; i++) csumi[i] = 0.0; - int *neach = new int[ntypes+1]; - for (i = 0; i<=ntypes; i++) neach[i] = 0; - - //the following variables are needed to distinguish between arithmetic - // and geometric mixing - - double mix1; // scales 20/16 to 4 - int mix2; // shifts the value to the sigma^3 value - int mix3; // shifts the value to the right atom type - if (function[1]) { - for (i = 1; i <= ntypes; i++) - cii[i] = B[i]*B[i]; - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - neach[tmp]++; - csum += B[tmp]*B[tmp]; - } - } - if (function[2]) { - for (i = 1; i <= ntypes; i++) - cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3]; - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - neach[tmp]++; - csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3]; - } - } - if (function[3]) { - for (i = 1; i <= ntypes; i++) - for (j = 0; j < nsplit; j++) - cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j]; - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - neach[tmp]++; - for (j = 0; j < nsplit; j++) - csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j]; - } - } - - - double tmp2; - MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world); - csum = tmp2; - csumflag = 1; - - int *neach_all = new int[ntypes+1]; - MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world); - - // copmute csumij and csumi - double d1, d2; - if (function[1]){ - for (i=1; i<=ntypes; i++) { - for (j=1; j<=ntypes; j++) { - csumi[i] += neach_all[j]*B[i]*B[j]; - d1 = neach_all[i]*B[i]; - d2 = neach_all[j]*B[j]; - csumij += d1*d2; - //csumij += neach_all[i]*neach_all[j]*B[i]*B[j]; - } - } - } - if (function[2]) { - for (i=1; i<=ntypes; i++) { - for (j=1; j<=ntypes; j++) { - for (k=0; k<=6; k++) { - csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; - d1 = neach_all[i]*B[7*i + k]; - d2 = neach_all[j]*B[7*(j+1)-k-1]; - csumij += d1*d2; - //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; - } - } - } - } - if (function[3]) { - for (i=1; i<=ntypes; i++) { - for (j=1; j<=ntypes; j++) { - for (k=0; kall(FLERR, str); - -} - -/* ---------------------------------------------------------------------- - Calculate f(x) for Dispersion interaction - ------------------------------------------------------------------------- */ - -double PPPMDisp::f_6() -{ - double df_rspace, df_kspace; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - - df_rspace = lj_rspace_error(); - - double qopt = compute_qopt_6(); - df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); - - return df_rspace - df_kspace; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) using forward difference - [f(x + h) - f(x)] / h - ------------------------------------------------------------------------- */ - -double PPPMDisp::derivf_6() -{ - double h = 0.000001; //Derivative step-size - double df,f1,f2,g_ewald_old; - - f1 = f_6(); - g_ewald_old = g_ewald_6; - g_ewald_6 += h; - f2 = f_6(); - g_ewald_6 = g_ewald_old; - df = (f2 - f1)/h; - - return df; -} - - -/* ---------------------------------------------------------------------- - calculate an initial value for g_ewald_6 - ---------------------------------------------------------------------- */ - -void PPPMDisp::set_init_g6() -{ - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - // make initial g_ewald estimate - // based on desired error and real space cutoff - - // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj - // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0 - // else, repeat multiply g_ewald_6 by 2 until df_real > 0 - // perform bisection for the last two values of - double df_real; - double g_ewald_old; - double gmin, gmax; - - // check if there is a user defined accuracy - double acc_rspace = accuracy; - if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6; - - g_ewald_6 = 1.0/cutoff_lj; - df_real = lj_rspace_error() - acc_rspace; - int counter = 0; - if (df_real > 0) { - while (df_real > 0 && counter < LARGE) { - counter++; - g_ewald_old = g_ewald_6; - g_ewald_6 *= 2; - df_real = lj_rspace_error() - acc_rspace; - } - } - - if (df_real < 0) { - while (df_real < 0 && counter < LARGE) { - counter++; - g_ewald_old = g_ewald_6; - g_ewald_6 *= 0.5; - df_real = lj_rspace_error() - acc_rspace; - } - } - - if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); - - gmin = MIN(g_ewald_6, g_ewald_old); - gmax = MAX(g_ewald_6, g_ewald_old); - g_ewald_6 = gmin + 0.5*(gmax-gmin); - counter = 0; - while (gmax-gmin > SMALL && counter < LARGE) { - counter++; - df_real = lj_rspace_error() -acc_rspace; - if (df_real < 0) gmax = g_ewald_6; - else gmin = g_ewald_6; - g_ewald_6 = gmin + 0.5*(gmax-gmin); - } - if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); - -} - -/* ---------------------------------------------------------------------- - calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction - ---------------------------------------------------------------------- */ - -void PPPMDisp::set_n_pppm_6() -{ - bigint natoms = atom->natoms; - - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - double h, h_x,h_y,h_z; - - double acc_kspace = accuracy; - if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6; - - // initial value for the grid spacing - h = h_x = h_y = h_z = 4.0/g_ewald_6; - // decrease grid spacing untill required precision is obtained - int count = 0; - while(1) { - - // set grid dimension - nx_pppm_6 = static_cast (xprd/h_x); - ny_pppm_6 = static_cast (yprd/h_y); - nz_pppm_6 = static_cast (zprd_slab/h_z); - - if (nx_pppm_6 <= 1) nx_pppm_6 = 2; - if (ny_pppm_6 <= 1) ny_pppm_6 = 2; - if (nz_pppm_6 <= 1) nz_pppm_6 = 2; - - //set local grid dimension - int npey_fft,npez_fft; - if (nz_pppm_6 >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft_6 = 0; - nxhi_fft_6 = nx_pppm_6 - 1; - nylo_fft_6 = me_y*ny_pppm_6/npey_fft; - nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1; - nzlo_fft_6 = me_z*nz_pppm_6/npez_fft; - nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1; - - double qopt = compute_qopt_6(); - - double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); - - count++; - - // break loop if the accuracy has been reached or too many loops have been performed - if (df_kspace <= acc_kspace) break; - if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion"); - h *= 0.95; - h_x = h_y = h_z = h; - } -} - -/* ---------------------------------------------------------------------- - calculate the real space error for dispersion interactions - ---------------------------------------------------------------------- */ - -double PPPMDisp::lj_rspace_error() -{ - bigint natoms = atom->natoms; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - double deltaf; - double rgs = (cutoff_lj*g_ewald_6); - rgs *= rgs; - double rgs_inv = 1.0/rgs; - deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)* - exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6))); - return deltaf; -} - - -/* ---------------------------------------------------------------------- - Compyute the modified (hockney-eastwood) coulomb green function - ---------------------------------------------------------------------- */ - -void PPPMDisp::compute_gf() -{ - int k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int kper,lper,mper; - double snx,sny,snz,snx2,sny2,snz2; - double sqk; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double numerator,denominator; - - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - qz = unitkz*mper; - snz = sin(0.5*qz*zprd_slab/nz_pppm); - snz2 = snz*snz; - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,order); - wz *= wz; - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - qy = unitky*lper; - sny = sin(0.5*qy*yprd/ny_pppm); - sny2 = sny*sny; - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,order); - wy *= wy; - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - qx = unitkx*kper; - snx = sin(0.5*qx*xprd/nx_pppm); - snx2 = snx*snx; - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,order); - wx *= wx; - - sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); - - if (sqk != 0.0) { - numerator = 4.0*MY_PI/sqk; - denominator = gf_denom(snx2,sny2,snz2, gf_b, order); - greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme - and Coulomb interaction -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord, - int nxlo_ft, int nylo_ft, int nzlo_ft, - int nxhi_ft, int nyhi_ft, int nzhi_ft, - double *sf_pre1, double *sf_pre2, double *sf_pre3, - double *sf_pre4, double *sf_pre5, double *sf_pre6) -{ - - int i,k,l,m,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double argx,argy,argz; - double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; - double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; - double u0,u1,u2,u3,u4,u5,u6; - double sum1,sum2,sum3,sum4,sum5,sum6; - - int nb = 2; - - n = 0; - for (m = nzlo_ft; m <= nzhi_ft; m++) { - mper = m - nzp*(2*m/nzp); - - for (l = nylo_ft; l <= nyhi_ft; l++) { - lper = l - nyp*(2*l/nyp); - - for (k = nxlo_ft; k <= nxhi_ft; k++) { - kper = k - nxp*(2*k/nxp); - - sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; - for (i = -nb; i <= nb; i++) { - - qx0 = unitkx*(kper+nxp*i); - qx1 = unitkx*(kper+nxp*(i+1)); - qx2 = unitkx*(kper+nxp*(i+2)); - wx0[i+2] = 1.0; - wx1[i+2] = 1.0; - wx2[i+2] = 1.0; - argx = 0.5*qx0*xprd/nxp; - if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord); - argx = 0.5*qx1*xprd/nxp; - if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord); - argx = 0.5*qx2*xprd/nxp; - if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord); - - qy0 = unitky*(lper+nyp*i); - qy1 = unitky*(lper+nyp*(i+1)); - qy2 = unitky*(lper+nyp*(i+2)); - wy0[i+2] = 1.0; - wy1[i+2] = 1.0; - wy2[i+2] = 1.0; - argy = 0.5*qy0*yprd/nyp; - if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord); - argy = 0.5*qy1*yprd/nyp; - if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord); - argy = 0.5*qy2*yprd/nyp; - if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord); - - qz0 = unitkz*(mper+nzp*i); - qz1 = unitkz*(mper+nzp*(i+1)); - qz2 = unitkz*(mper+nzp*(i+2)); - wz0[i+2] = 1.0; - wz1[i+2] = 1.0; - wz2[i+2] = 1.0; - argz = 0.5*qz0*zprd_slab/nzp; - if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord); - argz = 0.5*qz1*zprd_slab/nzp; - if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord); - argz = 0.5*qz2*zprd_slab/nzp; - if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord); - } - - for (nx = 0; nx <= 4; nx++) { - for (ny = 0; ny <= 4; ny++) { - for (nz = 0; nz <= 4; nz++) { - u0 = wx0[nx]*wy0[ny]*wz0[nz]; - u1 = wx1[nx]*wy0[ny]*wz0[nz]; - u2 = wx2[nx]*wy0[ny]*wz0[nz]; - u3 = wx0[nx]*wy1[ny]*wz0[nz]; - u4 = wx0[nx]*wy2[ny]*wz0[nz]; - u5 = wx0[nx]*wy0[ny]*wz1[nz]; - u6 = wx0[nx]*wy0[ny]*wz2[nz]; - - sum1 += u0*u1; - sum2 += u0*u2; - sum3 += u0*u3; - sum4 += u0*u4; - sum5 += u0*u5; - sum6 += u0*u6; - } - } - } - - // store values - - sf_pre1[n] = sum1; - sf_pre2[n] = sum2; - sf_pre3[n] = sum3; - sf_pre4[n] = sum4; - sf_pre5[n] = sum5; - sf_pre6[n++] = sum6; - } - } - } -} - -/* ---------------------------------------------------------------------- - Compute the modified (hockney-eastwood) dispersion green function - ---------------------------------------------------------------------- */ - -void PPPMDisp::compute_gf_6() -{ - double *prd; - int k,l,m,n; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int kper,lper,mper; - double sqk; - double snx,sny,snz,snx2,sny2,snz2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz; - double qx,qy,qz; - double rtsqk, term; - double numerator,denominator; - double inv2ew = 2*g_ewald_6; - inv2ew = 1/inv2ew; - double rtpi = sqrt(MY_PI); - - numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0); - - n = 0; - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - mper = m - nz_pppm_6*(2*m/nz_pppm_6); - qz = unitkz*mper; - snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6); - snz2 = snz*snz; - sz = exp(-qz*qz*inv2ew*inv2ew); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm_6; - if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); - wz *= wz; - - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - lper = l - ny_pppm_6*(2*l/ny_pppm_6); - qy = unitky*lper; - sny = sin(0.5*unitky*lper*yprd/ny_pppm_6); - sny2 = sny*sny; - sy = exp(-qy*qy*inv2ew*inv2ew); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm_6; - if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); - wy *= wy; - - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - kper = k - nx_pppm_6*(2*k/nx_pppm_6); - qx = unitkx*kper; - snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6); - snx2 = snx*snx; - sx = exp(-qx*qx*inv2ew*inv2ew); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm_6; - if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); - wx *= wx; - - sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); - - if (sqk != 0.0) { - denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); - rtsqk = sqrt(sqk); - term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz + - 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew); - greensfn_6[n++] = numerator*term*wx*wy*wz/denominator; - } else greensfn_6[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme - and Coulomb interaction -------------------------------------------------------------------------- */ -void PPPMDisp::compute_sf_coeff() -{ - int i,k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - for (l = nylo_fft; l <= nyhi_fft; l++) { - for (k = nxlo_fft; k <= nxhi_fft; k++) { - sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; - sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; - sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; - sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; - sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; - sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; - ++n; - } - } - } - - // Compute the coefficients for the self-force correction - - double prex, prey, prez; - prex = prey = prez = MY_PI/volume; - prex *= nx_pppm/xprd; - prey *= ny_pppm/yprd; - prez *= nz_pppm/zprd_slab; - sf_coeff[0] *= prex; - sf_coeff[1] *= prex*2; - sf_coeff[2] *= prey; - sf_coeff[3] *= prey*2; - sf_coeff[4] *= prez; - sf_coeff[5] *= prez*2; - - // communicate values with other procs - - double tmp[6]; - MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); - for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme - and Dispersion interaction -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_sf_coeff_6() -{ - int i,k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0; - - n = 0; - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n]; - sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n]; - sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n]; - sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n]; - sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n]; - sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n]; - ++n; - } - } - } - - - // perform multiplication with prefactors - - double prex, prey, prez; - prex = prey = prez = MY_PI/volume; - prex *= nx_pppm_6/xprd; - prey *= ny_pppm_6/yprd; - prez *= nz_pppm_6/zprd_slab; - sf_coeff_6[0] *= prex; - sf_coeff_6[1] *= prex*2; - sf_coeff_6[2] *= prey; - sf_coeff_6[3] *= prey*2; - sf_coeff_6[4] *= prez; - sf_coeff_6[5] *= prez*2; - - // communicate values with other procs - - double tmp[6]; - MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world); - for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n]; - -} - -/* ---------------------------------------------------------------------- - denominator for Hockney-Eastwood Green's function - of x,y,z = sin(kx*deltax/2), etc - - inf n-1 - S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l - j=-inf l=0 - - = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x) - gf_b = denominator expansion coeffs -------------------------------------------------------------------------- */ - -double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord) -{ - double sx,sy,sz; - sz = sy = sx = 0.0; - for (int l = ord-1; l >= 0; l--) { - sx = g_b[l] + sx*x; - sy = g_b[l] + sy*y; - sz = g_b[l] + sz*z; - } - double s = sx*sy*sz; - return s*s; -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_gf_denom(double* gf, int ord) -{ - int k,l,m; - - for (l = 1; l < ord; l++) gf[l] = 0.0; - gf[0] = 1.0; - - for (m = 1; m < ord; m++) { - for (l = m; l > 0; l--) - gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1)); - gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5)); - } - - bigint ifact = 1; - for (k = 1; k < 2*ord; k++) ifact *= k; - double gaminv = 1.0/ifact; - for (l = 0; l < ord; l++) gf[l] *= gaminv; -} - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFTdecomposition - for coulomb interaction or dispersion interaction with geometric - mixing -------------------------------------------------------------------------- */ - -void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work, - LAMMPS_NS::Remap* rmp) -{ - int n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_i; iz <= nzhi_i; iz++) - for (iy = nylo_i; iy <= nyhi_i; iy++) - for (ix = nxlo_i; ix <= nxhi_i; ix++) - dfft[n++] = dbrick[iz][iy][ix]; - - rmp->perform(dfft,dfft,work); -} - - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFTdecomposition - for dispersion with arithmetic mixing rule -------------------------------------------------------------------------- */ - -void PPPMDisp::brick2fft_a() -{ - int n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++) - for (iy = nylo_in_6; iy <= nyhi_in_6; iy++) - for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) { - density_fft_a0[n] = density_brick_a0[iz][iy][ix]; - density_fft_a1[n] = density_brick_a1[iz][iy][ix]; - density_fft_a2[n] = density_brick_a2[iz][iy][ix]; - density_fft_a3[n] = density_brick_a3[iz][iy][ix]; - density_fft_a4[n] = density_brick_a4[iz][iy][ix]; - density_fft_a5[n] = density_brick_a5[iz][iy][ix]; - density_fft_a6[n++] = density_brick_a6[iz][iy][ix]; - } - - remap_6->perform(density_fft_a0,density_fft_a0,work1_6); - remap_6->perform(density_fft_a1,density_fft_a1,work1_6); - remap_6->perform(density_fft_a2,density_fft_a2,work1_6); - remap_6->perform(density_fft_a3,density_fft_a3,work1_6); - remap_6->perform(density_fft_a4,density_fft_a4,work1_6); - remap_6->perform(density_fft_a5,density_fft_a5,work1_6); - remap_6->perform(density_fft_a6,density_fft_a6,work1_6); - -} - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFTdecomposition - for dispersion with special case -------------------------------------------------------------------------- */ - -void PPPMDisp::brick2fft_none() -{ - int k,n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - for (k = 0; kperform(density_fft_none[k],density_fft_none[k],work1_6); -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPMDisp::particle_map(double delx, double dely, double delz, - double sft, int** p2g, int nup, int nlow, - int nxlo, int nylo, int nzlo, - int nxhi, int nyhi, int nzhi) -{ - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((x[i][0]-boxlo[0])*delx+sft) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*dely+sft) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delz+sft) - OFFSET; - - p2g[i][0] = nx; - p2g[i][1] = ny; - p2g[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlow < nxlo || nx+nup > nxhi || - ny+nlow < nylo || ny+nup > nyhi || - nz+nlow < nzlo || nz+nup > nzhi) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp"); -} - - -void PPPMDisp::particle_map_c(double delx, double dely, double delz, - double sft, int** p2g, int nup, int nlow, - int nxlo, int nylo, int nzlo, - int nxhi, int nyhi, int nzhi) -{ - particle_map(delx, dely, delz, sft, p2g, nup, nlow, - nxlo, nylo, nzlo, nxhi, nyhi, nzhi); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_c() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = dispersion "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid --- geometric mixing -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_g() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - int type; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - type = atom->type[i]; - z0 = delvolinv_6 * B[type]; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - y0 = z0*rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - x0 = y0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - density_brick_g[mz][my][mx] += x0*rho1d_6[0][l]; - } - } - } - } -} - - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = dispersion "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid --- arithmetic mixing -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_a() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0,w; - - // clear 3d density array - - memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - - // loop over my particles, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - int type; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - //do the following for all 4 grids - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - type = atom->type[i]; - z0 = delvolinv_6; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - y0 = z0*rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - x0 = y0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - w = x0*rho1d_6[0][l]; - density_brick_a0[mz][my][mx] += w*B[7*type]; - density_brick_a1[mz][my][mx] += w*B[7*type+1]; - density_brick_a2[mz][my][mx] += w*B[7*type+2]; - density_brick_a3[mz][my][mx] += w*B[7*type+3]; - density_brick_a4[mz][my][mx] += w*B[7*type+4]; - density_brick_a5[mz][my][mx] += w*B[7*type+5]; - density_brick_a6[mz][my][mx] += w*B[7*type+6]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = dispersion "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid --- case when mixing rules don't apply -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_none() -{ - int k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0,w; - - // clear 3d density array - for (k = 0; k < nsplit_alloc; k++) - memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - - - // loop over my particles, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - int type; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - //do the following for all 4 grids - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - type = atom->type[i]; - z0 = delvolinv_6; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - y0 = z0*rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - x0 = y0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - w = x0*rho1d_6[0][l]; - for (k = 0; k < nsplit; k++) - density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k]; - } - } - } - } -} - - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ik differentiation -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2, - FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, - int nx_p, int ny_p, int nz_p, int nft, - int nxlo_ft, int nylo_ft, int nzlo_ft, - int nxhi_ft, int nyhi_ft, int nzhi_ft, - int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - double& egy, double* gfn, - double* kx, double* ky, double* kz, - double* kx2, double* ky2, double* kz2, - FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick, - double* vir, double** vcoeff, double** vcoeff2, - FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, - FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) - - -{ - int i,j,k,n; - double eng; - - // transform charge/dispersion density (r -> k) - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] = dfft[i]; - wk1[n++] = ZEROF; - } - - ft1->compute(wk1,wk1,1); - - // if requested, compute energy and virial contribution - - double scaleinv = 1.0/(nx_p*ny_p*nz_p); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nft; i++) { - eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; - if (eflag_global) egy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nft; i++) { - egy += - s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] *= scaleinv * gfn[i]; - wk1[n++] *= scaleinv * gfn[i]; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x & y direction gradient - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n]; - wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - vx_brick[k][j][i] = wk2[n++]; - vy_brick[k][j][i] = wk2[n++]; - } - - if (!eflag_atom) { - // z direction gradient only - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = kz[k]*wk1[n+1]; - wk2[n+1] = -kz[k]*wk1[n]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - vz_brick[k][j][i] = wk2[n]; - n += 2; - } - - } - - else { - // z direction gradient & per-atom energy - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1]; - wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - vz_brick[k][j][i] = wk2[n++]; - u_pa[k][j][i] = wk2[n++];; - } - } - - if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, - nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, - v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ad differentiation -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2, - FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, - int nx_p, int ny_p, int nz_p, int nft, - int nxlo_ft, int nylo_ft, int nzlo_ft, - int nxhi_ft, int nyhi_ft, int nzhi_ft, - int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - double& egy, double* gfn, - double* vir, double** vcoeff, double** vcoeff2, - FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, - FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) - - -{ - int i,j,k,n; - double eng; - - // transform charge/dispersion density (r -> k) - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] = dfft[i]; - wk1[n++] = ZEROF; - } - - ft1->compute(wk1,wk1,1); - - // if requested, compute energy and virial contribution - - double scaleinv = 1.0/(nx_p*ny_p*nz_p); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nft; i++) { - eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; - if (eflag_global) egy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nft; i++) { - egy += - s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] *= scaleinv * gfn[i]; - wk1[n++] *= scaleinv * gfn[i]; - } - - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = wk1[n]; - wk2[n+1] = wk1[n+1]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - u_pa[k][j][i] = wk2[n++]; - n++; - } - - - if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, - nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, - v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); - -} - -/* ---------------------------------------------------------------------- - Fourier Transform for per atom virial calculations -------------------------------------------------------------------------- */ - -void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2, - double** vcoeff, double** vcoeff2, int nft, - int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, - FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) -{ - //v0 & v1 term - int n, i, j, k; - n = 0; - for (i = 0; i < nft; i++) { - wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1]; - wk2[n+1] = wk1[n+1]*vcoeff[i][0] + wk1[n]*vcoeff[i][1]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - v0_pa[k][j][i] = wk2[n++]; - v1_pa[k][j][i] = wk2[n++]; - } - - //v2 & v3 term - - n = 0; - for (i = 0; i < nft; i++) { - wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0]; - wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - v2_pa[k][j][i] = wk2[n++]; - v3_pa[k][j][i] = wk2[n++]; - } - - //v4 & v5 term - - n = 0; - for (i = 0; i < nft; i++) { - wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2]; - wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - v4_pa[k][j][i] = wk2[n++]; - v5_pa[k][j][i] = wk2[n++]; - } - -} - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, - FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, - FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) - -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vxbrick_1[k][j][i] = work2_6[n++]; - vxbrick_2[k][j][i] = work2_6[n++]; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vybrick_1[k][j][i] = work2_6[n++]; - vybrick_2[k][j][i] = work2_6[n++]; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vzbrick_1[k][j][i] = work2_6[n++]; - vzbrick_2[k][j][i] = work2_6[n++]; - } - - //Per-atom energy - - if (eflag_atom) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa_1[k][j][i] = work2_6[n++]; - u_pa_2[k][j][i] = work2_6[n++]; - } - } - - if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, - v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); -} - - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, - FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, - FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, - FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - - - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vxbrick_1[k][j][i] = B[n1]*work2_6[n++]; - vxbrick_2[k][j][i] = B[n2]*work2_6[n++]; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vybrick_1[k][j][i] = B[n1]*work2_6[n++]; - vybrick_2[k][j][i] = B[n2]*work2_6[n++]; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vzbrick_1[k][j][i] = B[n1]*work2_6[n++]; - vzbrick_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Per-atom energy - - if (eflag_atom) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa[n1][k][j][i] = B[n1]*work2_6[n++]; - u_pa[n2][k][j][i] = B[n2]*work2_6[n++]; - } - } - - if (vflag_atom) poisson_none_peratom(n1,n2, - v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], - v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); -} - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) - -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa_1[k][j][i] = work2_6[n++]; - u_pa_2[k][j][i] = work2_6[n++]; - } - - if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, - v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); -} - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2, - FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, - FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa_1[k][j][i] = B[n1]*work2_6[n++]; - u_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - if (vflag_atom) poisson_none_peratom(n1,n2, - v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], - v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); -} - -/* ---------------------------------------------------------------------- - Fourier Transform for per atom virial calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) -{ - //Compute first virial term v0 - int n, i, j, k; - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v0_pa_1[k][j][i] = work2_6[n++]; - v0_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute second virial term v1 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v1_pa_1[k][j][i] = work2_6[n++]; - v1_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute third virial term v2 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v2_pa_1[k][j][i] = work2_6[n++]; - v2_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute fourth virial term v3 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v3_pa_1[k][j][i] = work2_6[n++]; - v3_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute fifth virial term v4 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v4_pa_1[k][j][i] = work2_6[n++]; - v4_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute last virial term v5 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v5_pa_1[k][j][i] = work2_6[n++]; - v5_pa_2[k][j][i] = work2_6[n++]; - } -} - -/* ---------------------------------------------------------------------- - Fourier Transform for per atom virial calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_none_peratom(int n1, int n2, - FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) -{ - //Compute first virial term v0 - int n, i, j, k; - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v0_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v0_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute second virial term v1 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v1_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v1_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute third virial term v2 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v2_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v2_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute fourth virial term v3 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v3_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v3_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute fifth virial term v4 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v4_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v4_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute last virial term v5 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v5_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v5_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles - for ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_c_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles - for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_c_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz; - FFT_SCALAR ekx,eky,ekz; - double s1,s2,s3; - double sf = 0.0; - - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm/xprd; - double hy_inv = ny_pppm/yprd; - double hz_inv = nz_pppm/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; - } - } - } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - // convert E-field to force and substract self forces - const double qfactor = force->qqrd2e * scale; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - sf = sf_coeff[0]*sin(2*MY_PI*s1); - sf += sf_coeff[1]*sin(4*MY_PI*s1); - sf *= 2*q[i]*q[i]; - f[i][0] += qfactor*(ekx*q[i] - sf); - - sf = sf_coeff[2]*sin(2*MY_PI*s2); - sf += sf_coeff[3]*sin(4*MY_PI*s2); - sf *= 2*q[i]*q[i]; - f[i][1] += qfactor*(eky*q[i] - sf); - - - sf = sf_coeff[4]*sin(2*MY_PI*s3); - sf += sf_coeff[5]*sin(4*MY_PI*s3); - sf *= 2*q[i]*q[i]; - if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_c_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u_pa += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - // convert E-field to force - - const double qfactor = 0.5*force->qqrd2e * scale * q[i]; - - if (eflag_atom) eatom[i] += u_pa*qfactor; - if (vflag_atom) { - vatom[i][0] += v0*qfactor; - vatom[i][1] += v1*qfactor; - vatom[i][2] += v2*qfactor; - vatom[i][3] += v3*qfactor; - vatom[i][4] += v4*qfactor; - vatom[i][5] += v5*qfactor; - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for geometric mixing rule -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_g_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - ekx = eky = ekz = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - ekx -= x0*vdx_brick_g[mz][my][mx]; - eky -= x0*vdy_brick_g[mz][my][mx]; - ekz -= x0*vdz_brick_g[mz][my][mx]; - } - } - } - - // convert E-field to force - type = atom->type[i]; - lj = B[type]; - f[i][0] += lj*ekx; - f[i][1] += lj*eky; - if (slabflag != 2) f[i][2] += lj*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for geometric mixing rule for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_g_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz; - FFT_SCALAR ekx,eky,ekz; - double s1,s2,s3; - double sf = 0.0; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm_6/xprd; - double hy_inv = ny_pppm_6/yprd; - double hz_inv = nz_pppm_6/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - - for (i = 0; i < nlocal; i++) { - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); - - - ekx = eky = ekz = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; - eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; - ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx]; - } - } - } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - - // convert E-field to force - type = atom->type[i]; - lj = B[type]; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - - sf = sf_coeff_6[0]*sin(2*MY_PI*s1); - sf += sf_coeff_6[1]*sin(4*MY_PI*s1); - sf *= 2*lj*lj; - f[i][0] += ekx*lj - sf; - - sf = sf_coeff_6[2]*sin(2*MY_PI*s2); - sf += sf_coeff_6[3]*sin(4*MY_PI*s2); - sf *= 2*lj*lj; - f[i][1] += eky*lj - sf; - - - sf = sf_coeff_6[4]*sin(2*MY_PI*s3); - sf += sf_coeff_6[5]*sin(4*MY_PI*s3); - sf *= 2*lj*lj; - if (slabflag != 2) f[i][2] += ekz*lj - sf; - - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for geometric mixing rule for per atom quantities -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_g_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick_g[mz][my][mx]; - v1 += x0*v1_brick_g[mz][my][mx]; - v2 += x0*v2_brick_g[mz][my][mx]; - v3 += x0*v3_brick_g[mz][my][mx]; - v4 += x0*v4_brick_g[mz][my][mx]; - v5 += x0*v5_brick_g[mz][my][mx]; - } - } - } - } - - // convert E-field to force - type = atom->type[i]; - lj = B[type]*0.5; - - if (eflag_atom) eatom[i] += u_pa*lj; - if (vflag_atom) { - vatom[i][0] += v0*lj; - vatom[i][1] += v1*lj; - vatom[i][2] += v2*lj; - vatom[i][3] += v3*lj; - vatom[i][4] += v4*lj; - vatom[i][5] += v5*lj; - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule and ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_a_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; - FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; - FFT_SCALAR ekx6, eky6, ekz6; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj0, lj1, lj2, lj3, lj4, lj5, lj6; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - ekx0 = eky0 = ekz0 = ZEROF; - ekx1 = eky1 = ekz1 = ZEROF; - ekx2 = eky2 = ekz2 = ZEROF; - ekx3 = eky3 = ekz3 = ZEROF; - ekx4 = eky4 = ekz4 = ZEROF; - ekx5 = eky5 = ekz5 = ZEROF; - ekx6 = eky6 = ekz6 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - ekx0 -= x0*vdx_brick_a0[mz][my][mx]; - eky0 -= x0*vdy_brick_a0[mz][my][mx]; - ekz0 -= x0*vdz_brick_a0[mz][my][mx]; - ekx1 -= x0*vdx_brick_a1[mz][my][mx]; - eky1 -= x0*vdy_brick_a1[mz][my][mx]; - ekz1 -= x0*vdz_brick_a1[mz][my][mx]; - ekx2 -= x0*vdx_brick_a2[mz][my][mx]; - eky2 -= x0*vdy_brick_a2[mz][my][mx]; - ekz2 -= x0*vdz_brick_a2[mz][my][mx]; - ekx3 -= x0*vdx_brick_a3[mz][my][mx]; - eky3 -= x0*vdy_brick_a3[mz][my][mx]; - ekz3 -= x0*vdz_brick_a3[mz][my][mx]; - ekx4 -= x0*vdx_brick_a4[mz][my][mx]; - eky4 -= x0*vdy_brick_a4[mz][my][mx]; - ekz4 -= x0*vdz_brick_a4[mz][my][mx]; - ekx5 -= x0*vdx_brick_a5[mz][my][mx]; - eky5 -= x0*vdy_brick_a5[mz][my][mx]; - ekz5 -= x0*vdz_brick_a5[mz][my][mx]; - ekx6 -= x0*vdx_brick_a6[mz][my][mx]; - eky6 -= x0*vdy_brick_a6[mz][my][mx]; - ekz6 -= x0*vdz_brick_a6[mz][my][mx]; - } - } - } - // convert D-field to force - type = atom->type[i]; - lj0 = B[7*type+6]; - lj1 = B[7*type+5]; - lj2 = B[7*type+4]; - lj3 = B[7*type+3]; - lj4 = B[7*type+2]; - lj5 = B[7*type+1]; - lj6 = B[7*type]; - f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; - f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6; - if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for the ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_a_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; - FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; - FFT_SCALAR ekx6, eky6, ekz6; - - double s1,s2,s3; - double sf = 0.0; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm_6/xprd; - double hy_inv = ny_pppm_6/yprd; - double hz_inv = nz_pppm_6/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj0, lj1, lj2, lj3, lj4, lj5, lj6; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); - - ekx0 = eky0 = ekz0 = ZEROF; - ekx1 = eky1 = ekz1 = ZEROF; - ekx2 = eky2 = ekz2 = ZEROF; - ekx3 = eky3 = ekz3 = ZEROF; - ekx4 = eky4 = ekz4 = ZEROF; - ekx5 = eky5 = ekz5 = ZEROF; - ekx6 = eky6 = ekz6 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; - y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; - z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; - - ekx0 += x0*u_brick_a0[mz][my][mx]; - eky0 += y0*u_brick_a0[mz][my][mx]; - ekz0 += z0*u_brick_a0[mz][my][mx]; - - ekx1 += x0*u_brick_a1[mz][my][mx]; - eky1 += y0*u_brick_a1[mz][my][mx]; - ekz1 += z0*u_brick_a1[mz][my][mx]; - - ekx2 += x0*u_brick_a2[mz][my][mx]; - eky2 += y0*u_brick_a2[mz][my][mx]; - ekz2 += z0*u_brick_a2[mz][my][mx]; - - ekx3 += x0*u_brick_a3[mz][my][mx]; - eky3 += y0*u_brick_a3[mz][my][mx]; - ekz3 += z0*u_brick_a3[mz][my][mx]; - - ekx4 += x0*u_brick_a4[mz][my][mx]; - eky4 += y0*u_brick_a4[mz][my][mx]; - ekz4 += z0*u_brick_a4[mz][my][mx]; - - ekx5 += x0*u_brick_a5[mz][my][mx]; - eky5 += y0*u_brick_a5[mz][my][mx]; - ekz5 += z0*u_brick_a5[mz][my][mx]; - - ekx6 += x0*u_brick_a6[mz][my][mx]; - eky6 += y0*u_brick_a6[mz][my][mx]; - ekz6 += z0*u_brick_a6[mz][my][mx]; - } - } - } - - ekx0 *= hx_inv; - eky0 *= hy_inv; - ekz0 *= hz_inv; - - ekx1 *= hx_inv; - eky1 *= hy_inv; - ekz1 *= hz_inv; - - ekx2 *= hx_inv; - eky2 *= hy_inv; - ekz2 *= hz_inv; - - ekx3 *= hx_inv; - eky3 *= hy_inv; - ekz3 *= hz_inv; - - ekx4 *= hx_inv; - eky4 *= hy_inv; - ekz4 *= hz_inv; - - ekx5 *= hx_inv; - eky5 *= hy_inv; - ekz5 *= hz_inv; - - ekx6 *= hx_inv; - eky6 *= hy_inv; - ekz6 *= hz_inv; - - // convert D-field to force - type = atom->type[i]; - lj0 = B[7*type+6]; - lj1 = B[7*type+5]; - lj2 = B[7*type+4]; - lj3 = B[7*type+3]; - lj4 = B[7*type+2]; - lj5 = B[7*type+1]; - lj6 = B[7*type]; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - - sf = sf_coeff_6[0]*sin(2*MY_PI*s1); - sf += sf_coeff_6[1]*sin(4*MY_PI*s1); - sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; - f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf; - - sf = sf_coeff_6[2]*sin(2*MY_PI*s2); - sf += sf_coeff_6[3]*sin(4*MY_PI*s2); - sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; - f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf; - - sf = sf_coeff_6[4]*sin(2*MY_PI*s3); - sf += sf_coeff_6[5]*sin(4*MY_PI*s3); - sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; - if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for per atom quantities -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_a_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50; - FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51; - FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52; - FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53; - FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54; - FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55; - FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - int type; - double lj0, lj1, lj2, lj3, lj4, lj5, lj6; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF; - u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF; - u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF; - u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF; - u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF; - u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF; - u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - if (eflag_atom) { - u_pa0 += x0*u_brick_a0[mz][my][mx]; - u_pa1 += x0*u_brick_a1[mz][my][mx]; - u_pa2 += x0*u_brick_a2[mz][my][mx]; - u_pa3 += x0*u_brick_a3[mz][my][mx]; - u_pa4 += x0*u_brick_a4[mz][my][mx]; - u_pa5 += x0*u_brick_a5[mz][my][mx]; - u_pa6 += x0*u_brick_a6[mz][my][mx]; - } - if (vflag_atom) { - v00 += x0*v0_brick_a0[mz][my][mx]; - v10 += x0*v1_brick_a0[mz][my][mx]; - v20 += x0*v2_brick_a0[mz][my][mx]; - v30 += x0*v3_brick_a0[mz][my][mx]; - v40 += x0*v4_brick_a0[mz][my][mx]; - v50 += x0*v5_brick_a0[mz][my][mx]; - v01 += x0*v0_brick_a1[mz][my][mx]; - v11 += x0*v1_brick_a1[mz][my][mx]; - v21 += x0*v2_brick_a1[mz][my][mx]; - v31 += x0*v3_brick_a1[mz][my][mx]; - v41 += x0*v4_brick_a1[mz][my][mx]; - v51 += x0*v5_brick_a1[mz][my][mx]; - v02 += x0*v0_brick_a2[mz][my][mx]; - v12 += x0*v1_brick_a2[mz][my][mx]; - v22 += x0*v2_brick_a2[mz][my][mx]; - v32 += x0*v3_brick_a2[mz][my][mx]; - v42 += x0*v4_brick_a2[mz][my][mx]; - v52 += x0*v5_brick_a2[mz][my][mx]; - v03 += x0*v0_brick_a3[mz][my][mx]; - v13 += x0*v1_brick_a3[mz][my][mx]; - v23 += x0*v2_brick_a3[mz][my][mx]; - v33 += x0*v3_brick_a3[mz][my][mx]; - v43 += x0*v4_brick_a3[mz][my][mx]; - v53 += x0*v5_brick_a3[mz][my][mx]; - v04 += x0*v0_brick_a4[mz][my][mx]; - v14 += x0*v1_brick_a4[mz][my][mx]; - v24 += x0*v2_brick_a4[mz][my][mx]; - v34 += x0*v3_brick_a4[mz][my][mx]; - v44 += x0*v4_brick_a4[mz][my][mx]; - v54 += x0*v5_brick_a4[mz][my][mx]; - v05 += x0*v0_brick_a5[mz][my][mx]; - v15 += x0*v1_brick_a5[mz][my][mx]; - v25 += x0*v2_brick_a5[mz][my][mx]; - v35 += x0*v3_brick_a5[mz][my][mx]; - v45 += x0*v4_brick_a5[mz][my][mx]; - v55 += x0*v5_brick_a5[mz][my][mx]; - v06 += x0*v0_brick_a6[mz][my][mx]; - v16 += x0*v1_brick_a6[mz][my][mx]; - v26 += x0*v2_brick_a6[mz][my][mx]; - v36 += x0*v3_brick_a6[mz][my][mx]; - v46 += x0*v4_brick_a6[mz][my][mx]; - v56 += x0*v5_brick_a6[mz][my][mx]; - } - } - } - } - // convert D-field to force - type = atom->type[i]; - lj0 = B[7*type+6]*0.5; - lj1 = B[7*type+5]*0.5; - lj2 = B[7*type+4]*0.5; - lj3 = B[7*type+3]*0.5; - lj4 = B[7*type+2]*0.5; - lj5 = B[7*type+1]*0.5; - lj6 = B[7*type]*0.5; - - - if (eflag_atom) - eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 + - u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6; - if (vflag_atom) { - vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + - v04*lj4 + v05*lj5 + v06*lj6; - vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + - v14*lj4 + v15*lj5 + v16*lj6; - vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + - v24*lj4 + v25*lj5 + v26*lj6; - vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + - v34*lj4 + v35*lj5 + v36*lj6; - vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + - v44*lj4 + v45*lj5 + v46*lj6; - vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + - v54*lj4 + v55*lj5 + v56*lj6; - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule and ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_none_ik() -{ - int i,k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR *ekx, *eky, *ekz; - - ekx = new FFT_SCALAR[nsplit]; - eky = new FFT_SCALAR[nsplit]; - ekz = new FFT_SCALAR[nsplit]; - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - for (k = 0; k < nsplit; k++) - ekx[k] = eky[k] = ekz[k] = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - for (k = 0; k < nsplit; k++) { - ekx[k] -= x0*vdx_brick_none[k][mz][my][mx]; - eky[k] -= x0*vdy_brick_none[k][mz][my][mx]; - ekz[k] -= x0*vdz_brick_none[k][mz][my][mx]; - } - } - } - } - // convert D-field to force - type = atom->type[i]; - for (k = 0; k < nsplit; k++) { - lj = B[nsplit*type + k]; - f[i][0] += lj*ekx[k]; - f[i][1] +=lj*eky[k]; - if (slabflag != 2) f[i][2] +=lj*ekz[k]; - } - } - - delete [] ekx; - delete [] eky; - delete [] ekz; -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for the ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_none_ad() -{ - int i,k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR *ekx, *eky, *ekz; - - ekx = new FFT_SCALAR[nsplit]; - eky = new FFT_SCALAR[nsplit]; - ekz = new FFT_SCALAR[nsplit]; - - - double s1,s2,s3; - double sf1,sf2,sf3; - double sf = 0.0; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm_6/xprd; - double hy_inv = ny_pppm_6/yprd; - double hz_inv = nz_pppm_6/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); - - for (k = 0; k < nsplit; k++) - ekx[k] = eky[k] = ekz[k] = ZEROF; - - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; - y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; - z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; - - for (k = 0; k < nsplit; k++) { - ekx[k] += x0*u_brick_none[k][mz][my][mx]; - eky[k] += y0*u_brick_none[k][mz][my][mx]; - ekz[k] += z0*u_brick_none[k][mz][my][mx]; - } - } - } - } - - for (k = 0; k < nsplit; k++) { - ekx[k] *= hx_inv; - eky[k] *= hy_inv; - ekz[k] *= hz_inv; - } - - // convert D-field to force - type = atom->type[i]; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - - sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1); - sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1); - - sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2); - sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2); - - sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3); - sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3); - - for (k = 0; k < nsplit; k++) { - lj = B[nsplit*type + k]; - - sf = sf1*B[k]*2*lj*lj; - f[i][0] += lj*ekx[k] - sf; - - - sf = sf2*B[k]*2*lj*lj; - f[i][1] += lj*eky[k] - sf; - - sf = sf3*B[k]*2*lj*lj; - if (slabflag != 2) f[i][2] += lj*ekz[k] - sf; - } - } - - delete [] ekx; - delete [] eky; - delete [] ekz; -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for per atom quantities -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_none_peratom() -{ - int i,k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5; - - u_pa = new FFT_SCALAR[nsplit]; - v0 = new FFT_SCALAR[nsplit]; - v1 = new FFT_SCALAR[nsplit]; - v2 = new FFT_SCALAR[nsplit]; - v3 = new FFT_SCALAR[nsplit]; - v4 = new FFT_SCALAR[nsplit]; - v5 = new FFT_SCALAR[nsplit]; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - for (k = 0; k < nsplit; k++) - u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF; - - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - if (eflag_atom) { - for (k = 0; k < nsplit; k++) - u_pa[k] += x0*u_brick_none[k][mz][my][mx]; - } - if (vflag_atom) { - for (k = 0; k < nsplit; k++) { - v0[k] += x0*v0_brick_none[k][mz][my][mx]; - v1[k] += x0*v1_brick_none[k][mz][my][mx]; - v2[k] += x0*v2_brick_none[k][mz][my][mx]; - v3[k] += x0*v3_brick_none[k][mz][my][mx]; - v4[k] += x0*v4_brick_none[k][mz][my][mx]; - v5[k] += x0*v5_brick_none[k][mz][my][mx]; - } - } - } - } - } - // convert D-field to force - type = atom->type[i]; - for (k = 0; k < nsplit; k++) { - lj = B[nsplit*type + k]*0.5; - - if (eflag_atom) { - eatom[i] += u_pa[k]*lj; - } - if (vflag_atom) { - vatom[i][0] += v0[k]*lj; - vatom[i][1] += v1[k]*lj; - vatom[i][2] += v2[k]*lj; - vatom[i][3] += v3[k]*lj; - vatom[i][4] += v4[k]*lj; - vatom[i][5] += v5[k]*lj; - } - } - } - - delete [] u_pa; - delete [] v0; - delete [] v1; - delete [] v2; - delete [] v3; - delete [] v4; - delete [] v5; -} - -/* ---------------------------------------------------------------------- - pack values to buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - switch (flag) { - - // Coulomb interactions - - case FORWARD_IK: { - FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - break; - } - - case FORWARD_AD: { - FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - break; - } - - case FORWARD_IK_PERATOM: { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - break; - } - - case FORWARD_AD_PERATOM: { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - break; - } - - // Dispersion interactions, geometric mixing - - case FORWARD_IK_G: { - FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - break; - } - - case FORWARD_AD_G: { - FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - break; - } - - case FORWARD_IK_PERATOM_G: { - FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - break; - } - - case FORWARD_AD_PERATOM_G: { - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - break; - } - - // Dispersion interactions, arithmetic mixing - - case FORWARD_IK_A: { - FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc0[list[i]]; - buf[n++] = ysrc0[list[i]]; - buf[n++] = zsrc0[list[i]]; - - buf[n++] = xsrc1[list[i]]; - buf[n++] = ysrc1[list[i]]; - buf[n++] = zsrc1[list[i]]; - - buf[n++] = xsrc2[list[i]]; - buf[n++] = ysrc2[list[i]]; - buf[n++] = zsrc2[list[i]]; - - buf[n++] = xsrc3[list[i]]; - buf[n++] = ysrc3[list[i]]; - buf[n++] = zsrc3[list[i]]; - - buf[n++] = xsrc4[list[i]]; - buf[n++] = ysrc4[list[i]]; - buf[n++] = zsrc4[list[i]]; - - buf[n++] = xsrc5[list[i]]; - buf[n++] = ysrc5[list[i]]; - buf[n++] = zsrc5[list[i]]; - - buf[n++] = xsrc6[list[i]]; - buf[n++] = ysrc6[list[i]]; - buf[n++] = zsrc6[list[i]]; - } - break; - } - - case FORWARD_AD_A: { - FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - buf[n++] = src0[list[i]]; - buf[n++] = src1[list[i]]; - buf[n++] = src2[list[i]]; - buf[n++] = src3[list[i]]; - buf[n++] = src4[list[i]]; - buf[n++] = src5[list[i]]; - buf[n++] = src6[list[i]]; - } - break; - } - - case FORWARD_IK_PERATOM_A: { - FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - if (eflag_atom) { - buf[n++] = esrc0[list[i]]; - buf[n++] = esrc1[list[i]]; - buf[n++] = esrc2[list[i]]; - buf[n++] = esrc3[list[i]]; - buf[n++] = esrc4[list[i]]; - buf[n++] = esrc5[list[i]]; - buf[n++] = esrc6[list[i]]; - } - if (vflag_atom) { - buf[n++] = v0src0[list[i]]; - buf[n++] = v1src0[list[i]]; - buf[n++] = v2src0[list[i]]; - buf[n++] = v3src0[list[i]]; - buf[n++] = v4src0[list[i]]; - buf[n++] = v5src0[list[i]]; - - buf[n++] = v0src1[list[i]]; - buf[n++] = v1src1[list[i]]; - buf[n++] = v2src1[list[i]]; - buf[n++] = v3src1[list[i]]; - buf[n++] = v4src1[list[i]]; - buf[n++] = v5src1[list[i]]; - - buf[n++] = v0src2[list[i]]; - buf[n++] = v1src2[list[i]]; - buf[n++] = v2src2[list[i]]; - buf[n++] = v3src2[list[i]]; - buf[n++] = v4src2[list[i]]; - buf[n++] = v5src2[list[i]]; - - buf[n++] = v0src3[list[i]]; - buf[n++] = v1src3[list[i]]; - buf[n++] = v2src3[list[i]]; - buf[n++] = v3src3[list[i]]; - buf[n++] = v4src3[list[i]]; - buf[n++] = v5src3[list[i]]; - - buf[n++] = v0src4[list[i]]; - buf[n++] = v1src4[list[i]]; - buf[n++] = v2src4[list[i]]; - buf[n++] = v3src4[list[i]]; - buf[n++] = v4src4[list[i]]; - buf[n++] = v5src4[list[i]]; - - buf[n++] = v0src5[list[i]]; - buf[n++] = v1src5[list[i]]; - buf[n++] = v2src5[list[i]]; - buf[n++] = v3src5[list[i]]; - buf[n++] = v4src5[list[i]]; - buf[n++] = v5src5[list[i]]; - - buf[n++] = v0src6[list[i]]; - buf[n++] = v1src6[list[i]]; - buf[n++] = v2src6[list[i]]; - buf[n++] = v3src6[list[i]]; - buf[n++] = v4src6[list[i]]; - buf[n++] = v5src6[list[i]]; - } - } - break; - } - - case FORWARD_AD_PERATOM_A: { - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src0[list[i]]; - buf[n++] = v1src0[list[i]]; - buf[n++] = v2src0[list[i]]; - buf[n++] = v3src0[list[i]]; - buf[n++] = v4src0[list[i]]; - buf[n++] = v5src0[list[i]]; - - buf[n++] = v0src1[list[i]]; - buf[n++] = v1src1[list[i]]; - buf[n++] = v2src1[list[i]]; - buf[n++] = v3src1[list[i]]; - buf[n++] = v4src1[list[i]]; - buf[n++] = v5src1[list[i]]; - - buf[n++] = v0src2[list[i]]; - buf[n++] = v1src2[list[i]]; - buf[n++] = v2src2[list[i]]; - buf[n++] = v3src2[list[i]]; - buf[n++] = v4src2[list[i]]; - buf[n++] = v5src2[list[i]]; - - buf[n++] = v0src3[list[i]]; - buf[n++] = v1src3[list[i]]; - buf[n++] = v2src3[list[i]]; - buf[n++] = v3src3[list[i]]; - buf[n++] = v4src3[list[i]]; - buf[n++] = v5src3[list[i]]; - - buf[n++] = v0src4[list[i]]; - buf[n++] = v1src4[list[i]]; - buf[n++] = v2src4[list[i]]; - buf[n++] = v3src4[list[i]]; - buf[n++] = v4src4[list[i]]; - buf[n++] = v5src4[list[i]]; - - buf[n++] = v0src5[list[i]]; - buf[n++] = v1src5[list[i]]; - buf[n++] = v2src5[list[i]]; - buf[n++] = v3src5[list[i]]; - buf[n++] = v4src5[list[i]]; - buf[n++] = v5src5[list[i]]; - - buf[n++] = v0src6[list[i]]; - buf[n++] = v1src6[list[i]]; - buf[n++] = v2src6[list[i]]; - buf[n++] = v3src6[list[i]]; - buf[n++] = v4src6[list[i]]; - buf[n++] = v5src6[list[i]]; - } - break; - } - - // Dispersion interactions, no mixing - - case FORWARD_IK_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - } - break; - } - - case FORWARD_AD_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - buf[n++] = src[list[i]]; - } - break; - } - - case FORWARD_IK_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - } - break; - } - - case FORWARD_AD_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - break; - } - - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's own values from buf and set own ghost values -------------------------------------------------------------------------- */ - -void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - switch (flag) { - - // Coulomb interactions - - case FORWARD_IK: { - FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_AD: { - FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[n++]; - break; - } - - case FORWARD_IK_PERATOM: { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_PERATOM: { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - break; - } - - // Disperion interactions, geometric mixing - - case FORWARD_IK_G: { - FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_AD_G: { - FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[n++]; - break; - } - - case FORWARD_IK_PERATOM_G: { - FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_PERATOM_G: { - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - break; - } - - // Disperion interactions, arithmetic mixing - - case FORWARD_IK_A: { - FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - xdest0[list[i]] = buf[n++]; - ydest0[list[i]] = buf[n++]; - zdest0[list[i]] = buf[n++]; - - xdest1[list[i]] = buf[n++]; - ydest1[list[i]] = buf[n++]; - zdest1[list[i]] = buf[n++]; - - xdest2[list[i]] = buf[n++]; - ydest2[list[i]] = buf[n++]; - zdest2[list[i]] = buf[n++]; - - xdest3[list[i]] = buf[n++]; - ydest3[list[i]] = buf[n++]; - zdest3[list[i]] = buf[n++]; - - xdest4[list[i]] = buf[n++]; - ydest4[list[i]] = buf[n++]; - zdest4[list[i]] = buf[n++]; - - xdest5[list[i]] = buf[n++]; - ydest5[list[i]] = buf[n++]; - zdest5[list[i]] = buf[n++]; - - xdest6[list[i]] = buf[n++]; - ydest6[list[i]] = buf[n++]; - zdest6[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_AD_A: { - FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - dest0[list[i]] = buf[n++]; - dest1[list[i]] = buf[n++]; - dest2[list[i]] = buf[n++]; - dest3[list[i]] = buf[n++]; - dest4[list[i]] = buf[n++]; - dest5[list[i]] = buf[n++]; - dest6[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_IK_PERATOM_A: { - FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - if (eflag_atom) { - esrc0[list[i]] = buf[n++]; - esrc1[list[i]] = buf[n++]; - esrc2[list[i]] = buf[n++]; - esrc3[list[i]] = buf[n++]; - esrc4[list[i]] = buf[n++]; - esrc5[list[i]] = buf[n++]; - esrc6[list[i]] = buf[n++]; - } - if (vflag_atom) { - v0src0[list[i]] = buf[n++]; - v1src0[list[i]] = buf[n++]; - v2src0[list[i]] = buf[n++]; - v3src0[list[i]] = buf[n++]; - v4src0[list[i]] = buf[n++]; - v5src0[list[i]] = buf[n++]; - - v0src1[list[i]] = buf[n++]; - v1src1[list[i]] = buf[n++]; - v2src1[list[i]] = buf[n++]; - v3src1[list[i]] = buf[n++]; - v4src1[list[i]] = buf[n++]; - v5src1[list[i]] = buf[n++]; - - v0src2[list[i]] = buf[n++]; - v1src2[list[i]] = buf[n++]; - v2src2[list[i]] = buf[n++]; - v3src2[list[i]] = buf[n++]; - v4src2[list[i]] = buf[n++]; - v5src2[list[i]] = buf[n++]; - - v0src3[list[i]] = buf[n++]; - v1src3[list[i]] = buf[n++]; - v2src3[list[i]] = buf[n++]; - v3src3[list[i]] = buf[n++]; - v4src3[list[i]] = buf[n++]; - v5src3[list[i]] = buf[n++]; - - v0src4[list[i]] = buf[n++]; - v1src4[list[i]] = buf[n++]; - v2src4[list[i]] = buf[n++]; - v3src4[list[i]] = buf[n++]; - v4src4[list[i]] = buf[n++]; - v5src4[list[i]] = buf[n++]; - - v0src5[list[i]] = buf[n++]; - v1src5[list[i]] = buf[n++]; - v2src5[list[i]] = buf[n++]; - v3src5[list[i]] = buf[n++]; - v4src5[list[i]] = buf[n++]; - v5src5[list[i]] = buf[n++]; - - v0src6[list[i]] = buf[n++]; - v1src6[list[i]] = buf[n++]; - v2src6[list[i]] = buf[n++]; - v3src6[list[i]] = buf[n++]; - v4src6[list[i]] = buf[n++]; - v5src6[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_PERATOM_A: { - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - v0src0[list[i]] = buf[n++]; - v1src0[list[i]] = buf[n++]; - v2src0[list[i]] = buf[n++]; - v3src0[list[i]] = buf[n++]; - v4src0[list[i]] = buf[n++]; - v5src0[list[i]] = buf[n++]; - - v0src1[list[i]] = buf[n++]; - v1src1[list[i]] = buf[n++]; - v2src1[list[i]] = buf[n++]; - v3src1[list[i]] = buf[n++]; - v4src1[list[i]] = buf[n++]; - v5src1[list[i]] = buf[n++]; - - v0src2[list[i]] = buf[n++]; - v1src2[list[i]] = buf[n++]; - v2src2[list[i]] = buf[n++]; - v3src2[list[i]] = buf[n++]; - v4src2[list[i]] = buf[n++]; - v5src2[list[i]] = buf[n++]; - - v0src3[list[i]] = buf[n++]; - v1src3[list[i]] = buf[n++]; - v2src3[list[i]] = buf[n++]; - v3src3[list[i]] = buf[n++]; - v4src3[list[i]] = buf[n++]; - v5src3[list[i]] = buf[n++]; - - v0src4[list[i]] = buf[n++]; - v1src4[list[i]] = buf[n++]; - v2src4[list[i]] = buf[n++]; - v3src4[list[i]] = buf[n++]; - v4src4[list[i]] = buf[n++]; - v5src4[list[i]] = buf[n++]; - - v0src5[list[i]] = buf[n++]; - v1src5[list[i]] = buf[n++]; - v2src5[list[i]] = buf[n++]; - v3src5[list[i]] = buf[n++]; - v4src5[list[i]] = buf[n++]; - v5src5[list[i]] = buf[n++]; - - v0src6[list[i]] = buf[n++]; - v1src6[list[i]] = buf[n++]; - v2src6[list[i]] = buf[n++]; - v3src6[list[i]] = buf[n++]; - v4src6[list[i]] = buf[n++]; - v5src6[list[i]] = buf[n++]; - } - break; - } - - // Disperion interactions, geometric mixing - - case FORWARD_IK_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_IK_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - } - break; - } - - case FORWARD_AD_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - break; - } - - } -} - -/* ---------------------------------------------------------------------- - pack ghost values into buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - //Coulomb interactions - - if (flag == REVERSE_RHO) { - FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - - //Dispersion interactions, geometric mixing - - } else if (flag == REVERSE_RHO_G) { - FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - - //Dispersion interactions, arithmetic mixing - - } else if (flag == REVERSE_RHO_A) { - FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = src0[list[i]]; - buf[n++] = src1[list[i]]; - buf[n++] = src2[list[i]]; - buf[n++] = src3[list[i]]; - buf[n++] = src4[list[i]]; - buf[n++] = src5[list[i]]; - buf[n++] = src6[list[i]]; - } - - //Dispersion interactions, no mixing - - } else if (flag == REVERSE_RHO_NONE) { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = src[list[i]]; - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's ghost values from buf and add to own values -------------------------------------------------------------------------- */ - -void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - //Coulomb interactions - - if (flag == REVERSE_RHO) { - FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[i]; - - //Dispersion interactions, geometric mixing - - } else if (flag == REVERSE_RHO_G) { - FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[i]; - - //Dispersion interactions, arithmetic mixing - - } else if (flag == REVERSE_RHO_A) { - FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - dest0[list[i]] += buf[n++]; - dest1[list[i]] += buf[n++]; - dest2[list[i]] += buf[n++]; - dest3[list[i]] += buf[n++]; - dest4[list[i]] += buf[n++]; - dest5[list[i]] += buf[n++]; - dest6[list[i]] += buf[n++]; - } - - //Dispersion interactions, no mixing - - } else if (flag == REVERSE_RHO_NONE) { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[n++]; - } - } -} - -/* ---------------------------------------------------------------------- - map nprocs to NX by NY grid as PX by PY procs - return optimal px,py -------------------------------------------------------------------------- */ - -void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) -{ - // loop thru all possible factorizations of nprocs - // surf = surface area of largest proc sub-domain - // innermost if test minimizes surface area and surface/volume ratio - - int bestsurf = 2 * (nx + ny); - int bestboxx = 0; - int bestboxy = 0; - - int boxx,boxy,surf,ipx,ipy; - - ipx = 1; - while (ipx <= nprocs) { - if (nprocs % ipx == 0) { - ipy = nprocs/ipx; - boxx = nx/ipx; - if (nx % ipx) boxx++; - boxy = ny/ipy; - if (ny % ipy) boxy++; - surf = boxx + boxy; - if (surf < bestsurf || - (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { - bestsurf = surf; - bestboxx = boxx; - bestboxy = boxy; - *px = ipx; - *py = ipy; - } - } - ipx++; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into rho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz, int ord, - FFT_SCALAR **rho_c, FFT_SCALAR **r1d) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-ord)/2; k <= ord/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = ord-1; l >= 0; l--) { - r1 = rho_c[l][k] + r1*dx; - r2 = rho_c[l][k] + r2*dy; - r3 = rho_c[l][k] + r3*dz; - } - r1d[0][k] = r1; - r1d[1][k] = r2; - r1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into drho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz, int ord, - FFT_SCALAR **drho_c, FFT_SCALAR **dr1d) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-ord)/2; k <= ord/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = ord-2; l >= 0; l--) { - r1 = drho_c[l][k] + r1*dx; - r2 = drho_c[l][k] + r2*dy; - r3 = drho_c[l][k] + r3*dz; - } - dr1d[0][k] = r1; - dr1d[1][k] = r2; - dr1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - generate coeffients for the weight function of order n - - (n-1) - Wn(x) = Sum wn(k,x) , Sum is over every other integer - k=-(n-1) - For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 - k is odd integers if n is even and even integers if n is odd - --- - | n-1 - | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 - wn(k,x) = < l=0 - | - | 0 otherwise - --- - a coeffients are packed into the array rho_coeff to eliminate zeros - rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff, - int ord) -{ - int j,k,l,m; - FFT_SCALAR s; - - FFT_SCALAR **a; - memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a"); - - for (k = -ord; k <= ord; k++) - for (l = 0; l < ord; l++) - a[l][k] = 0.0; - - a[0][0] = 1.0; - for (j = 1; j < ord; j++) { - for (k = -j; k <= j; k += 2) { - s = 0.0; - for (l = 0; l < j; l++) { - a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); -#ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); -#else - s += pow(0.5,(double) l+1) * - (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); -#endif - } - a[0][k] = s; - } - } - - m = (1-ord)/2; - for (k = -(ord-1); k < ord; k += 2) { - for (l = 0; l < ord; l++) - coeff[l][m] = a[l][k]; - for (l = 1; l < ord; l++) - dcoeff[l-1][m] = l*a[l][k]; - m++; - } - - memory->destroy2d_offset(a,-ord); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPMDisp::slabcorr(int eflag) -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy_1 += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - -/* ---------------------------------------------------------------------- - perform and time the 1d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMDisp::timing_1d(int n, double &time1d) -{ - double time1,time2; - int mixing = 1; - if (function[2]) mixing = 4; - if (function[3]) mixing = nsplit_alloc/2; - - if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - if (function[1] + function[2] + function[3]) - for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[0]) { - for (int i = 0; i < n; i++) { - fft1->timing1d(work1,nfft_both,1); - fft2->timing1d(work1,nfft_both,-1); - if (differentiation_flag != 1){ - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d = time2 - time1; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[1] + function[2] + function[3]) { - for (int i = 0; i < n; i++) { - fft1_6->timing1d(work1_6,nfft_both_6,1); - fft2_6->timing1d(work1_6,nfft_both_6,-1); - if (differentiation_flag != 1){ - fft2_6->timing1d(work1_6,nfft_both_6,-1); - fft2_6->timing1d(work1_6,nfft_both_6,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d += (time2 - time1)*mixing; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - perform and time the 3d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMDisp::timing_3d(int n, double &time3d) -{ - double time1,time2; - int mixing = 1; - if (function[2]) mixing = 4; - if (function[3]) mixing = nsplit_alloc/2; - - if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - if (function[1] + function[2] + function[3]) - for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; - - - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[0]) { - for (int i = 0; i < n; i++) { - fft1->compute(work1,work1,1); - fft2->compute(work1,work1,-1); - if (differentiation_flag != 1) { - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d = time2 - time1; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[1] + function[2] + function[3]) { - for (int i = 0; i < n; i++) { - fft1_6->compute(work1_6,work1_6,1); - fft2_6->compute(work1_6,work1_6,-1); - if (differentiation_flag != 1) { - fft2_6->compute(work1_6,work1_6,-1); - fft2_6->compute(work1_6,work1_6,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d += (time2 - time1) * mixing; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPMDisp::memory_usage() -{ - double bytes = nmax*3 * sizeof(double); - int mixing = 1; - int diff = 3; //depends on differentiation - int per = 7; //depends on per atom calculations - if (differentiation_flag) { - diff = 1; - per = 6; - } - if (!evflag_atom) per = 0; - if (function[2]) mixing = 7; - if (function[3]) mixing = nsplit_alloc; - - if (function[0]) { - int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - bytes += (1 + diff + per) * nbrick * sizeof(FFT_SCALAR); //brick memory - bytes += 6 * nfft_both * sizeof(double); // vg - bytes += nfft_both * sizeof(double); // greensfn - bytes += nfft_both * 3 * sizeof(FFT_SCALAR); // density_FFT, work1, work2 - bytes += cg->memory_usage(); - } - - if (function[1] + function[2] + function[3]) { - int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * - (nzhi_out_6-nzlo_out_6+1); - bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing; // density_brick + vd_brick + per atom bricks - bytes += 6 * nfft_both_6 * sizeof(double); // vg - bytes += nfft_both_6 * sizeof(double); // greensfn - bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR); // density_FFT, work1, work2 - bytes += cg_6->memory_usage(); - } - return bytes; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Rolf Isele-Holder (Aachen University) + Paul Crozier (SNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "pppm_disp.h" +#include "math_const.h" +#include "atom.h" +#include "comm.h" +#include "commgrid.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXORDER 7 +#define OFFSET 16384 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + +enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; +enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE}; +enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM, + FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G, + FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A, + FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE}; + + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command"); + + triclinic_support = 0; + pppmflag = dispersionflag = 1; + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + csumflag = 0; + B = NULL; + cii = NULL; + csumi = NULL; + peratom_allocate_flag = 0; + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + u_brick = v0_brick = v1_brick = v2_brick = v3_brick = + v4_brick = v5_brick = NULL; + + density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; + density_fft_g = NULL; + u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = + v4_brick_g = v5_brick_g = NULL; + + density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; + density_fft_a0 = NULL; + u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = + v4_brick_a0 = v5_brick_a0 = NULL; + + density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; + density_fft_a1 = NULL; + u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = + v4_brick_a1 = v5_brick_a1 = NULL; + + density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; + density_fft_a2 = NULL; + u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = + v4_brick_a2 = v5_brick_a2 = NULL; + + density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; + density_fft_a3 = NULL; + u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = + v4_brick_a3 = v5_brick_a3 = NULL; + + density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; + density_fft_a4 = NULL; + u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = + v4_brick_a4 = v5_brick_a4 = NULL; + + density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; + density_fft_a5 = NULL; + u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = + v4_brick_a5 = v5_brick_a5 = NULL; + + density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; + density_fft_a6 = NULL; + u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = + v4_brick_a6 = v5_brick_a6 = NULL; + + density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; + density_fft_none = NULL; + u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = + v4_brick_none = v5_brick_none = NULL; + + greensfn = NULL; + greensfn_6 = NULL; + work1 = work2 = NULL; + work1_6 = work2_6 = NULL; + vg = NULL; + vg2 = NULL; + vg_6 = NULL; + vg2_6 = NULL; + fkx = fky = fkz = NULL; + fkx2 = fky2 = fkz2 = NULL; + fkx_6 = fky_6 = fkz_6 = NULL; + fkx2_6 = fky2_6 = fkz2_6 = NULL; + + sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = + sf_precoeff5 = sf_precoeff6 = NULL; + sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = + sf_precoeff5_6 = sf_precoeff6_6 = NULL; + + gf_b = NULL; + gf_b_6 = NULL; + rho1d = rho_coeff = NULL; + drho1d = drho_coeff = NULL; + rho1d_6 = rho_coeff_6 = NULL; + drho1d_6 = drho_coeff_6 = NULL; + fft1 = fft2 = NULL; + fft1_6 = fft2_6 = NULL; + remap = NULL; + remap_6 = NULL; + + nmax = 0; + part2grid = NULL; + part2grid_6 = NULL; + + cg = NULL; + cg_peratom = NULL; + cg_6 = NULL; + cg_peratom_6 = NULL; + + memset(function, 0, EWALD_FUNCS*sizeof(int)); +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMDisp::~PPPMDisp() +{ + delete [] factors; + delete [] B; + B = NULL; + delete [] cii; + cii = NULL; + delete [] csumi; + csumi = NULL; + deallocate(); + deallocate_peratom(); + memory->destroy(part2grid); + memory->destroy(part2grid_6); + part2grid = part2grid_6 = NULL; +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPMDisp::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPMDisp initialization ...\n"); + if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n"); + } + + triclinic_check(); + if (domain->dimension == 2) + error->all(FLERR,"Cannot use PPPMDisp with 2d simulation"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp"); + if (slabflag == 1) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPMDisp"); + } + + if (order > MAXORDER || order_6 > MAXORDER) { + char str[128]; + sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER); + error->all(FLERR,str); + } + + // free all arrays previously allocated + + deallocate(); + deallocate_peratom(); + + // set scale + + scale = 1.0; + + triclinic = domain->triclinic; + + // check whether cutoff and pair style are set + + pair_check(); + + int tmp; + Pair *pair = force->pair; + int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; + double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; + double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL; + if (!(ptr||*p_cutoff||*p_cutoff_lj)) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + cutoff_lj = *p_cutoff_lj; + + double tmp2; + MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world); + + // check out which types of potentials will have to be calculated + + int ewald_order = ptr ? *((int *) ptr) : 1<<1; + int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; + memset(function, 0, EWALD_FUNCS*sizeof(int)); + for (int i=0; i<=EWALD_MAXORDER; ++i) // transcribe order + if (ewald_order&(1<pair_style); + error->all(FLERR,str); + } + function[k] = 1; + } + + + // warn, if function[0] is not set but charge attribute is set! + if (!function[0] && atom->q_flag && me == 0) { + char str[128]; + sprintf(str, "Charges are set, but coulombic solver is not used"); + error->warning(FLERR, str); + } + + // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral + + if (function[0]) { + if (!atom->q_flag) + error->all(FLERR,"Kspace style with selected options " + "requires atom attribute q"); + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver with selected options " + "on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + } + + // if kspace is TIP4P, extract TIP4P params from pair style + // bond/angle are not yet init(), so insure equilibrium request is valid + + qdist = 0.0; + + if (tip4pflag) { + int itmp; + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + if (typeA < 1 || typeA > atom->nangletypes || + force->angle->setflag[typeA] == 0) + error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P"); + if (typeB < 1 || typeB > atom->nbondtypes || + force->bond->setflag[typeB] == 0) + error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (cos(0.5*theta) * blen); + } + + + // initialize the pair style to get the coefficients + neighrequest_flag = 0; + pair->init(); + neighrequest_flag = 1; + init_coeffs(); + + //if g_ewald and g_ewald_6 have not been specified, set some initial value + // to avoid problems when calculating the energies! + + if (!gewaldflag) g_ewald = 1; + if (!gewaldflag_6) g_ewald_6 = 1; + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + int (*procneigh)[2] = comm->procneigh; + + int iteration = 0; + if (function[0]) { + CommGrid *cgtmp = NULL; + while (order >= minorder) { + + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPMDisp Coulomb order " + "b/c stencil extends beyond neighbor processor"); + iteration++; + + // set grid for dispersion interaction and coulomb interactions + + set_grid(); + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPMDisp Coulomb grid is too large"); + + set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, + nxhi_in, nyhi_in, nzhi_in, + nxlo_out, nylo_out, nzlo_out, + nxhi_out, nyhi_out, nzhi_out, + nlower, nupper, + ngrid, nfft, nfft_both, + shift, shiftone, order); + + if (overlap_allowed) break; + + cgtmp = new CommGrid(lmp, world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out, + nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + + order--; + } + + if (order < minorder) + error->all(FLERR, + "Coulomb PPPMDisp order has been reduced below minorder"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald + + if (!gewaldflag) adjust_gewald(); + + // calculate the final accuracy + + double acc = final_accuracy(); + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + #ifdef FFT_SINGLE + const char fft_prec[] = "single"; + #else + const char fft_prec[] = "double"; + #endif + + if (screen) { + fprintf(screen," Coulomb G vector (1/distance)= %g\n",g_ewald); + fprintf(screen," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," Coulomb stencil order = %d\n",order); + fprintf(screen," Coulomb estimated absolute RMS force accuracy = %g\n", + acc); + fprintf(screen," Coulomb estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc = %d %d\n", + ngrid_max, nfft_both_max); + } + if (logfile) { + fprintf(logfile," Coulomb G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," Coulomb stencil order = %d\n",order); + fprintf(logfile, + " Coulomb estimated absolute RMS force accuracy = %g\n", + acc); + fprintf(logfile," Coulomb estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", + ngrid_max, nfft_both_max); + } + } + } + + iteration = 0; + if (function[1] + function[2] + function[3]) { + CommGrid *cgtmp = NULL; + while (order_6 >= minorder) { + + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPMDisp dispersion order " + "b/c stencil extends beyond neighbor processor"); + iteration++; + + set_grid_6(); + + if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET) + error->all(FLERR,"PPPMDisp Dispersion grid is too large"); + + set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, + nxhi_in_6, nyhi_in_6, nzhi_in_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, + nxhi_out_6, nyhi_out_6, nzhi_out_6, + nlower_6, nupper_6, + ngrid_6, nfft_6, nfft_both_6, + shift_6, shiftone_6, order_6); + + if (overlap_allowed) break; + + cgtmp = new CommGrid(lmp,world,1,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6, + nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6, + nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + order_6--; + } + + if (order_6 < minorder) + error->all(FLERR,"Dispersion PPPMDisp order has been " + "reduced below minorder"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald_6 + + if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) + adjust_gewald_6(); + + // calculate the final accuracy + + double acc, acc_real, acc_kspace; + final_accuracy_6(acc, acc_real, acc_kspace); + + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + #ifdef FFT_SINGLE + const char fft_prec[] = "single"; + #else + const char fft_prec[] = "double"; + #endif + + if (screen) { + fprintf(screen," Dispersion G vector (1/distance)= %g\n",g_ewald_6); + fprintf(screen," Dispersion grid = %d %d %d\n", + nx_pppm_6,ny_pppm_6,nz_pppm_6); + fprintf(screen," Dispersion stencil order = %d\n",order_6); + fprintf(screen," Dispersion estimated absolute " + "RMS force accuracy = %g\n",acc); + fprintf(screen," Dispersion estimated absolute " + "real space RMS force accuracy = %g\n",acc_real); + fprintf(screen," Dispersion estimated absolute " + "kspace RMS force accuracy = %g\n",acc_kspace); + fprintf(screen," Dispersion estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc dispersion = %d %d\n", + ngrid_max,nfft_both_max); + } + if (logfile) { + fprintf(logfile," Dispersion G vector (1/distance) = %g\n",g_ewald_6); + fprintf(logfile," Dispersion grid = %d %d %d\n", + nx_pppm_6,ny_pppm_6,nz_pppm_6); + fprintf(logfile," Dispersion stencil order = %d\n",order_6); + fprintf(logfile," Dispersion estimated absolute " + "RMS force accuracy = %g\n",acc); + fprintf(logfile," Dispersion estimated absolute " + "real space RMS force accuracy = %g\n",acc_real); + fprintf(logfile," Dispersion estimated absolute " + "kspace RMS force accuracy = %g\n",acc_kspace); + fprintf(logfile," Disperion estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc dispersion = %d %d\n", + ngrid_max,nfft_both_max); + } + } + } + + // allocate K-space dependent memory + + allocate(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + if (function[0]) { + compute_gf_denom(gf_b, order); + compute_rho_coeff(rho_coeff, drho_coeff, order); + cg->ghost_notify(); + cg->setup(); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + sf_precoeff1, sf_precoeff2, sf_precoeff3, + sf_precoeff4, sf_precoeff5, sf_precoeff6); + } + if (function[1] + function[2] + function[3]) { + compute_gf_denom(gf_b_6, order_6); + compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); + cg_6->ghost_notify(); + cg_6->setup(); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, + sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); + } + +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMDisp::setup() +{ + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + // compute fkx,fky,fkz for my FFT grid pts + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + //compute the virial coefficients and green functions + if (function[0]){ + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double per; + int i, j, k, n; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + j = (nx_pppm - i) % nx_pppm; + per = j - nx_pppm*(2*j/nx_pppm); + fkx2[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + j = (ny_pppm - i) % ny_pppm; + per = j - ny_pppm*(2*j/ny_pppm); + fky2[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + j = (nz_pppm - i) % nz_pppm; + per = j - nz_pppm*(2*j/nz_pppm); + fkz2[i] = unitkz*per; + } + + double sqk,vterm; + double gew2inv = 1/(g_ewald*g_ewald); + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25*gew2inv); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]); + vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]); + vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]); + } + n++; + } + } + } + compute_gf(); + if (differentiation_flag == 1) compute_sf_coeff(); + } + + if (function[1] + function[2] + function[3]) { + delxinv_6 = nx_pppm_6/xprd; + delyinv_6 = ny_pppm_6/yprd; + delzinv_6 = nz_pppm_6/zprd_slab; + delvolinv_6 = delxinv_6*delyinv_6*delzinv_6; + + double per; + int i, j, k, n; + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + per = i - nx_pppm_6*(2*i/nx_pppm_6); + fkx_6[i] = unitkx*per; + j = (nx_pppm_6 - i) % nx_pppm_6; + per = j - nx_pppm_6*(2*j/nx_pppm_6); + fkx2_6[i] = unitkx*per; + } + for (i = nylo_fft_6; i <= nyhi_fft_6; i++) { + per = i - ny_pppm_6*(2*i/ny_pppm_6); + fky_6[i] = unitky*per; + j = (ny_pppm_6 - i) % ny_pppm_6; + per = j - ny_pppm_6*(2*j/ny_pppm_6); + fky2_6[i] = unitky*per; + } + for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) { + per = i - nz_pppm_6*(2*i/nz_pppm_6); + fkz_6[i] = unitkz*per; + j = (nz_pppm_6 - i) % nz_pppm_6; + per = j - nz_pppm_6*(2*j/nz_pppm_6); + fkz2_6[i] = unitkz*per; + } + double sqk,vterm; + long double erft, expt,nom, denom; + long double b, bs, bt; + double rtpi = sqrt(MY_PI); + double gewinv = 1/g_ewald_6; + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) { + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) { + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k]; + if (sqk == 0.0) { + vg_6[n][0] = 0.0; + vg_6[n][1] = 0.0; + vg_6[n][2] = 0.0; + vg_6[n][3] = 0.0; + vg_6[n][4] = 0.0; + vg_6[n][5] = 0.0; + } else { + b = 0.5*sqrt(sqk)*gewinv; + bs = b*b; + bt = bs*b; + erft = 2*bt*rtpi*erfc(b); + expt = exp(-bs); + nom = erft - 2*bs*expt; + denom = nom + expt; + if (denom == 0) vterm = 3.0/sqk; + else vterm = 3.0*nom/(sqk*denom); + vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i]; + vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j]; + vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k]; + vg_6[n][3] = vterm*fkx_6[i]*fky_6[j]; + vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k]; + vg_6[n][5] = vterm*fky_6[j]*fkz_6[k]; + vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]); + vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]); + vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]); + } + n++; + } + } + } + compute_gf_6(); + if (differentiation_flag == 1) compute_sf_coeff_6(); + } +} + +/* ---------------------------------------------------------------------- + reset local grid arrays and communication stencils + called by fix balance b/c it changed sizes of processor sub-domains +------------------------------------------------------------------------- */ + +void PPPMDisp::setup_grid() +{ + // free all arrays previously allocated + + deallocate(); + deallocate_peratom(); + + // reset portion of global grid that each proc owns + + if (function[0]) + set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, + nxhi_in, nyhi_in, nzhi_in, + nxlo_out, nylo_out, nzlo_out, + nxhi_out, nyhi_out, nzhi_out, + nlower, nupper, + ngrid, nfft, nfft_both, + shift, shiftone, order); + + if (function[1] + function[2] + function[3]) + set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, + nxhi_in_6, nyhi_in_6, nzhi_in_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, + nxhi_out_6, nyhi_out_6, nzhi_out_6, + nlower_6, nupper_6, + ngrid_6, nfft_6, nfft_both_6, + shift_6, shiftone_6, order_6); + + // reallocate K-space dependent memory + // check if grid communication is now overlapping if not allowed + // don't invoke allocate_peratom(), compute() will allocate when needed + + allocate(); + + if (function[0]) { + cg->ghost_notify(); + if (overlap_allowed == 0 && cg->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + cg->setup(); + } + if (function[1] + function[2] + function[3]) { + cg_6->ghost_notify(); + if (overlap_allowed == 0 && cg_6->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + cg_6->setup(); + } + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + if (function[0]) { + compute_gf_denom(gf_b, order); + compute_rho_coeff(rho_coeff, drho_coeff, order); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + sf_precoeff1, sf_precoeff2, sf_precoeff3, + sf_precoeff4, sf_precoeff5, sf_precoeff6); + } + if (function[1] + function[2] + function[3]) { + compute_gf_denom(gf_b_6, order_6); + compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, + sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); + } + + // pre-compute volume-dependent coeffs + + setup(); +} + +/* ---------------------------------------------------------------------- + compute the PPPM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMDisp::compute(int eflag, int vflag) +{ + + int i; + // convert atoms from box to lamda coords + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + if (function[0]) { + cg_peratom->ghost_notify(); + cg_peratom->setup(); + } + if (function[1] + function[2] + function[3]) { + cg_peratom_6->ghost_notify(); + cg_peratom_6->setup(); + } + peratom_allocate_flag = 1; + } + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + + if (function[0]) memory->destroy(part2grid); + if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6); + nmax = atom->nmax; + if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid"); + if (function[1] + function[2] + function[3]) + memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6"); + } + + + energy = 0.0; + energy_1 = 0.0; + energy_6 = 0.0; + if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0; + + // find grid points for all my particles + // distribute partcles' charges/dispersion coefficients on the grid + // communication between processors and remapping two fft + // Solution of poissons equation in k-space and backtransformation + // communication between processors + // calculation of forces + + if (function[0]) { + + //perfrom calculations for coulomb interactions only + + particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower, + nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out); + + make_rho_c(); + + cg->reverse_comm(this,REVERSE_RHO); + + brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + density_brick, density_fft, work1,remap); + + if (differentiation_flag == 1) { + + poisson_ad(work1, work2, density_fft, fft1, fft2, + nx_pppm, ny_pppm, nz_pppm, nfft, + nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + energy_1, greensfn, + virial_1, vg,vg2, + u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); + + cg->forward_comm(this,FORWARD_AD); + + fieldforce_c_ad(); + + if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM); + + } else { + poisson_ik(work1, work2, density_fft, fft1, fft2, + nx_pppm, ny_pppm, nz_pppm, nfft, + nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + energy_1, greensfn, + fkx, fky, fkz,fkx2, fky2, fkz2, + vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2, + u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); + + cg->forward_comm(this, FORWARD_IK); + + fieldforce_c_ik(); + + if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM); + } + if (evflag_atom) fieldforce_c_peratom(); + } + + if (function[1]) { + //perfrom calculations for geometric mixing + particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); + make_rho_g(); + + + cg_6->reverse_comm(this, REVERSE_RHO_G); + + brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + density_brick_g, density_fft_g, work1_6,remap_6); + + if (differentiation_flag == 1) { + + poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + virial_6, vg_6, vg2_6, + u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); + + cg_6->forward_comm(this,FORWARD_AD_G); + + fieldforce_g_ad(); + + if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G); + + } else { + poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, + vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6, + u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); + + cg_6->forward_comm(this,FORWARD_IK_G); + + fieldforce_g_ik(); + + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G); + } + if (evflag_atom) fieldforce_g_peratom(); + } + + if (function[2]) { + //perform calculations for arithmetic mixing + particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); + make_rho_a(); + + cg_6->reverse_comm(this, REVERSE_RHO_A); + + brick2fft_a(); + + if ( differentiation_flag == 1) { + + poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + virial_6, vg_6, vg2_6, + u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); + poisson_2s_ad(density_fft_a0, density_fft_a6, + u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, + u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); + poisson_2s_ad(density_fft_a1, density_fft_a5, + u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, + u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); + poisson_2s_ad(density_fft_a2, density_fft_a4, + u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, + u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); + + cg_6->forward_comm(this, FORWARD_AD_A); + + fieldforce_a_ad(); + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A); + + } else { + + poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, + vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6, + u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); + poisson_2s_ik(density_fft_a0, density_fft_a6, + vdx_brick_a0, vdy_brick_a0, vdz_brick_a0, + vdx_brick_a6, vdy_brick_a6, vdz_brick_a6, + u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, + u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); + poisson_2s_ik(density_fft_a1, density_fft_a5, + vdx_brick_a1, vdy_brick_a1, vdz_brick_a1, + vdx_brick_a5, vdy_brick_a5, vdz_brick_a5, + u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, + u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); + poisson_2s_ik(density_fft_a2, density_fft_a4, + vdx_brick_a2, vdy_brick_a2, vdz_brick_a2, + vdx_brick_a4, vdy_brick_a4, vdz_brick_a4, + u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, + u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); + + cg_6->forward_comm(this, FORWARD_IK_A); + + fieldforce_a_ik(); + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A); + } + if (evflag_atom) fieldforce_a_peratom(); + } + + if (function[3]) { + //perfrom calculations if no mixing rule applies + particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); + + make_rho_none(); + + cg_6->reverse_comm(this, REVERSE_RHO_NONE); + + brick2fft_none(); + + if (differentiation_flag == 1) { + + int n = 0; + for (int k = 0; kforward_comm(this,FORWARD_AD_NONE); + + fieldforce_none_ad(); + + if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE); + + } else { + int n = 0; + for (int k = 0; kforward_comm(this,FORWARD_IK_NONE); + + fieldforce_none_ik(); + + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE); + } + if (evflag_atom) fieldforce_none_peratom(); + } + + // sum energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy_1 = energy_all; + MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy_6 = energy_all; + + energy_1 *= 0.5*volume; + energy_6 *= 0.5*volume; + + energy_1 -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij + + 1.0/12.0*pow(g_ewald_6,6)*csum; + energy_1 *= qscale; + } + + // sum virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i]; + if (function[1]+function[2]+function[3]){ + double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij; + virial[0] -= a; + virial[1] -= a; + virial[2] -= a; + } + } + + if (eflag_atom) { + if (function[0]) { + double *q = atom->q; + for (i = 0; i < atom->nlocal; i++) { + eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction + } + } + if (function[1] + function[2] + function[3]) { + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] + + 1.0/12.0*pow(g_ewald_6,6)*cii[tmp]; + } + } + } + + if (vflag_atom) { + if (function[1] + function[2] + function[3]) { + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction + } + } + } + + + // 2d slab correction + + if (slabflag) slabcorr(eflag); + if (function[0]) energy += energy_1; + if (function[1] + function[2] + function[3]) energy += energy_6; + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + +/* ---------------------------------------------------------------------- + initialize coefficients needed for the dispersion density on the grids +------------------------------------------------------------------------- */ + +void PPPMDisp::init_coeffs() // local pair coeffs +{ + int tmp; + int n = atom->ntypes; + int converged; + delete [] B; + if (function[3] + function[2]) { // no mixing rule or arithmetic + if (function[2] && me == 0) { + if (screen) fprintf(screen," Optimizing splitting of Dispersion coefficients\n"); + if (logfile) fprintf(logfile," Optimizing splitting of Dispersion coefficients\n"); + } + // get dispersion coefficients + double **b = (double **) force->pair->extract("B",tmp); + // allocate data for eigenvalue decomposition + double **A; + double **Q; + memory->create(A,n,n,"pppm/disp:A"); + memory->create(Q,n,n,"pppm/disp:Q"); + // fill coefficients to matrix a + for (int i = 1; i <= n; i++) + for (int j = 1; j <= n; j++) + A[i-1][j-1] = b[i][j]; + // transform q to a unity matrix + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + Q[i][j] = 0.0; + for (int i = 0; i < n; i++) + Q[i][i] = 1.0; + // perfrom eigenvalue decomposition with QR algorithm + converged = qr_alg(A,Q,n); + if (function[3] && !converged) { + error->all(FLERR,"Matrix factorization to split dispersion coefficients failed"); + } + // determine number of used eigenvalues + // based on maximum allowed number or cutoff criterion + // sort eigenvalues according to their size with bubble sort + double t; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n-1-i; j++) { + if (fabs(A[j][j]) < fabs(A[j+1][j+1])) { + t = A[j][j]; + A[j][j] = A[j+1][j+1]; + A[j+1][j+1] = t; + for (int k = 0; k < n; k++) { + t = Q[k][j]; + Q[k][j] = Q[k][j+1]; + Q[k][j+1] = t; + } + } + } + } + + // check which eigenvalue is the first that is smaller + // than a specified tolerance + // check how many are maximum allowed by the user + double amax = fabs(A[0][0]); + double acrit = amax*splittol; + double bmax = 0; + double err = 0; + nsplit = 0; + for (int i = 0; i < n; i++) { + if (fabs(A[i][i]) > acrit) nsplit++; + else { + bmax = fabs(A[i][i]); + break; + } + } + + err = bmax/amax; + if (err > 1.0e-4) { + char str[128]; + sprintf(str,"Error in splitting of dispersion coeffs is estimated %g",err); + error->warning(FLERR, str); + } + // set B + B = new double[nsplit*n+nsplit]; + for (int i = 0; i< nsplit; i++) { + B[i] = A[i][i]; + for (int j = 0; j < n; j++) { + B[nsplit*(j+1) + i] = Q[j][i]; + } + } + + nsplit_alloc = nsplit; + if (nsplit%2 == 1) nsplit_alloc = nsplit + 1; + // check if the function should preferably be [1] or [2] or [3] + if (nsplit == 1) { + delete [] B; + function[3] = 0; + function[2] = 0; + function[1] = 1; + if (me == 0) { + if (screen) fprintf(screen," Using geometric mixing for reciprocal space\n"); + if (logfile) fprintf(logfile," Using geometric mixing for reciprocal space\n"); + } + } + if (function[2] && nsplit <= 6) { + if (me == 0) { + if (screen) fprintf(screen," Using %d instead of 7 structure factors\n",nsplit); + if (logfile) fprintf(logfile," Using %d instead of 7 structure factors\n",nsplit); + } + function[3] = 1; + function[2] = 0; + } + if (function[2] && (nsplit > 6)) { + if (me == 0) { + if (screen) fprintf(screen," Using 7 structure factors\n"); + if (logfile) fprintf(logfile," Using 7 structure factors\n"); + } + delete [] B; + } + if (function[3]) { + if (me == 0) { + if (screen) fprintf(screen," Using %d structure factors\n",nsplit); + if (logfile) fprintf(logfile," Using %d structure factors\n",nsplit); + } + if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors"); + } + + memory->destroy(A); + memory->destroy(Q); + } + if (function[1]) { // geometric 1/r^6 + double **b = (double **) force->pair->extract("B",tmp); + B = new double[n+1]; + for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); + } + if (function[2]) { // arithmetic 1/r^6 + //cannot use epsilon, because this has not been set yet + double **epsilon = (double **) force->pair->extract("epsilon",tmp); + //cannot use sigma, because this has not been set yet + double **sigma = (double **) force->pair->extract("sigma",tmp); + if (!(epsilon&&sigma)) + error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp"); + double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; + double c[7] = { + 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; + for (int i=0; i<=n; ++i) { + eps_i = sqrt(epsilon[i][i]); + sigma_i = sigma[i][i]; + sigma_n = 1.0; + for (int j=0; j<7; ++j) { + *(bi++) = sigma_n*eps_i*c[j]*0.25; + sigma_n *= sigma_i; + } + } + } +} + +/* ---------------------------------------------------------------------- + Eigenvalue decomposition of a real, symmetric matrix with the QR + method (includes transpformation to Tridiagonal Matrix + Wilkinson + shift) +------------------------------------------------------------------------- */ + +int PPPMDisp::qr_alg(double **A, double **Q, int n) +{ + int converged = 0; + double an1, an, bn1, d, mue; + // allocate some memory for the required operations + double **A0,**Qi,**C,**D,**E; + // make a copy of A for convergence check + memory->create(A0,n,n,"pppm/disp:A0"); + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + A0[i][j] = A[i][j]; + + // allocate an auxiliary matrix Qi + memory->create(Qi,n,n,"pppm/disp:Qi"); + + // alllocate an auxillary matrices for the matrix multiplication + memory->create(C,n,n,"pppm/disp:C"); + memory->create(D,n,n,"pppm/disp:D"); + memory->create(E,n,n,"pppm/disp:E"); + + // transform Matrix A to Tridiagonal form + hessenberg(A,Q,n); + + // start loop for the matrix factorization + int count = 0; + int countmax = 100000; + while (1) { + // make a Wilkinson shift + an1 = A[n-2][n-2]; + an = A[n-1][n-1]; + bn1 = A[n-2][n-1]; + d = (an1-an)/2; + mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1); + for (int i = 0; i < n; i++) + A[i][i] -= mue; + + // perform a QR factorization for a tridiagonal matrix A + qr_tri(Qi,A,n); + + // update the matrices + mmult(A,Qi,C,n); + mmult(Q,Qi,C,n); + + // backward Wilkinson shift + for (int i = 0; i < n; i++) + A[i][i] += mue; + + // check the convergence + converged = check_convergence(A,Q,A0,C,D,E,n); + if (converged) break; + count = count + 1; + if (count == countmax) break; + } + + // free allocated memory + memory->destroy(Qi); + memory->destroy(A0); + memory->destroy(C); + memory->destroy(D); + memory->destroy(E); + + return converged; +} + +/* ---------------------------------------------------------------------- + Transform a Matrix to Hessenberg form (for symmetric Matrices, the + result will be a tridiagonal matrix) +------------------------------------------------------------------------- */ + +void PPPMDisp::hessenberg(double **A, double **Q, int n) +{ + double r,a,b,c,s,x1,x2; + for (int i = 0; i < n-1; i++) { + for (int j = i+2; j < n; j++) { + // compute coeffs for the rotation matrix + a = A[i+1][i]; + b = A[j][i]; + r = sqrt(a*a + b*b); + c = a/r; + s = b/r; + // update the entries of A with multiplication from the left + for (int k = 0; k < n; k++) { + x1 = A[i+1][k]; + x2 = A[j][k]; + A[i+1][k] = c*x1 + s*x2; + A[j][k] = -s*x1 + c*x2; + } + // update the entries of A and Q with a multiplication from the right + for (int k = 0; k < n; k++) { + x1 = A[k][i+1]; + x2 = A[k][j]; + A[k][i+1] = c*x1 + s*x2; + A[k][j] = -s*x1 + c*x2; + x1 = Q[k][i+1]; + x2 = Q[k][j]; + Q[k][i+1] = c*x1 + s*x2; + Q[k][j] = -s*x1 + c*x2; + } + } + } +} + +/* ---------------------------------------------------------------------- + QR factorization for a tridiagonal matrix; Result of the factorization + is stored in A and Qi +------------------------------------------------------------------------- */ + +void PPPMDisp::qr_tri(double** Qi,double** A,int n) +{ + double r,a,b,c,s,x1,x2; + int j,k,k0,kmax; + // make Qi a unity matrix + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + Qi[i][j] = 0.0; + for (int i = 0; i < n; i++) + Qi[i][i] = 1.0; + // loop over main diagonal and first of diagonal of A + for (int i = 0; i < n-1; i++) { + j = i+1; + // coefficients of the rotation matrix + a = A[i][i]; + b = A[j][i]; + r = sqrt(a*a + b*b); + c = a/r; + s = b/r; + // update the entries of A and Q + k0 = (i-1>0)?i-1:0; //min(i-1,0); + kmax = (i+3A0[i][j])?Bmax:A0[i][j]; //max(Bmax,A0[i][j]); + double epsabs = eps*Bmax; + + // reconstruct the original matrix + // store the diagonal elements in D + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + D[i][j] = 0.0; + for (int i = 0; i < n; i++) + D[i][i] = A[i][i]; + // store matrix Q in E + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + E[i][j] = Q[i][j]; + // E = Q*A + mmult(E,D,C,n); + // store transpose of Q in D + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + D[i][j] = Q[j][i]; + // E = Q*A*Q.t + mmult(E,D,C,n); + + //compare the original matrix and the final matrix + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + diff = A0[i][j] - E[i][j]; + epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff)); + } + } + if (epsmax > epsabs) converged = 0; + return converged; +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDisp::allocate() +{ + + int (*procneigh)[2] = comm->procneigh; + + if (function[0]) { + memory->create(work1,2*nfft_both,"pppm/disp:work1"); + memory->create(work2,2*nfft_both,"pppm/disp:work2"); + + memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx"); + memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky"); + memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz"); + + memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2"); + memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2"); + memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2"); + + + memory->create(gf_b,order,"pppm/disp:gf_b"); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff"); + memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d"); + memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff"); + + memory->create(greensfn,nfft_both,"pppm/disp:greensfn"); + memory->create(vg,nfft_both,6,"pppm/disp:vg"); + memory->create(vg2,nfft_both,3,"pppm/disp:vg2"); + + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:density_brick"); + if ( differentiation_flag == 1) { + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:u_brick"); + memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1"); + memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2"); + memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3"); + memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4"); + memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5"); + memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6"); + + } else { + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:vdx_brick"); + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:vdy_brick"); + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:vdz_brick"); + } + memory->create(density_fft,nfft_both,"pppm/disp:density_fft"); + + int tmp; + + fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp); + + fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp); + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg = new CommGrid(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg = new CommGrid(lmp,world,3,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + + if (function[1]) { + memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); + memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); + + memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); + memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); + memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); + + memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); + memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); + memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); + + memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); + memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); + memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); + memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); + memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); + + memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); + memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); + memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); + + memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g"); + if ( differentiation_flag == 1) { + memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); + + memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); + memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); + memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); + memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); + memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); + memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); + + } else { + memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g"); + memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g"); + memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g"); + } + memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g"); + + + int tmp; + + fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 0,0,&tmp); + + fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + 0,0,&tmp); + + remap_6 = new Remap(lmp,world, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_6 = new CommGrid(lmp,world,1,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_6 = new CommGrid(lmp,world,3,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + + if (function[2]) { + memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); + memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); + + memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); + memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); + memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); + + memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); + memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); + memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); + + memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); + memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); + memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); + memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); + memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); + + memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); + memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); + memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); + + memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0"); + memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1"); + memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2"); + memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3"); + memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4"); + memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5"); + memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6"); + + memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0"); + memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1"); + memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2"); + memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3"); + memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4"); + memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5"); + memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6"); + + + if ( differentiation_flag == 1 ) { + memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); + memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); + memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); + memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); + memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); + memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); + memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); + + memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); + memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); + memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); + memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); + memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); + memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); + + } else { + + memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0"); + memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0"); + memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0"); + + memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1"); + memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1"); + memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1"); + + memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2"); + memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2"); + memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2"); + + memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3"); + memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3"); + memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3"); + + memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4"); + memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4"); + memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4"); + + memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5"); + memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5"); + memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5"); + + memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6"); + memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6"); + memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6"); + } + + + + int tmp; + + fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 0,0,&tmp); + + fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + 0,0,&tmp); + + remap_6 = new Remap(lmp,world, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + + if (differentiation_flag == 1) + cg_6 = new CommGrid(lmp,world,7,7, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_6 = new CommGrid(lmp,world,21,7, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + + if (function[3]) { + memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); + memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); + + memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); + memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); + memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); + + memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); + memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); + memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); + + memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); + memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); + memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); + memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); + memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); + + memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); + memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); + memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); + + memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none"); + if ( differentiation_flag == 1) { + memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); + + memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); + memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); + memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); + memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); + memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); + memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); + + } else { + memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none"); + memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none"); + memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none"); + } + memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none"); + + + int tmp; + + fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 0,0,&tmp); + + fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + 0,0,&tmp); + + remap_6 = new Remap(lmp,world, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order + for per atom calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::allocate_peratom() +{ + + int (*procneigh)[2] = comm->procneigh; + + if (function[0]) { + + if (differentiation_flag != 1) + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:u_brick"); + + memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v0_brick"); + memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v1_brick"); + memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v2_brick"); + memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v3_brick"); + memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v4_brick"); + memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v5_brick"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom = + new CommGrid(lmp,world,6,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom = + new CommGrid(lmp,world,7,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } + + + if (function[1]) { + + if ( differentiation_flag != 1 ) + memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); + + memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g"); + memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g"); + memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g"); + memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g"); + memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g"); + memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom_6 = + new CommGrid(lmp,world,6,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom_6 = + new CommGrid(lmp,world,7,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } + + if (function[2]) { + + if ( differentiation_flag != 1 ) { + memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); + memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); + memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); + memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); + memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); + memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); + memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); + } + + memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0"); + memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0"); + memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0"); + memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0"); + memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0"); + memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0"); + + memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1"); + memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1"); + memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1"); + memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1"); + memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1"); + memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1"); + + memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2"); + memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2"); + memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2"); + memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2"); + memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2"); + memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2"); + + memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3"); + memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3"); + memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3"); + memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3"); + memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3"); + memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3"); + + memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4"); + memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4"); + memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4"); + memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4"); + memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4"); + memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4"); + + memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5"); + memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5"); + memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5"); + memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5"); + memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5"); + memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5"); + + memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6"); + memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6"); + memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6"); + memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6"); + memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6"); + memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom_6 = + new CommGrid(lmp,world,42,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom_6 = + new CommGrid(lmp,world,49,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } + + if (function[3]) { + + if ( differentiation_flag != 1 ) + memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); + + memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none"); + memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none"); + memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none"); + memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none"); + memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none"); + memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom_6 = + new CommGrid(lmp,world,6*nsplit_alloc,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom_6 = + new CommGrid(lmp,world,7*nsplit_alloc,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } +} + + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDisp::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(density_fft); + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + + memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_g); + density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; + density_fft_g = NULL; + + memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a0); + density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; + density_fft_a0 = NULL; + + memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a1); + density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; + density_fft_a1 = NULL; + + memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a2); + density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; + density_fft_a2 = NULL; + + memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a3); + density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; + density_fft_a3 = NULL; + + memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a4); + density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; + density_fft_a4 = NULL; + + memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a5); + density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; + density_fft_a5 = NULL; + + memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a6); + density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; + density_fft_a6 = NULL; + + memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_none); + density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; + density_fft_none = NULL; + + memory->destroy(sf_precoeff1); + memory->destroy(sf_precoeff2); + memory->destroy(sf_precoeff3); + memory->destroy(sf_precoeff4); + memory->destroy(sf_precoeff5); + memory->destroy(sf_precoeff6); + sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; + + memory->destroy(sf_precoeff1_6); + memory->destroy(sf_precoeff2_6); + memory->destroy(sf_precoeff3_6); + memory->destroy(sf_precoeff4_6); + memory->destroy(sf_precoeff5_6); + memory->destroy(sf_precoeff6_6); + sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL; + + memory->destroy(greensfn); + memory->destroy(greensfn_6); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(work1_6); + memory->destroy(work2_6); + memory->destroy(vg); + memory->destroy(vg2); + memory->destroy(vg_6); + memory->destroy(vg2_6); + greensfn = greensfn_6 = NULL; + work1 = work2 = work1_6 = work2_6 = NULL; + vg = vg2 = vg_6 = vg2_6 = NULL; + + memory->destroy1d_offset(fkx,nxlo_fft); + memory->destroy1d_offset(fky,nylo_fft); + memory->destroy1d_offset(fkz,nzlo_fft); + fkx = fky = fkz = NULL; + + memory->destroy1d_offset(fkx2,nxlo_fft); + memory->destroy1d_offset(fky2,nylo_fft); + memory->destroy1d_offset(fkz2,nzlo_fft); + fkx2 = fky2 = fkz2 = NULL; + + memory->destroy1d_offset(fkx_6,nxlo_fft_6); + memory->destroy1d_offset(fky_6,nylo_fft_6); + memory->destroy1d_offset(fkz_6,nzlo_fft_6); + fkx_6 = fky_6 = fkz_6 = NULL; + + memory->destroy1d_offset(fkx2_6,nxlo_fft_6); + memory->destroy1d_offset(fky2_6,nylo_fft_6); + memory->destroy1d_offset(fkz2_6,nzlo_fft_6); + fkx2_6 = fky2_6 = fkz2_6 = NULL; + + + memory->destroy(gf_b); + memory->destroy2d_offset(rho1d,-order/2); + memory->destroy2d_offset(rho_coeff,(1-order)/2); + memory->destroy2d_offset(drho1d,-order/2); + memory->destroy2d_offset(drho_coeff, (1-order)/2); + gf_b = NULL; + rho1d = rho_coeff = drho1d = drho_coeff = NULL; + + memory->destroy(gf_b_6); + memory->destroy2d_offset(rho1d_6,-order_6/2); + memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2); + memory->destroy2d_offset(drho1d_6,-order_6/2); + memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2); + gf_b_6 = NULL; + rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL; + + delete fft1; + delete fft2; + delete remap; + delete cg; + fft1 = fft2 = NULL; + remap = NULL; + cg = NULL; + + delete fft1_6; + delete fft2_6; + delete remap_6; + delete cg_6; + fft1_6 = fft2_6 = NULL; + remap_6 = NULL; + cg_6 = NULL; +} + + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order + for per atom calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::deallocate_peratom() +{ + peratom_allocate_flag = 0; + + memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out); + u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; + + memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL; + + memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL; + + memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL; + + memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL; + + memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL; + + memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL; + + memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL; + + memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL; + + memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL; + + delete cg_peratom; + delete cg_peratom_6; + cg_peratom = cg_peratom_6 = NULL; +} + +/* ---------------------------------------------------------------------- + set size of FFT grid (nx,ny,nz_pppm) and g_ewald + for Coulomb interactions +------------------------------------------------------------------------- */ + +void PPPMDisp::set_grid() +{ + double q2 = qsqsum * force->qqrd2e; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h, h_x,h_y,h_z; + bigint natoms = atom->natoms; + + if (!gewaldflag) { + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) + error->all(FLERR,"KSpace accuracy too large to estimate G vector"); + g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy + // nz_pppm uses extended zprd_slab instead of zprd + // reduce it until accuracy target is met + + if (!gridflag) { + h = h_x = h_y = h_z = 4.0/g_ewald; + int count = 0; + while (1) { + + // set grid dimension + nx_pppm = static_cast (xprd/h_x); + ny_pppm = static_cast (yprd/h_y); + nz_pppm = static_cast (zprd_slab/h_z); + + if (nx_pppm <= 1) nx_pppm = 2; + if (ny_pppm <= 1) ny_pppm = 2; + if (nz_pppm <= 1) nz_pppm = 2; + + //set local grid dimension + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + double qopt = compute_qopt(); + + double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + + count++; + + // break loop if the accuracy has been reached or too many loops have been performed + if (dfkspace <= accuracy) break; + if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction"); + h *= 0.95; + h_x = h_y = h_z = h; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; +} + +/* ---------------------------------------------------------------------- + set the FFT parameters +------------------------------------------------------------------------- */ + +void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p, + int& nxlo_f,int& nylo_f,int& nzlo_f, + int& nxhi_f,int& nyhi_f,int& nzhi_f, + int& nxlo_i,int& nylo_i,int& nzlo_i, + int& nxhi_i,int& nyhi_i,int& nzhi_i, + int& nxlo_o,int& nylo_o,int& nzlo_o, + int& nxhi_o,int& nyhi_o,int& nzhi_o, + int& nlow, int& nupp, + int& ng, int& nf, int& nfb, + double& sft,double& sftone, int& ord) +{ + // global indices of PPPM grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPM grid that I own without ghost cells + // for slab PPPM, assign z grid as if it were not extended + + nxlo_i = static_cast (comm->xsplit[comm->myloc[0]] * nx_p); + nxhi_i = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1; + + nylo_i = static_cast (comm->ysplit[comm->myloc[1]] * ny_p); + nyhi_i = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1; + + nzlo_i = static_cast + (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor); + nzhi_i = static_cast + (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1; + + + // nlow,nupp = stencil size for mapping particles to PPPM grid + + nlow = -(ord-1)/2; + nupp = ord/2; + + // sft values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (ord % 2) sft = OFFSET + 0.5; + else sft = OFFSET; + if (ord % 2) sftone = 0.0; + else sftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPM grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPM, assign z grid as if it were not extended + + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else { + dist[0] = cuthalf/domain->prd[0]; + dist[1] = cuthalf/domain->prd[1]; + dist[2] = cuthalf/domain->prd[2]; + } + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_p/xprd + sft) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_p/xprd + sft) - OFFSET; + nxlo_o = nlo + nlow; + nxhi_o = nhi + nupp; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_p/yprd + sft) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_p/yprd + sft) - OFFSET; + nylo_o = nlo + nlow; + nyhi_o = nhi + nupp; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_p/zprd_slab + sft) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_p/zprd_slab + sft) - OFFSET; + nzlo_o = nlo + nlow; + nzhi_o = nhi + nupp; + + // for slab PPPM, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPM, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells) + + if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) { + nzhi_i = nz_p - 1; + nzhi_o = nz_p - 1; + } + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clump of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_p >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_f = 0; + nxhi_f = nx_p - 1; + nylo_f = me_y*ny_p/npey_fft; + nyhi_f = (me_y+1)*ny_p/npey_fft - 1; + nzlo_f = me_z*nz_p/npez_fft; + nzhi_f = (me_z+1)*nz_p/npez_fft - 1; + + // PPPM grid for this proc, including ghosts + + ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) * + (nzhi_o-nzlo_o+1); + + // FFT arrays on this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) * + (nzhi_f-nzlo_f+1); + int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) * + (nzhi_i-nzlo_i+1); + nfb = MAX(nf,nfft_brick); + +} + +/* ---------------------------------------------------------------------- + check if all factors of n are in list of factors + return 1 if yes, 0 if no +------------------------------------------------------------------------- */ + +int PPPMDisp::factorable(int n) +{ + int i; + + while (n > 1) { + for (i = 0; i < nfactors; i++) { + if (n % factors[i] == 0) { + n /= factors[i]; + break; + } + } + if (i == nfactors) return 0; + } + + return 1; +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ +void PPPMDisp::adjust_gewald() +{ + + // Use Newton solver to find g_ewald + + double dx; + + // Begin algorithm + + for (int i = 0; i < LARGE; i++) { + dx = f() / derivf(); + g_ewald -= dx; //Update g_ewald + if (fabs(f()) < SMALL) return; + } + + // Failed to converge + + char str[128]; + sprintf(str, "Could not compute g_ewald"); + error->all(FLERR, str); + +} + +/* ---------------------------------------------------------------------- + Calculate f(x) + ------------------------------------------------------------------------- */ + +double PPPMDisp::f() +{ + double df_rspace, df_kspace; + double q2 = qsqsum * force->qqrd2e; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + + df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd); + + double qopt = compute_qopt(); + df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + + return df_rspace - df_kspace; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) using forward difference + [f(x + h) - f(x)] / h + ------------------------------------------------------------------------- */ + +double PPPMDisp::derivf() +{ + double h = 0.000001; //Derivative step-size + double df,f1,f2,g_ewald_old; + + f1 = f(); + g_ewald_old = g_ewald; + g_ewald += h; + f2 = f(); + g_ewald = g_ewald_old; + df = (f2 - f1)/h; + + return df; +} + +/* ---------------------------------------------------------------------- + Calculate the final estimator for the accuracy +------------------------------------------------------------------------- */ + +double PPPMDisp::final_accuracy() +{ + double df_rspace, df_kspace; + double q2 = qsqsum * force->qqrd2e; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd); + + double qopt = compute_qopt(); + + df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + + double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace); + return acc; +} + +/* ---------------------------------------------------------------------- + Calculate the final estimator for the Dispersion accuracy +------------------------------------------------------------------------- */ + +void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace) +{ + double df_rspace, df_kspace; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + acc_real = lj_rspace_error(); + + double qopt = compute_qopt_6(); + + acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); + + acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace); + return; +} + +/* ---------------------------------------------------------------------- + Compute qopt for Coulomb interactions +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt() +{ + double qopt; + if (differentiation_flag == 1) { + qopt = compute_qopt_ad(); + } else { + qopt = compute_qopt_ik(); + } + double qopt_all; + MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); + return qopt_all; +} + +/* ---------------------------------------------------------------------- + Compute qopt for Dispersion interactions +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_6() +{ + double qopt; + if (differentiation_flag == 1) { + qopt = compute_qopt_6_ad(); + } else { + qopt = compute_qopt_6_ik(); + } + double qopt_all; + MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); + return qopt_all; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ik differentiation scheme and Coulomb interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_ik() +{ + double qopt = 0.0; + int k,l,m; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double sqk, u2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,sum2, sum3,dot1,dot2; + + int nbx = 2; + int nby = 2; + int nbz = 2; + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + u2 = pow(wx*wy*wz,2.0); + sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; + sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1; + sum3 += u2; + } + } + } + sum2 *= sum2; + sum3 *= sum3*sqk; + qopt += sum1 -sum2/sum3; + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ad differentiation scheme and Coulomb interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_ad() +{ + double qopt = 0.0; + int k,l,m; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double u2, sqk; + double sum1,sum2,sum3,sum4,dot2; + double numerator; + + int nbx = 2; + int nby = 2; + int nbz = 2; + double form = 1.0; + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + numerator = form*12.5663706; + + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + sum4 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + + dot2 = qx*qx+qy*qy+qz*qz; + u2 = pow(wx*wy*wz,2.0); + sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; + sum2 += sx*sy*sz * u2*4.0*MY_PI; + sum3 += u2; + sum4 += dot2*u2; + } + } + } + sum2 *= sum2; + qopt += sum1 - sum2/(sum3*sum4); + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ik differentiation scheme and Dispersion interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_6_ik() +{ + double qopt = 0.0; + int k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double sqk, u2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,sum2, sum3; + double dot1,dot2, rtdot2, term; + double inv2ew = 2*g_ewald_6; + inv2ew = 1.0/inv2ew; + double rtpi = sqrt(MY_PI); + + int nbx = 2; + int nby = 2; + int nbz = 2; + + n = 0; + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm_6*nx); + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm_6*ny); + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm_6*nz); + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + rtdot2 = sqrt(dot2); + term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + + 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); + term *= g_ewald_6*g_ewald_6*g_ewald_6; + u2 = pow(wx*wy*wz,2.0); + sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; + sum2 += -u2*term*MY_PI*rtpi/3.0*dot1; + sum3 += u2; + } + } + } + sum2 *= sum2; + sum3 *= sum3*sqk; + qopt += sum1 -sum2/sum3; + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ad differentiation scheme and Dispersion interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_6_ad() +{ + double qopt = 0.0; + int k,l,m; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double u2, sqk; + double sum1,sum2,sum3,sum4; + double dot2, rtdot2, term; + double inv2ew = 2*g_ewald_6; + inv2ew = 1/inv2ew; + double rtpi = sqrt(MY_PI); + + int nbx = 2; + int nby = 2; + int nbz = 2; + + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + sum4 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm_6*nx); + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm_6*ny); + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm_6*nz); + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + + dot2 = qx*qx+qy*qy+qz*qz; + rtdot2 = sqrt(dot2); + term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + + 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); + term *= g_ewald_6*g_ewald_6*g_ewald_6; + u2 = pow(wx*wy*wz,2.0); + sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; + sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2; + sum3 += u2; + sum4 += dot2*u2; + } + } + } + sum2 *= sum2; + qopt += sum1 - sum2/(sum3*sum4); + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + set size of FFT grid and g_ewald_6 + for Dispersion interactions +------------------------------------------------------------------------- */ + +void PPPMDisp::set_grid_6() +{ + // Calculate csum + if (!csumflag) calc_csum(); + if (!gewaldflag_6) set_init_g6(); + if (!gridflag_6) set_n_pppm_6(); + while (!factorable(nx_pppm_6)) nx_pppm_6++; + while (!factorable(ny_pppm_6)) ny_pppm_6++; + while (!factorable(nz_pppm_6)) nz_pppm_6++; + +} + +/* ---------------------------------------------------------------------- + Calculate the sum of the squared dispersion coefficients and other + related quantities required for the calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::calc_csum() +{ + csumij = 0.0; + csum = 0.0; + + int ntypes = atom->ntypes; + int i,j,k; + + delete [] cii; + cii = new double[ntypes +1]; + for (i = 0; i<=ntypes; i++) cii[i] = 0.0; + delete [] csumi; + csumi = new double[ntypes +1]; + for (i = 0; i<=ntypes; i++) csumi[i] = 0.0; + int *neach = new int[ntypes+1]; + for (i = 0; i<=ntypes; i++) neach[i] = 0; + + //the following variables are needed to distinguish between arithmetic + // and geometric mixing + + double mix1; // scales 20/16 to 4 + int mix2; // shifts the value to the sigma^3 value + int mix3; // shifts the value to the right atom type + if (function[1]) { + for (i = 1; i <= ntypes; i++) + cii[i] = B[i]*B[i]; + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + neach[tmp]++; + csum += B[tmp]*B[tmp]; + } + } + if (function[2]) { + for (i = 1; i <= ntypes; i++) + cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3]; + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + neach[tmp]++; + csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3]; + } + } + if (function[3]) { + for (i = 1; i <= ntypes; i++) + for (j = 0; j < nsplit; j++) + cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j]; + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + neach[tmp]++; + for (j = 0; j < nsplit; j++) + csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j]; + } + } + + + double tmp2; + MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world); + csum = tmp2; + csumflag = 1; + + int *neach_all = new int[ntypes+1]; + MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world); + + // copmute csumij and csumi + double d1, d2; + if (function[1]){ + for (i=1; i<=ntypes; i++) { + for (j=1; j<=ntypes; j++) { + csumi[i] += neach_all[j]*B[i]*B[j]; + d1 = neach_all[i]*B[i]; + d2 = neach_all[j]*B[j]; + csumij += d1*d2; + //csumij += neach_all[i]*neach_all[j]*B[i]*B[j]; + } + } + } + if (function[2]) { + for (i=1; i<=ntypes; i++) { + for (j=1; j<=ntypes; j++) { + for (k=0; k<=6; k++) { + csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; + d1 = neach_all[i]*B[7*i + k]; + d2 = neach_all[j]*B[7*(j+1)-k-1]; + csumij += d1*d2; + //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; + } + } + } + } + if (function[3]) { + for (i=1; i<=ntypes; i++) { + for (j=1; j<=ntypes; j++) { + for (k=0; kall(FLERR, str); + +} + +/* ---------------------------------------------------------------------- + Calculate f(x) for Dispersion interaction + ------------------------------------------------------------------------- */ + +double PPPMDisp::f_6() +{ + double df_rspace, df_kspace; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + + df_rspace = lj_rspace_error(); + + double qopt = compute_qopt_6(); + df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); + + return df_rspace - df_kspace; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) using forward difference + [f(x + h) - f(x)] / h + ------------------------------------------------------------------------- */ + +double PPPMDisp::derivf_6() +{ + double h = 0.000001; //Derivative step-size + double df,f1,f2,g_ewald_old; + + f1 = f_6(); + g_ewald_old = g_ewald_6; + g_ewald_6 += h; + f2 = f_6(); + g_ewald_6 = g_ewald_old; + df = (f2 - f1)/h; + + return df; +} + + +/* ---------------------------------------------------------------------- + calculate an initial value for g_ewald_6 + ---------------------------------------------------------------------- */ + +void PPPMDisp::set_init_g6() +{ + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + // make initial g_ewald estimate + // based on desired error and real space cutoff + + // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj + // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0 + // else, repeat multiply g_ewald_6 by 2 until df_real > 0 + // perform bisection for the last two values of + double df_real; + double g_ewald_old; + double gmin, gmax; + + // check if there is a user defined accuracy + double acc_rspace = accuracy; + if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6; + + g_ewald_6 = 1.0/cutoff_lj; + df_real = lj_rspace_error() - acc_rspace; + int counter = 0; + if (df_real > 0) { + while (df_real > 0 && counter < LARGE) { + counter++; + g_ewald_old = g_ewald_6; + g_ewald_6 *= 2; + df_real = lj_rspace_error() - acc_rspace; + } + } + + if (df_real < 0) { + while (df_real < 0 && counter < LARGE) { + counter++; + g_ewald_old = g_ewald_6; + g_ewald_6 *= 0.5; + df_real = lj_rspace_error() - acc_rspace; + } + } + + if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); + + gmin = MIN(g_ewald_6, g_ewald_old); + gmax = MAX(g_ewald_6, g_ewald_old); + g_ewald_6 = gmin + 0.5*(gmax-gmin); + counter = 0; + while (gmax-gmin > SMALL && counter < LARGE) { + counter++; + df_real = lj_rspace_error() -acc_rspace; + if (df_real < 0) gmax = g_ewald_6; + else gmin = g_ewald_6; + g_ewald_6 = gmin + 0.5*(gmax-gmin); + } + if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); + +} + +/* ---------------------------------------------------------------------- + calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction + ---------------------------------------------------------------------- */ + +void PPPMDisp::set_n_pppm_6() +{ + bigint natoms = atom->natoms; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + double h, h_x,h_y,h_z; + + double acc_kspace = accuracy; + if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6; + + // initial value for the grid spacing + h = h_x = h_y = h_z = 4.0/g_ewald_6; + // decrease grid spacing untill required precision is obtained + int count = 0; + while(1) { + + // set grid dimension + nx_pppm_6 = static_cast (xprd/h_x); + ny_pppm_6 = static_cast (yprd/h_y); + nz_pppm_6 = static_cast (zprd_slab/h_z); + + if (nx_pppm_6 <= 1) nx_pppm_6 = 2; + if (ny_pppm_6 <= 1) ny_pppm_6 = 2; + if (nz_pppm_6 <= 1) nz_pppm_6 = 2; + + //set local grid dimension + int npey_fft,npez_fft; + if (nz_pppm_6 >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft_6 = 0; + nxhi_fft_6 = nx_pppm_6 - 1; + nylo_fft_6 = me_y*ny_pppm_6/npey_fft; + nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1; + nzlo_fft_6 = me_z*nz_pppm_6/npez_fft; + nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1; + + double qopt = compute_qopt_6(); + + double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); + + count++; + + // break loop if the accuracy has been reached or too many loops have been performed + if (df_kspace <= acc_kspace) break; + if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion"); + h *= 0.95; + h_x = h_y = h_z = h; + } +} + +/* ---------------------------------------------------------------------- + calculate the real space error for dispersion interactions + ---------------------------------------------------------------------- */ + +double PPPMDisp::lj_rspace_error() +{ + bigint natoms = atom->natoms; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + double deltaf; + double rgs = (cutoff_lj*g_ewald_6); + rgs *= rgs; + double rgs_inv = 1.0/rgs; + deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)* + exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6))); + return deltaf; +} + + +/* ---------------------------------------------------------------------- + Compyute the modified (hockney-eastwood) coulomb green function + ---------------------------------------------------------------------- */ + +void PPPMDisp::compute_gf() +{ + int k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int kper,lper,mper; + double snx,sny,snz,snx2,sny2,snz2; + double sqk; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double numerator,denominator; + + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + qz = unitkz*mper; + snz = sin(0.5*qz*zprd_slab/nz_pppm); + snz2 = snz*snz; + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + wz *= wz; + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + qy = unitky*lper; + sny = sin(0.5*qy*yprd/ny_pppm); + sny2 = sny*sny; + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + wy *= wy; + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + qx = unitkx*kper; + snx = sin(0.5*qx*xprd/nx_pppm); + snx2 = snx*snx; + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + wx *= wx; + + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); + + if (sqk != 0.0) { + numerator = 4.0*MY_PI/sqk; + denominator = gf_denom(snx2,sny2,snz2, gf_b, order); + greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme + and Coulomb interaction +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord, + int nxlo_ft, int nylo_ft, int nzlo_ft, + int nxhi_ft, int nyhi_ft, int nzhi_ft, + double *sf_pre1, double *sf_pre2, double *sf_pre3, + double *sf_pre4, double *sf_pre5, double *sf_pre6) +{ + + int i,k,l,m,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double argx,argy,argz; + double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; + double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; + double u0,u1,u2,u3,u4,u5,u6; + double sum1,sum2,sum3,sum4,sum5,sum6; + + int nb = 2; + + n = 0; + for (m = nzlo_ft; m <= nzhi_ft; m++) { + mper = m - nzp*(2*m/nzp); + + for (l = nylo_ft; l <= nyhi_ft; l++) { + lper = l - nyp*(2*l/nyp); + + for (k = nxlo_ft; k <= nxhi_ft; k++) { + kper = k - nxp*(2*k/nxp); + + sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; + for (i = -nb; i <= nb; i++) { + + qx0 = unitkx*(kper+nxp*i); + qx1 = unitkx*(kper+nxp*(i+1)); + qx2 = unitkx*(kper+nxp*(i+2)); + wx0[i+2] = 1.0; + wx1[i+2] = 1.0; + wx2[i+2] = 1.0; + argx = 0.5*qx0*xprd/nxp; + if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord); + argx = 0.5*qx1*xprd/nxp; + if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord); + argx = 0.5*qx2*xprd/nxp; + if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord); + + qy0 = unitky*(lper+nyp*i); + qy1 = unitky*(lper+nyp*(i+1)); + qy2 = unitky*(lper+nyp*(i+2)); + wy0[i+2] = 1.0; + wy1[i+2] = 1.0; + wy2[i+2] = 1.0; + argy = 0.5*qy0*yprd/nyp; + if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord); + argy = 0.5*qy1*yprd/nyp; + if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord); + argy = 0.5*qy2*yprd/nyp; + if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord); + + qz0 = unitkz*(mper+nzp*i); + qz1 = unitkz*(mper+nzp*(i+1)); + qz2 = unitkz*(mper+nzp*(i+2)); + wz0[i+2] = 1.0; + wz1[i+2] = 1.0; + wz2[i+2] = 1.0; + argz = 0.5*qz0*zprd_slab/nzp; + if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord); + argz = 0.5*qz1*zprd_slab/nzp; + if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord); + argz = 0.5*qz2*zprd_slab/nzp; + if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord); + } + + for (nx = 0; nx <= 4; nx++) { + for (ny = 0; ny <= 4; ny++) { + for (nz = 0; nz <= 4; nz++) { + u0 = wx0[nx]*wy0[ny]*wz0[nz]; + u1 = wx1[nx]*wy0[ny]*wz0[nz]; + u2 = wx2[nx]*wy0[ny]*wz0[nz]; + u3 = wx0[nx]*wy1[ny]*wz0[nz]; + u4 = wx0[nx]*wy2[ny]*wz0[nz]; + u5 = wx0[nx]*wy0[ny]*wz1[nz]; + u6 = wx0[nx]*wy0[ny]*wz2[nz]; + + sum1 += u0*u1; + sum2 += u0*u2; + sum3 += u0*u3; + sum4 += u0*u4; + sum5 += u0*u5; + sum6 += u0*u6; + } + } + } + + // store values + + sf_pre1[n] = sum1; + sf_pre2[n] = sum2; + sf_pre3[n] = sum3; + sf_pre4[n] = sum4; + sf_pre5[n] = sum5; + sf_pre6[n++] = sum6; + } + } + } +} + +/* ---------------------------------------------------------------------- + Compute the modified (hockney-eastwood) dispersion green function + ---------------------------------------------------------------------- */ + +void PPPMDisp::compute_gf_6() +{ + double *prd; + int k,l,m,n; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int kper,lper,mper; + double sqk; + double snx,sny,snz,snx2,sny2,snz2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz; + double qx,qy,qz; + double rtsqk, term; + double numerator,denominator; + double inv2ew = 2*g_ewald_6; + inv2ew = 1/inv2ew; + double rtpi = sqrt(MY_PI); + + numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0); + + n = 0; + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + qz = unitkz*mper; + snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6); + snz2 = snz*snz; + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + wz *= wz; + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + qy = unitky*lper; + sny = sin(0.5*unitky*lper*yprd/ny_pppm_6); + sny2 = sny*sny; + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + wy *= wy; + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + qx = unitkx*kper; + snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6); + snx2 = snx*snx; + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + wx *= wx; + + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); + + if (sqk != 0.0) { + denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); + rtsqk = sqrt(sqk); + term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz + + 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew); + greensfn_6[n++] = numerator*term*wx*wy*wz/denominator; + } else greensfn_6[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme + and Coulomb interaction +------------------------------------------------------------------------- */ +void PPPMDisp::compute_sf_coeff() +{ + int i,k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + for (l = nylo_fft; l <= nyhi_fft; l++) { + for (k = nxlo_fft; k <= nxhi_fft; k++) { + sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; + sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; + sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; + sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; + sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; + sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; + ++n; + } + } + } + + // Compute the coefficients for the self-force correction + + double prex, prey, prez; + prex = prey = prez = MY_PI/volume; + prex *= nx_pppm/xprd; + prey *= ny_pppm/yprd; + prez *= nz_pppm/zprd_slab; + sf_coeff[0] *= prex; + sf_coeff[1] *= prex*2; + sf_coeff[2] *= prey; + sf_coeff[3] *= prey*2; + sf_coeff[4] *= prez; + sf_coeff[5] *= prez*2; + + // communicate values with other procs + + double tmp[6]; + MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); + for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme + and Dispersion interaction +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_sf_coeff_6() +{ + int i,k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0; + + n = 0; + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n]; + sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n]; + sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n]; + sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n]; + sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n]; + sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n]; + ++n; + } + } + } + + + // perform multiplication with prefactors + + double prex, prey, prez; + prex = prey = prez = MY_PI/volume; + prex *= nx_pppm_6/xprd; + prey *= ny_pppm_6/yprd; + prez *= nz_pppm_6/zprd_slab; + sf_coeff_6[0] *= prex; + sf_coeff_6[1] *= prex*2; + sf_coeff_6[2] *= prey; + sf_coeff_6[3] *= prey*2; + sf_coeff_6[4] *= prez; + sf_coeff_6[5] *= prez*2; + + // communicate values with other procs + + double tmp[6]; + MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world); + for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n]; + +} + +/* ---------------------------------------------------------------------- + denominator for Hockney-Eastwood Green's function + of x,y,z = sin(kx*deltax/2), etc + + inf n-1 + S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l + j=-inf l=0 + + = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x) + gf_b = denominator expansion coeffs +------------------------------------------------------------------------- */ + +double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord) +{ + double sx,sy,sz; + sz = sy = sx = 0.0; + for (int l = ord-1; l >= 0; l--) { + sx = g_b[l] + sx*x; + sy = g_b[l] + sy*y; + sz = g_b[l] + sz*z; + } + double s = sx*sy*sz; + return s*s; +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_gf_denom(double* gf, int ord) +{ + int k,l,m; + + for (l = 1; l < ord; l++) gf[l] = 0.0; + gf[0] = 1.0; + + for (m = 1; m < ord; m++) { + for (l = m; l > 0; l--) + gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1)); + gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5)); + } + + bigint ifact = 1; + for (k = 1; k < 2*ord; k++) ifact *= k; + double gaminv = 1.0/ifact; + for (l = 0; l < ord; l++) gf[l] *= gaminv; +} + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFTdecomposition + for coulomb interaction or dispersion interaction with geometric + mixing +------------------------------------------------------------------------- */ + +void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work, + LAMMPS_NS::Remap* rmp) +{ + int n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_i; iz <= nzhi_i; iz++) + for (iy = nylo_i; iy <= nyhi_i; iy++) + for (ix = nxlo_i; ix <= nxhi_i; ix++) + dfft[n++] = dbrick[iz][iy][ix]; + + rmp->perform(dfft,dfft,work); +} + + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFTdecomposition + for dispersion with arithmetic mixing rule +------------------------------------------------------------------------- */ + +void PPPMDisp::brick2fft_a() +{ + int n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++) + for (iy = nylo_in_6; iy <= nyhi_in_6; iy++) + for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) { + density_fft_a0[n] = density_brick_a0[iz][iy][ix]; + density_fft_a1[n] = density_brick_a1[iz][iy][ix]; + density_fft_a2[n] = density_brick_a2[iz][iy][ix]; + density_fft_a3[n] = density_brick_a3[iz][iy][ix]; + density_fft_a4[n] = density_brick_a4[iz][iy][ix]; + density_fft_a5[n] = density_brick_a5[iz][iy][ix]; + density_fft_a6[n++] = density_brick_a6[iz][iy][ix]; + } + + remap_6->perform(density_fft_a0,density_fft_a0,work1_6); + remap_6->perform(density_fft_a1,density_fft_a1,work1_6); + remap_6->perform(density_fft_a2,density_fft_a2,work1_6); + remap_6->perform(density_fft_a3,density_fft_a3,work1_6); + remap_6->perform(density_fft_a4,density_fft_a4,work1_6); + remap_6->perform(density_fft_a5,density_fft_a5,work1_6); + remap_6->perform(density_fft_a6,density_fft_a6,work1_6); + +} + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFTdecomposition + for dispersion with special case +------------------------------------------------------------------------- */ + +void PPPMDisp::brick2fft_none() +{ + int k,n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + for (k = 0; kperform(density_fft_none[k],density_fft_none[k],work1_6); +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMDisp::particle_map(double delx, double dely, double delz, + double sft, int** p2g, int nup, int nlow, + int nxlo, int nylo, int nzlo, + int nxhi, int nyhi, int nzhi) +{ + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + + int flag = 0; + for (int i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((x[i][0]-boxlo[0])*delx+sft) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*dely+sft) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delz+sft) - OFFSET; + + p2g[i][0] = nx; + p2g[i][1] = ny; + p2g[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlow < nxlo || nx+nup > nxhi || + ny+nlow < nylo || ny+nup > nyhi || + nz+nlow < nzlo || nz+nup > nzhi) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp"); +} + + +void PPPMDisp::particle_map_c(double delx, double dely, double delz, + double sft, int** p2g, int nup, int nlow, + int nxlo, int nylo, int nzlo, + int nxhi, int nyhi, int nzhi) +{ + particle_map(delx, dely, delz, sft, p2g, nup, nlow, + nxlo, nylo, nzlo, nxhi, nyhi, nzhi); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_c() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- geometric mixing +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_g() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + int type; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + type = atom->type[i]; + z0 = delvolinv_6 * B[type]; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + y0 = z0*rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + x0 = y0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + density_brick_g[mz][my][mx] += x0*rho1d_6[0][l]; + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- arithmetic mixing +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_a() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0,w; + + // clear 3d density array + + memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + + // loop over my particles, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + int type; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + //do the following for all 4 grids + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + type = atom->type[i]; + z0 = delvolinv_6; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + y0 = z0*rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + x0 = y0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + w = x0*rho1d_6[0][l]; + density_brick_a0[mz][my][mx] += w*B[7*type]; + density_brick_a1[mz][my][mx] += w*B[7*type+1]; + density_brick_a2[mz][my][mx] += w*B[7*type+2]; + density_brick_a3[mz][my][mx] += w*B[7*type+3]; + density_brick_a4[mz][my][mx] += w*B[7*type+4]; + density_brick_a5[mz][my][mx] += w*B[7*type+5]; + density_brick_a6[mz][my][mx] += w*B[7*type+6]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- case when mixing rules don't apply +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_none() +{ + int k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0,w; + + // clear 3d density array + for (k = 0; k < nsplit_alloc; k++) + memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + + + // loop over my particles, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + int type; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + //do the following for all 4 grids + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + type = atom->type[i]; + z0 = delvolinv_6; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + y0 = z0*rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + x0 = y0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + w = x0*rho1d_6[0][l]; + for (k = 0; k < nsplit; k++) + density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k]; + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik differentiation +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2, + FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, + int nx_p, int ny_p, int nz_p, int nft, + int nxlo_ft, int nylo_ft, int nzlo_ft, + int nxhi_ft, int nyhi_ft, int nzhi_ft, + int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + double& egy, double* gfn, + double* kx, double* ky, double* kz, + double* kx2, double* ky2, double* kz2, + FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick, + double* vir, double** vcoeff, double** vcoeff2, + FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, + FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) + + +{ + int i,j,k,n; + double eng; + + // transform charge/dispersion density (r -> k) + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] = dfft[i]; + wk1[n++] = ZEROF; + } + + ft1->compute(wk1,wk1,1); + + // if requested, compute energy and virial contribution + + double scaleinv = 1.0/(nx_p*ny_p*nz_p); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nft; i++) { + eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; + if (eflag_global) egy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nft; i++) { + egy += + s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] *= scaleinv * gfn[i]; + wk1[n++] *= scaleinv * gfn[i]; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x & y direction gradient + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n]; + wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + vx_brick[k][j][i] = wk2[n++]; + vy_brick[k][j][i] = wk2[n++]; + } + + if (!eflag_atom) { + // z direction gradient only + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = kz[k]*wk1[n+1]; + wk2[n+1] = -kz[k]*wk1[n]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + vz_brick[k][j][i] = wk2[n]; + n += 2; + } + + } + + else { + // z direction gradient & per-atom energy + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1]; + wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + vz_brick[k][j][i] = wk2[n++]; + u_pa[k][j][i] = wk2[n++];; + } + } + + if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, + nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, + v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ad differentiation +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2, + FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, + int nx_p, int ny_p, int nz_p, int nft, + int nxlo_ft, int nylo_ft, int nzlo_ft, + int nxhi_ft, int nyhi_ft, int nzhi_ft, + int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + double& egy, double* gfn, + double* vir, double** vcoeff, double** vcoeff2, + FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, + FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) + + +{ + int i,j,k,n; + double eng; + + // transform charge/dispersion density (r -> k) + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] = dfft[i]; + wk1[n++] = ZEROF; + } + + ft1->compute(wk1,wk1,1); + + // if requested, compute energy and virial contribution + + double scaleinv = 1.0/(nx_p*ny_p*nz_p); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nft; i++) { + eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; + if (eflag_global) egy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nft; i++) { + egy += + s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] *= scaleinv * gfn[i]; + wk1[n++] *= scaleinv * gfn[i]; + } + + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = wk1[n]; + wk2[n+1] = wk1[n+1]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + u_pa[k][j][i] = wk2[n++]; + n++; + } + + + if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, + nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, + v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); + +} + +/* ---------------------------------------------------------------------- + Fourier Transform for per atom virial calculations +------------------------------------------------------------------------- */ + +void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2, + double** vcoeff, double** vcoeff2, int nft, + int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, + FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) +{ + //v0 & v1 term + int n, i, j, k; + n = 0; + for (i = 0; i < nft; i++) { + wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1]; + wk2[n+1] = wk1[n+1]*vcoeff[i][0] + wk1[n]*vcoeff[i][1]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + v0_pa[k][j][i] = wk2[n++]; + v1_pa[k][j][i] = wk2[n++]; + } + + //v2 & v3 term + + n = 0; + for (i = 0; i < nft; i++) { + wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0]; + wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + v2_pa[k][j][i] = wk2[n++]; + v3_pa[k][j][i] = wk2[n++]; + } + + //v4 & v5 term + + n = 0; + for (i = 0; i < nft; i++) { + wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2]; + wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + v4_pa[k][j][i] = wk2[n++]; + v5_pa[k][j][i] = wk2[n++]; + } + +} + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, + FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, + FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) + +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vxbrick_1[k][j][i] = work2_6[n++]; + vxbrick_2[k][j][i] = work2_6[n++]; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vybrick_1[k][j][i] = work2_6[n++]; + vybrick_2[k][j][i] = work2_6[n++]; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vzbrick_1[k][j][i] = work2_6[n++]; + vzbrick_2[k][j][i] = work2_6[n++]; + } + + //Per-atom energy + + if (eflag_atom) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa_1[k][j][i] = work2_6[n++]; + u_pa_2[k][j][i] = work2_6[n++]; + } + } + + if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, + v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); +} + + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, + FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, + FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, + FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + + + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vxbrick_1[k][j][i] = B[n1]*work2_6[n++]; + vxbrick_2[k][j][i] = B[n2]*work2_6[n++]; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vybrick_1[k][j][i] = B[n1]*work2_6[n++]; + vybrick_2[k][j][i] = B[n2]*work2_6[n++]; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vzbrick_1[k][j][i] = B[n1]*work2_6[n++]; + vzbrick_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Per-atom energy + + if (eflag_atom) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa[n1][k][j][i] = B[n1]*work2_6[n++]; + u_pa[n2][k][j][i] = B[n2]*work2_6[n++]; + } + } + + if (vflag_atom) poisson_none_peratom(n1,n2, + v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], + v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); +} + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) + +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa_1[k][j][i] = work2_6[n++]; + u_pa_2[k][j][i] = work2_6[n++]; + } + + if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, + v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); +} + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2, + FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, + FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa_1[k][j][i] = B[n1]*work2_6[n++]; + u_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + if (vflag_atom) poisson_none_peratom(n1,n2, + v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], + v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); +} + +/* ---------------------------------------------------------------------- + Fourier Transform for per atom virial calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) +{ + //Compute first virial term v0 + int n, i, j, k; + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v0_pa_1[k][j][i] = work2_6[n++]; + v0_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute second virial term v1 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v1_pa_1[k][j][i] = work2_6[n++]; + v1_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute third virial term v2 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v2_pa_1[k][j][i] = work2_6[n++]; + v2_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute fourth virial term v3 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v3_pa_1[k][j][i] = work2_6[n++]; + v3_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute fifth virial term v4 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v4_pa_1[k][j][i] = work2_6[n++]; + v4_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute last virial term v5 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v5_pa_1[k][j][i] = work2_6[n++]; + v5_pa_2[k][j][i] = work2_6[n++]; + } +} + +/* ---------------------------------------------------------------------- + Fourier Transform for per atom virial calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_none_peratom(int n1, int n2, + FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) +{ + //Compute first virial term v0 + int n, i, j, k; + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v0_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v0_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute second virial term v1 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v1_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v1_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute third virial term v2 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v2_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v2_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute fourth virial term v3 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v3_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v3_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute fifth virial term v4 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v4_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v4_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute last virial term v5 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v5_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v5_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_c_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } +} +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_c_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR ekx,eky,ekz; + double s1,s2,s3; + double sf = 0.0; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm/xprd; + double hy_inv = ny_pppm/yprd; + double hz_inv = nz_pppm/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + // convert E-field to force and substract self forces + const double qfactor = force->qqrd2e * scale; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + sf = sf_coeff[0]*sin(2*MY_PI*s1); + sf += sf_coeff[1]*sin(4*MY_PI*s1); + sf *= 2*q[i]*q[i]; + f[i][0] += qfactor*(ekx*q[i] - sf); + + sf = sf_coeff[2]*sin(2*MY_PI*s2); + sf += sf_coeff[3]*sin(4*MY_PI*s2); + sf *= 2*q[i]*q[i]; + f[i][1] += qfactor*(eky*q[i] - sf); + + + sf = sf_coeff[4]*sin(2*MY_PI*s3); + sf += sf_coeff[5]*sin(4*MY_PI*s3); + sf *= 2*q[i]*q[i]; + if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_c_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u_pa += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + // convert E-field to force + + const double qfactor = 0.5*force->qqrd2e * scale * q[i]; + + if (eflag_atom) eatom[i] += u_pa*qfactor; + if (vflag_atom) { + vatom[i][0] += v0*qfactor; + vatom[i][1] += v1*qfactor; + vatom[i][2] += v2*qfactor; + vatom[i][3] += v3*qfactor; + vatom[i][4] += v4*qfactor; + vatom[i][5] += v5*qfactor; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_g_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + ekx = eky = ekz = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + ekx -= x0*vdx_brick_g[mz][my][mx]; + eky -= x0*vdy_brick_g[mz][my][mx]; + ekz -= x0*vdz_brick_g[mz][my][mx]; + } + } + } + + // convert E-field to force + type = atom->type[i]; + lj = B[type]; + f[i][0] += lj*ekx; + f[i][1] += lj*eky; + if (slabflag != 2) f[i][2] += lj*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_g_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR ekx,eky,ekz; + double s1,s2,s3; + double sf = 0.0; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm_6/xprd; + double hy_inv = ny_pppm_6/yprd; + double hz_inv = nz_pppm_6/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + + for (i = 0; i < nlocal; i++) { + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); + + + ekx = eky = ekz = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; + eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; + ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force + type = atom->type[i]; + lj = B[type]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf += sf_coeff_6[1]*sin(4*MY_PI*s1); + sf *= 2*lj*lj; + f[i][0] += ekx*lj - sf; + + sf = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf += sf_coeff_6[3]*sin(4*MY_PI*s2); + sf *= 2*lj*lj; + f[i][1] += eky*lj - sf; + + + sf = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf += sf_coeff_6[5]*sin(4*MY_PI*s3); + sf *= 2*lj*lj; + if (slabflag != 2) f[i][2] += ekz*lj - sf; + + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule for per atom quantities +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_g_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick_g[mz][my][mx]; + v1 += x0*v1_brick_g[mz][my][mx]; + v2 += x0*v2_brick_g[mz][my][mx]; + v3 += x0*v3_brick_g[mz][my][mx]; + v4 += x0*v4_brick_g[mz][my][mx]; + v5 += x0*v5_brick_g[mz][my][mx]; + } + } + } + } + + // convert E-field to force + type = atom->type[i]; + lj = B[type]*0.5; + + if (eflag_atom) eatom[i] += u_pa*lj; + if (vflag_atom) { + vatom[i][0] += v0*lj; + vatom[i][1] += v1*lj; + vatom[i][2] += v2*lj; + vatom[i][3] += v3*lj; + vatom[i][4] += v4*lj; + vatom[i][5] += v5*lj; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule and ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_a_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj0, lj1, lj2, lj3, lj4, lj5, lj6; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + ekx0 -= x0*vdx_brick_a0[mz][my][mx]; + eky0 -= x0*vdy_brick_a0[mz][my][mx]; + ekz0 -= x0*vdz_brick_a0[mz][my][mx]; + ekx1 -= x0*vdx_brick_a1[mz][my][mx]; + eky1 -= x0*vdy_brick_a1[mz][my][mx]; + ekz1 -= x0*vdz_brick_a1[mz][my][mx]; + ekx2 -= x0*vdx_brick_a2[mz][my][mx]; + eky2 -= x0*vdy_brick_a2[mz][my][mx]; + ekz2 -= x0*vdz_brick_a2[mz][my][mx]; + ekx3 -= x0*vdx_brick_a3[mz][my][mx]; + eky3 -= x0*vdy_brick_a3[mz][my][mx]; + ekz3 -= x0*vdz_brick_a3[mz][my][mx]; + ekx4 -= x0*vdx_brick_a4[mz][my][mx]; + eky4 -= x0*vdy_brick_a4[mz][my][mx]; + ekz4 -= x0*vdz_brick_a4[mz][my][mx]; + ekx5 -= x0*vdx_brick_a5[mz][my][mx]; + eky5 -= x0*vdy_brick_a5[mz][my][mx]; + ekz5 -= x0*vdz_brick_a5[mz][my][mx]; + ekx6 -= x0*vdx_brick_a6[mz][my][mx]; + eky6 -= x0*vdy_brick_a6[mz][my][mx]; + ekz6 -= x0*vdz_brick_a6[mz][my][mx]; + } + } + } + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]; + lj1 = B[7*type+5]; + lj2 = B[7*type+4]; + lj3 = B[7*type+3]; + lj4 = B[7*type+2]; + lj5 = B[7*type+1]; + lj6 = B[7*type]; + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6; + if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for the ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_a_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + + double s1,s2,s3; + double sf = 0.0; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm_6/xprd; + double hy_inv = ny_pppm_6/yprd; + double hz_inv = nz_pppm_6/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj0, lj1, lj2, lj3, lj4, lj5, lj6; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); + + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; + y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; + z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; + + ekx0 += x0*u_brick_a0[mz][my][mx]; + eky0 += y0*u_brick_a0[mz][my][mx]; + ekz0 += z0*u_brick_a0[mz][my][mx]; + + ekx1 += x0*u_brick_a1[mz][my][mx]; + eky1 += y0*u_brick_a1[mz][my][mx]; + ekz1 += z0*u_brick_a1[mz][my][mx]; + + ekx2 += x0*u_brick_a2[mz][my][mx]; + eky2 += y0*u_brick_a2[mz][my][mx]; + ekz2 += z0*u_brick_a2[mz][my][mx]; + + ekx3 += x0*u_brick_a3[mz][my][mx]; + eky3 += y0*u_brick_a3[mz][my][mx]; + ekz3 += z0*u_brick_a3[mz][my][mx]; + + ekx4 += x0*u_brick_a4[mz][my][mx]; + eky4 += y0*u_brick_a4[mz][my][mx]; + ekz4 += z0*u_brick_a4[mz][my][mx]; + + ekx5 += x0*u_brick_a5[mz][my][mx]; + eky5 += y0*u_brick_a5[mz][my][mx]; + ekz5 += z0*u_brick_a5[mz][my][mx]; + + ekx6 += x0*u_brick_a6[mz][my][mx]; + eky6 += y0*u_brick_a6[mz][my][mx]; + ekz6 += z0*u_brick_a6[mz][my][mx]; + } + } + } + + ekx0 *= hx_inv; + eky0 *= hy_inv; + ekz0 *= hz_inv; + + ekx1 *= hx_inv; + eky1 *= hy_inv; + ekz1 *= hz_inv; + + ekx2 *= hx_inv; + eky2 *= hy_inv; + ekz2 *= hz_inv; + + ekx3 *= hx_inv; + eky3 *= hy_inv; + ekz3 *= hz_inv; + + ekx4 *= hx_inv; + eky4 *= hy_inv; + ekz4 *= hz_inv; + + ekx5 *= hx_inv; + eky5 *= hy_inv; + ekz5 *= hz_inv; + + ekx6 *= hx_inv; + eky6 *= hy_inv; + ekz6 *= hz_inv; + + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]; + lj1 = B[7*type+5]; + lj2 = B[7*type+4]; + lj3 = B[7*type+3]; + lj4 = B[7*type+2]; + lj5 = B[7*type+1]; + lj6 = B[7*type]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf += sf_coeff_6[1]*sin(4*MY_PI*s1); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf; + + sf = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf += sf_coeff_6[3]*sin(4*MY_PI*s2); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf; + + sf = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf += sf_coeff_6[5]*sin(4*MY_PI*s3); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for per atom quantities +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_a_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50; + FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51; + FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52; + FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53; + FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54; + FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55; + FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + int type; + double lj0, lj1, lj2, lj3, lj4, lj5, lj6; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF; + u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF; + u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF; + u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF; + u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF; + u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF; + u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + if (eflag_atom) { + u_pa0 += x0*u_brick_a0[mz][my][mx]; + u_pa1 += x0*u_brick_a1[mz][my][mx]; + u_pa2 += x0*u_brick_a2[mz][my][mx]; + u_pa3 += x0*u_brick_a3[mz][my][mx]; + u_pa4 += x0*u_brick_a4[mz][my][mx]; + u_pa5 += x0*u_brick_a5[mz][my][mx]; + u_pa6 += x0*u_brick_a6[mz][my][mx]; + } + if (vflag_atom) { + v00 += x0*v0_brick_a0[mz][my][mx]; + v10 += x0*v1_brick_a0[mz][my][mx]; + v20 += x0*v2_brick_a0[mz][my][mx]; + v30 += x0*v3_brick_a0[mz][my][mx]; + v40 += x0*v4_brick_a0[mz][my][mx]; + v50 += x0*v5_brick_a0[mz][my][mx]; + v01 += x0*v0_brick_a1[mz][my][mx]; + v11 += x0*v1_brick_a1[mz][my][mx]; + v21 += x0*v2_brick_a1[mz][my][mx]; + v31 += x0*v3_brick_a1[mz][my][mx]; + v41 += x0*v4_brick_a1[mz][my][mx]; + v51 += x0*v5_brick_a1[mz][my][mx]; + v02 += x0*v0_brick_a2[mz][my][mx]; + v12 += x0*v1_brick_a2[mz][my][mx]; + v22 += x0*v2_brick_a2[mz][my][mx]; + v32 += x0*v3_brick_a2[mz][my][mx]; + v42 += x0*v4_brick_a2[mz][my][mx]; + v52 += x0*v5_brick_a2[mz][my][mx]; + v03 += x0*v0_brick_a3[mz][my][mx]; + v13 += x0*v1_brick_a3[mz][my][mx]; + v23 += x0*v2_brick_a3[mz][my][mx]; + v33 += x0*v3_brick_a3[mz][my][mx]; + v43 += x0*v4_brick_a3[mz][my][mx]; + v53 += x0*v5_brick_a3[mz][my][mx]; + v04 += x0*v0_brick_a4[mz][my][mx]; + v14 += x0*v1_brick_a4[mz][my][mx]; + v24 += x0*v2_brick_a4[mz][my][mx]; + v34 += x0*v3_brick_a4[mz][my][mx]; + v44 += x0*v4_brick_a4[mz][my][mx]; + v54 += x0*v5_brick_a4[mz][my][mx]; + v05 += x0*v0_brick_a5[mz][my][mx]; + v15 += x0*v1_brick_a5[mz][my][mx]; + v25 += x0*v2_brick_a5[mz][my][mx]; + v35 += x0*v3_brick_a5[mz][my][mx]; + v45 += x0*v4_brick_a5[mz][my][mx]; + v55 += x0*v5_brick_a5[mz][my][mx]; + v06 += x0*v0_brick_a6[mz][my][mx]; + v16 += x0*v1_brick_a6[mz][my][mx]; + v26 += x0*v2_brick_a6[mz][my][mx]; + v36 += x0*v3_brick_a6[mz][my][mx]; + v46 += x0*v4_brick_a6[mz][my][mx]; + v56 += x0*v5_brick_a6[mz][my][mx]; + } + } + } + } + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]*0.5; + lj1 = B[7*type+5]*0.5; + lj2 = B[7*type+4]*0.5; + lj3 = B[7*type+3]*0.5; + lj4 = B[7*type+2]*0.5; + lj5 = B[7*type+1]*0.5; + lj6 = B[7*type]*0.5; + + + if (eflag_atom) + eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 + + u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6; + if (vflag_atom) { + vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + + v04*lj4 + v05*lj5 + v06*lj6; + vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + + v14*lj4 + v15*lj5 + v16*lj6; + vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + + v24*lj4 + v25*lj5 + v26*lj6; + vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + + v34*lj4 + v35*lj5 + v36*lj6; + vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + + v44*lj4 + v45*lj5 + v46*lj6; + vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + + v54*lj4 + v55*lj5 + v56*lj6; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule and ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_none_ik() +{ + int i,k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR *ekx, *eky, *ekz; + + ekx = new FFT_SCALAR[nsplit]; + eky = new FFT_SCALAR[nsplit]; + ekz = new FFT_SCALAR[nsplit]; + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + for (k = 0; k < nsplit; k++) + ekx[k] = eky[k] = ekz[k] = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + for (k = 0; k < nsplit; k++) { + ekx[k] -= x0*vdx_brick_none[k][mz][my][mx]; + eky[k] -= x0*vdy_brick_none[k][mz][my][mx]; + ekz[k] -= x0*vdz_brick_none[k][mz][my][mx]; + } + } + } + } + // convert D-field to force + type = atom->type[i]; + for (k = 0; k < nsplit; k++) { + lj = B[nsplit*type + k]; + f[i][0] += lj*ekx[k]; + f[i][1] +=lj*eky[k]; + if (slabflag != 2) f[i][2] +=lj*ekz[k]; + } + } + + delete [] ekx; + delete [] eky; + delete [] ekz; +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for the ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_none_ad() +{ + int i,k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR *ekx, *eky, *ekz; + + ekx = new FFT_SCALAR[nsplit]; + eky = new FFT_SCALAR[nsplit]; + ekz = new FFT_SCALAR[nsplit]; + + + double s1,s2,s3; + double sf1,sf2,sf3; + double sf = 0.0; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm_6/xprd; + double hy_inv = ny_pppm_6/yprd; + double hz_inv = nz_pppm_6/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); + + for (k = 0; k < nsplit; k++) + ekx[k] = eky[k] = ekz[k] = ZEROF; + + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; + y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; + z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; + + for (k = 0; k < nsplit; k++) { + ekx[k] += x0*u_brick_none[k][mz][my][mx]; + eky[k] += y0*u_brick_none[k][mz][my][mx]; + ekz[k] += z0*u_brick_none[k][mz][my][mx]; + } + } + } + } + + for (k = 0; k < nsplit; k++) { + ekx[k] *= hx_inv; + eky[k] *= hy_inv; + ekz[k] *= hz_inv; + } + + // convert D-field to force + type = atom->type[i]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1); + + sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2); + + sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3); + + for (k = 0; k < nsplit; k++) { + lj = B[nsplit*type + k]; + + sf = sf1*B[k]*2*lj*lj; + f[i][0] += lj*ekx[k] - sf; + + + sf = sf2*B[k]*2*lj*lj; + f[i][1] += lj*eky[k] - sf; + + sf = sf3*B[k]*2*lj*lj; + if (slabflag != 2) f[i][2] += lj*ekz[k] - sf; + } + } + + delete [] ekx; + delete [] eky; + delete [] ekz; +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for per atom quantities +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_none_peratom() +{ + int i,k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5; + + u_pa = new FFT_SCALAR[nsplit]; + v0 = new FFT_SCALAR[nsplit]; + v1 = new FFT_SCALAR[nsplit]; + v2 = new FFT_SCALAR[nsplit]; + v3 = new FFT_SCALAR[nsplit]; + v4 = new FFT_SCALAR[nsplit]; + v5 = new FFT_SCALAR[nsplit]; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + for (k = 0; k < nsplit; k++) + u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF; + + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + if (eflag_atom) { + for (k = 0; k < nsplit; k++) + u_pa[k] += x0*u_brick_none[k][mz][my][mx]; + } + if (vflag_atom) { + for (k = 0; k < nsplit; k++) { + v0[k] += x0*v0_brick_none[k][mz][my][mx]; + v1[k] += x0*v1_brick_none[k][mz][my][mx]; + v2[k] += x0*v2_brick_none[k][mz][my][mx]; + v3[k] += x0*v3_brick_none[k][mz][my][mx]; + v4[k] += x0*v4_brick_none[k][mz][my][mx]; + v5[k] += x0*v5_brick_none[k][mz][my][mx]; + } + } + } + } + } + // convert D-field to force + type = atom->type[i]; + for (k = 0; k < nsplit; k++) { + lj = B[nsplit*type + k]*0.5; + + if (eflag_atom) { + eatom[i] += u_pa[k]*lj; + } + if (vflag_atom) { + vatom[i][0] += v0[k]*lj; + vatom[i][1] += v1[k]*lj; + vatom[i][2] += v2[k]*lj; + vatom[i][3] += v3[k]*lj; + vatom[i][4] += v4[k]*lj; + vatom[i][5] += v5[k]*lj; + } + } + } + + delete [] u_pa; + delete [] v0; + delete [] v1; + delete [] v2; + delete [] v3; + delete [] v4; + delete [] v5; +} + +/* ---------------------------------------------------------------------- + pack values to buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + switch (flag) { + + // Coulomb interactions + + case FORWARD_IK: { + FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + break; + } + + case FORWARD_AD: { + FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + break; + } + + case FORWARD_IK_PERATOM: { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + break; + } + + case FORWARD_AD_PERATOM: { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + break; + } + + // Dispersion interactions, geometric mixing + + case FORWARD_IK_G: { + FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + break; + } + + case FORWARD_AD_G: { + FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + break; + } + + case FORWARD_IK_PERATOM_G: { + FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + break; + } + + case FORWARD_AD_PERATOM_G: { + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + break; + } + + // Dispersion interactions, arithmetic mixing + + case FORWARD_IK_A: { + FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc0[list[i]]; + buf[n++] = ysrc0[list[i]]; + buf[n++] = zsrc0[list[i]]; + + buf[n++] = xsrc1[list[i]]; + buf[n++] = ysrc1[list[i]]; + buf[n++] = zsrc1[list[i]]; + + buf[n++] = xsrc2[list[i]]; + buf[n++] = ysrc2[list[i]]; + buf[n++] = zsrc2[list[i]]; + + buf[n++] = xsrc3[list[i]]; + buf[n++] = ysrc3[list[i]]; + buf[n++] = zsrc3[list[i]]; + + buf[n++] = xsrc4[list[i]]; + buf[n++] = ysrc4[list[i]]; + buf[n++] = zsrc4[list[i]]; + + buf[n++] = xsrc5[list[i]]; + buf[n++] = ysrc5[list[i]]; + buf[n++] = zsrc5[list[i]]; + + buf[n++] = xsrc6[list[i]]; + buf[n++] = ysrc6[list[i]]; + buf[n++] = zsrc6[list[i]]; + } + break; + } + + case FORWARD_AD_A: { + FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + buf[n++] = src0[list[i]]; + buf[n++] = src1[list[i]]; + buf[n++] = src2[list[i]]; + buf[n++] = src3[list[i]]; + buf[n++] = src4[list[i]]; + buf[n++] = src5[list[i]]; + buf[n++] = src6[list[i]]; + } + break; + } + + case FORWARD_IK_PERATOM_A: { + FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + if (eflag_atom) { + buf[n++] = esrc0[list[i]]; + buf[n++] = esrc1[list[i]]; + buf[n++] = esrc2[list[i]]; + buf[n++] = esrc3[list[i]]; + buf[n++] = esrc4[list[i]]; + buf[n++] = esrc5[list[i]]; + buf[n++] = esrc6[list[i]]; + } + if (vflag_atom) { + buf[n++] = v0src0[list[i]]; + buf[n++] = v1src0[list[i]]; + buf[n++] = v2src0[list[i]]; + buf[n++] = v3src0[list[i]]; + buf[n++] = v4src0[list[i]]; + buf[n++] = v5src0[list[i]]; + + buf[n++] = v0src1[list[i]]; + buf[n++] = v1src1[list[i]]; + buf[n++] = v2src1[list[i]]; + buf[n++] = v3src1[list[i]]; + buf[n++] = v4src1[list[i]]; + buf[n++] = v5src1[list[i]]; + + buf[n++] = v0src2[list[i]]; + buf[n++] = v1src2[list[i]]; + buf[n++] = v2src2[list[i]]; + buf[n++] = v3src2[list[i]]; + buf[n++] = v4src2[list[i]]; + buf[n++] = v5src2[list[i]]; + + buf[n++] = v0src3[list[i]]; + buf[n++] = v1src3[list[i]]; + buf[n++] = v2src3[list[i]]; + buf[n++] = v3src3[list[i]]; + buf[n++] = v4src3[list[i]]; + buf[n++] = v5src3[list[i]]; + + buf[n++] = v0src4[list[i]]; + buf[n++] = v1src4[list[i]]; + buf[n++] = v2src4[list[i]]; + buf[n++] = v3src4[list[i]]; + buf[n++] = v4src4[list[i]]; + buf[n++] = v5src4[list[i]]; + + buf[n++] = v0src5[list[i]]; + buf[n++] = v1src5[list[i]]; + buf[n++] = v2src5[list[i]]; + buf[n++] = v3src5[list[i]]; + buf[n++] = v4src5[list[i]]; + buf[n++] = v5src5[list[i]]; + + buf[n++] = v0src6[list[i]]; + buf[n++] = v1src6[list[i]]; + buf[n++] = v2src6[list[i]]; + buf[n++] = v3src6[list[i]]; + buf[n++] = v4src6[list[i]]; + buf[n++] = v5src6[list[i]]; + } + } + break; + } + + case FORWARD_AD_PERATOM_A: { + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src0[list[i]]; + buf[n++] = v1src0[list[i]]; + buf[n++] = v2src0[list[i]]; + buf[n++] = v3src0[list[i]]; + buf[n++] = v4src0[list[i]]; + buf[n++] = v5src0[list[i]]; + + buf[n++] = v0src1[list[i]]; + buf[n++] = v1src1[list[i]]; + buf[n++] = v2src1[list[i]]; + buf[n++] = v3src1[list[i]]; + buf[n++] = v4src1[list[i]]; + buf[n++] = v5src1[list[i]]; + + buf[n++] = v0src2[list[i]]; + buf[n++] = v1src2[list[i]]; + buf[n++] = v2src2[list[i]]; + buf[n++] = v3src2[list[i]]; + buf[n++] = v4src2[list[i]]; + buf[n++] = v5src2[list[i]]; + + buf[n++] = v0src3[list[i]]; + buf[n++] = v1src3[list[i]]; + buf[n++] = v2src3[list[i]]; + buf[n++] = v3src3[list[i]]; + buf[n++] = v4src3[list[i]]; + buf[n++] = v5src3[list[i]]; + + buf[n++] = v0src4[list[i]]; + buf[n++] = v1src4[list[i]]; + buf[n++] = v2src4[list[i]]; + buf[n++] = v3src4[list[i]]; + buf[n++] = v4src4[list[i]]; + buf[n++] = v5src4[list[i]]; + + buf[n++] = v0src5[list[i]]; + buf[n++] = v1src5[list[i]]; + buf[n++] = v2src5[list[i]]; + buf[n++] = v3src5[list[i]]; + buf[n++] = v4src5[list[i]]; + buf[n++] = v5src5[list[i]]; + + buf[n++] = v0src6[list[i]]; + buf[n++] = v1src6[list[i]]; + buf[n++] = v2src6[list[i]]; + buf[n++] = v3src6[list[i]]; + buf[n++] = v4src6[list[i]]; + buf[n++] = v5src6[list[i]]; + } + break; + } + + // Dispersion interactions, no mixing + + case FORWARD_IK_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + } + break; + } + + case FORWARD_AD_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + buf[n++] = src[list[i]]; + } + break; + } + + case FORWARD_IK_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + } + break; + } + + case FORWARD_AD_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + break; + } + + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's own values from buf and set own ghost values +------------------------------------------------------------------------- */ + +void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + switch (flag) { + + // Coulomb interactions + + case FORWARD_IK: { + FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_AD: { + FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[n++]; + break; + } + + case FORWARD_IK_PERATOM: { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_PERATOM: { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + break; + } + + // Disperion interactions, geometric mixing + + case FORWARD_IK_G: { + FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_AD_G: { + FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[n++]; + break; + } + + case FORWARD_IK_PERATOM_G: { + FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_PERATOM_G: { + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + break; + } + + // Disperion interactions, arithmetic mixing + + case FORWARD_IK_A: { + FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + xdest0[list[i]] = buf[n++]; + ydest0[list[i]] = buf[n++]; + zdest0[list[i]] = buf[n++]; + + xdest1[list[i]] = buf[n++]; + ydest1[list[i]] = buf[n++]; + zdest1[list[i]] = buf[n++]; + + xdest2[list[i]] = buf[n++]; + ydest2[list[i]] = buf[n++]; + zdest2[list[i]] = buf[n++]; + + xdest3[list[i]] = buf[n++]; + ydest3[list[i]] = buf[n++]; + zdest3[list[i]] = buf[n++]; + + xdest4[list[i]] = buf[n++]; + ydest4[list[i]] = buf[n++]; + zdest4[list[i]] = buf[n++]; + + xdest5[list[i]] = buf[n++]; + ydest5[list[i]] = buf[n++]; + zdest5[list[i]] = buf[n++]; + + xdest6[list[i]] = buf[n++]; + ydest6[list[i]] = buf[n++]; + zdest6[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_AD_A: { + FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + dest0[list[i]] = buf[n++]; + dest1[list[i]] = buf[n++]; + dest2[list[i]] = buf[n++]; + dest3[list[i]] = buf[n++]; + dest4[list[i]] = buf[n++]; + dest5[list[i]] = buf[n++]; + dest6[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_IK_PERATOM_A: { + FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + if (eflag_atom) { + esrc0[list[i]] = buf[n++]; + esrc1[list[i]] = buf[n++]; + esrc2[list[i]] = buf[n++]; + esrc3[list[i]] = buf[n++]; + esrc4[list[i]] = buf[n++]; + esrc5[list[i]] = buf[n++]; + esrc6[list[i]] = buf[n++]; + } + if (vflag_atom) { + v0src0[list[i]] = buf[n++]; + v1src0[list[i]] = buf[n++]; + v2src0[list[i]] = buf[n++]; + v3src0[list[i]] = buf[n++]; + v4src0[list[i]] = buf[n++]; + v5src0[list[i]] = buf[n++]; + + v0src1[list[i]] = buf[n++]; + v1src1[list[i]] = buf[n++]; + v2src1[list[i]] = buf[n++]; + v3src1[list[i]] = buf[n++]; + v4src1[list[i]] = buf[n++]; + v5src1[list[i]] = buf[n++]; + + v0src2[list[i]] = buf[n++]; + v1src2[list[i]] = buf[n++]; + v2src2[list[i]] = buf[n++]; + v3src2[list[i]] = buf[n++]; + v4src2[list[i]] = buf[n++]; + v5src2[list[i]] = buf[n++]; + + v0src3[list[i]] = buf[n++]; + v1src3[list[i]] = buf[n++]; + v2src3[list[i]] = buf[n++]; + v3src3[list[i]] = buf[n++]; + v4src3[list[i]] = buf[n++]; + v5src3[list[i]] = buf[n++]; + + v0src4[list[i]] = buf[n++]; + v1src4[list[i]] = buf[n++]; + v2src4[list[i]] = buf[n++]; + v3src4[list[i]] = buf[n++]; + v4src4[list[i]] = buf[n++]; + v5src4[list[i]] = buf[n++]; + + v0src5[list[i]] = buf[n++]; + v1src5[list[i]] = buf[n++]; + v2src5[list[i]] = buf[n++]; + v3src5[list[i]] = buf[n++]; + v4src5[list[i]] = buf[n++]; + v5src5[list[i]] = buf[n++]; + + v0src6[list[i]] = buf[n++]; + v1src6[list[i]] = buf[n++]; + v2src6[list[i]] = buf[n++]; + v3src6[list[i]] = buf[n++]; + v4src6[list[i]] = buf[n++]; + v5src6[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_PERATOM_A: { + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + v0src0[list[i]] = buf[n++]; + v1src0[list[i]] = buf[n++]; + v2src0[list[i]] = buf[n++]; + v3src0[list[i]] = buf[n++]; + v4src0[list[i]] = buf[n++]; + v5src0[list[i]] = buf[n++]; + + v0src1[list[i]] = buf[n++]; + v1src1[list[i]] = buf[n++]; + v2src1[list[i]] = buf[n++]; + v3src1[list[i]] = buf[n++]; + v4src1[list[i]] = buf[n++]; + v5src1[list[i]] = buf[n++]; + + v0src2[list[i]] = buf[n++]; + v1src2[list[i]] = buf[n++]; + v2src2[list[i]] = buf[n++]; + v3src2[list[i]] = buf[n++]; + v4src2[list[i]] = buf[n++]; + v5src2[list[i]] = buf[n++]; + + v0src3[list[i]] = buf[n++]; + v1src3[list[i]] = buf[n++]; + v2src3[list[i]] = buf[n++]; + v3src3[list[i]] = buf[n++]; + v4src3[list[i]] = buf[n++]; + v5src3[list[i]] = buf[n++]; + + v0src4[list[i]] = buf[n++]; + v1src4[list[i]] = buf[n++]; + v2src4[list[i]] = buf[n++]; + v3src4[list[i]] = buf[n++]; + v4src4[list[i]] = buf[n++]; + v5src4[list[i]] = buf[n++]; + + v0src5[list[i]] = buf[n++]; + v1src5[list[i]] = buf[n++]; + v2src5[list[i]] = buf[n++]; + v3src5[list[i]] = buf[n++]; + v4src5[list[i]] = buf[n++]; + v5src5[list[i]] = buf[n++]; + + v0src6[list[i]] = buf[n++]; + v1src6[list[i]] = buf[n++]; + v2src6[list[i]] = buf[n++]; + v3src6[list[i]] = buf[n++]; + v4src6[list[i]] = buf[n++]; + v5src6[list[i]] = buf[n++]; + } + break; + } + + // Disperion interactions, geometric mixing + + case FORWARD_IK_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_IK_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + } + break; + } + + case FORWARD_AD_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + break; + } + + } +} + +/* ---------------------------------------------------------------------- + pack ghost values into buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + //Coulomb interactions + + if (flag == REVERSE_RHO) { + FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + + //Dispersion interactions, geometric mixing + + } else if (flag == REVERSE_RHO_G) { + FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + + //Dispersion interactions, arithmetic mixing + + } else if (flag == REVERSE_RHO_A) { + FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = src0[list[i]]; + buf[n++] = src1[list[i]]; + buf[n++] = src2[list[i]]; + buf[n++] = src3[list[i]]; + buf[n++] = src4[list[i]]; + buf[n++] = src5[list[i]]; + buf[n++] = src6[list[i]]; + } + + //Dispersion interactions, no mixing + + } else if (flag == REVERSE_RHO_NONE) { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = src[list[i]]; + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's ghost values from buf and add to own values +------------------------------------------------------------------------- */ + +void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + //Coulomb interactions + + if (flag == REVERSE_RHO) { + FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[i]; + + //Dispersion interactions, geometric mixing + + } else if (flag == REVERSE_RHO_G) { + FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[i]; + + //Dispersion interactions, arithmetic mixing + + } else if (flag == REVERSE_RHO_A) { + FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + dest0[list[i]] += buf[n++]; + dest1[list[i]] += buf[n++]; + dest2[list[i]] += buf[n++]; + dest3[list[i]] += buf[n++]; + dest4[list[i]] += buf[n++]; + dest5[list[i]] += buf[n++]; + dest6[list[i]] += buf[n++]; + } + + //Dispersion interactions, no mixing + + } else if (flag == REVERSE_RHO_NONE) { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + map nprocs to NX by NY grid as PX by PY procs - return optimal px,py +------------------------------------------------------------------------- */ + +void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) +{ + // loop thru all possible factorizations of nprocs + // surf = surface area of largest proc sub-domain + // innermost if test minimizes surface area and surface/volume ratio + + int bestsurf = 2 * (nx + ny); + int bestboxx = 0; + int bestboxy = 0; + + int boxx,boxy,surf,ipx,ipy; + + ipx = 1; + while (ipx <= nprocs) { + if (nprocs % ipx == 0) { + ipy = nprocs/ipx; + boxx = nx/ipx; + if (nx % ipx) boxx++; + boxy = ny/ipy; + if (ny % ipy) boxy++; + surf = boxx + boxy; + if (surf < bestsurf || + (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { + bestsurf = surf; + bestboxx = boxx; + bestboxy = boxy; + *px = ipx; + *py = ipy; + } + } + ipx++; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz, int ord, + FFT_SCALAR **rho_c, FFT_SCALAR **r1d) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-ord)/2; k <= ord/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = ord-1; l >= 0; l--) { + r1 = rho_c[l][k] + r1*dx; + r2 = rho_c[l][k] + r2*dy; + r3 = rho_c[l][k] + r3*dz; + } + r1d[0][k] = r1; + r1d[1][k] = r2; + r1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into drho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz, int ord, + FFT_SCALAR **drho_c, FFT_SCALAR **dr1d) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-ord)/2; k <= ord/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = ord-2; l >= 0; l--) { + r1 = drho_c[l][k] + r1*dx; + r2 = drho_c[l][k] + r2*dy; + r3 = drho_c[l][k] + r3*dz; + } + dr1d[0][k] = r1; + dr1d[1][k] = r2; + dr1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + generate coeffients for the weight function of order n + + (n-1) + Wn(x) = Sum wn(k,x) , Sum is over every other integer + k=-(n-1) + For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 + k is odd integers if n is even and even integers if n is odd + --- + | n-1 + | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 + wn(k,x) = < l=0 + | + | 0 otherwise + --- + a coeffients are packed into the array rho_coeff to eliminate zeros + rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff, + int ord) +{ + int j,k,l,m; + FFT_SCALAR s; + + FFT_SCALAR **a; + memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a"); + + for (k = -ord; k <= ord; k++) + for (l = 0; l < ord; l++) + a[l][k] = 0.0; + + a[0][0] = 1.0; + for (j = 1; j < ord; j++) { + for (k = -j; k <= j; k += 2) { + s = 0.0; + for (l = 0; l < j; l++) { + a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); +#ifdef FFT_SINGLE + s += powf(0.5,(float) l+1) * + (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); +#else + s += pow(0.5,(double) l+1) * + (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); +#endif + } + a[0][k] = s; + } + } + + m = (1-ord)/2; + for (k = -(ord-1); k < ord; k += 2) { + for (l = 0; l < ord; l++) + coeff[l][m] = a[l][k]; + for (l = 1; l < ord; l++) + dcoeff[l-1][m] = l*a[l][k]; + m++; + } + + memory->destroy2d_offset(a,-ord); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPMDisp::slabcorr(int eflag) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy_1 += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + +/* ---------------------------------------------------------------------- + perform and time the 1d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMDisp::timing_1d(int n, double &time1d) +{ + double time1,time2; + int mixing = 1; + if (function[2]) mixing = 4; + if (function[3]) mixing = nsplit_alloc/2; + + if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + if (function[1] + function[2] + function[3]) + for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[0]) { + for (int i = 0; i < n; i++) { + fft1->timing1d(work1,nfft_both,1); + fft2->timing1d(work1,nfft_both,-1); + if (differentiation_flag != 1){ + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d = time2 - time1; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[1] + function[2] + function[3]) { + for (int i = 0; i < n; i++) { + fft1_6->timing1d(work1_6,nfft_both_6,1); + fft2_6->timing1d(work1_6,nfft_both_6,-1); + if (differentiation_flag != 1){ + fft2_6->timing1d(work1_6,nfft_both_6,-1); + fft2_6->timing1d(work1_6,nfft_both_6,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d += (time2 - time1)*mixing; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + perform and time the 3d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMDisp::timing_3d(int n, double &time3d) +{ + double time1,time2; + int mixing = 1; + if (function[2]) mixing = 4; + if (function[3]) mixing = nsplit_alloc/2; + + if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + if (function[1] + function[2] + function[3]) + for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; + + + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[0]) { + for (int i = 0; i < n; i++) { + fft1->compute(work1,work1,1); + fft2->compute(work1,work1,-1); + if (differentiation_flag != 1) { + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d = time2 - time1; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[1] + function[2] + function[3]) { + for (int i = 0; i < n; i++) { + fft1_6->compute(work1_6,work1_6,1); + fft2_6->compute(work1_6,work1_6,-1); + if (differentiation_flag != 1) { + fft2_6->compute(work1_6,work1_6,-1); + fft2_6->compute(work1_6,work1_6,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d += (time2 - time1) * mixing; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPMDisp::memory_usage() +{ + double bytes = nmax*3 * sizeof(double); + int mixing = 1; + int diff = 3; //depends on differentiation + int per = 7; //depends on per atom calculations + if (differentiation_flag) { + diff = 1; + per = 6; + } + if (!evflag_atom) per = 0; + if (function[2]) mixing = 7; + if (function[3]) mixing = nsplit_alloc; + + if (function[0]) { + int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + bytes += (1 + diff + per) * nbrick * sizeof(FFT_SCALAR); //brick memory + bytes += 6 * nfft_both * sizeof(double); // vg + bytes += nfft_both * sizeof(double); // greensfn + bytes += nfft_both * 3 * sizeof(FFT_SCALAR); // density_FFT, work1, work2 + bytes += cg->memory_usage(); + } + + if (function[1] + function[2] + function[3]) { + int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * + (nzhi_out_6-nzlo_out_6+1); + bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing; // density_brick + vd_brick + per atom bricks + bytes += 6 * nfft_both_6 * sizeof(double); // vg + bytes += nfft_both_6 * sizeof(double); // greensfn + bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR); // density_FFT, work1, work2 + bytes += cg_6->memory_usage(); + } + return bytes; +} diff --git a/src/KSPACE/pppm_old.cpp b/src/KSPACE/pppm_old.cpp index 4d68c12586..a368b5d5b0 100644 --- a/src/KSPACE/pppm_old.cpp +++ b/src/KSPACE/pppm_old.cpp @@ -1,2863 +1,2863 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) - per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "pppm_old.h" -#include "math_const.h" -#include "atom.h" -#include "comm.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXORDER 7 -#define OFFSET 16384 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); - - triclinic_support = 0; - pppmflag = 1; - group_group_enable = 0; - - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - u_brick = NULL; - v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; - greensfn = NULL; - work1 = work2 = NULL; - vg = NULL; - fkx = fky = fkz = NULL; - buf1 = buf2 = buf3 = buf4 = NULL; - - density_A_brick = density_B_brick = NULL; - density_A_fft = density_B_fft = NULL; - - gf_b = NULL; - rho1d = rho_coeff = NULL; - - fft1 = fft2 = NULL; - remap = NULL; - - nmax = 0; - part2grid = NULL; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMOld::~PPPMOld() -{ - delete [] factors; - deallocate(); - deallocate_peratom(); - deallocate_groups(); - memory->destroy(part2grid); -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPMOld::init() -{ - if (me == 0) { - if (screen) fprintf(screen,"PPPM initialization ...\n"); - if (logfile) fprintf(logfile,"PPPM initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->dimension == 2) error->all(FLERR, - "Cannot use PPPM with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); - if (slabflag) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPM"); - } - - if (order < 2 || order > MAXORDER) { - char str[128]; - sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); - error->all(FLERR,str); - } - - // free all arrays previously allocated - - deallocate(); - deallocate_peratom(); - peratom_allocate_flag = 0; - deallocate_groups(); - group_allocate_flag = 0; - - // extract short-range Coulombic cutoff from pair style - - scale = 1.0; - - pair_check(); - - int itmp=0; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - - // if kspace is TIP4P, extract TIP4P params from pair style - // bond/angle are not yet init(), so insure equilibrium request is valid - - qdist = 0.0; - - if (tip4pflag) { - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - if (typeA < 1 || typeA > atom->nangletypes || - force->angle->setflag[typeA] == 0) - error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); - if (typeB < 1 || typeB > atom->nbondtypes || - force->bond->setflag[typeB] == 0) - error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (cos(0.5*theta) * blen); - } - - // compute qsum & qsqsum and warn if not charge-neutral - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup FFT grid resolution and g_ewald - // normally one iteration thru while loop is all that is required - // if grid stencil extends beyond neighbor proc, reduce order and try again - - int iteration = 0; - - while (order > 1) { - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPM order b/c stencil extends " - "beyond neighbor processor"); - iteration++; - - set_grid(); - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPM grid is too large"); - - // global indices of PPPM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPM grid that I own without ghost cells - // for slab PPPM, assign z grid as if it were not extended - - nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); - nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; - - nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); - nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; - - nzlo_in = static_cast - (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); - nzhi_in = static_cast - (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; - - // nlower,nupper = stencil size for mapping particles to PPPM grid - - nlower = -(order-1)/2; - nupper = order/2; - - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shift = OFFSET + 0.5; - else shift = OFFSET; - if (order % 2) shiftone = 0.0; - else shiftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended - - triclinic = domain->triclinic; - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nxlo_out = nlo + nlower; - nxhi_out = nhi + nupper; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nylo_out = nlo + nlower; - nyhi_out = nhi + nupper; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - - if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) { - nzhi_in = nz_pppm - 1; - nzhi_out = nz_pppm - 1; - } - - // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions - // that overlay domain I own - // proc in that direction tells me via sendrecv() - // if no neighbor proc, value is from self since I have ghosts regardless - - int nplanes; - MPI_Status status; - - nplanes = nxlo_in - nxlo_out; - if (comm->procneigh[0][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, - &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, - world,&status); - else nxhi_ghost = nplanes; - - nplanes = nxhi_out - nxhi_in; - if (comm->procneigh[0][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, - &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], - 0,world,&status); - else nxlo_ghost = nplanes; - - nplanes = nylo_in - nylo_out; - if (comm->procneigh[1][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, - &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, - world,&status); - else nyhi_ghost = nplanes; - - nplanes = nyhi_out - nyhi_in; - if (comm->procneigh[1][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, - &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, - world,&status); - else nylo_ghost = nplanes; - - nplanes = nzlo_in - nzlo_out; - if (comm->procneigh[2][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, - &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, - world,&status); - else nzhi_ghost = nplanes; - - nplanes = nzhi_out - nzhi_in; - if (comm->procneigh[2][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, - &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, - world,&status); - else nzlo_ghost = nplanes; - - // test that ghost overlap is not bigger than my sub-domain - - int flag = 0; - if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; - if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; - - int flag_all; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - - if (flag_all == 0) break; - order--; - } - - if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0"); - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // PPPM grid for this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // FFT arrays on this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); - - // buffer space for use in brick2fft and fillbrick - // idel = max # of ghost planes to send or recv in +/- dir of each dim - // nx,ny,nz = owned planes (including ghosts) in each dim - // nxx,nyy,nzz = max # of grid cells to send in each dim - // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick - - int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; - - idelx = MAX(nxlo_ghost,nxhi_ghost); - idelx = MAX(idelx,nxhi_out-nxhi_in); - idelx = MAX(idelx,nxlo_in-nxlo_out); - - idely = MAX(nylo_ghost,nyhi_ghost); - idely = MAX(idely,nyhi_out-nyhi_in); - idely = MAX(idely,nylo_in-nylo_out); - - idelz = MAX(nzlo_ghost,nzhi_ghost); - idelz = MAX(idelz,nzhi_out-nzhi_in); - idelz = MAX(idelz,nzlo_in-nzlo_out); - - nx = nxhi_out - nxlo_out + 1; - ny = nyhi_out - nylo_out + 1; - nz = nzhi_out - nzlo_out + 1; - - nxx = idelx * ny * nz; - nyy = idely * nx * nz; - nzz = idelz * nx * ny; - - nbuf = MAX(nxx,nyy); - nbuf = MAX(nbuf,nzz); - - nbuf_peratom = 7*nbuf; - nbuf *= 3; - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - } - - // allocate K-space dependent memory - // don't invoke allocate_peratom() here, wait to see if needed - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - compute_rho_coeff(); -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPMOld::setup() -{ - int i,j,k,l,m,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - // fkx,fky,fkz for my FFT grid pts - - double per; - - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per = i - nx_pppm*(2*i/nx_pppm); - fkx[i] = unitkx*per; - } - - for (i = nylo_fft; i <= nyhi_fft; i++) { - per = i - ny_pppm*(2*i/ny_pppm); - fky[i] = unitky*per; - } - - for (i = nzlo_fft; i <= nzhi_fft; i++) { - per = i - nz_pppm*(2*i/nz_pppm); - fkz[i] = unitkz*per; - } - - // virial coefficients - - double sqk,vterm; - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - for (j = nylo_fft; j <= nyhi_fft; j++) { - for (i = nxlo_fft; i <= nxhi_fft; i++) { - sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; - vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; - vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; - vg[n][3] = vterm*fkx[i]*fky[j]; - vg[n][4] = vterm*fkx[i]*fkz[k]; - vg[n][5] = vterm*fky[j]*fkz[k]; - } - n++; - } - } - } - - // modified (Hockney-Eastwood) Coulomb Green's function - - int nx,ny,nz,kper,lper,mper; - double snx,sny,snz,snx2,sny2,snz2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,dot1,dot2; - double numerator,denominator; - - int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * - pow(-log(EPS_HOC),0.25)); - - double form = 1.0; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm); - snz2 = snz*snz; - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - sny = sin(0.5*unitky*lper*yprd/ny_pppm); - sny2 = sny*sny; - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - snx = sin(0.5*unitkx*kper*xprd/nx_pppm); - snx2 = snx*snx; - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - numerator = form*12.5663706/sqk; - denominator = gf_denom(snx2,sny2,snz2); - sum1 = 0.0; - const double dorder = static_cast(order); - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,dorder); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,dorder); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,dorder); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0); - } - } - } - greensfn[n++] = numerator*sum1/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMOld::compute(int eflag, int vflag) -{ - int i,j; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - peratom_allocate_flag = 1; - } - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(part2grid); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - } - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - particle_map(); - make_rho(); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - brick2fft(); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - // also performs per-atom calculations via poisson_peratom() - - poisson(); - - // all procs communicate E-field values - // to fill ghost cells surrounding their 3d bricks - - fillbrick(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fillbrick_peratom(); - - // calculate the force on my particles - - fieldforce(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fieldforce_peratom(); - - // sum global energy across procs and add in volume-dependent term - - const double qscale = force->qqrd2e * scale; - - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // sum global virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - } - - // per-atom energy/virial - // energy includes self-energy correction - - if (evflag_atom) { - double *q = atom->q; - int nlocal = atom->nlocal; - - if (eflag_atom) { - for (i = 0; i < nlocal; i++) { - eatom[i] *= 0.5; - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - } - - if (vflag_atom) { - for (i = 0; i < nlocal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; - } - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::allocate() -{ - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); - - memory->create(density_fft,nfft_both,"pppm:density_fft"); - memory->create(greensfn,nfft_both,"pppm:greensfn"); - memory->create(work1,2*nfft_both,"pppm:work1"); - memory->create(work2,2*nfft_both,"pppm:work2"); - memory->create(vg,nfft_both,6,"pppm:vg"); - - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); - - memory->create(buf1,nbuf,"pppm:buf1"); - memory->create(buf2,nbuf,"pppm:buf2"); - - // summation coeffs - - memory->create(gf_b,order,"pppm:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - - // create 2 FFTs and a Remap - // 1st FFT keeps data in FFT decompostion - // 2nd FFT returns data in 3d brick decomposition - // remap takes data from 3d brick to FFT decomposition - - int tmp; - - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp); - - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp); - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION); -} - -/* ---------------------------------------------------------------------- - allocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::allocate_peratom() -{ - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); - - memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v0_brick"); - memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v1_brick"); - memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v2_brick"); - memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v3_brick"); - memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v4_brick"); - memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v5_brick"); - - memory->create(buf3,nbuf_peratom,"pppm:buf3"); - memory->create(buf4,nbuf_peratom,"pppm:buf4"); -} - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy(density_fft); - memory->destroy(greensfn); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(vg); - - memory->destroy1d_offset(fkx,nxlo_fft); - memory->destroy1d_offset(fky,nylo_fft); - memory->destroy1d_offset(fkz,nzlo_fft); - - memory->destroy(buf1); - memory->destroy(buf2); - - memory->destroy(gf_b); - memory->destroy2d_offset(rho1d,-order/2); - memory->destroy2d_offset(rho_coeff,(1-order)/2); - - delete fft1; - delete fft2; - delete remap; -} - -/* ---------------------------------------------------------------------- - deallocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::deallocate_peratom() -{ - memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy(buf3); - memory->destroy(buf4); -} - -/* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald -------------------------------------------------------------------------- */ - -void PPPMOld::set_grid() -{ - // see JCP 109, pg 7698 for derivation of coefficients - // higher order coefficients may be computed if needed - - double **acons; - memory->create(acons,8,7,"pppm:acons"); - - acons[1][0] = 2.0 / 3.0; - acons[2][0] = 1.0 / 50.0; - acons[2][1] = 5.0 / 294.0; - acons[3][0] = 1.0 / 588.0; - acons[3][1] = 7.0 / 1440.0; - acons[3][2] = 21.0 / 3872.0; - acons[4][0] = 1.0 / 4320.0; - acons[4][1] = 3.0 / 1936.0; - acons[4][2] = 7601.0 / 2271360.0; - acons[4][3] = 143.0 / 28800.0; - acons[5][0] = 1.0 / 23232.0; - acons[5][1] = 7601.0 / 13628160.0; - acons[5][2] = 143.0 / 69120.0; - acons[5][3] = 517231.0 / 106536960.0; - acons[5][4] = 106640677.0 / 11737571328.0; - acons[6][0] = 691.0 / 68140800.0; - acons[6][1] = 13.0 / 57600.0; - acons[6][2] = 47021.0 / 35512320.0; - acons[6][3] = 9694607.0 / 2095994880.0; - acons[6][4] = 733191589.0 / 59609088000.0; - acons[6][5] = 326190917.0 / 11700633600.0; - acons[7][0] = 1.0 / 345600.0; - acons[7][1] = 3617.0 / 35512320.0; - acons[7][2] = 745739.0 / 838397952.0; - acons[7][3] = 56399353.0 / 12773376000.0; - acons[7][4] = 25091609.0 / 1560084480.0; - acons[7][5] = 1755948832039.0 / 36229939200000.0; - acons[7][6] = 4887769399.0 / 37838389248.0; - - double q2 = qsqsum * force->qqrd2e / force->dielectric; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h_x,h_y,h_z; - bigint natoms = atom->natoms; - - if (!gewaldflag) { - if (accuracy <= 0.0) - error->all(FLERR,"KSpace accuracy must be > 0"); - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; - else g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy - // nz_pppm uses extended zprd_slab instead of zprd - // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 - // reduce it until accuracy target is met - - if (!gridflag) { - double err; - h_x = h_y = h_z = 1.0/g_ewald; - - nx_pppm = static_cast (xprd/h_x) + 1; - ny_pppm = static_cast (yprd/h_y) + 1; - nz_pppm = static_cast (zprd_slab/h_z) + 1; - - err = rms(h_x,xprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_x,xprd,natoms,q2,acons); - nx_pppm++; - h_x = xprd/nx_pppm; - } - - err = rms(h_y,yprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_y,yprd,natoms,q2,acons); - ny_pppm++; - h_y = yprd/ny_pppm; - } - - err = rms(h_z,zprd_slab,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_z,zprd_slab,natoms,q2,acons); - nz_pppm++; - h_z = zprd_slab/nz_pppm; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; - - // adjust g_ewald for new grid size - - h_x = xprd/static_cast(nx_pppm); - h_y = yprd/static_cast(ny_pppm); - h_z = zprd_slab/static_cast(nz_pppm); - - if (!gewaldflag) { - double gew1,gew2,dgew,f,fmid,hmin,rtb; - int ncount; - - gew1 = 0.0; - g_ewald = gew1; - f = diffpr(h_x,h_y,h_z,q2,acons); - - hmin = MIN(h_x,MIN(h_y,h_z)); - gew2 = 10.0/hmin; - g_ewald = gew2; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - - if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G"); - rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); - ncount = 0; - while (fabs(dgew) > SMALL && fmid != 0.0) { - dgew *= 0.5; - g_ewald = rtb + dgew; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - if (fmid <= 0.0) rtb = g_ewald; - ncount++; - if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G"); - } - } - - // final RMS accuracy - - double lprx = rms(h_x,xprd,natoms,q2,acons); - double lpry = rms(h_y,yprd,natoms,q2,acons); - double lprz = rms(h_z,zprd_slab,natoms,q2,acons); - double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); - double tpr = estimate_table_accuracy(q2_over_sqrt,spr); - double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); - - // free local memory - - memory->destroy(acons); - - // print info - - if (me == 0) { -#ifdef FFT_SINGLE - const char fft_prec[] = "single"; -#else - const char fft_prec[] = "double"; -#endif - if (screen) { - fprintf(screen," G vector (1/distance)= %g\n",g_ewald); - fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," stencil order = %d\n",order); - fprintf(screen," estimated absolute RMS force accuracy = %g\n", - accuracy); - fprintf(screen," estimated relative force accuracy = %g\n", - accuracy/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - } - if (logfile) { - fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," stencil order = %d\n",order); - fprintf(logfile," estimated absolute RMS force accuracy = %g\n", - accuracy); - fprintf(logfile," estimated relative force accuracy = %g\n", - accuracy/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - } - } -} - -/* ---------------------------------------------------------------------- - check if all factors of n are in list of factors - return 1 if yes, 0 if no -------------------------------------------------------------------------- */ - -int PPPMOld::factorable(int n) -{ - int i; - - while (n > 1) { - for (i = 0; i < nfactors; i++) { - if (n % factors[i] == 0) { - n /= factors[i]; - break; - } - } - if (i == nfactors) return 0; - } - - return 1; -} - -/* ---------------------------------------------------------------------- - compute RMS accuracy for a dimension -------------------------------------------------------------------------- */ - -double PPPMOld::rms(double h, double prd, bigint natoms, - double q2, double **acons) -{ - double sum = 0.0; - for (int m = 0; m < order; m++) - sum += acons[order][m] * pow(h*g_ewald,2.0*m); - double value = q2 * pow(h*g_ewald,(double)order) * - sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd); - return value; -} - -/* ---------------------------------------------------------------------- - compute difference in real-space and KSpace RMS accuracy -------------------------------------------------------------------------- */ - -double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2, - double **acons) -{ - double lprx,lpry,lprz,kspace_prec,real_prec; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - bigint natoms = atom->natoms; - - lprx = rms(h_x,xprd,natoms,q2,acons); - lpry = rms(h_y,yprd,natoms,q2,acons); - lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons); - kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(static_cast(natoms)*cutoff*xprd*yprd*zprd); - double value = kspace_prec - real_prec; - return value; -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ - -void PPPMOld::compute_gf_denom() -{ - int k,l,m; - - for (l = 1; l < order; l++) gf_b[l] = 0.0; - gf_b[0] = 1.0; - - for (m = 1; m < order; m++) { - for (l = m; l > 0; l--) - gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); - gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); - } - - bigint ifact = 1; - for (k = 1; k < 2*order; k++) ifact *= k; - double gaminv = 1.0/ifact; - for (l = 0; l < order; l++) gf_b[l] *= gaminv; -} - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFT decomposition -------------------------------------------------------------------------- */ - -void PPPMOld::brick2fft() -{ - int i,n,ix,iy,iz; - MPI_Request request; - MPI_Status status; - - // pack my ghosts for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // remap from 3d brick decomposition to FFT decomposition - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_in; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_fft[n++] = density_brick[iz][iy][ix]; - - remap->perform(density_fft,density_fft,work1); -} - -/* ---------------------------------------------------------------------- - ghost-swap to fill ghost cells of my brick with field values -------------------------------------------------------------------------- */ - -void PPPMOld::fillbrick() -{ - int i,n,ix,iy,iz; - MPI_Request request; - MPI_Status status; - - // pack my real cells for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } -} - -/* ---------------------------------------------------------------------- - ghost-swap to fill ghost cells of my brick with per-atom field values -------------------------------------------------------------------------- */ - -void PPPMOld::fillbrick_peratom() -{ - int i,n,ix,iy,iz; - MPI_Request request; - MPI_Status status; - - // pack my real cells for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[2][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[2][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[1][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[1][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[0][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[0][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPMOld::particle_map() -{ - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - part2grid[i][0] = nx; - part2grid[i][1] = ny; - part2grid[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPMOld::make_rho() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver -------------------------------------------------------------------------- */ - -void PPPMOld::poisson() -{ - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (evflag_atom) poisson_peratom(); - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdx_brick[k][j][i] = work2[n]; - n += 2; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fky[j]*work1[n+1]; - work2[n+1] = -fky[j]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdy_brick[k][j][i] = work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkz[k]*work1[n+1]; - work2[n+1] = -fkz[k]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPMOld::poisson_peratom() -{ - int i,j,k,n; - - // energy - - if (eflag_atom) { - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]; - work2[n+1] = work1[n+1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - u_brick[k][j][i] = work2[n]; - n += 2; - } - } - - // 6 components of virial in v0 thru v5 - - if (!vflag_atom) return; - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][0]; - work2[n+1] = work1[n+1]*vg[i][0]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v0_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][1]; - work2[n+1] = work1[n+1]*vg[i][1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v1_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][2]; - work2[n+1] = work1[n+1]*vg[i][2]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v2_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][3]; - work2[n+1] = work1[n+1]*vg[i][3]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v3_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][4]; - work2[n+1] = work1[n+1]*vg[i][4]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v4_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][5]; - work2[n+1] = work1[n+1]*vg[i][5]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v5_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPMOld::fieldforce() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPMOld::fieldforce_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - if (eflag_atom) eatom[i] += q[i]*u; - if (vflag_atom) { - vatom[i][0] += v0; - vatom[i][1] += v1; - vatom[i][2] += v2; - vatom[i][3] += v3; - vatom[i][4] += v4; - vatom[i][5] += v5; - } - } -} - -/* ---------------------------------------------------------------------- - map nprocs to NX by NY grid as PX by PY procs - return optimal px,py -------------------------------------------------------------------------- */ - -void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) -{ - // loop thru all possible factorizations of nprocs - // surf = surface area of largest proc sub-domain - // innermost if test minimizes surface area and surface/volume ratio - - int bestsurf = 2 * (nx + ny); - int bestboxx = 0; - int bestboxy = 0; - - int boxx,boxy,surf,ipx,ipy; - - ipx = 1; - while (ipx <= nprocs) { - if (nprocs % ipx == 0) { - ipy = nprocs/ipx; - boxx = nx/ipx; - if (nx % ipx) boxx++; - boxy = ny/ipy; - if (ny % ipy) boxy++; - surf = boxx + boxy; - if (surf < bestsurf || - (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { - bestsurf = surf; - bestboxx = boxx; - bestboxy = boxy; - *px = ipx; - *py = ipy; - } - } - ipx++; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into rho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = order-1; l >= 0; l--) { - r1 = rho_coeff[l][k] + r1*dx; - r2 = rho_coeff[l][k] + r2*dy; - r3 = rho_coeff[l][k] + r3*dz; - } - rho1d[0][k] = r1; - rho1d[1][k] = r2; - rho1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - generate coeffients for the weight function of order n - - (n-1) - Wn(x) = Sum wn(k,x) , Sum is over every other integer - k=-(n-1) - For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 - k is odd integers if n is even and even integers if n is odd - --- - | n-1 - | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 - wn(k,x) = < l=0 - | - | 0 otherwise - --- - a coeffients are packed into the array rho_coeff to eliminate zeros - rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) -------------------------------------------------------------------------- */ - -void PPPMOld::compute_rho_coeff() -{ - int j,k,l,m; - FFT_SCALAR s; - - FFT_SCALAR **a; - memory->create2d_offset(a,order,-order,order,"pppm:a"); - - for (k = -order; k <= order; k++) - for (l = 0; l < order; l++) - a[l][k] = 0.0; - - a[0][0] = 1.0; - for (j = 1; j < order; j++) { - for (k = -j; k <= j; k += 2) { - s = 0.0; - for (l = 0; l < j; l++) { - a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); -#ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); -#else - s += pow(0.5,(double) l+1) * - (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); -#endif - } - a[0][k] = s; - } - } - - m = (1-order)/2; - for (k = -(order-1); k < order; k += 2) { - for (l = 0; l < order; l++) - rho_coeff[l][m] = a[l][k]; - m++; - } - - memory->destroy2d_offset(a,-order); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPMOld::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - - -/* ---------------------------------------------------------------------- - perform and time the 1d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMOld::timing_1d(int n, double &time1d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->timing1d(work1,nfft_both,1); - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d = time2 - time1; - - return 4; -} - -/* ---------------------------------------------------------------------- - perform and time the 3d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMOld::timing_3d(int n, double &time3d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->compute(work1,work1,1); - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d = time2 - time1; - - return 4; -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPMOld::memory_usage() -{ - double bytes = nmax*3 * sizeof(double); - int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - bytes += 4 * nbrick * sizeof(FFT_SCALAR); - bytes += 6 * nfft_both * sizeof(double); - bytes += nfft_both * sizeof(double); - bytes += nfft_both*5 * sizeof(FFT_SCALAR); - bytes += 2 * nbuf * sizeof(FFT_SCALAR); - - if (peratom_allocate_flag) { - bytes += 7 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR); - } - - if (group_allocate_flag) { - bytes += 2 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; - } - - return bytes; -} - -/* ---------------------------------------------------------------------- - group-group interactions - ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - compute the PPPM total long-range force and energy for groups A and B - ------------------------------------------------------------------------- */ - -void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag) -{ - if (slabflag) - error->all(FLERR,"Cannot (yet) use K-space slab " - "correction with compute group/group"); - - int i,j; - - if (!group_allocate_flag) { - allocate_groups(); - group_allocate_flag = 1; - } - - e2group = 0; //energy - f2group[0] = 0; //force in x-direction - f2group[1] = 0; //force in y-direction - f2group[2] = 0; //force in z-direction - - double *q = atom->q; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - - // map my particle charge onto my local 3d density grid - - make_rho_groups(groupbit_A,groupbit_B,BA_flag); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - // temporarily store and switch pointers so we can - // use brick2fft() for groups A and B (without - // writing an additional function) - - FFT_SCALAR ***density_brick_real = density_brick; - FFT_SCALAR *density_fft_real = density_fft; - - // group A - - density_brick = density_A_brick; - density_fft = density_A_fft; - - brick2fft(); - - // group B - - density_brick = density_B_brick; - density_fft = density_B_fft; - - brick2fft(); - - // switch back pointers - - density_brick = density_brick_real; - density_fft = density_fft_real; - - // compute potential gradient on my FFT grid and - // portion of group-group energy/force on this proc's FFT grid - - poisson_groups(BA_flag); - - const double qscale = force->qqrd2e * scale; - - // total group A <--> group B energy - // self and boundary correction terms are in compute_group_group.cpp - - double e2group_all; - MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); - e2group = e2group_all; - - e2group *= qscale*0.5*volume; - - // total group A <--> group B force - - double f2group_all[3]; - MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); - - for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i]; -} - -/* ---------------------------------------------------------------------- - allocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPMOld::allocate_groups() -{ - memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_A_brick"); - memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_B_brick"); - memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); - memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); -} - -/* ---------------------------------------------------------------------- - deallocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPMOld::deallocate_groups() -{ - memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(density_A_fft); - memory->destroy(density_B_fft); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag) -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density arrays - - memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - for (int i = 0; i < nlocal; i++) { - - if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B)) - if (BA_flag) continue; - - if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - - // group A - - if (mask[i] & groupbit_A) - density_A_brick[mz][my][mx] += x0*rho1d[0][l]; - - // group B - - if (mask[i] & groupbit_B) - density_B_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPMOld::poisson_groups(int BA_flag) -{ - int i,j,k,n; - double eng; - - // reuse memory (already declared) - - FFT_SCALAR *work_A = work1; - FFT_SCALAR *work_B = work2; - - // transform charge density (r -> k) - - // group A - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] = density_A_fft[i]; - work_A[n++] = ZEROF; - } - - fft1->compute(work_A,work_A,1); - - // group B - - n = 0; - for (i = 0; i < nfft; i++) { - work_B[n++] = density_B_fft[i]; - work_B[n++] = ZEROF; - } - - fft1->compute(work_B,work_B,1); - - // group-group energy and force contribution, - // keep everything in reciprocal space so - // no inverse FFTs needed - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - // energy - - n = 0; - for (i = 0; i < nfft; i++) { - e2group += s2 * greensfn[i] * - (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); - n += 2; - } - - if (BA_flag) return; - - - // multiply by Green's function and s2 - // (only for work_A so it is not squared below) - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] *= s2 * greensfn[i]; - work_A[n++] *= s2 * greensfn[i]; - } - - double partial_group; - - // force, x direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[0] += fkx[i] * partial_group; - n += 2; - } - - // force, y direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[1] += fky[j] * partial_group; - n += 2; - } - - // force, z direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[2] += fkz[k] * partial_group; - n += 2; - } -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "pppm_old.h" +#include "math_const.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXORDER 7 +#define OFFSET 16384 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); + + triclinic_support = 0; + pppmflag = 1; + group_group_enable = 0; + + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + u_brick = NULL; + v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; + greensfn = NULL; + work1 = work2 = NULL; + vg = NULL; + fkx = fky = fkz = NULL; + buf1 = buf2 = buf3 = buf4 = NULL; + + density_A_brick = density_B_brick = NULL; + density_A_fft = density_B_fft = NULL; + + gf_b = NULL; + rho1d = rho_coeff = NULL; + + fft1 = fft2 = NULL; + remap = NULL; + + nmax = 0; + part2grid = NULL; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMOld::~PPPMOld() +{ + delete [] factors; + deallocate(); + deallocate_peratom(); + deallocate_groups(); + memory->destroy(part2grid); +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPMOld::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPM initialization ...\n"); + if (logfile) fprintf(logfile,"PPPM initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->dimension == 2) error->all(FLERR, + "Cannot use PPPM with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPM"); + } + + if (order < 2 || order > MAXORDER) { + char str[128]; + sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); + error->all(FLERR,str); + } + + // free all arrays previously allocated + + deallocate(); + deallocate_peratom(); + peratom_allocate_flag = 0; + deallocate_groups(); + group_allocate_flag = 0; + + // extract short-range Coulombic cutoff from pair style + + scale = 1.0; + + pair_check(); + + int itmp=0; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + + // if kspace is TIP4P, extract TIP4P params from pair style + // bond/angle are not yet init(), so insure equilibrium request is valid + + qdist = 0.0; + + if (tip4pflag) { + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + if (typeA < 1 || typeA > atom->nangletypes || + force->angle->setflag[typeA] == 0) + error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); + if (typeB < 1 || typeB > atom->nbondtypes || + force->bond->setflag[typeB] == 0) + error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (cos(0.5*theta) * blen); + } + + // compute qsum & qsqsum and warn if not charge-neutral + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup FFT grid resolution and g_ewald + // normally one iteration thru while loop is all that is required + // if grid stencil extends beyond neighbor proc, reduce order and try again + + int iteration = 0; + + while (order > 1) { + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPM order b/c stencil extends " + "beyond neighbor processor"); + iteration++; + + set_grid(); + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPM grid is too large"); + + // global indices of PPPM grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPM grid that I own without ghost cells + // for slab PPPM, assign z grid as if it were not extended + + nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); + nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; + + nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); + nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; + + nzlo_in = static_cast + (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); + nzhi_in = static_cast + (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; + + // nlower,nupper = stencil size for mapping particles to PPPM grid + + nlower = -(order-1)/2; + nupper = order/2; + + // shift values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (order % 2) shift = OFFSET + 0.5; + else shift = OFFSET; + if (order % 2) shiftone = 0.0; + else shiftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPM grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPM, assign z grid as if it were not extended + + triclinic = domain->triclinic; + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else { + dist[0] = cuthalf/domain->prd[0]; + dist[1] = cuthalf/domain->prd[1]; + dist[2] = cuthalf/domain->prd[2]; + } + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nxlo_out = nlo + nlower; + nxhi_out = nhi + nupper; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nylo_out = nlo + nlower; + nyhi_out = nhi + nupper; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nzlo_out = nlo + nlower; + nzhi_out = nhi + nupper; + + // for slab PPPM, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPM, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) + + if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) { + nzhi_in = nz_pppm - 1; + nzhi_out = nz_pppm - 1; + } + + // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions + // that overlay domain I own + // proc in that direction tells me via sendrecv() + // if no neighbor proc, value is from self since I have ghosts regardless + + int nplanes; + MPI_Status status; + + nplanes = nxlo_in - nxlo_out; + if (comm->procneigh[0][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, + &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, + world,&status); + else nxhi_ghost = nplanes; + + nplanes = nxhi_out - nxhi_in; + if (comm->procneigh[0][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, + &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], + 0,world,&status); + else nxlo_ghost = nplanes; + + nplanes = nylo_in - nylo_out; + if (comm->procneigh[1][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, + &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, + world,&status); + else nyhi_ghost = nplanes; + + nplanes = nyhi_out - nyhi_in; + if (comm->procneigh[1][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, + &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, + world,&status); + else nylo_ghost = nplanes; + + nplanes = nzlo_in - nzlo_out; + if (comm->procneigh[2][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, + &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, + world,&status); + else nzhi_ghost = nplanes; + + nplanes = nzhi_out - nzhi_in; + if (comm->procneigh[2][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, + &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, + world,&status); + else nzlo_ghost = nplanes; + + // test that ghost overlap is not bigger than my sub-domain + + int flag = 0; + if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; + if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; + if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; + if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; + if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; + if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + + if (flag_all == 0) break; + order--; + } + + if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0"); + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clump of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + // PPPM grid for this proc, including ghosts + + ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + + // FFT arrays on this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * + (nzhi_fft-nzlo_fft+1); + int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * + (nzhi_in-nzlo_in+1); + nfft_both = MAX(nfft,nfft_brick); + + // buffer space for use in brick2fft and fillbrick + // idel = max # of ghost planes to send or recv in +/- dir of each dim + // nx,ny,nz = owned planes (including ghosts) in each dim + // nxx,nyy,nzz = max # of grid cells to send in each dim + // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick + + int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; + + idelx = MAX(nxlo_ghost,nxhi_ghost); + idelx = MAX(idelx,nxhi_out-nxhi_in); + idelx = MAX(idelx,nxlo_in-nxlo_out); + + idely = MAX(nylo_ghost,nyhi_ghost); + idely = MAX(idely,nyhi_out-nyhi_in); + idely = MAX(idely,nylo_in-nylo_out); + + idelz = MAX(nzlo_ghost,nzhi_ghost); + idelz = MAX(idelz,nzhi_out-nzhi_in); + idelz = MAX(idelz,nzlo_in-nzlo_out); + + nx = nxhi_out - nxlo_out + 1; + ny = nyhi_out - nylo_out + 1; + nz = nzhi_out - nzlo_out + 1; + + nxx = idelx * ny * nz; + nyy = idely * nx * nz; + nzz = idelz * nx * ny; + + nbuf = MAX(nxx,nyy); + nbuf = MAX(nbuf,nzz); + + nbuf_peratom = 7*nbuf; + nbuf *= 3; + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", + ngrid_max,nfft_both_max,nbuf_max); + if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", + ngrid_max,nfft_both_max,nbuf_max); + } + + // allocate K-space dependent memory + // don't invoke allocate_peratom() here, wait to see if needed + + allocate(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + compute_rho_coeff(); +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMOld::setup() +{ + int i,j,k,l,m,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + + double per; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + } + + // virial coefficients + + double sqk,vterm; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + } + n++; + } + } + } + + // modified (Hockney-Eastwood) Coulomb Green's function + + int nx,ny,nz,kper,lper,mper; + double snx,sny,snz,snx2,sny2,snz2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + + int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + + double form = 1.0; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm); + snz2 = snz*snz; + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = sin(0.5*unitky*lper*yprd/ny_pppm); + sny2 = sny*sny; + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = sin(0.5*unitkx*kper*xprd/nx_pppm); + snx2 = snx*snx; + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + numerator = form*12.5663706/sqk; + denominator = gf_denom(snx2,sny2,snz2); + sum1 = 0.0; + const double dorder = static_cast(order); + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,dorder); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,dorder); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,dorder); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0); + } + } + } + greensfn[n++] = numerator*sum1/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute the PPPM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMOld::compute(int eflag, int vflag) +{ + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + peratom_allocate_flag = 1; + } + + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(part2grid); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm:part2grid"); + } + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid + + particle_map(); + make_rho(); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + brick2fft(); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + // also performs per-atom calculations via poisson_peratom() + + poisson(); + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks + + fillbrick(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fillbrick_peratom(); + + // calculate the force on my particles + + fieldforce(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fieldforce_peratom(); + + // sum global energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qscale; + } + + // sum global virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + double *q = atom->q; + int nlocal = atom->nlocal; + + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] *= 0.5; + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + } + + if (vflag_atom) { + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; + } + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::allocate() +{ + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdx_brick"); + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdy_brick"); + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdz_brick"); + + memory->create(density_fft,nfft_both,"pppm:density_fft"); + memory->create(greensfn,nfft_both,"pppm:greensfn"); + memory->create(work1,2*nfft_both,"pppm:work1"); + memory->create(work2,2*nfft_both,"pppm:work2"); + memory->create(vg,nfft_both,6,"pppm:vg"); + + memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); + memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); + memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); + + memory->create(buf1,nbuf,"pppm:buf1"); + memory->create(buf2,nbuf,"pppm:buf2"); + + // summation coeffs + + memory->create(gf_b,order,"pppm:gf_b"); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decompostion + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition + + int tmp; + + fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp); + + fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp); + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,FFT_PRECISION); +} + +/* ---------------------------------------------------------------------- + allocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::allocate_peratom() +{ + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:u_brick"); + + memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v0_brick"); + memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v1_brick"); + memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v2_brick"); + memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v3_brick"); + memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v4_brick"); + memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v5_brick"); + + memory->create(buf3,nbuf_peratom,"pppm:buf3"); + memory->create(buf4,nbuf_peratom,"pppm:buf4"); +} + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy(density_fft); + memory->destroy(greensfn); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(vg); + + memory->destroy1d_offset(fkx,nxlo_fft); + memory->destroy1d_offset(fky,nylo_fft); + memory->destroy1d_offset(fkz,nzlo_fft); + + memory->destroy(buf1); + memory->destroy(buf2); + + memory->destroy(gf_b); + memory->destroy2d_offset(rho1d,-order/2); + memory->destroy2d_offset(rho_coeff,(1-order)/2); + + delete fft1; + delete fft2; + delete remap; +} + +/* ---------------------------------------------------------------------- + deallocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::deallocate_peratom() +{ + memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy(buf3); + memory->destroy(buf4); +} + +/* ---------------------------------------------------------------------- + set size of FFT grid (nx,ny,nz_pppm) and g_ewald +------------------------------------------------------------------------- */ + +void PPPMOld::set_grid() +{ + // see JCP 109, pg 7698 for derivation of coefficients + // higher order coefficients may be computed if needed + + double **acons; + memory->create(acons,8,7,"pppm:acons"); + + acons[1][0] = 2.0 / 3.0; + acons[2][0] = 1.0 / 50.0; + acons[2][1] = 5.0 / 294.0; + acons[3][0] = 1.0 / 588.0; + acons[3][1] = 7.0 / 1440.0; + acons[3][2] = 21.0 / 3872.0; + acons[4][0] = 1.0 / 4320.0; + acons[4][1] = 3.0 / 1936.0; + acons[4][2] = 7601.0 / 2271360.0; + acons[4][3] = 143.0 / 28800.0; + acons[5][0] = 1.0 / 23232.0; + acons[5][1] = 7601.0 / 13628160.0; + acons[5][2] = 143.0 / 69120.0; + acons[5][3] = 517231.0 / 106536960.0; + acons[5][4] = 106640677.0 / 11737571328.0; + acons[6][0] = 691.0 / 68140800.0; + acons[6][1] = 13.0 / 57600.0; + acons[6][2] = 47021.0 / 35512320.0; + acons[6][3] = 9694607.0 / 2095994880.0; + acons[6][4] = 733191589.0 / 59609088000.0; + acons[6][5] = 326190917.0 / 11700633600.0; + acons[7][0] = 1.0 / 345600.0; + acons[7][1] = 3617.0 / 35512320.0; + acons[7][2] = 745739.0 / 838397952.0; + acons[7][3] = 56399353.0 / 12773376000.0; + acons[7][4] = 25091609.0 / 1560084480.0; + acons[7][5] = 1755948832039.0 / 36229939200000.0; + acons[7][6] = 4887769399.0 / 37838389248.0; + + double q2 = qsqsum * force->qqrd2e; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h_x,h_y,h_z; + bigint natoms = atom->natoms; + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy + // nz_pppm uses extended zprd_slab instead of zprd + // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 + // reduce it until accuracy target is met + + if (!gridflag) { + double err; + h_x = h_y = h_z = 1.0/g_ewald; + + nx_pppm = static_cast (xprd/h_x) + 1; + ny_pppm = static_cast (yprd/h_y) + 1; + nz_pppm = static_cast (zprd_slab/h_z) + 1; + + err = rms(h_x,xprd,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_x,xprd,natoms,q2,acons); + nx_pppm++; + h_x = xprd/nx_pppm; + } + + err = rms(h_y,yprd,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_y,yprd,natoms,q2,acons); + ny_pppm++; + h_y = yprd/ny_pppm; + } + + err = rms(h_z,zprd_slab,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_z,zprd_slab,natoms,q2,acons); + nz_pppm++; + h_z = zprd_slab/nz_pppm; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; + + // adjust g_ewald for new grid size + + h_x = xprd/static_cast(nx_pppm); + h_y = yprd/static_cast(ny_pppm); + h_z = zprd_slab/static_cast(nz_pppm); + + if (!gewaldflag) { + double gew1,gew2,dgew,f,fmid,hmin,rtb; + int ncount; + + gew1 = 0.0; + g_ewald = gew1; + f = diffpr(h_x,h_y,h_z,q2,acons); + + hmin = MIN(h_x,MIN(h_y,h_z)); + gew2 = 10.0/hmin; + g_ewald = gew2; + fmid = diffpr(h_x,h_y,h_z,q2,acons); + + if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G"); + rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); + ncount = 0; + while (fabs(dgew) > SMALL && fmid != 0.0) { + dgew *= 0.5; + g_ewald = rtb + dgew; + fmid = diffpr(h_x,h_y,h_z,q2,acons); + if (fmid <= 0.0) rtb = g_ewald; + ncount++; + if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G"); + } + } + + // final RMS accuracy + + double lprx = rms(h_x,xprd,natoms,q2,acons); + double lpry = rms(h_y,yprd,natoms,q2,acons); + double lprz = rms(h_z,zprd_slab,natoms,q2,acons); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double tpr = estimate_table_accuracy(q2_over_sqrt,spr); + double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); + + // free local memory + + memory->destroy(acons); + + // print info + + if (me == 0) { +#ifdef FFT_SINGLE + const char fft_prec[] = "single"; +#else + const char fft_prec[] = "double"; +#endif + if (screen) { + fprintf(screen," G vector (1/distance)= %g\n",g_ewald); + fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," stencil order = %d\n",order); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + accuracy/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," stencil order = %d\n",order); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + accuracy/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + } + } +} + +/* ---------------------------------------------------------------------- + check if all factors of n are in list of factors + return 1 if yes, 0 if no +------------------------------------------------------------------------- */ + +int PPPMOld::factorable(int n) +{ + int i; + + while (n > 1) { + for (i = 0; i < nfactors; i++) { + if (n % factors[i] == 0) { + n /= factors[i]; + break; + } + } + if (i == nfactors) return 0; + } + + return 1; +} + +/* ---------------------------------------------------------------------- + compute RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double PPPMOld::rms(double h, double prd, bigint natoms, + double q2, double **acons) +{ + double sum = 0.0; + for (int m = 0; m < order; m++) + sum += acons[order][m] * pow(h*g_ewald,2.0*m); + double value = q2 * pow(h*g_ewald,(double)order) * + sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd); + return value; +} + +/* ---------------------------------------------------------------------- + compute difference in real-space and KSpace RMS accuracy +------------------------------------------------------------------------- */ + +double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2, + double **acons) +{ + double lprx,lpry,lprz,kspace_prec,real_prec; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + bigint natoms = atom->natoms; + + lprx = rms(h_x,xprd,natoms,q2,acons); + lpry = rms(h_y,yprd,natoms,q2,acons); + lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons); + kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(static_cast(natoms)*cutoff*xprd*yprd*zprd); + double value = kspace_prec - real_prec; + return value; +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ + +void PPPMOld::compute_gf_denom() +{ + int k,l,m; + + for (l = 1; l < order; l++) gf_b[l] = 0.0; + gf_b[0] = 1.0; + + for (m = 1; m < order; m++) { + for (l = m; l > 0; l--) + gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); + gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); + } + + bigint ifact = 1; + for (k = 1; k < 2*order; k++) ifact *= k; + double gaminv = 1.0/ifact; + for (l = 0; l < order; l++) gf_b[l] *= gaminv; +} + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFT decomposition +------------------------------------------------------------------------- */ + +void PPPMOld::brick2fft() +{ + int i,n,ix,iy,iz; + MPI_Request request; + MPI_Status status; + + // pack my ghosts for +x processor + // pass data to self or +x processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in+1; ix <= nxhi_out; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[0][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for -x processor + // pass data to self or -x processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_out; ix < nxlo_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[0][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for +y processor + // pass data to self or +y processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in+1; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[1][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for -y processor + // pass data to self or -y processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy < nylo_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[1][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for +z processor + // pass data to self or +z processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzhi_in+1; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[2][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for -z processor + // pass data to self or -z processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz < nzlo_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[2][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // remap from 3d brick decomposition to FFT decomposition + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_in; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_fft[n++] = density_brick[iz][iy][ix]; + + remap->perform(density_fft,density_fft,work1); +} + +/* ---------------------------------------------------------------------- + ghost-swap to fill ghost cells of my brick with field values +------------------------------------------------------------------------- */ + +void PPPMOld::fillbrick() +{ + int i,n,ix,iy,iz; + MPI_Request request; + MPI_Status status; + + // pack my real cells for +z processor + // pass data to self or +z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[2][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz < nzlo_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for -z processor + // pass data to self or -z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[2][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzhi_in+1; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for +y processor + // pass data to self or +y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[1][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy < nylo_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for -y processor + // pass data to self or -y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[1][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in+1; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for +x processor + // pass data to self or +x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[0][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_out; ix < nxlo_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for -x processor + // pass data to self or -x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[0][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } +} + +/* ---------------------------------------------------------------------- + ghost-swap to fill ghost cells of my brick with per-atom field values +------------------------------------------------------------------------- */ + +void PPPMOld::fillbrick_peratom() +{ + int i,n,ix,iy,iz; + MPI_Request request; + MPI_Status status; + + // pack my real cells for +z processor + // pass data to self or +z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[2][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[2][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz < nzlo_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -z processor + // pass data to self or -z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[2][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[2][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzhi_in+1; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for +y processor + // pass data to self or +y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[1][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[1][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy < nylo_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -y processor + // pass data to self or -y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[1][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[1][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in+1; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for +x processor + // pass data to self or +x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[0][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[0][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_out; ix < nxlo_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -x processor + // pass data to self or -x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[0][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[0][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMOld::particle_map() +{ + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + + int flag = 0; + for (int i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMOld::make_rho() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver +------------------------------------------------------------------------- */ + +void PPPMOld::poisson() +{ + int i,j,k,n; + double eng; + + // transform charge density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] = density_fft[i]; + work1[n++] = ZEROF; + } + + fft1->compute(work1,work1,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nfft; i++) { + eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; + if (eflag_global) energy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft; i++) { + energy += + s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] *= scaleinv * greensfn[i]; + work1[n++] *= scaleinv * greensfn[i]; + } + + // extra FFTs for per-atom energy/virial + + if (evflag_atom) poisson_peratom(); + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkx[i]*work1[n+1]; + work2[n+1] = -fkx[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdx_brick[k][j][i] = work2[n]; + n += 2; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fky[j]*work1[n+1]; + work2[n+1] = -fky[j]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdy_brick[k][j][i] = work2[n]; + n += 2; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkz[k]*work1[n+1]; + work2[n+1] = -fkz[k]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdz_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPMOld::poisson_peratom() +{ + int i,j,k,n; + + // energy + + if (eflag_atom) { + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]; + work2[n+1] = work1[n+1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + u_brick[k][j][i] = work2[n]; + n += 2; + } + } + + // 6 components of virial in v0 thru v5 + + if (!vflag_atom) return; + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][0]; + work2[n+1] = work1[n+1]*vg[i][0]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v0_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][1]; + work2[n+1] = work1[n+1]*vg[i][1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v1_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][2]; + work2[n+1] = work1[n+1]*vg[i][2]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v2_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][3]; + work2[n+1] = work1[n+1]*vg[i][3]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v3_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][4]; + work2[n+1] = work1[n+1]*vg[i][4]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v4_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][5]; + work2[n+1] = work1[n+1]*vg[i][5]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v5_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMOld::fieldforce() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPMOld::fieldforce_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += v0; + vatom[i][1] += v1; + vatom[i][2] += v2; + vatom[i][3] += v3; + vatom[i][4] += v4; + vatom[i][5] += v5; + } + } +} + +/* ---------------------------------------------------------------------- + map nprocs to NX by NY grid as PX by PY procs - return optimal px,py +------------------------------------------------------------------------- */ + +void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) +{ + // loop thru all possible factorizations of nprocs + // surf = surface area of largest proc sub-domain + // innermost if test minimizes surface area and surface/volume ratio + + int bestsurf = 2 * (nx + ny); + int bestboxx = 0; + int bestboxy = 0; + + int boxx,boxy,surf,ipx,ipy; + + ipx = 1; + while (ipx <= nprocs) { + if (nprocs % ipx == 0) { + ipy = nprocs/ipx; + boxx = nx/ipx; + if (nx % ipx) boxx++; + boxy = ny/ipy; + if (ny % ipy) boxy++; + surf = boxx + boxy; + if (surf < bestsurf || + (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { + bestsurf = surf; + bestboxx = boxx; + bestboxy = boxy; + *px = ipx; + *py = ipy; + } + } + ipx++; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + rho1d[0][k] = r1; + rho1d[1][k] = r2; + rho1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + generate coeffients for the weight function of order n + + (n-1) + Wn(x) = Sum wn(k,x) , Sum is over every other integer + k=-(n-1) + For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 + k is odd integers if n is even and even integers if n is odd + --- + | n-1 + | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 + wn(k,x) = < l=0 + | + | 0 otherwise + --- + a coeffients are packed into the array rho_coeff to eliminate zeros + rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) +------------------------------------------------------------------------- */ + +void PPPMOld::compute_rho_coeff() +{ + int j,k,l,m; + FFT_SCALAR s; + + FFT_SCALAR **a; + memory->create2d_offset(a,order,-order,order,"pppm:a"); + + for (k = -order; k <= order; k++) + for (l = 0; l < order; l++) + a[l][k] = 0.0; + + a[0][0] = 1.0; + for (j = 1; j < order; j++) { + for (k = -j; k <= j; k += 2) { + s = 0.0; + for (l = 0; l < j; l++) { + a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); +#ifdef FFT_SINGLE + s += powf(0.5,(float) l+1) * + (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); +#else + s += pow(0.5,(double) l+1) * + (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); +#endif + } + a[0][k] = s; + } + } + + m = (1-order)/2; + for (k = -(order-1); k < order; k += 2) { + for (l = 0; l < order; l++) + rho_coeff[l][m] = a[l][k]; + m++; + } + + memory->destroy2d_offset(a,-order); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPMOld::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + + +/* ---------------------------------------------------------------------- + perform and time the 1d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMOld::timing_1d(int n, double &time1d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->timing1d(work1,nfft_both,1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d = time2 - time1; + + return 4; +} + +/* ---------------------------------------------------------------------- + perform and time the 3d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMOld::timing_3d(int n, double &time3d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->compute(work1,work1,1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d = time2 - time1; + + return 4; +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPMOld::memory_usage() +{ + double bytes = nmax*3 * sizeof(double); + int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + bytes += 4 * nbrick * sizeof(FFT_SCALAR); + bytes += 6 * nfft_both * sizeof(double); + bytes += nfft_both * sizeof(double); + bytes += nfft_both*5 * sizeof(FFT_SCALAR); + bytes += 2 * nbuf * sizeof(FFT_SCALAR); + + if (peratom_allocate_flag) { + bytes += 7 * nbrick * sizeof(FFT_SCALAR); + bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR); + } + + if (group_allocate_flag) { + bytes += 2 * nbrick * sizeof(FFT_SCALAR); + bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; + } + + return bytes; +} + +/* ---------------------------------------------------------------------- + group-group interactions + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + compute the PPPM total long-range force and energy for groups A and B + ------------------------------------------------------------------------- */ + +void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag) +{ + if (slabflag) + error->all(FLERR,"Cannot (yet) use K-space slab " + "correction with compute group/group"); + + int i,j; + + if (!group_allocate_flag) { + allocate_groups(); + group_allocate_flag = 1; + } + + e2group = 0; //energy + f2group[0] = 0; //force in x-direction + f2group[1] = 0; //force in y-direction + f2group[2] = 0; //force in z-direction + + double *q = atom->q; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + + // map my particle charge onto my local 3d density grid + + make_rho_groups(groupbit_A,groupbit_B,BA_flag); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + // temporarily store and switch pointers so we can + // use brick2fft() for groups A and B (without + // writing an additional function) + + FFT_SCALAR ***density_brick_real = density_brick; + FFT_SCALAR *density_fft_real = density_fft; + + // group A + + density_brick = density_A_brick; + density_fft = density_A_fft; + + brick2fft(); + + // group B + + density_brick = density_B_brick; + density_fft = density_B_fft; + + brick2fft(); + + // switch back pointers + + density_brick = density_brick_real; + density_fft = density_fft_real; + + // compute potential gradient on my FFT grid and + // portion of group-group energy/force on this proc's FFT grid + + poisson_groups(BA_flag); + + const double qscale = force->qqrd2e * scale; + + // total group A <--> group B energy + // self and boundary correction terms are in compute_group_group.cpp + + double e2group_all; + MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); + e2group = e2group_all; + + e2group *= qscale*0.5*volume; + + // total group A <--> group B force + + double f2group_all[3]; + MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); + + for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i]; +} + +/* ---------------------------------------------------------------------- + allocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPMOld::allocate_groups() +{ + memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_A_brick"); + memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_B_brick"); + memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); + memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); +} + +/* ---------------------------------------------------------------------- + deallocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPMOld::deallocate_groups() +{ + memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(density_A_fft); + memory->destroy(density_B_fft); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag) +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density arrays + + memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + for (int i = 0; i < nlocal; i++) { + + if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B)) + if (BA_flag) continue; + + if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + + // group A + + if (mask[i] & groupbit_A) + density_A_brick[mz][my][mx] += x0*rho1d[0][l]; + + // group B + + if (mask[i] & groupbit_B) + density_B_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPMOld::poisson_groups(int BA_flag) +{ + int i,j,k,n; + double eng; + + // reuse memory (already declared) + + FFT_SCALAR *work_A = work1; + FFT_SCALAR *work_B = work2; + + // transform charge density (r -> k) + + // group A + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] = density_A_fft[i]; + work_A[n++] = ZEROF; + } + + fft1->compute(work_A,work_A,1); + + // group B + + n = 0; + for (i = 0; i < nfft; i++) { + work_B[n++] = density_B_fft[i]; + work_B[n++] = ZEROF; + } + + fft1->compute(work_B,work_B,1); + + // group-group energy and force contribution, + // keep everything in reciprocal space so + // no inverse FFTs needed + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + // energy + + n = 0; + for (i = 0; i < nfft; i++) { + e2group += s2 * greensfn[i] * + (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); + n += 2; + } + + if (BA_flag) return; + + + // multiply by Green's function and s2 + // (only for work_A so it is not squared below) + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] *= s2 * greensfn[i]; + work_A[n++] *= s2 * greensfn[i]; + } + + double partial_group; + + // force, x direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[0] += fkx[i] * partial_group; + n += 2; + } + + // force, y direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[1] += fky[j] * partial_group; + n += 2; + } + + // force, z direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[2] += fkz[k] * partial_group; + n += 2; + } +} diff --git a/src/USER-CUDA/pppm_cuda.cpp b/src/USER-CUDA/pppm_cuda.cpp index 6e09fde133..58574c4bd5 100644 --- a/src/USER-CUDA/pppm_cuda.cpp +++ b/src/USER-CUDA/pppm_cuda.cpp @@ -1,1436 +1,1436 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) -------------------------------------------------------------------------- */ - - -#include "mpi.h" -#include -#include -#include -#include -#include "pppm_cuda.h" -#include "atom.h" -#include "comm.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap_cuda.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" -#include "update.h" -#include //crmadd -#include "cuda_wrapper_cu.h" -#include "pppm_cuda_cu.h" -#include "cuda.h" -#include "math_const.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXORDER 7 -#define OFFSET 4096 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - - -void printArray(double* data,int nx, int ny, int nz) -{ - for(int i=0;icuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if ((narg > 3)||(narg<1)) error->all(FLERR,"Illegal kspace_style pppm/cuda command"); - #ifndef FFT_CUFFT - error->all(FLERR,"Using kspace_style pppm/cuda without cufft is not possible. Compile with cufft=1 to include cufft. Aborting."); - #endif - - triclinic_support = 0; - accuracy_relative = atof(arg[0]); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - density_brick = vdx_brick = vdy_brick = vdz_brick = vdx_brick_tmp = NULL; - density_fft = NULL; - greensfn = NULL; - work1 = work2 = NULL; - vg = NULL; - fkx = fky = fkz = NULL; - buf1 = buf2 = NULL; - - gf_b = NULL; - rho1d = rho_coeff = NULL; - - fft1c = fft2c = NULL; - remap = NULL; - - density_brick_int=NULL; - density_intScale=1000000; - cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL; - cu_density_brick = NULL; - cu_density_brick_int = NULL; - cu_density_fft = NULL; - cu_energy=NULL; - cu_greensfn = NULL; - cu_work1 = cu_work2 = cu_work3 = NULL; - cu_vg = NULL; - cu_fkx = cu_fky = cu_fkz = NULL; - - cu_flag = NULL; - cu_debugdata = NULL; - cu_rho_coeff = NULL; - cu_virial = NULL; - - cu_gf_b = NULL; - - cu_slabbuf = NULL; - slabbuf = NULL; - - nmax = 0; - part2grid = NULL; - cu_part2grid = NULL; - adev_data_array=NULL; - poissontime=0; - old_nmax=0; - cu_pppm_grid_n=NULL; - cu_pppm_grid_ids=NULL; - - pppm_grid_nmax=0; - pppm2partgrid=new int[3]; - pppm_grid=new int[3]; - firstpass=true; - scale = 1.0; -} - - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMCuda::~PPPMCuda() -{ - delete [] slabbuf; - delete cu_slabbuf; - - delete [] factors; - factors=NULL; - deallocate(); - delete cu_part2grid; - cu_part2grid=NULL; - memory->destroy(part2grid); - part2grid = NULL; -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPMCuda::init() -{ - - cuda->shared_data.pppm.cudable_force=1; - - //if(cuda->finished_run) {PPPM::init(); return;} - - if (me == 0) { - if (screen) fprintf(screen,"PPPMCuda initialization ...\n"); - if (logfile) fprintf(logfile,"PPPMCuda initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMCuda"); - if (slabflag == 1) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPMCuda"); - } - - if (order < 2 || order > MAXORDER) { - char str[128]; - sprintf(str,"PPPMCuda order cannot be smaller than 2 or greater than %d",MAXORDER); - error->all(FLERR,str); - } - // free all arrays previously allocated - - deallocate(); - - // extract short-range Coulombic cutoff from pair style - - qqrd2e = force->qqrd2e; - - if (force->pair == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - int itmp=0; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - - // if kspace is TIP4P, extract TIP4P params from pair style - - qdist = 0.0; - - if (strcmp(force->kspace_style,"pppm/tip4p") == 0) { - if (force->pair == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (2.0 * cos(0.5*theta) * blen); - } - - // compute qsum & qsqsum and warn if not charge-neutral - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup FFT grid resolution and g_ewald - // normally one iteration thru while loop is all that is required - // if grid stencil extends beyond neighbor proc, reduce order and try again - - int iteration = 0; - - while (order > 1) { - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPMCuda order b/c stencil extends " - "beyond neighbor processor"); - iteration++; - - set_grid(); - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPMCuda grid is too large"); - - // global indices of PPPMCuda grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPMCuda grid that I own without ghost cells - // for slab PPPMCuda, assign z grid as if it were not extended - - nxlo_in = comm->myloc[0]*nx_pppm / comm->procgrid[0]; - nxhi_in = (comm->myloc[0]+1)*nx_pppm / comm->procgrid[0] - 1; - nylo_in = comm->myloc[1]*ny_pppm / comm->procgrid[1]; - nyhi_in = (comm->myloc[1]+1)*ny_pppm / comm->procgrid[1] - 1; - nzlo_in = comm->myloc[2] * - (static_cast (nz_pppm/slab_volfactor)) / comm->procgrid[2]; - nzhi_in = (comm->myloc[2]+1) * - (static_cast (nz_pppm/slab_volfactor)) / comm->procgrid[2] - 1; - - // nlower,nupper = stencil size for mapping particles to PPPMCuda grid - - nlower = -(order-1)/2; - nupper = order/2; - - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shift = OFFSET + 0.5; - else shift = OFFSET; - if (order % 2) shiftone = 0.0; - else shiftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPMCuda grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPMCuda, assign z grid as if it were not extended - - - triclinic = domain->triclinic; - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nxlo_out = nlo + nlower; - nxhi_out = nhi + nupper; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nylo_out = nlo + nlower; - nyhi_out = nhi + nupper; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - // for slab PPPMCuda, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPMCuda, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - - if (slabflag && ((comm->myloc[2]+1) == (comm->procgrid[2]))) { - nzhi_in = nz_pppm - 1; - nzhi_out = nz_pppm - 1; - } - - // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions - // that overlay domain I own - // proc in that direction tells me via sendrecv() - // if no neighbor proc, value is from self since I have ghosts regardless - - int nplanes; - MPI_Status status; - - nplanes = nxlo_in - nxlo_out; - if (comm->procneigh[0][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, - &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, - world,&status); - else nxhi_ghost = nplanes; - - nplanes = nxhi_out - nxhi_in; - if (comm->procneigh[0][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, - &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], - 0,world,&status); - else nxlo_ghost = nplanes; - - nplanes = nylo_in - nylo_out; - if (comm->procneigh[1][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, - &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, - world,&status); - else nyhi_ghost = nplanes; - - nplanes = nyhi_out - nyhi_in; - if (comm->procneigh[1][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, - &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, - world,&status); - else nylo_ghost = nplanes; - - nplanes = nzlo_in - nzlo_out; - if (comm->procneigh[2][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, - &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, - world,&status); - else nzhi_ghost = nplanes; - - nplanes = nzhi_out - nzhi_in; - if (comm->procneigh[2][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, - &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, - world,&status); - else nzlo_ghost = nplanes; - - // test that ghost overlap is not bigger than my sub-domain - - int flag = 0; - if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; - if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; - - int flag_all; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - - if (flag_all == 0) break; - order--; - } - - if (order == 0) error->all(FLERR,"PPPMCuda order has been reduced to 0"); - - - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // PPPMCuda grid for this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // FFT arrays on this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); - - // buffer space for use in brick2fft and fillbrick - // idel = max # of ghost planes to send or recv in +/- dir of each dim - // nx,ny,nz = owned planes (including ghosts) in each dim - // nxx,nyy,nzz = max # of grid cells to send in each dim - // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick - - int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; - - idelx = MAX(nxlo_ghost,nxhi_ghost); - idelx = MAX(idelx,nxhi_out-nxhi_in); - idelx = MAX(idelx,nxlo_in-nxlo_out); - - idely = MAX(nylo_ghost,nyhi_ghost); - idely = MAX(idely,nyhi_out-nyhi_in); - idely = MAX(idely,nylo_in-nylo_out); - - idelz = MAX(nzlo_ghost,nzhi_ghost); - idelz = MAX(idelz,nzhi_out-nzhi_in); - idelz = MAX(idelz,nzlo_in-nzlo_out); - - nx = nxhi_out - nxlo_out + 1; - ny = nyhi_out - nylo_out + 1; - nz = nzhi_out - nzlo_out + 1; - - nxx = idelx * ny * nz; - nyy = idely * nx * nz; - nzz = idelz * nx * ny; - - nbuf = MAX(nxx,nyy); - nbuf = MAX(nbuf,nzz); - nbuf *= 3; - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - } - cuda_shared_pppm* ap=&(cuda->shared_data.pppm); - - ap->density_intScale=density_intScale; - ap->nxlo_in=nxlo_in; - ap->nxhi_in=nxhi_in; - ap->nxlo_out=nxlo_out; - ap->nxhi_out=nxhi_out; - ap->nylo_in=nylo_in; - ap->nyhi_in=nyhi_in; - ap->nylo_out=nylo_out; - ap->nyhi_out=nyhi_out; - ap->nzlo_in=nzlo_in; - ap->nzhi_in=nzhi_in; - ap->nzlo_out=nzlo_out; - ap->nzhi_out=nzhi_out; - ap->nxlo_in=nxlo_fft; - ap->nxhi_in=nxhi_fft; - ap->nylo_in=nylo_fft; - ap->nyhi_in=nyhi_fft; - ap->nzlo_in=nzlo_fft; - ap->nzhi_in=nzhi_fft; - ap->nx_pppm=nx_pppm; - ap->ny_pppm=ny_pppm; - ap->nz_pppm=nz_pppm; - ap->qqrd2e=qqrd2e; - ap->order=order; - ap->nmax=nmax; - ap->nlocal=atom->nlocal; - ap->delxinv=delxinv; - ap->delyinv=delyinv; - ap->delzinv=delzinv; - ap->nlower=nlower; - ap->nupper=nupper; - ap->shiftone=shiftone; - - // allocate K-space dependent memory - - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - compute_rho_coeff(); -} - -/* ---------------------------------------------------------------------- - adjust PPPMCuda coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPMCuda::setup() -{ - double *prd; - cu_gf_b->upload(); - // volume-dependent factors - // adjust z dimension for 2d slab PPPMCuda - // z dimension for 3d PPPMCuda is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - // fkx,fky,fkz for my FFT grid pts - Cuda_PPPM_Setup_fkxyz_vg(nx_pppm, ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald); - - - - // modified (Hockney-Eastwood) Coulomb Green's function - - int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * - pow(-log(EPS_HOC),0.25)); - Cuda_PPPM_setup_greensfn(nx_pppm,ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald, -nbx,nby,nbz,xprd,yprd,zprd_slab); - - -#ifdef FFT_CUFFT - cu_vdx_brick->upload(); - cu_vdy_brick->upload(); - cu_vdz_brick->upload(); -#endif - cu_rho_coeff->upload(); - cu_density_brick->memset_device(0); - pppm_device_init_setup(&cuda->shared_data,shiftone,delxinv,delyinv,delzinv,nlower,nupper); -} - -/* ---------------------------------------------------------------------- - compute the PPPMCuda long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMCuda::compute(int eflag, int vflag) -{ - cuda_shared_atom* cu_atom = & cuda->shared_data.atom; - - int i; - my_times starttime; - my_times endtime; - my_times starttotal; - my_times endtotal; - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of per-atom arrays if necessary - - if ((cu_atom->update_nmax)||(old_nmax==0)) { - memory->destroy(part2grid); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - delete cu_part2grid; - delete [] adev_data_array; - adev_data_array=new dev_array[1]; - cu_part2grid = new cCudaData ((int*)part2grid,adev_data_array, nmax,3); - - pppm_device_update(&cuda->shared_data,cu_part2grid->dev_data(),atom->nlocal,atom->nmax); - old_nmax=nmax; - } - if(cu_atom->update_nlocal) {pppm_update_nlocal(cu_atom->nlocal);} - - energy = 0.0; - if (vflag) - { - for (i = 0; i < 6; i++) virial[i] = 0.0; - cu_virial->memset_device(0); - } - if(eflag) cu_energy->memset_device(0); - my_gettime(CLOCK_REALTIME,&starttotal); - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - - my_gettime(CLOCK_REALTIME,&starttime); - - particle_map(); - - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_particle_map+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - //cu_part2grid->download(); - my_gettime(CLOCK_REALTIME,&starttime); - make_rho(); - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_make_rho+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - int nprocs=comm->nprocs; - - my_gettime(CLOCK_REALTIME,&starttime); - - if(nprocs>1) - { - cu_density_brick->download(); - brick2fft(); - } - else - { - #ifdef FFT_CUFFT - pppm_initfftdata(&cuda->shared_data,(PPPM_FLOAT*)cu_density_brick->dev_data(),(FFT_FLOAT*)cu_work2->dev_data()); - #endif - } - - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_brick2fft+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - - my_gettime(CLOCK_REALTIME,&starttime); - poisson(eflag,vflag); - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_poisson+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // all procs communicate E-field values to fill ghost cells - // surrounding their 3d bricks - - // not necessary since all the calculations are done on one proc - - // calculate the force on my particles - - - my_gettime(CLOCK_REALTIME,&starttime); - fieldforce(); - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_fieldforce+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // sum energy across procs and add in volume-dependent term - - my_gettime(CLOCK_REALTIME,&endtotal); - cuda->shared_data.cuda_timings.pppm_compute+=(endtotal.tv_sec-starttotal.tv_sec+1.0*(endtotal.tv_nsec-starttotal.tv_nsec)/1000000000); - - if (eflag) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/1.772453851 + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qqrd2e; - } - - // sum virial across procs - - if (vflag) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*volume*virial_all[i]; - } - - // 2d slab correction - - if (slabflag) slabcorr(eflag); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); - - if(firstpass) firstpass=false; -} - - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - - -void PPPMCuda::allocate() -{ - - struct dev_array* dev_tmp=new struct dev_array[20]; - int n_cudata=0; - - - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); - memory->create3d_offset(density_brick_int,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick_int"); - - - cu_density_brick = new cCudaData ((double*) &(density_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - cu_density_brick_int = new cCudaData ((int*) &(density_brick_int[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdx_brick_tmp,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick_tmp"); - - cu_vdx_brick = new cCudaData ((double*) &(vdx_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - cu_vdy_brick = new cCudaData ((double*) &(vdy_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); - cu_vdz_brick = new cCudaData ((double*) &(vdz_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create(density_fft,nfft_both,"pppm:density_fft"); - - cu_density_fft = new cCudaData (density_fft, & (dev_tmp[n_cudata++]),nfft_both); - - cu_energy = new cCudaData (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm); - cu_virial = new cCudaData (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm*6); - - memory->create(greensfn,nfft_both,"pppm:greensfn"); - cu_greensfn = new cCudaData (greensfn, & (dev_tmp[n_cudata++]) , nx_pppm*ny_pppm*nz_pppm); - - memory->create(work1,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work1"); - memory->create(work2,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work2"); - memory->create(work3,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work3"); - - cu_work1 = new cCudaData (work1, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); - cu_work2 = new cCudaData (work2, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); - cu_work3 = new cCudaData (work3, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); - - - memory->create(fkx,nx_pppm,"pppmcuda:fkx"); - cu_fkx = new cCudaData (fkx, & (dev_tmp[n_cudata++]) , nx_pppm); - memory->create(fky,ny_pppm,"pppmcuda:fky"); - cu_fky = new cCudaData (fky, & (dev_tmp[n_cudata++]) , ny_pppm); - memory->create(fkz,nz_pppm,"pppmcuda:fkz"); - cu_fkz = new cCudaData (fkz, & (dev_tmp[n_cudata++]) , nz_pppm); - - memory->create(vg,nfft_both,6,"pppm:vg"); - - cu_vg = new cCudaData ((double*)vg, & (dev_tmp[n_cudata++]) , nfft_both,6); - - memory->create(buf1,nbuf,"pppm:buf1"); - memory->create(buf2,nbuf,"pppm:buf2"); - - - // summation coeffs - - - gf_b = new double[order]; - cu_gf_b = new cCudaData (gf_b, &(dev_tmp[n_cudata++]) , order); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - - cu_rho_coeff = new cCudaData ((double*) &(rho_coeff[0][(1-order)/2]), & (dev_tmp[n_cudata++]) , order*(order/2-(1-order)/2+1)); - - debugdata=new PPPM_FLOAT[100]; - cu_debugdata = new cCudaData (debugdata,& (dev_tmp[n_cudata++]),100); - cu_flag = new cCudaData (&global_flag,& (dev_tmp[n_cudata++]),3); - - // create 2 FFTs and a Remap - // 1st FFT keeps data in FFT decompostion - // 2nd FFT returns data in 3d brick decomposition - // remap takes data from 3d brick to FFT decomposition - - int tmp; - - - - - fft1c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp,true); - - fft2c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp,false); - - -#ifdef FFT_CUFFT - fft1c->set_cudata(cu_work2->dev_data(),cu_work1->dev_data()); - fft2c->set_cudata(cu_work2->dev_data(),cu_work3->dev_data()); -#endif - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,2); - - -pppm_device_init(cu_density_brick->dev_data(), cu_vdx_brick->dev_data(), cu_vdy_brick->dev_data(), cu_vdz_brick->dev_data(), cu_density_fft->dev_data(),cu_energy->dev_data(),cu_virial->dev_data() - , cu_work1->dev_data(), cu_work2->dev_data(), cu_work3->dev_data(), cu_greensfn->dev_data(), cu_fkx->dev_data(), cu_fky->dev_data(), cu_fkz->dev_data(), cu_vg->dev_data() - ,nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,nx_pppm,ny_pppm,nz_pppm - ,nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,cu_gf_b->dev_data() - ,qqrd2e,order,cu_rho_coeff->dev_data(),cu_debugdata->dev_data(),cu_density_brick_int->dev_data(),slabflag - ); -} - - - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order - ---------------------------------------------------------------------- */ - -void PPPMCuda::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - - memory->destroy(density_fft); - memory->destroy(greensfn); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(vg); - - density_fft = NULL; - greensfn = NULL; - work1 = NULL; - work2 = NULL; - vg = NULL; - - memory->destroy(fkx); - memory->destroy(fky); - memory->destroy(fkz); - - fkx = NULL; - fky = NULL; - fkz = NULL; - - delete cu_density_brick; - delete cu_density_brick_int; - delete cu_vdx_brick; - delete cu_vdy_brick; - delete cu_vdz_brick; - delete cu_density_fft; - delete cu_energy; - delete cu_virial; -#ifdef FFT_CUFFT - delete cu_greensfn; - delete cu_gf_b; - delete cu_vg; - delete cu_work1; - delete cu_work2; - delete cu_work3; - delete cu_fkx; - delete cu_fky; - delete cu_fkz; -#endif - - delete cu_flag; - delete cu_debugdata; - delete cu_rho_coeff; - - - cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL; - cu_density_brick = NULL; - cu_density_brick_int = NULL; - cu_density_fft = NULL; - cu_energy=NULL; - cu_virial=NULL; -#ifdef FFT_CUFFT - cu_greensfn = NULL; - cu_gf_b = NULL; - cu_work1 = cu_work2 = cu_work3 = NULL; - cu_vg = NULL; - cu_fkx = cu_fky = cu_fkz = NULL; -#endif - - cu_flag = NULL; - cu_debugdata = NULL; - cu_rho_coeff = NULL; - cu_part2grid = NULL; - - memory->destroy(buf1); - memory->destroy(buf2); - - delete [] gf_b; - gf_b = NULL; - memory->destroy2d_offset(rho1d,-order/2); rho1d = NULL; - memory->destroy2d_offset(rho_coeff,(1-order)/2); rho_coeff = NULL; - - delete fft1c; - fft1c = NULL; - - delete fft2c; - fft2c = NULL; - delete remap; - remap = NULL; - buf1 = NULL; - buf2 = NULL; -} - -/* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald --------------------------------------------------------------------------*/ - -void PPPMCuda::set_grid() -{ - // see JCP 109, pg 7698 for derivation of coefficients - // higher order coefficients may be computed if needed - - double **acons; - memory->create(acons,8,7,"pppm:acons"); - - acons[1][0] = 2.0 / 3.0; - acons[2][0] = 1.0 / 50.0; - acons[2][1] = 5.0 / 294.0; - acons[3][0] = 1.0 / 588.0; - acons[3][1] = 7.0 / 1440.0; - acons[3][2] = 21.0 / 3872.0; - acons[4][0] = 1.0 / 4320.0; - acons[4][1] = 3.0 / 1936.0; - acons[4][2] = 7601.0 / 2271360.0; - acons[4][3] = 143.0 / 28800.0; - acons[5][0] = 1.0 / 23232.0; - acons[5][1] = 7601.0 / 13628160.0; - acons[5][2] = 143.0 / 69120.0; - acons[5][3] = 517231.0 / 106536960.0; - acons[5][4] = 106640677.0 / 11737571328.0; - acons[6][0] = 691.0 / 68140800.0; - acons[6][1] = 13.0 / 57600.0; - acons[6][2] = 47021.0 / 35512320.0; - acons[6][3] = 9694607.0 / 2095994880.0; - acons[6][4] = 733191589.0 / 59609088000.0; - acons[6][5] = 326190917.0 / 11700633600.0; - acons[7][0] = 1.0 / 345600.0; - acons[7][1] = 3617.0 / 35512320.0; - acons[7][2] = 745739.0 / 838397952.0; - acons[7][3] = 56399353.0 / 12773376000.0; - acons[7][4] = 25091609.0 / 1560084480.0; - acons[7][5] = 1755948832039.0 / 36229939200000.0; - acons[7][6] = 4887769399.0 / 37838389248.0; - - double q2 = qsqsum * force->qqrd2e/ force->dielectric; - bigint natoms = atom->natoms; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPMCuda - // 3d PPPMCuda just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired error and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h_x,h_y,h_z; - - if (!gewaldflag) - g_ewald = sqrt(-log(accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / - (2.0*q2))) / cutoff; - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and precision - // nz_pppm uses extended zprd_slab instead of zprd - // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 - // reduce it until precision target is met - - if (!gridflag) { - double err; - h_x = h_y = h_z = 1/g_ewald; - - nx_pppm = static_cast (xprd/h_x + 1); - ny_pppm = static_cast (yprd/h_y + 1); - nz_pppm = static_cast (zprd_slab/h_z + 1); - - err = rms(h_x,xprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_x,xprd,natoms,q2,acons); - nx_pppm++; - h_x = xprd/nx_pppm; - } - - err = rms(h_y,yprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_y,yprd,natoms,q2,acons); - ny_pppm++; - h_y = yprd/ny_pppm; - } - - err = rms(h_z,zprd_slab,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_z,zprd_slab,natoms,q2,acons); - nz_pppm++; - h_z = zprd_slab/nz_pppm; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; - - - // adjust g_ewald for new grid size - - h_x = xprd/nx_pppm; - h_y = yprd/ny_pppm; - h_z = zprd_slab/nz_pppm; - - if (!gewaldflag) { - double gew1,gew2,dgew,f,fmid,hmin,rtb; - int ncount; - - gew1 = 0.0; - g_ewald = gew1; - f = diffpr(h_x,h_y,h_z,q2,acons); - - hmin = MIN(h_x,MIN(h_y,h_z)); - gew2 = 10/hmin; - g_ewald = gew2; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - - if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPMCuda G"); - rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); - ncount = 0; - while (fabs(dgew) > SMALL && fmid != 0.0) { - dgew *= 0.5; - g_ewald = rtb + dgew; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - if (fmid <= 0.0) rtb = g_ewald; - ncount++; - if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPMCuda G"); - } - } - - // final RMS precision - - double lprx = rms(h_x,xprd,natoms,q2,acons); - double lpry = rms(h_y,yprd,natoms,q2,acons); - double lprz = rms(h_z,zprd_slab,natoms,q2,acons); - double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - double spr = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - - // free local memory - - memory->destroy(acons); - - // print info - - if (me == 0) { - if (screen) { - fprintf(screen," G vector = %g\n",g_ewald); - fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," stencil order = %d\n",order); - fprintf(screen," absolute RMS force accuracy = %g\n",MAX(lpr,spr)); - fprintf(screen," relative force accuracy = %g\n", - MAX(lpr,spr)/two_charge_force); - } - if (logfile) { - fprintf(logfile," G vector = %g\n",g_ewald); - fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," stencil order = %d\n",order); - fprintf(logfile," absolute RMS force accuracy = %g\n",MAX(lpr,spr)); - fprintf(logfile," relative force accuracy = %g\n", - MAX(lpr,spr)/two_charge_force); - } - } -} - - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - - -void PPPMCuda::particle_map() -{ - MYDBG(printf("# CUDA PPPMCuda::particle_map() ... start\n");) - int flag = 0; - - cu_flag->memset_device(0); - flag=cuda_particle_map(&cuda->shared_data,cu_flag->dev_data()); - if(flag) - { - cu_debugdata->download(); - printf("Out of range atom: "); - printf("ID: %i ",atom->tag[int(debugdata[0])]); - printf("x: %e ",debugdata[7]); - printf("y: %e ",debugdata[8]); - printf("z: %e ",debugdata[9]); - printf("nx: %e ",debugdata[4]); - printf("ny: %e ",debugdata[5]); - - printf("\n"); - //printf("debugdata: cpu: %e %e %e %i\n",boxlo[0],boxlo[1],boxlo[2],atom->nlocal); - cuda->cu_x->download(); - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - for (int i = 0; i < nlocal; i++) { - nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - if(i==1203)printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out || i==1203) {printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); } - } - - } - - int flag_all; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPMCuda!"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - - -void PPPMCuda::make_rho() -{ - cuda_make_rho(&cuda->shared_data,cu_flag->dev_data(),&density_intScale,nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,cu_density_brick->dev_data(),cu_density_brick_int->dev_data()); -} - - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver -------------------------------------------------------------------------- */ -void PPPMCuda::poisson(int eflag, int vflag) -{ - -#ifndef FFT_CUFFT - PPPM::poisson(eflag,vflag); - return; -#endif -#ifdef FFT_CUFFT - my_times starttime; - my_times endtime; - - - my_gettime(CLOCK_REALTIME,&starttime); - fft1c->compute(density_fft,work1,1); - - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - - - if (eflag || vflag) { - poisson_energy(nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,vflag); - ENERGY_FLOAT gpuvirial[6]; - energy+=sum_energy(cu_virial->dev_data(),cu_energy->dev_data(),nx_pppm,ny_pppm,nz_pppm,vflag,gpuvirial); - if(vflag) - { - for(int j=0;j<6;j++) virial[j]+=gpuvirial[j]; - } - } - - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - poisson_scale(nx_pppm,ny_pppm,nz_pppm); - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - - poisson_xgrad(nx_pppm,ny_pppm,nz_pppm); - - - my_gettime(CLOCK_REALTIME,&starttime); - fft2c->compute(work2,work2,-1); - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - poisson_vdx_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); - - - // y direction gradient - - poisson_ygrad(nx_pppm,ny_pppm,nz_pppm); - - my_gettime(CLOCK_REALTIME,&starttime); - fft2c->compute(work2,work2,-1); - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - poisson_vdy_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); - - // z direction gradient - - poisson_zgrad(nx_pppm,ny_pppm,nz_pppm); - - my_gettime(CLOCK_REALTIME,&starttime); - fft2c->compute(work2,work2,-1); - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - poisson_vdz_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); - #endif -} - -/*---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles --------------------------------------------------------------------------*/ - -void PPPMCuda::fieldforce() -{ - cuda_fieldforce(& cuda->shared_data,cu_flag); - return; -} - -/* ---------------------------------------------------------------------- - perform and time the 4 FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMCuda::timing_1d(int n, double &time1d) -{ - time1d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps/4*n; - return 4; -} - -int PPPMCuda::timing_3d(int n, double &time3d) -{ - time3d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps*n; - return 4; -} - -void PPPMCuda::slabcorr(int eflag) -{ - // compute local contribution to global dipole moment - if(slabbuf==NULL) - { - slabbuf=new ENERGY_FLOAT[(atom->nmax+31)/32]; - cu_slabbuf = new cCudaData (slabbuf, (atom->nmax+31)/32); - } - if(unsigned((atom->nlocal+31)/32)*sizeof(ENERGY_FLOAT)>=unsigned(cu_slabbuf->dev_size())) - { - delete [] slabbuf; - delete cu_slabbuf; - slabbuf=new ENERGY_FLOAT[(atom->nmax+31)/32]; - cu_slabbuf = new cCudaData (slabbuf, (atom->nmax+31)/32); - } - - - double dipole = cuda_slabcorr_energy(&cuda->shared_data,slabbuf,(ENERGY_FLOAT*) cu_slabbuf->dev_data()); - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // compute corrections - - double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; - - //if (eflag) energy += qqrd2e*scale * e_slabcorr; - // need to add a correction to make non-neutral systems and per-atom energy translationally invariant - if (eflag || fabs(qsum) > SMALL) - error->all(FLERR,"Cannot (yet) use slab correction with kspace_style pppm/cuda for non-neutral systems or to get per-atom energy. Aborting."); - - double ffact = -4.0*MY_PI*dipole_all/volume; - - cuda_slabcorr_force(&cuda->shared_data,ffact); -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + + Original Version: + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. + + ----------------------------------------------------------------------- + + USER-CUDA Package and associated modifications: + https://sourceforge.net/projects/lammpscuda/ + + Christian Trott, christian.trott@tu-ilmenau.de + Lars Winterfeld, lars.winterfeld@tu-ilmenau.de + Theoretical Physics II, University of Technology Ilmenau, Germany + + See the README file in the USER-CUDA directory. + + This software is distributed under the GNU General Public License. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) +------------------------------------------------------------------------- */ + + +#include "mpi.h" +#include +#include +#include +#include +#include "pppm_cuda.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap_cuda.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" +#include "update.h" +#include //crmadd +#include "cuda_wrapper_cu.h" +#include "pppm_cuda_cu.h" +#include "cuda.h" +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXORDER 7 +#define OFFSET 4096 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + + +void printArray(double* data,int nx, int ny, int nz) +{ + for(int i=0;icuda; + if(cuda == NULL) + error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); + + if ((narg > 3)||(narg<1)) error->all(FLERR,"Illegal kspace_style pppm/cuda command"); + #ifndef FFT_CUFFT + error->all(FLERR,"Using kspace_style pppm/cuda without cufft is not possible. Compile with cufft=1 to include cufft. Aborting."); + #endif + + triclinic_support = 0; + accuracy_relative = atof(arg[0]); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + density_brick = vdx_brick = vdy_brick = vdz_brick = vdx_brick_tmp = NULL; + density_fft = NULL; + greensfn = NULL; + work1 = work2 = NULL; + vg = NULL; + fkx = fky = fkz = NULL; + buf1 = buf2 = NULL; + + gf_b = NULL; + rho1d = rho_coeff = NULL; + + fft1c = fft2c = NULL; + remap = NULL; + + density_brick_int=NULL; + density_intScale=1000000; + cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL; + cu_density_brick = NULL; + cu_density_brick_int = NULL; + cu_density_fft = NULL; + cu_energy=NULL; + cu_greensfn = NULL; + cu_work1 = cu_work2 = cu_work3 = NULL; + cu_vg = NULL; + cu_fkx = cu_fky = cu_fkz = NULL; + + cu_flag = NULL; + cu_debugdata = NULL; + cu_rho_coeff = NULL; + cu_virial = NULL; + + cu_gf_b = NULL; + + cu_slabbuf = NULL; + slabbuf = NULL; + + nmax = 0; + part2grid = NULL; + cu_part2grid = NULL; + adev_data_array=NULL; + poissontime=0; + old_nmax=0; + cu_pppm_grid_n=NULL; + cu_pppm_grid_ids=NULL; + + pppm_grid_nmax=0; + pppm2partgrid=new int[3]; + pppm_grid=new int[3]; + firstpass=true; + scale = 1.0; +} + + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMCuda::~PPPMCuda() +{ + delete [] slabbuf; + delete cu_slabbuf; + + delete [] factors; + factors=NULL; + deallocate(); + delete cu_part2grid; + cu_part2grid=NULL; + memory->destroy(part2grid); + part2grid = NULL; +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPMCuda::init() +{ + + cuda->shared_data.pppm.cudable_force=1; + + //if(cuda->finished_run) {PPPM::init(); return;} + + if (me == 0) { + if (screen) fprintf(screen,"PPPMCuda initialization ...\n"); + if (logfile) fprintf(logfile,"PPPMCuda initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMCuda"); + if (slabflag == 1) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPMCuda"); + } + + if (order < 2 || order > MAXORDER) { + char str[128]; + sprintf(str,"PPPMCuda order cannot be smaller than 2 or greater than %d",MAXORDER); + error->all(FLERR,str); + } + // free all arrays previously allocated + + deallocate(); + + // extract short-range Coulombic cutoff from pair style + + qqrd2e = force->qqrd2e; + + if (force->pair == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + int itmp=0; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + + // if kspace is TIP4P, extract TIP4P params from pair style + + qdist = 0.0; + + if (strcmp(force->kspace_style,"pppm/tip4p") == 0) { + if (force->pair == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (2.0 * cos(0.5*theta) * blen); + } + + // compute qsum & qsqsum and warn if not charge-neutral + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup FFT grid resolution and g_ewald + // normally one iteration thru while loop is all that is required + // if grid stencil extends beyond neighbor proc, reduce order and try again + + int iteration = 0; + + while (order > 1) { + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPMCuda order b/c stencil extends " + "beyond neighbor processor"); + iteration++; + + set_grid(); + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPMCuda grid is too large"); + + // global indices of PPPMCuda grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPMCuda grid that I own without ghost cells + // for slab PPPMCuda, assign z grid as if it were not extended + + nxlo_in = comm->myloc[0]*nx_pppm / comm->procgrid[0]; + nxhi_in = (comm->myloc[0]+1)*nx_pppm / comm->procgrid[0] - 1; + nylo_in = comm->myloc[1]*ny_pppm / comm->procgrid[1]; + nyhi_in = (comm->myloc[1]+1)*ny_pppm / comm->procgrid[1] - 1; + nzlo_in = comm->myloc[2] * + (static_cast (nz_pppm/slab_volfactor)) / comm->procgrid[2]; + nzhi_in = (comm->myloc[2]+1) * + (static_cast (nz_pppm/slab_volfactor)) / comm->procgrid[2] - 1; + + // nlower,nupper = stencil size for mapping particles to PPPMCuda grid + + nlower = -(order-1)/2; + nupper = order/2; + + // shift values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (order % 2) shift = OFFSET + 0.5; + else shift = OFFSET; + if (order % 2) shiftone = 0.0; + else shiftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPMCuda grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPMCuda, assign z grid as if it were not extended + + + triclinic = domain->triclinic; + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else { + dist[0] = cuthalf/domain->prd[0]; + dist[1] = cuthalf/domain->prd[1]; + dist[2] = cuthalf/domain->prd[2]; + } + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nxlo_out = nlo + nlower; + nxhi_out = nhi + nupper; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nylo_out = nlo + nlower; + nyhi_out = nhi + nupper; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nzlo_out = nlo + nlower; + nzhi_out = nhi + nupper; + + // for slab PPPMCuda, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPMCuda, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) + + if (slabflag && ((comm->myloc[2]+1) == (comm->procgrid[2]))) { + nzhi_in = nz_pppm - 1; + nzhi_out = nz_pppm - 1; + } + + // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions + // that overlay domain I own + // proc in that direction tells me via sendrecv() + // if no neighbor proc, value is from self since I have ghosts regardless + + int nplanes; + MPI_Status status; + + nplanes = nxlo_in - nxlo_out; + if (comm->procneigh[0][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, + &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, + world,&status); + else nxhi_ghost = nplanes; + + nplanes = nxhi_out - nxhi_in; + if (comm->procneigh[0][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, + &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], + 0,world,&status); + else nxlo_ghost = nplanes; + + nplanes = nylo_in - nylo_out; + if (comm->procneigh[1][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, + &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, + world,&status); + else nyhi_ghost = nplanes; + + nplanes = nyhi_out - nyhi_in; + if (comm->procneigh[1][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, + &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, + world,&status); + else nylo_ghost = nplanes; + + nplanes = nzlo_in - nzlo_out; + if (comm->procneigh[2][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, + &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, + world,&status); + else nzhi_ghost = nplanes; + + nplanes = nzhi_out - nzhi_in; + if (comm->procneigh[2][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, + &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, + world,&status); + else nzlo_ghost = nplanes; + + // test that ghost overlap is not bigger than my sub-domain + + int flag = 0; + if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; + if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; + if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; + if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; + if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; + if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + + if (flag_all == 0) break; + order--; + } + + if (order == 0) error->all(FLERR,"PPPMCuda order has been reduced to 0"); + + + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clump of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + // PPPMCuda grid for this proc, including ghosts + + ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + + // FFT arrays on this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * + (nzhi_fft-nzlo_fft+1); + int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * + (nzhi_in-nzlo_in+1); + nfft_both = MAX(nfft,nfft_brick); + + // buffer space for use in brick2fft and fillbrick + // idel = max # of ghost planes to send or recv in +/- dir of each dim + // nx,ny,nz = owned planes (including ghosts) in each dim + // nxx,nyy,nzz = max # of grid cells to send in each dim + // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick + + int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; + + idelx = MAX(nxlo_ghost,nxhi_ghost); + idelx = MAX(idelx,nxhi_out-nxhi_in); + idelx = MAX(idelx,nxlo_in-nxlo_out); + + idely = MAX(nylo_ghost,nyhi_ghost); + idely = MAX(idely,nyhi_out-nyhi_in); + idely = MAX(idely,nylo_in-nylo_out); + + idelz = MAX(nzlo_ghost,nzhi_ghost); + idelz = MAX(idelz,nzhi_out-nzhi_in); + idelz = MAX(idelz,nzlo_in-nzlo_out); + + nx = nxhi_out - nxlo_out + 1; + ny = nyhi_out - nylo_out + 1; + nz = nzhi_out - nzlo_out + 1; + + nxx = idelx * ny * nz; + nyy = idely * nx * nz; + nzz = idelz * nx * ny; + + nbuf = MAX(nxx,nyy); + nbuf = MAX(nbuf,nzz); + nbuf *= 3; + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", + ngrid_max,nfft_both_max,nbuf_max); + if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", + ngrid_max,nfft_both_max,nbuf_max); + } + cuda_shared_pppm* ap=&(cuda->shared_data.pppm); + + ap->density_intScale=density_intScale; + ap->nxlo_in=nxlo_in; + ap->nxhi_in=nxhi_in; + ap->nxlo_out=nxlo_out; + ap->nxhi_out=nxhi_out; + ap->nylo_in=nylo_in; + ap->nyhi_in=nyhi_in; + ap->nylo_out=nylo_out; + ap->nyhi_out=nyhi_out; + ap->nzlo_in=nzlo_in; + ap->nzhi_in=nzhi_in; + ap->nzlo_out=nzlo_out; + ap->nzhi_out=nzhi_out; + ap->nxlo_in=nxlo_fft; + ap->nxhi_in=nxhi_fft; + ap->nylo_in=nylo_fft; + ap->nyhi_in=nyhi_fft; + ap->nzlo_in=nzlo_fft; + ap->nzhi_in=nzhi_fft; + ap->nx_pppm=nx_pppm; + ap->ny_pppm=ny_pppm; + ap->nz_pppm=nz_pppm; + ap->qqrd2e=qqrd2e; + ap->order=order; + ap->nmax=nmax; + ap->nlocal=atom->nlocal; + ap->delxinv=delxinv; + ap->delyinv=delyinv; + ap->delzinv=delzinv; + ap->nlower=nlower; + ap->nupper=nupper; + ap->shiftone=shiftone; + + // allocate K-space dependent memory + + + allocate(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + compute_rho_coeff(); +} + +/* ---------------------------------------------------------------------- + adjust PPPMCuda coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMCuda::setup() +{ + double *prd; + cu_gf_b->upload(); + // volume-dependent factors + // adjust z dimension for 2d slab PPPMCuda + // z dimension for 3d PPPMCuda is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + Cuda_PPPM_Setup_fkxyz_vg(nx_pppm, ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald); + + + + // modified (Hockney-Eastwood) Coulomb Green's function + + int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + Cuda_PPPM_setup_greensfn(nx_pppm,ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald, +nbx,nby,nbz,xprd,yprd,zprd_slab); + + +#ifdef FFT_CUFFT + cu_vdx_brick->upload(); + cu_vdy_brick->upload(); + cu_vdz_brick->upload(); +#endif + cu_rho_coeff->upload(); + cu_density_brick->memset_device(0); + pppm_device_init_setup(&cuda->shared_data,shiftone,delxinv,delyinv,delzinv,nlower,nupper); +} + +/* ---------------------------------------------------------------------- + compute the PPPMCuda long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMCuda::compute(int eflag, int vflag) +{ + cuda_shared_atom* cu_atom = & cuda->shared_data.atom; + + int i; + my_times starttime; + my_times endtime; + my_times starttotal; + my_times endtotal; + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + // extend size of per-atom arrays if necessary + + if ((cu_atom->update_nmax)||(old_nmax==0)) { + memory->destroy(part2grid); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm:part2grid"); + delete cu_part2grid; + delete [] adev_data_array; + adev_data_array=new dev_array[1]; + cu_part2grid = new cCudaData ((int*)part2grid,adev_data_array, nmax,3); + + pppm_device_update(&cuda->shared_data,cu_part2grid->dev_data(),atom->nlocal,atom->nmax); + old_nmax=nmax; + } + if(cu_atom->update_nlocal) {pppm_update_nlocal(cu_atom->nlocal);} + + energy = 0.0; + if (vflag) + { + for (i = 0; i < 6; i++) virial[i] = 0.0; + cu_virial->memset_device(0); + } + if(eflag) cu_energy->memset_device(0); + my_gettime(CLOCK_REALTIME,&starttotal); + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid + + + my_gettime(CLOCK_REALTIME,&starttime); + + particle_map(); + + my_gettime(CLOCK_REALTIME,&endtime); + cuda->shared_data.cuda_timings.pppm_particle_map+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + //cu_part2grid->download(); + my_gettime(CLOCK_REALTIME,&starttime); + make_rho(); + my_gettime(CLOCK_REALTIME,&endtime); + cuda->shared_data.cuda_timings.pppm_make_rho+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + int nprocs=comm->nprocs; + + my_gettime(CLOCK_REALTIME,&starttime); + + if(nprocs>1) + { + cu_density_brick->download(); + brick2fft(); + } + else + { + #ifdef FFT_CUFFT + pppm_initfftdata(&cuda->shared_data,(PPPM_FLOAT*)cu_density_brick->dev_data(),(FFT_FLOAT*)cu_work2->dev_data()); + #endif + } + + my_gettime(CLOCK_REALTIME,&endtime); + cuda->shared_data.cuda_timings.pppm_brick2fft+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + + my_gettime(CLOCK_REALTIME,&starttime); + poisson(eflag,vflag); + my_gettime(CLOCK_REALTIME,&endtime); + cuda->shared_data.cuda_timings.pppm_poisson+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + // all procs communicate E-field values to fill ghost cells + // surrounding their 3d bricks + + // not necessary since all the calculations are done on one proc + + // calculate the force on my particles + + + my_gettime(CLOCK_REALTIME,&starttime); + fieldforce(); + my_gettime(CLOCK_REALTIME,&endtime); + cuda->shared_data.cuda_timings.pppm_fieldforce+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + // sum energy across procs and add in volume-dependent term + + my_gettime(CLOCK_REALTIME,&endtotal); + cuda->shared_data.cuda_timings.pppm_compute+=(endtotal.tv_sec-starttotal.tv_sec+1.0*(endtotal.tv_nsec-starttotal.tv_nsec)/1000000000); + + if (eflag) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= g_ewald*qsqsum/1.772453851 + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qqrd2e; + } + + // sum virial across procs + + if (vflag) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*volume*virial_all[i]; + } + + // 2d slab correction + + if (slabflag) slabcorr(eflag); + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); + + if(firstpass) firstpass=false; +} + + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + + +void PPPMCuda::allocate() +{ + + struct dev_array* dev_tmp=new struct dev_array[20]; + int n_cudata=0; + + + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); + memory->create3d_offset(density_brick_int,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick_int"); + + + cu_density_brick = new cCudaData ((double*) &(density_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), + (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); + + cu_density_brick_int = new cCudaData ((int*) &(density_brick_int[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), + (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); + + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdx_brick"); + memory->create3d_offset(vdx_brick_tmp,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdx_brick_tmp"); + + cu_vdx_brick = new cCudaData ((double*) &(vdx_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), + (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); + + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdy_brick"); + cu_vdy_brick = new cCudaData ((double*) &(vdy_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), + (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); + + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdz_brick"); + cu_vdz_brick = new cCudaData ((double*) &(vdz_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), + (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); + + memory->create(density_fft,nfft_both,"pppm:density_fft"); + + cu_density_fft = new cCudaData (density_fft, & (dev_tmp[n_cudata++]),nfft_both); + + cu_energy = new cCudaData (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm); + cu_virial = new cCudaData (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm*6); + + memory->create(greensfn,nfft_both,"pppm:greensfn"); + cu_greensfn = new cCudaData (greensfn, & (dev_tmp[n_cudata++]) , nx_pppm*ny_pppm*nz_pppm); + + memory->create(work1,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work1"); + memory->create(work2,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work2"); + memory->create(work3,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work3"); + + cu_work1 = new cCudaData (work1, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); + cu_work2 = new cCudaData (work2, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); + cu_work3 = new cCudaData (work3, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); + + + memory->create(fkx,nx_pppm,"pppmcuda:fkx"); + cu_fkx = new cCudaData (fkx, & (dev_tmp[n_cudata++]) , nx_pppm); + memory->create(fky,ny_pppm,"pppmcuda:fky"); + cu_fky = new cCudaData (fky, & (dev_tmp[n_cudata++]) , ny_pppm); + memory->create(fkz,nz_pppm,"pppmcuda:fkz"); + cu_fkz = new cCudaData (fkz, & (dev_tmp[n_cudata++]) , nz_pppm); + + memory->create(vg,nfft_both,6,"pppm:vg"); + + cu_vg = new cCudaData ((double*)vg, & (dev_tmp[n_cudata++]) , nfft_both,6); + + memory->create(buf1,nbuf,"pppm:buf1"); + memory->create(buf2,nbuf,"pppm:buf2"); + + + // summation coeffs + + + gf_b = new double[order]; + cu_gf_b = new cCudaData (gf_b, &(dev_tmp[n_cudata++]) , order); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); + + cu_rho_coeff = new cCudaData ((double*) &(rho_coeff[0][(1-order)/2]), & (dev_tmp[n_cudata++]) , order*(order/2-(1-order)/2+1)); + + debugdata=new PPPM_FLOAT[100]; + cu_debugdata = new cCudaData (debugdata,& (dev_tmp[n_cudata++]),100); + cu_flag = new cCudaData (&global_flag,& (dev_tmp[n_cudata++]),3); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decompostion + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition + + int tmp; + + + + + fft1c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp,true); + + fft2c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp,false); + + +#ifdef FFT_CUFFT + fft1c->set_cudata(cu_work2->dev_data(),cu_work1->dev_data()); + fft2c->set_cudata(cu_work2->dev_data(),cu_work3->dev_data()); +#endif + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,2); + + +pppm_device_init(cu_density_brick->dev_data(), cu_vdx_brick->dev_data(), cu_vdy_brick->dev_data(), cu_vdz_brick->dev_data(), cu_density_fft->dev_data(),cu_energy->dev_data(),cu_virial->dev_data() + , cu_work1->dev_data(), cu_work2->dev_data(), cu_work3->dev_data(), cu_greensfn->dev_data(), cu_fkx->dev_data(), cu_fky->dev_data(), cu_fkz->dev_data(), cu_vg->dev_data() + ,nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,nx_pppm,ny_pppm,nz_pppm + ,nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,cu_gf_b->dev_data() + ,qqrd2e,order,cu_rho_coeff->dev_data(),cu_debugdata->dev_data(),cu_density_brick_int->dev_data(),slabflag + ); +} + + + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order + ---------------------------------------------------------------------- */ + +void PPPMCuda::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + + memory->destroy(density_fft); + memory->destroy(greensfn); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(vg); + + density_fft = NULL; + greensfn = NULL; + work1 = NULL; + work2 = NULL; + vg = NULL; + + memory->destroy(fkx); + memory->destroy(fky); + memory->destroy(fkz); + + fkx = NULL; + fky = NULL; + fkz = NULL; + + delete cu_density_brick; + delete cu_density_brick_int; + delete cu_vdx_brick; + delete cu_vdy_brick; + delete cu_vdz_brick; + delete cu_density_fft; + delete cu_energy; + delete cu_virial; +#ifdef FFT_CUFFT + delete cu_greensfn; + delete cu_gf_b; + delete cu_vg; + delete cu_work1; + delete cu_work2; + delete cu_work3; + delete cu_fkx; + delete cu_fky; + delete cu_fkz; +#endif + + delete cu_flag; + delete cu_debugdata; + delete cu_rho_coeff; + + + cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL; + cu_density_brick = NULL; + cu_density_brick_int = NULL; + cu_density_fft = NULL; + cu_energy=NULL; + cu_virial=NULL; +#ifdef FFT_CUFFT + cu_greensfn = NULL; + cu_gf_b = NULL; + cu_work1 = cu_work2 = cu_work3 = NULL; + cu_vg = NULL; + cu_fkx = cu_fky = cu_fkz = NULL; +#endif + + cu_flag = NULL; + cu_debugdata = NULL; + cu_rho_coeff = NULL; + cu_part2grid = NULL; + + memory->destroy(buf1); + memory->destroy(buf2); + + delete [] gf_b; + gf_b = NULL; + memory->destroy2d_offset(rho1d,-order/2); rho1d = NULL; + memory->destroy2d_offset(rho_coeff,(1-order)/2); rho_coeff = NULL; + + delete fft1c; + fft1c = NULL; + + delete fft2c; + fft2c = NULL; + delete remap; + remap = NULL; + buf1 = NULL; + buf2 = NULL; +} + +/* ---------------------------------------------------------------------- + set size of FFT grid (nx,ny,nz_pppm) and g_ewald +-------------------------------------------------------------------------*/ + +void PPPMCuda::set_grid() +{ + // see JCP 109, pg 7698 for derivation of coefficients + // higher order coefficients may be computed if needed + + double **acons; + memory->create(acons,8,7,"pppm:acons"); + + acons[1][0] = 2.0 / 3.0; + acons[2][0] = 1.0 / 50.0; + acons[2][1] = 5.0 / 294.0; + acons[3][0] = 1.0 / 588.0; + acons[3][1] = 7.0 / 1440.0; + acons[3][2] = 21.0 / 3872.0; + acons[4][0] = 1.0 / 4320.0; + acons[4][1] = 3.0 / 1936.0; + acons[4][2] = 7601.0 / 2271360.0; + acons[4][3] = 143.0 / 28800.0; + acons[5][0] = 1.0 / 23232.0; + acons[5][1] = 7601.0 / 13628160.0; + acons[5][2] = 143.0 / 69120.0; + acons[5][3] = 517231.0 / 106536960.0; + acons[5][4] = 106640677.0 / 11737571328.0; + acons[6][0] = 691.0 / 68140800.0; + acons[6][1] = 13.0 / 57600.0; + acons[6][2] = 47021.0 / 35512320.0; + acons[6][3] = 9694607.0 / 2095994880.0; + acons[6][4] = 733191589.0 / 59609088000.0; + acons[6][5] = 326190917.0 / 11700633600.0; + acons[7][0] = 1.0 / 345600.0; + acons[7][1] = 3617.0 / 35512320.0; + acons[7][2] = 745739.0 / 838397952.0; + acons[7][3] = 56399353.0 / 12773376000.0; + acons[7][4] = 25091609.0 / 1560084480.0; + acons[7][5] = 1755948832039.0 / 36229939200000.0; + acons[7][6] = 4887769399.0 / 37838389248.0; + + double q2 = qsqsum * force->qqrd2e; + bigint natoms = atom->natoms; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPMCuda + // 3d PPPMCuda just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired error and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h_x,h_y,h_z; + + if (!gewaldflag) + g_ewald = sqrt(-log(accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / + (2.0*q2))) / cutoff; + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and precision + // nz_pppm uses extended zprd_slab instead of zprd + // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 + // reduce it until precision target is met + + if (!gridflag) { + double err; + h_x = h_y = h_z = 1/g_ewald; + + nx_pppm = static_cast (xprd/h_x + 1); + ny_pppm = static_cast (yprd/h_y + 1); + nz_pppm = static_cast (zprd_slab/h_z + 1); + + err = rms(h_x,xprd,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_x,xprd,natoms,q2,acons); + nx_pppm++; + h_x = xprd/nx_pppm; + } + + err = rms(h_y,yprd,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_y,yprd,natoms,q2,acons); + ny_pppm++; + h_y = yprd/ny_pppm; + } + + err = rms(h_z,zprd_slab,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_z,zprd_slab,natoms,q2,acons); + nz_pppm++; + h_z = zprd_slab/nz_pppm; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; + + + // adjust g_ewald for new grid size + + h_x = xprd/nx_pppm; + h_y = yprd/ny_pppm; + h_z = zprd_slab/nz_pppm; + + if (!gewaldflag) { + double gew1,gew2,dgew,f,fmid,hmin,rtb; + int ncount; + + gew1 = 0.0; + g_ewald = gew1; + f = diffpr(h_x,h_y,h_z,q2,acons); + + hmin = MIN(h_x,MIN(h_y,h_z)); + gew2 = 10/hmin; + g_ewald = gew2; + fmid = diffpr(h_x,h_y,h_z,q2,acons); + + if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPMCuda G"); + rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); + ncount = 0; + while (fabs(dgew) > SMALL && fmid != 0.0) { + dgew *= 0.5; + g_ewald = rtb + dgew; + fmid = diffpr(h_x,h_y,h_z,q2,acons); + if (fmid <= 0.0) rtb = g_ewald; + ncount++; + if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPMCuda G"); + } + } + + // final RMS precision + + double lprx = rms(h_x,xprd,natoms,q2,acons); + double lpry = rms(h_y,yprd,natoms,q2,acons); + double lprz = rms(h_z,zprd_slab,natoms,q2,acons); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double spr = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + + // free local memory + + memory->destroy(acons); + + // print info + + if (me == 0) { + if (screen) { + fprintf(screen," G vector = %g\n",g_ewald); + fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," stencil order = %d\n",order); + fprintf(screen," absolute RMS force accuracy = %g\n",MAX(lpr,spr)); + fprintf(screen," relative force accuracy = %g\n", + MAX(lpr,spr)/two_charge_force); + } + if (logfile) { + fprintf(logfile," G vector = %g\n",g_ewald); + fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," stencil order = %d\n",order); + fprintf(logfile," absolute RMS force accuracy = %g\n",MAX(lpr,spr)); + fprintf(logfile," relative force accuracy = %g\n", + MAX(lpr,spr)/two_charge_force); + } + } +} + + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + + +void PPPMCuda::particle_map() +{ + MYDBG(printf("# CUDA PPPMCuda::particle_map() ... start\n");) + int flag = 0; + + cu_flag->memset_device(0); + flag=cuda_particle_map(&cuda->shared_data,cu_flag->dev_data()); + if(flag) + { + cu_debugdata->download(); + printf("Out of range atom: "); + printf("ID: %i ",atom->tag[int(debugdata[0])]); + printf("x: %e ",debugdata[7]); + printf("y: %e ",debugdata[8]); + printf("z: %e ",debugdata[9]); + printf("nx: %e ",debugdata[4]); + printf("ny: %e ",debugdata[5]); + + printf("\n"); + //printf("debugdata: cpu: %e %e %e %i\n",boxlo[0],boxlo[1],boxlo[2],atom->nlocal); + cuda->cu_x->download(); + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; + + if(i==1203)printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out || i==1203) {printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); } + } + + } + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPMCuda!"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + + +void PPPMCuda::make_rho() +{ + cuda_make_rho(&cuda->shared_data,cu_flag->dev_data(),&density_intScale,nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,cu_density_brick->dev_data(),cu_density_brick_int->dev_data()); +} + + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver +------------------------------------------------------------------------- */ +void PPPMCuda::poisson(int eflag, int vflag) +{ + +#ifndef FFT_CUFFT + PPPM::poisson(eflag,vflag); + return; +#endif +#ifdef FFT_CUFFT + my_times starttime; + my_times endtime; + + + my_gettime(CLOCK_REALTIME,&starttime); + fft1c->compute(density_fft,work1,1); + + my_gettime(CLOCK_REALTIME,&endtime); + poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + + + if (eflag || vflag) { + poisson_energy(nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,vflag); + ENERGY_FLOAT gpuvirial[6]; + energy+=sum_energy(cu_virial->dev_data(),cu_energy->dev_data(),nx_pppm,ny_pppm,nz_pppm,vflag,gpuvirial); + if(vflag) + { + for(int j=0;j<6;j++) virial[j]+=gpuvirial[j]; + } + } + + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + poisson_scale(nx_pppm,ny_pppm,nz_pppm); + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + + poisson_xgrad(nx_pppm,ny_pppm,nz_pppm); + + + my_gettime(CLOCK_REALTIME,&starttime); + fft2c->compute(work2,work2,-1); + my_gettime(CLOCK_REALTIME,&endtime); + poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + poisson_vdx_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); + + + // y direction gradient + + poisson_ygrad(nx_pppm,ny_pppm,nz_pppm); + + my_gettime(CLOCK_REALTIME,&starttime); + fft2c->compute(work2,work2,-1); + my_gettime(CLOCK_REALTIME,&endtime); + poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + poisson_vdy_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); + + // z direction gradient + + poisson_zgrad(nx_pppm,ny_pppm,nz_pppm); + + my_gettime(CLOCK_REALTIME,&starttime); + fft2c->compute(work2,work2,-1); + my_gettime(CLOCK_REALTIME,&endtime); + poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + poisson_vdz_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); + #endif +} + +/*---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +-------------------------------------------------------------------------*/ + +void PPPMCuda::fieldforce() +{ + cuda_fieldforce(& cuda->shared_data,cu_flag); + return; +} + +/* ---------------------------------------------------------------------- + perform and time the 4 FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMCuda::timing_1d(int n, double &time1d) +{ + time1d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps/4*n; + return 4; +} + +int PPPMCuda::timing_3d(int n, double &time3d) +{ + time3d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps*n; + return 4; +} + +void PPPMCuda::slabcorr(int eflag) +{ + // compute local contribution to global dipole moment + if(slabbuf==NULL) + { + slabbuf=new ENERGY_FLOAT[(atom->nmax+31)/32]; + cu_slabbuf = new cCudaData (slabbuf, (atom->nmax+31)/32); + } + if(unsigned((atom->nlocal+31)/32)*sizeof(ENERGY_FLOAT)>=unsigned(cu_slabbuf->dev_size())) + { + delete [] slabbuf; + delete cu_slabbuf; + slabbuf=new ENERGY_FLOAT[(atom->nmax+31)/32]; + cu_slabbuf = new cCudaData (slabbuf, (atom->nmax+31)/32); + } + + + double dipole = cuda_slabcorr_energy(&cuda->shared_data,slabbuf,(ENERGY_FLOAT*) cu_slabbuf->dev_data()); + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // compute corrections + + double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; + + //if (eflag) energy += qqrd2e*scale * e_slabcorr; + // need to add a correction to make non-neutral systems and per-atom energy translationally invariant + if (eflag || fabs(qsum) > SMALL) + error->all(FLERR,"Cannot (yet) use slab correction with kspace_style pppm/cuda for non-neutral systems or to get per-atom energy. Aborting."); + + double ffact = -4.0*MY_PI*dipole_all/volume; + + cuda_slabcorr_force(&cuda->shared_data,ffact); +} From 2b350aaab9e577f6ae32f76d301042ee5c296d41 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Tue, 4 Mar 2014 16:48:04 +0000 Subject: [PATCH 03/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11585 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 287b27670a..01bb7ffda5 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "26 Feb 2014" +#define LAMMPS_VERSION "4 Mar 2014" From ec9648ee949120c9c0a4c099f001c61523529245 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Tue, 4 Mar 2014 16:48:05 +0000 Subject: [PATCH 04/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11586 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- doc/Manual.html | 4 ++-- doc/Manual.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/Manual.html b/doc/Manual.html index 20c800d27d..8bbed05b10 100644 --- a/doc/Manual.html +++ b/doc/Manual.html @@ -1,7 +1,7 @@ LAMMPS Users Manual - + @@ -22,7 +22,7 @@

LAMMPS Documentation

-

26 Feb 2014 version +

4 Mar 2014 version

Version info:

diff --git a/doc/Manual.txt b/doc/Manual.txt index e1b5f37d42..0cf8837e4b 100644 --- a/doc/Manual.txt +++ b/doc/Manual.txt @@ -1,6 +1,6 @@ LAMMPS Users Manual - + @@ -18,7 +18,7 @@

LAMMPS Documentation :c,h3 -26 Feb 2014 version :c,h4 +4 Mar 2014 version :c,h4 Version info: :h4 From 2bcf10827cc2239544e149a4f2203c2e88e8f109 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Thu, 6 Mar 2014 15:28:27 +0000 Subject: [PATCH 05/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11588 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/KSPACE/ewald.cpp | 2984 +++---- src/KSPACE/ewald_disp.cpp | 2950 +++---- src/KSPACE/pppm.cpp | 7002 +++++++-------- src/KSPACE/pppm_disp.cpp | 16418 ++++++++++++++++++------------------ src/KSPACE/pppm_old.cpp | 5726 ++++++------- 5 files changed, 17540 insertions(+), 17540 deletions(-) diff --git a/src/KSPACE/ewald.cpp b/src/KSPACE/ewald.cpp index f750c2cbf3..a684ce80a5 100644 --- a/src/KSPACE/ewald.cpp +++ b/src/KSPACE/ewald.cpp @@ -1,1492 +1,1492 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) - per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU) - group/group energy/force added by Stan Moore (BYU) - triclinic added by Stan Moore (SNL) -------------------------------------------------------------------------- */ - -#include "mpi.h" -#include "stdlib.h" -#include "stdio.h" -#include "string.h" -#include "math.h" -#include "ewald.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "pair.h" -#include "domain.h" -#include "math_const.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define SMALL 0.00001 - -/* ---------------------------------------------------------------------- */ - -Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command"); - - ewaldflag = 1; - group_group_enable = 1; - group_allocate_flag = 0; - - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - kmax = 0; - kxvecs = kyvecs = kzvecs = NULL; - ug = NULL; - eg = vg = NULL; - sfacrl = sfacim = sfacrl_all = sfacim_all = NULL; - - nmax = 0; - ek = NULL; - cs = sn = NULL; - - kcount = 0; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -Ewald::~Ewald() -{ - deallocate(); - if (group_allocate_flag) deallocate_groups(); - memory->destroy(ek); - memory->destroy3d_offset(cs,-kmax_created); - memory->destroy3d_offset(sn,-kmax_created); -} - -/* ---------------------------------------------------------------------- */ - -void Ewald::init() -{ - if (comm->me == 0) { - if (screen) fprintf(screen,"Ewald initialization ...\n"); - if (logfile) fprintf(logfile,"Ewald initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->dimension == 2) - error->all(FLERR,"Cannot use Ewald with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald"); - if (slabflag) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab Ewald"); - if (domain->triclinic) - error->all(FLERR,"Cannot (yet) use Ewald with triclinic box " - "and slab correction"); - } - - // extract short-range Coulombic cutoff from pair style - - scale = 1.0; - - pair_check(); - - int itmp; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - double cutoff = *p_cutoff; - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && comm->me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup K-space resolution - - q2 = qsqsum * force->qqrd2e; - bigint natoms = atom->natoms; - - triclinic = domain->triclinic; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab Ewald - // 3d Ewald just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - if (!gewaldflag) { - if (accuracy <= 0.0) - error->all(FLERR,"KSpace accuracy must be > 0"); - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; - else g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // setup Ewald coefficients so can print stats - - setup(); - - // final RMS accuracy - - double lprx = rms(kxmax_orig,xprd,natoms,q2); - double lpry = rms(kymax_orig,yprd,natoms,q2); - double lprz = rms(kzmax_orig,zprd_slab,natoms,q2); - double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); - double tpr = estimate_table_accuracy(q2_over_sqrt,spr); - double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); - - // stats - - if (comm->me == 0) { - if (screen) { - fprintf(screen," G vector (1/distance) = %g\n",g_ewald); - fprintf(screen," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(screen," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n", - kcount,kmax,kmax3d); - fprintf(screen," kxmax kymax kzmax = %d %d %d\n", - kxmax,kymax,kzmax); - } - if (logfile) { - fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(logfile," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n", - kcount,kmax,kmax3d); - fprintf(logfile," kxmax kymax kzmax = %d %d %d\n", - kxmax,kymax,kzmax); - } - } -} - -/* ---------------------------------------------------------------------- - adjust Ewald coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void Ewald::setup() -{ - // volume-dependent factors - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - - // adjustment of z dimension for 2d slab Ewald - // 3d Ewald just uses zprd since slab_volfactor = 1.0 - - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - unitk[0] = 2.0*MY_PI/xprd; - unitk[1] = 2.0*MY_PI/yprd; - unitk[2] = 2.0*MY_PI/zprd_slab; - - int kmax_old = kmax; - - if (kewaldflag == 0) { - - // determine kmax - // function of current box size, accuracy, G_ewald (short-range cutoff) - - bigint natoms = atom->natoms; - double err; - kxmax = 1; - kymax = 1; - kzmax = 1; - - err = rms(kxmax,xprd,natoms,q2); - while (err > accuracy) { - kxmax++; - err = rms(kxmax,xprd,natoms,q2); - } - - err = rms(kymax,yprd,natoms,q2); - while (err > accuracy) { - kymax++; - err = rms(kymax,yprd,natoms,q2); - } - - err = rms(kzmax,zprd_slab,natoms,q2); - while (err > accuracy) { - kzmax++; - err = rms(kzmax,zprd_slab,natoms,q2); - } - - kmax = MAX(kxmax,kymax); - kmax = MAX(kmax,kzmax); - kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; - - double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; - double gsqymx = unitk[1]*unitk[1]*kymax*kymax; - double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; - gsqmx = MAX(gsqxmx,gsqymx); - gsqmx = MAX(gsqmx,gsqzmx); - - kxmax_orig = kxmax; - kymax_orig = kymax; - kzmax_orig = kzmax; - - // scale lattice vectors for triclinic skew - - if (triclinic) { - double tmp[3]; - tmp[0] = kxmax/xprd; - tmp[1] = kymax/yprd; - tmp[2] = kzmax/zprd; - lamda2xT(&tmp[0],&tmp[0]); - kxmax = MAX(1,static_cast(tmp[0])); - kymax = MAX(1,static_cast(tmp[1])); - kzmax = MAX(1,static_cast(tmp[2])); - - kmax = MAX(kxmax,kymax); - kmax = MAX(kmax,kzmax); - kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; - } - - } else { - - kxmax = kx_ewald; - kymax = ky_ewald; - kzmax = kz_ewald; - - kxmax_orig = kxmax; - kymax_orig = kymax; - kzmax_orig = kzmax; - - kmax = MAX(kxmax,kymax); - kmax = MAX(kmax,kzmax); - kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; - - double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; - double gsqymx = unitk[1]*unitk[1]*kymax*kymax; - double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; - gsqmx = MAX(gsqxmx,gsqymx); - gsqmx = MAX(gsqmx,gsqzmx); - } - - gsqmx *= 1.00001; - - // if size has grown, reallocate k-dependent and nlocal-dependent arrays - - if (kmax > kmax_old) { - deallocate(); - allocate(); - group_allocate_flag = 0; - - memory->destroy(ek); - memory->destroy3d_offset(cs,-kmax_created); - memory->destroy3d_offset(sn,-kmax_created); - nmax = atom->nmax; - memory->create(ek,nmax,3,"ewald:ek"); - memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); - memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); - kmax_created = kmax; - } - - // pre-compute Ewald coefficients - - if (triclinic == 0) - coeffs(); - else - coeffs_triclinic(); -} - -/* ---------------------------------------------------------------------- - compute RMS accuracy for a dimension -------------------------------------------------------------------------- */ - -double Ewald::rms(int km, double prd, bigint natoms, double q2) -{ - double value = 2.0*q2*g_ewald/prd * - sqrt(1.0/(MY_PI*km*natoms)) * - exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd)); - - return value; -} - -/* ---------------------------------------------------------------------- - compute the Ewald long-range force, energy, virial -------------------------------------------------------------------------- */ - -void Ewald::compute(int eflag, int vflag) -{ - int i,j,k; - - // set energy/virial flags - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(ek); - memory->destroy3d_offset(cs,-kmax_created); - memory->destroy3d_offset(sn,-kmax_created); - nmax = atom->nmax; - memory->create(ek,nmax,3,"ewald:ek"); - memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); - memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); - kmax_created = kmax; - } - - // partial structure factors on each processor - // total structure factor by summing over procs - - if (triclinic == 0) - eik_dot_r(); - else - eik_dot_r_triclinic(); - - MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); - - // K-space portion of electric field - // double loop over K-vectors and local atoms - // perform per-atom calculations if needed - - double **f = atom->f; - double *q = atom->q; - int nlocal = atom->nlocal; - - int kx,ky,kz; - double cypz,sypz,exprl,expim,partial,partial_peratom; - - for (i = 0; i < nlocal; i++) { - ek[i][0] = 0.0; - ek[i][1] = 0.0; - ek[i][2] = 0.0; - } - - for (k = 0; k < kcount; k++) { - kx = kxvecs[k]; - ky = kyvecs[k]; - kz = kzvecs[k]; - - for (i = 0; i < nlocal; i++) { - cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; - sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; - exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; - expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; - partial = expim*sfacrl_all[k] - exprl*sfacim_all[k]; - ek[i][0] += partial*eg[k][0]; - ek[i][1] += partial*eg[k][1]; - ek[i][2] += partial*eg[k][2]; - - if (evflag_atom) { - partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k]; - if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom; - if (vflag_atom) - for (j = 0; j < 6; j++) - vatom[i][j] += ug[k]*vg[k][j]*partial_peratom; - } - } - } - - // convert E-field to force - - const double qscale = force->qqrd2e * scale; - - for (i = 0; i < nlocal; i++) { - f[i][0] += qscale * q[i]*ek[i][0]; - f[i][1] += qscale * q[i]*ek[i][1]; - if (slabflag != 2) f[i][2] += qscale * q[i]*ek[i][2]; - } - - // global energy - - if (eflag_global) { - for (k = 0; k < kcount; k++) - energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + - sfacim_all[k]*sfacim_all[k]); - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // global virial - - if (vflag_global) { - double uk; - for (k = 0; k < kcount; k++) { - uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); - for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j]; - } - for (j = 0; j < 6; j++) virial[j] *= qscale; - } - - // per-atom energy/virial - // energy includes self-energy correction - - if (evflag_atom) { - if (eflag_atom) { - for (i = 0; i < nlocal; i++) { - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - } - - if (vflag_atom) - for (i = 0; i < nlocal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale; - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); -} - -/* ---------------------------------------------------------------------- */ - -void Ewald::eik_dot_r() -{ - int i,k,l,m,n,ic; - double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; - double sqk,clpm,slpm; - - double **x = atom->x; - double *q = atom->q; - int nlocal = atom->nlocal; - - n = 0; - - // (k,0,0), (0,l,0), (0,0,m) - - for (ic = 0; ic < 3; ic++) { - sqk = unitk[ic]*unitk[ic]; - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - for (i = 0; i < nlocal; i++) { - cs[0][ic][i] = 1.0; - sn[0][ic][i] = 0.0; - cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); - sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); - cs[-1][ic][i] = cs[1][ic][i]; - sn[-1][ic][i] = -sn[1][ic][i]; - cstr1 += q[i]*cs[1][ic][i]; - sstr1 += q[i]*sn[1][ic][i]; - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - } - } - - for (m = 2; m <= kmax; m++) { - for (ic = 0; ic < 3; ic++) { - sqk = m*unitk[ic] * m*unitk[ic]; - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - for (i = 0; i < nlocal; i++) { - cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - - sn[m-1][ic][i]*sn[1][ic][i]; - sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + - cs[m-1][ic][i]*sn[1][ic][i]; - cs[-m][ic][i] = cs[m][ic][i]; - sn[-m][ic][i] = -sn[m][ic][i]; - cstr1 += q[i]*cs[m][ic][i]; - sstr1 += q[i]*sn[m][ic][i]; - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - } - } - } - - // 1 = (k,l,0), 2 = (k,-l,0) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - for (i = 0; i < nlocal; i++) { - cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]); - sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]); - cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]); - sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - } - } - } - - // 1 = (0,l,m), 2 = (0,l,-m) - - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - for (i = 0; i < nlocal; i++) { - cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); - sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); - cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]); - sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - } - } - } - - // 1 = (k,0,m), 2 = (k,0,-m) - - for (k = 1; k <= kxmax; k++) { - for (m = 1; m <= kzmax; m++) { - sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - for (i = 0; i < nlocal; i++) { - cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]); - sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]); - cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]); - sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - } - } - } - - // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + - (m*unitk[2] * m*unitk[2]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - cstr3 = 0.0; - sstr3 = 0.0; - cstr4 = 0.0; - sstr4 = 0.0; - for (i = 0; i < nlocal; i++) { - clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; - slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; - cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - - clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; - slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; - cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - - clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; - slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; - cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - - clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; - slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; - cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - sfacrl[n] = cstr3; - sfacim[n++] = sstr3; - sfacrl[n] = cstr4; - sfacim[n++] = sstr4; - } - } - } - } -} - -/* ---------------------------------------------------------------------- */ - -void Ewald::eik_dot_r_triclinic() -{ - int i,k,l,m,n,ic; - double cstr1,sstr1; - double sqk,clpm,slpm; - - double **x = atom->x; - double *q = atom->q; - int nlocal = atom->nlocal; - - double unitk_lamda[3]; - - double max_kvecs[3]; - max_kvecs[0] = kxmax; - max_kvecs[1] = kymax; - max_kvecs[2] = kzmax; - - // (k,0,0), (0,l,0), (0,0,m) - - for (ic = 0; ic < 3; ic++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 0.0; - unitk_lamda[2] = 0.0; - unitk_lamda[ic] = 2.0*MY_PI; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[ic]*unitk_lamda[ic]; - if (sqk <= gsqmx) { - for (i = 0; i < nlocal; i++) { - cs[0][ic][i] = 1.0; - sn[0][ic][i] = 0.0; - cs[1][ic][i] = cos(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); - sn[1][ic][i] = sin(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); - cs[-1][ic][i] = cs[1][ic][i]; - sn[-1][ic][i] = -sn[1][ic][i]; - } - } - } - - for (ic = 0; ic < 3; ic++) { - for (m = 2; m <= max_kvecs[ic]; m++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 0.0; - unitk_lamda[2] = 0.0; - unitk_lamda[ic] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[ic]*unitk_lamda[ic]; - for (i = 0; i < nlocal; i++) { - cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - - sn[m-1][ic][i]*sn[1][ic][i]; - sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + - cs[m-1][ic][i]*sn[1][ic][i]; - cs[-m][ic][i] = cs[m][ic][i]; - sn[-m][ic][i] = -sn[m][ic][i]; - } - } - } - - for (n = 0; n < kcount; n++) { - k = kxvecs[n]; - l = kyvecs[n]; - m = kzvecs[n]; - cstr1 = 0.0; - sstr1 = 0.0; - for (i = 0; i < nlocal; i++) { - clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; - slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; - cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - } - sfacrl[n] = cstr1; - sfacim[n] = sstr1; - } -} - -/* ---------------------------------------------------------------------- - pre-compute coefficients for each Ewald K-vector -------------------------------------------------------------------------- */ - -void Ewald::coeffs() -{ - int k,l,m; - double sqk,vterm; - - double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); - double preu = 4.0*MY_PI/volume; - - kcount = 0; - - // (k,0,0), (0,l,0), (0,0,m) - - for (m = 1; m <= kmax; m++) { - sqk = (m*unitk[0]) * (m*unitk[0]); - if (sqk <= gsqmx) { - kxvecs[kcount] = m; - kyvecs[kcount] = 0; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount]; - eg[kcount][1] = 0.0; - eg[kcount][2] = 0.0; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m); - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - sqk = (m*unitk[1]) * (m*unitk[1]); - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = m; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount]; - eg[kcount][2] = 0.0; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m); - vg[kcount][2] = 1.0; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - sqk = (m*unitk[2]) * (m*unitk[2]); - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = 0; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - } - - // 1 = (k,l,0), 2 = (k,-l,0) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l); - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 0.0; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0; - vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = -l; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 0.0; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0; - vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++;; - } - } - } - - // 1 = (0,l,m), 2 = (0,l,-m) - - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m); - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = 0; - kyvecs[kcount] = l; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; - kcount++; - } - } - } - - // 1 = (k,0,m), 2 = (k,0,-m) - - for (k = 1; k <= kxmax; k++) { - for (m = 1; m <= kzmax; m++) { - sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m); - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = 0; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = 0.0; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = 0; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 0.0; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = 0.0; - kcount++; - } - } - } - - // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) + - (unitk[2]*m) * (unitk[2]*m); - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = -l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = -l; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; - kcount++; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - pre-compute coefficients for each Ewald K-vector for a triclinic - system -------------------------------------------------------------------------- */ - -void Ewald::coeffs_triclinic() -{ - int k,l,m; - double sqk,vterm; - - double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); - double preu = 4.0*MY_PI/volume; - - double unitk_lamda[3]; - - kcount = 0; - - // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) - - for (k = 1; k <= kxmax; k++) { - for (l = -kymax; l <= kymax; l++) { - for (m = -kzmax; m <= kzmax; m++) { - unitk_lamda[0] = 2.0*MY_PI*k; - unitk_lamda[1] = 2.0*MY_PI*l; - unitk_lamda[2] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[0]*unitk_lamda[0] + unitk_lamda[1]*unitk_lamda[1] + - unitk_lamda[2]*unitk_lamda[2]; - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk_lamda[0]*ug[kcount]; - eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; - eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*unitk_lamda[0]*unitk_lamda[0]; - vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; - vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; - vg[kcount][3] = vterm*unitk_lamda[0]*unitk_lamda[1]; - vg[kcount][4] = vterm*unitk_lamda[0]*unitk_lamda[2]; - vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; - kcount++; - } - } - } - } - - // 1 = (0,l,m), 2 = (0,l,-m) - - for (l = 1; l <= kymax; l++) { - for (m = -kzmax; m <= kzmax; m++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 2.0*MY_PI*l; - unitk_lamda[2] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[1]*unitk_lamda[1] + unitk_lamda[2]*unitk_lamda[2]; - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; - eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; - vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; - kcount++; - } - } - } - - // (0,0,m) - - for (m = 1; m <= kmax; m++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 0.0; - unitk_lamda[2] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[2]*unitk_lamda[2]; - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = 0; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - } -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::allocate() -{ - kxvecs = new int[kmax3d]; - kyvecs = new int[kmax3d]; - kzvecs = new int[kmax3d]; - - ug = new double[kmax3d]; - memory->create(eg,kmax3d,3,"ewald:eg"); - memory->create(vg,kmax3d,6,"ewald:vg"); - - sfacrl = new double[kmax3d]; - sfacim = new double[kmax3d]; - sfacrl_all = new double[kmax3d]; - sfacim_all = new double[kmax3d]; -} - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::deallocate() -{ - delete [] kxvecs; - delete [] kyvecs; - delete [] kzvecs; - - delete [] ug; - memory->destroy(eg); - memory->destroy(vg); - - delete [] sfacrl; - delete [] sfacim; - delete [] sfacrl_all; - delete [] sfacim_all; -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void Ewald::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double Ewald::memory_usage() -{ - double bytes = 3 * kmax3d * sizeof(int); - bytes += (1 + 3 + 6) * kmax3d * sizeof(double); - bytes += 4 * kmax3d * sizeof(double); - bytes += nmax*3 * sizeof(double); - bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double); - return bytes; -} - -/* ---------------------------------------------------------------------- - group-group interactions - ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - compute the Ewald total long-range force and energy for groups A and B - ------------------------------------------------------------------------- */ - -void Ewald::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) -{ - if (slabflag && triclinic) - error->all(FLERR,"Cannot (yet) use K-space slab " - "correction with compute group/group for triclinic systems"); - - int i,k; - - if (!group_allocate_flag) { - allocate_groups(); - group_allocate_flag = 1; - } - - e2group = 0.0; //energy - f2group[0] = 0.0; //force in x-direction - f2group[1] = 0.0; //force in y-direction - f2group[2] = 0.0; //force in z-direction - - // partial and total structure factors for groups A and B - - for (k = 0; k < kcount; k++) { - - // group A - - sfacrl_A[k] = 0.0; - sfacim_A[k] = 0.0; - sfacrl_A_all[k] = 0.0; - sfacim_A_all[k] = 0; - - // group B - - sfacrl_B[k] = 0.0; - sfacim_B[k] = 0.0; - sfacrl_B_all[k] = 0.0; - sfacim_B_all[k] = 0.0; - } - - double *q = atom->q; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - int kx,ky,kz; - double cypz,sypz,exprl,expim; - - // partial structure factors for groups A and B on each processor - - for (k = 0; k < kcount; k++) { - kx = kxvecs[k]; - ky = kyvecs[k]; - kz = kzvecs[k]; - - for (i = 0; i < nlocal; i++) { - - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { - - cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; - sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; - exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; - expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; - - // group A - - if (mask[i] & groupbit_A) { - sfacrl_A[k] += q[i]*exprl; - sfacim_A[k] += q[i]*expim; - } - - // group B - - if (mask[i] & groupbit_B) { - sfacrl_B[k] += q[i]*exprl; - sfacim_B[k] += q[i]*expim; - } - } - } - } - - // total structure factor by summing over procs - - MPI_Allreduce(sfacrl_A,sfacrl_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(sfacim_A,sfacim_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); - - MPI_Allreduce(sfacrl_B,sfacrl_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(sfacim_B,sfacim_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); - - const double qscale = force->qqrd2e * scale; - double partial_group; - - // total group A <--> group B energy - // self and boundary correction terms are in compute_group_group.cpp - - for (k = 0; k < kcount; k++) { - partial_group = sfacrl_A_all[k]*sfacrl_B_all[k] + - sfacim_A_all[k]*sfacim_B_all[k]; - e2group += ug[k]*partial_group; - } - - e2group *= qscale; - - // total group A <--> group B force - - for (k = 0; k < kcount; k++) { - partial_group = sfacim_A_all[k]*sfacrl_B_all[k] - - sfacrl_A_all[k]*sfacim_B_all[k]; - f2group[0] += eg[k][0]*partial_group; - f2group[1] += eg[k][1]*partial_group; - if (slabflag != 2) f2group[2] += eg[k][2]*partial_group; - } - - f2group[0] *= qscale; - f2group[1] *= qscale; - f2group[2] *= qscale; - - // 2d slab correction - - if (slabflag == 1) - slabcorr_groups(groupbit_A, groupbit_B, AA_flag); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void Ewald::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double qsum_A = 0.0; - double qsum_B = 0.0; - double dipole_A = 0.0; - double dipole_B = 0.0; - double dipole_r2_A = 0.0; - double dipole_r2_B = 0.0; - - for (int i = 0; i < nlocal; i++) { - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if (mask[i] & groupbit_A) { - qsum_A += q[i]; - dipole_A += q[i]*x[i][2]; - dipole_r2_A += q[i]*x[i][2]*x[i][2]; - } - - if (mask[i] & groupbit_B) { - qsum_B += q[i]; - dipole_B += q[i]*x[i][2]; - dipole_r2_B += q[i]*x[i][2]*x[i][2]; - } - } - - // sum local contributions to get total charge and global dipole moment - // for each group - - double tmp; - MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_A = tmp; - - MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_B = tmp; - - MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_A = tmp; - - MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_B = tmp; - - MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_A = tmp; - - MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_B = tmp; - - // compute corrections - - const double qscale = force->qqrd2e * scale; - const double efact = qscale * MY_2PI/volume; - - e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + - qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); - - // add on force corrections - - const double ffact = qscale * (-4.0*MY_PI/volume); - f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); -} - -/* ---------------------------------------------------------------------- - allocate group-group memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::allocate_groups() -{ - // group A - - sfacrl_A = new double[kmax3d]; - sfacim_A = new double[kmax3d]; - sfacrl_A_all = new double[kmax3d]; - sfacim_A_all = new double[kmax3d]; - - // group B - - sfacrl_B = new double[kmax3d]; - sfacim_B = new double[kmax3d]; - sfacrl_B_all = new double[kmax3d]; - sfacim_B_all = new double[kmax3d]; -} - -/* ---------------------------------------------------------------------- - deallocate group-group memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::deallocate_groups() -{ - // group A - - delete [] sfacrl_A; - delete [] sfacim_A; - delete [] sfacrl_A_all; - delete [] sfacim_A_all; - - // group B - - delete [] sfacrl_B; - delete [] sfacim_B; - delete [] sfacrl_B_all; - delete [] sfacim_B_all; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU) + group/group energy/force added by Stan Moore (BYU) + triclinic added by Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "mpi.h" +#include "stdlib.h" +#include "stdio.h" +#include "string.h" +#include "math.h" +#include "ewald.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "pair.h" +#include "domain.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.00001 + +/* ---------------------------------------------------------------------- */ + +Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command"); + + ewaldflag = 1; + group_group_enable = 1; + group_allocate_flag = 0; + + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + kmax = 0; + kxvecs = kyvecs = kzvecs = NULL; + ug = NULL; + eg = vg = NULL; + sfacrl = sfacim = sfacrl_all = sfacim_all = NULL; + + nmax = 0; + ek = NULL; + cs = sn = NULL; + + kcount = 0; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +Ewald::~Ewald() +{ + deallocate(); + if (group_allocate_flag) deallocate_groups(); + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); +} + +/* ---------------------------------------------------------------------- */ + +void Ewald::init() +{ + if (comm->me == 0) { + if (screen) fprintf(screen,"Ewald initialization ...\n"); + if (logfile) fprintf(logfile,"Ewald initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->dimension == 2) + error->all(FLERR,"Cannot use Ewald with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab Ewald"); + if (domain->triclinic) + error->all(FLERR,"Cannot (yet) use Ewald with triclinic box " + "and slab correction"); + } + + // extract short-range Coulombic cutoff from pair style + + scale = 1.0; + + pair_check(); + + int itmp; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + double cutoff = *p_cutoff; + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && comm->me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup K-space resolution + + q2 = qsqsum * force->qqrd2e; + bigint natoms = atom->natoms; + + triclinic = domain->triclinic; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab Ewald + // 3d Ewald just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // setup Ewald coefficients so can print stats + + setup(); + + // final RMS accuracy + + double lprx = rms(kxmax_orig,xprd,natoms,q2); + double lpry = rms(kymax_orig,yprd,natoms,q2); + double lprz = rms(kzmax_orig,zprd_slab,natoms,q2); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double tpr = estimate_table_accuracy(q2_over_sqrt,spr); + double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); + + // stats + + if (comm->me == 0) { + if (screen) { + fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n", + kcount,kmax,kmax3d); + fprintf(screen," kxmax kymax kzmax = %d %d %d\n", + kxmax,kymax,kzmax); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n", + kcount,kmax,kmax3d); + fprintf(logfile," kxmax kymax kzmax = %d %d %d\n", + kxmax,kymax,kzmax); + } + } +} + +/* ---------------------------------------------------------------------- + adjust Ewald coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void Ewald::setup() +{ + // volume-dependent factors + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + + // adjustment of z dimension for 2d slab Ewald + // 3d Ewald just uses zprd since slab_volfactor = 1.0 + + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + unitk[0] = 2.0*MY_PI/xprd; + unitk[1] = 2.0*MY_PI/yprd; + unitk[2] = 2.0*MY_PI/zprd_slab; + + int kmax_old = kmax; + + if (kewaldflag == 0) { + + // determine kmax + // function of current box size, accuracy, G_ewald (short-range cutoff) + + bigint natoms = atom->natoms; + double err; + kxmax = 1; + kymax = 1; + kzmax = 1; + + err = rms(kxmax,xprd,natoms,q2); + while (err > accuracy) { + kxmax++; + err = rms(kxmax,xprd,natoms,q2); + } + + err = rms(kymax,yprd,natoms,q2); + while (err > accuracy) { + kymax++; + err = rms(kymax,yprd,natoms,q2); + } + + err = rms(kzmax,zprd_slab,natoms,q2); + while (err > accuracy) { + kzmax++; + err = rms(kzmax,zprd_slab,natoms,q2); + } + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + + double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; + double gsqymx = unitk[1]*unitk[1]*kymax*kymax; + double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + + kxmax_orig = kxmax; + kymax_orig = kymax; + kzmax_orig = kzmax; + + // scale lattice vectors for triclinic skew + + if (triclinic) { + double tmp[3]; + tmp[0] = kxmax/xprd; + tmp[1] = kymax/yprd; + tmp[2] = kzmax/zprd; + lamda2xT(&tmp[0],&tmp[0]); + kxmax = MAX(1,static_cast(tmp[0])); + kymax = MAX(1,static_cast(tmp[1])); + kzmax = MAX(1,static_cast(tmp[2])); + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + } + + } else { + + kxmax = kx_ewald; + kymax = ky_ewald; + kzmax = kz_ewald; + + kxmax_orig = kxmax; + kymax_orig = kymax; + kzmax_orig = kzmax; + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + + double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; + double gsqymx = unitk[1]*unitk[1]*kymax*kymax; + double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + } + + gsqmx *= 1.00001; + + // if size has grown, reallocate k-dependent and nlocal-dependent arrays + + if (kmax > kmax_old) { + deallocate(); + allocate(); + group_allocate_flag = 0; + + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald:ek"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); + kmax_created = kmax; + } + + // pre-compute Ewald coefficients + + if (triclinic == 0) + coeffs(); + else + coeffs_triclinic(); +} + +/* ---------------------------------------------------------------------- + compute RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double Ewald::rms(int km, double prd, bigint natoms, double q2) +{ + double value = 2.0*q2*g_ewald/prd * + sqrt(1.0/(MY_PI*km*natoms)) * + exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd)); + + return value; +} + +/* ---------------------------------------------------------------------- + compute the Ewald long-range force, energy, virial +------------------------------------------------------------------------- */ + +void Ewald::compute(int eflag, int vflag) +{ + int i,j,k; + + // set energy/virial flags + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald:ek"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); + kmax_created = kmax; + } + + // partial structure factors on each processor + // total structure factor by summing over procs + + if (triclinic == 0) + eik_dot_r(); + else + eik_dot_r_triclinic(); + + MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + // K-space portion of electric field + // double loop over K-vectors and local atoms + // perform per-atom calculations if needed + + double **f = atom->f; + double *q = atom->q; + int nlocal = atom->nlocal; + + int kx,ky,kz; + double cypz,sypz,exprl,expim,partial,partial_peratom; + + for (i = 0; i < nlocal; i++) { + ek[i][0] = 0.0; + ek[i][1] = 0.0; + ek[i][2] = 0.0; + } + + for (k = 0; k < kcount; k++) { + kx = kxvecs[k]; + ky = kyvecs[k]; + kz = kzvecs[k]; + + for (i = 0; i < nlocal; i++) { + cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; + sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; + exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; + expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; + partial = expim*sfacrl_all[k] - exprl*sfacim_all[k]; + ek[i][0] += partial*eg[k][0]; + ek[i][1] += partial*eg[k][1]; + ek[i][2] += partial*eg[k][2]; + + if (evflag_atom) { + partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k]; + if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom; + if (vflag_atom) + for (j = 0; j < 6; j++) + vatom[i][j] += ug[k]*vg[k][j]*partial_peratom; + } + } + } + + // convert E-field to force + + const double qscale = force->qqrd2e * scale; + + for (i = 0; i < nlocal; i++) { + f[i][0] += qscale * q[i]*ek[i][0]; + f[i][1] += qscale * q[i]*ek[i][1]; + if (slabflag != 2) f[i][2] += qscale * q[i]*ek[i][2]; + } + + // global energy + + if (eflag_global) { + for (k = 0; k < kcount; k++) + energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + + sfacim_all[k]*sfacim_all[k]); + energy -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qscale; + } + + // global virial + + if (vflag_global) { + double uk; + for (k = 0; k < kcount; k++) { + uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); + for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j]; + } + for (j = 0; j < 6; j++) virial[j] *= qscale; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + } + + if (vflag_atom) + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale; + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); +} + +/* ---------------------------------------------------------------------- */ + +void Ewald::eik_dot_r() +{ + int i,k,l,m,n,ic; + double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; + double sqk,clpm,slpm; + + double **x = atom->x; + double *q = atom->q; + int nlocal = atom->nlocal; + + n = 0; + + // (k,0,0), (0,l,0), (0,0,m) + + for (ic = 0; ic < 3; ic++) { + sqk = unitk[ic]*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + cs[0][ic][i] = 1.0; + sn[0][ic][i] = 0.0; + cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); + sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); + cs[-1][ic][i] = cs[1][ic][i]; + sn[-1][ic][i] = -sn[1][ic][i]; + cstr1 += q[i]*cs[1][ic][i]; + sstr1 += q[i]*sn[1][ic][i]; + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + } + } + + for (m = 2; m <= kmax; m++) { + for (ic = 0; ic < 3; ic++) { + sqk = m*unitk[ic] * m*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - + sn[m-1][ic][i]*sn[1][ic][i]; + sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + + cs[m-1][ic][i]*sn[1][ic][i]; + cs[-m][ic][i] = cs[m][ic][i]; + sn[-m][ic][i] = -sn[m][ic][i]; + cstr1 += q[i]*cs[m][ic][i]; + sstr1 += q[i]*sn[m][ic][i]; + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + } + } + } + + // 1 = (k,l,0), 2 = (k,-l,0) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]); + sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]); + cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]); + sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); + sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); + cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]); + sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (k,0,m), 2 = (k,0,-m) + + for (k = 1; k <= kxmax; k++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]); + sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]); + cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]); + sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + cstr3 = 0.0; + sstr3 = 0.0; + cstr4 = 0.0; + sstr4 = 0.0; + for (i = 0; i < nlocal; i++) { + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + sfacrl[n] = cstr3; + sfacim[n++] = sstr3; + sfacrl[n] = cstr4; + sfacim[n++] = sstr4; + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +void Ewald::eik_dot_r_triclinic() +{ + int i,k,l,m,n,ic; + double cstr1,sstr1; + double sqk,clpm,slpm; + + double **x = atom->x; + double *q = atom->q; + int nlocal = atom->nlocal; + + double unitk_lamda[3]; + + double max_kvecs[3]; + max_kvecs[0] = kxmax; + max_kvecs[1] = kymax; + max_kvecs[2] = kzmax; + + // (k,0,0), (0,l,0), (0,0,m) + + for (ic = 0; ic < 3; ic++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 0.0; + unitk_lamda[2] = 0.0; + unitk_lamda[ic] = 2.0*MY_PI; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[ic]*unitk_lamda[ic]; + if (sqk <= gsqmx) { + for (i = 0; i < nlocal; i++) { + cs[0][ic][i] = 1.0; + sn[0][ic][i] = 0.0; + cs[1][ic][i] = cos(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); + sn[1][ic][i] = sin(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); + cs[-1][ic][i] = cs[1][ic][i]; + sn[-1][ic][i] = -sn[1][ic][i]; + } + } + } + + for (ic = 0; ic < 3; ic++) { + for (m = 2; m <= max_kvecs[ic]; m++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 0.0; + unitk_lamda[2] = 0.0; + unitk_lamda[ic] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[ic]*unitk_lamda[ic]; + for (i = 0; i < nlocal; i++) { + cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - + sn[m-1][ic][i]*sn[1][ic][i]; + sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + + cs[m-1][ic][i]*sn[1][ic][i]; + cs[-m][ic][i] = cs[m][ic][i]; + sn[-m][ic][i] = -sn[m][ic][i]; + } + } + } + + for (n = 0; n < kcount; n++) { + k = kxvecs[n]; + l = kyvecs[n]; + m = kzvecs[n]; + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + } + sfacrl[n] = cstr1; + sfacim[n] = sstr1; + } +} + +/* ---------------------------------------------------------------------- + pre-compute coefficients for each Ewald K-vector +------------------------------------------------------------------------- */ + +void Ewald::coeffs() +{ + int k,l,m; + double sqk,vterm; + + double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); + double preu = 4.0*MY_PI/volume; + + kcount = 0; + + // (k,0,0), (0,l,0), (0,0,m) + + for (m = 1; m <= kmax; m++) { + sqk = (m*unitk[0]) * (m*unitk[0]); + if (sqk <= gsqmx) { + kxvecs[kcount] = m; + kyvecs[kcount] = 0; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount]; + eg[kcount][1] = 0.0; + eg[kcount][2] = 0.0; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m); + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + sqk = (m*unitk[1]) * (m*unitk[1]); + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = m; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount]; + eg[kcount][2] = 0.0; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m); + vg[kcount][2] = 1.0; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + sqk = (m*unitk[2]) * (m*unitk[2]); + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = 0; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 0.0; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + } + + // 1 = (k,l,0), 2 = (k,-l,0) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l); + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 0.0; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0; + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = -l; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 0.0; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0; + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++;; + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m); + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = 0; + kyvecs[kcount] = l; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; + kcount++; + } + } + } + + // 1 = (k,0,m), 2 = (k,0,-m) + + for (k = 1; k <= kxmax; k++) { + for (m = 1; m <= kzmax; m++) { + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m); + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = 0; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 0.0; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = 0.0; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = 0; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 0.0; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = 0.0; + kcount++; + } + } + } + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) + + (unitk[2]*m) * (unitk[2]*m); + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = -l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = -l; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; + kcount++; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + pre-compute coefficients for each Ewald K-vector for a triclinic + system +------------------------------------------------------------------------- */ + +void Ewald::coeffs_triclinic() +{ + int k,l,m; + double sqk,vterm; + + double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); + double preu = 4.0*MY_PI/volume; + + double unitk_lamda[3]; + + kcount = 0; + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = -kymax; l <= kymax; l++) { + for (m = -kzmax; m <= kzmax; m++) { + unitk_lamda[0] = 2.0*MY_PI*k; + unitk_lamda[1] = 2.0*MY_PI*l; + unitk_lamda[2] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[0]*unitk_lamda[0] + unitk_lamda[1]*unitk_lamda[1] + + unitk_lamda[2]*unitk_lamda[2]; + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk_lamda[0]*ug[kcount]; + eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; + eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*unitk_lamda[0]*unitk_lamda[0]; + vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; + vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; + vg[kcount][3] = vterm*unitk_lamda[0]*unitk_lamda[1]; + vg[kcount][4] = vterm*unitk_lamda[0]*unitk_lamda[2]; + vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; + kcount++; + } + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = -kzmax; m <= kzmax; m++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 2.0*MY_PI*l; + unitk_lamda[2] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[1]*unitk_lamda[1] + unitk_lamda[2]*unitk_lamda[2]; + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; + eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; + vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; + kcount++; + } + } + } + + // (0,0,m) + + for (m = 1; m <= kmax; m++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 0.0; + unitk_lamda[2] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[2]*unitk_lamda[2]; + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = 0; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 0.0; + eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + } +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::allocate() +{ + kxvecs = new int[kmax3d]; + kyvecs = new int[kmax3d]; + kzvecs = new int[kmax3d]; + + ug = new double[kmax3d]; + memory->create(eg,kmax3d,3,"ewald:eg"); + memory->create(vg,kmax3d,6,"ewald:vg"); + + sfacrl = new double[kmax3d]; + sfacim = new double[kmax3d]; + sfacrl_all = new double[kmax3d]; + sfacim_all = new double[kmax3d]; +} + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::deallocate() +{ + delete [] kxvecs; + delete [] kyvecs; + delete [] kzvecs; + + delete [] ug; + memory->destroy(eg); + memory->destroy(vg); + + delete [] sfacrl; + delete [] sfacim; + delete [] sfacrl_all; + delete [] sfacim_all; +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void Ewald::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double Ewald::memory_usage() +{ + double bytes = 3 * kmax3d * sizeof(int); + bytes += (1 + 3 + 6) * kmax3d * sizeof(double); + bytes += 4 * kmax3d * sizeof(double); + bytes += nmax*3 * sizeof(double); + bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double); + return bytes; +} + +/* ---------------------------------------------------------------------- + group-group interactions + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + compute the Ewald total long-range force and energy for groups A and B + ------------------------------------------------------------------------- */ + +void Ewald::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) +{ + if (slabflag && triclinic) + error->all(FLERR,"Cannot (yet) use K-space slab " + "correction with compute group/group for triclinic systems"); + + int i,k; + + if (!group_allocate_flag) { + allocate_groups(); + group_allocate_flag = 1; + } + + e2group = 0.0; //energy + f2group[0] = 0.0; //force in x-direction + f2group[1] = 0.0; //force in y-direction + f2group[2] = 0.0; //force in z-direction + + // partial and total structure factors for groups A and B + + for (k = 0; k < kcount; k++) { + + // group A + + sfacrl_A[k] = 0.0; + sfacim_A[k] = 0.0; + sfacrl_A_all[k] = 0.0; + sfacim_A_all[k] = 0; + + // group B + + sfacrl_B[k] = 0.0; + sfacim_B[k] = 0.0; + sfacrl_B_all[k] = 0.0; + sfacim_B_all[k] = 0.0; + } + + double *q = atom->q; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + int kx,ky,kz; + double cypz,sypz,exprl,expim; + + // partial structure factors for groups A and B on each processor + + for (k = 0; k < kcount; k++) { + kx = kxvecs[k]; + ky = kyvecs[k]; + kz = kzvecs[k]; + + for (i = 0; i < nlocal; i++) { + + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { + + cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; + sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; + exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; + expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; + + // group A + + if (mask[i] & groupbit_A) { + sfacrl_A[k] += q[i]*exprl; + sfacim_A[k] += q[i]*expim; + } + + // group B + + if (mask[i] & groupbit_B) { + sfacrl_B[k] += q[i]*exprl; + sfacim_B[k] += q[i]*expim; + } + } + } + } + + // total structure factor by summing over procs + + MPI_Allreduce(sfacrl_A,sfacrl_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim_A,sfacim_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + MPI_Allreduce(sfacrl_B,sfacrl_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim_B,sfacim_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + const double qscale = force->qqrd2e * scale; + double partial_group; + + // total group A <--> group B energy + // self and boundary correction terms are in compute_group_group.cpp + + for (k = 0; k < kcount; k++) { + partial_group = sfacrl_A_all[k]*sfacrl_B_all[k] + + sfacim_A_all[k]*sfacim_B_all[k]; + e2group += ug[k]*partial_group; + } + + e2group *= qscale; + + // total group A <--> group B force + + for (k = 0; k < kcount; k++) { + partial_group = sfacim_A_all[k]*sfacrl_B_all[k] - + sfacrl_A_all[k]*sfacim_B_all[k]; + f2group[0] += eg[k][0]*partial_group; + f2group[1] += eg[k][1]*partial_group; + if (slabflag != 2) f2group[2] += eg[k][2]*partial_group; + } + + f2group[0] *= qscale; + f2group[1] *= qscale; + f2group[2] *= qscale; + + // 2d slab correction + + if (slabflag == 1) + slabcorr_groups(groupbit_A, groupbit_B, AA_flag); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void Ewald::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + double qsum_A = 0.0; + double qsum_B = 0.0; + double dipole_A = 0.0; + double dipole_B = 0.0; + double dipole_r2_A = 0.0; + double dipole_r2_B = 0.0; + + for (int i = 0; i < nlocal; i++) { + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if (mask[i] & groupbit_A) { + qsum_A += q[i]; + dipole_A += q[i]*x[i][2]; + dipole_r2_A += q[i]*x[i][2]*x[i][2]; + } + + if (mask[i] & groupbit_B) { + qsum_B += q[i]; + dipole_B += q[i]*x[i][2]; + dipole_r2_B += q[i]*x[i][2]*x[i][2]; + } + } + + // sum local contributions to get total charge and global dipole moment + // for each group + + double tmp; + MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_A = tmp; + + MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_B = tmp; + + MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_A = tmp; + + MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_B = tmp; + + MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_A = tmp; + + MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_B = tmp; + + // compute corrections + + const double qscale = force->qqrd2e * scale; + const double efact = qscale * MY_2PI/volume; + + e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + + qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); + + // add on force corrections + + const double ffact = qscale * (-4.0*MY_PI/volume); + f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); +} + +/* ---------------------------------------------------------------------- + allocate group-group memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::allocate_groups() +{ + // group A + + sfacrl_A = new double[kmax3d]; + sfacim_A = new double[kmax3d]; + sfacrl_A_all = new double[kmax3d]; + sfacim_A_all = new double[kmax3d]; + + // group B + + sfacrl_B = new double[kmax3d]; + sfacim_B = new double[kmax3d]; + sfacrl_B_all = new double[kmax3d]; + sfacim_B_all = new double[kmax3d]; +} + +/* ---------------------------------------------------------------------- + deallocate group-group memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::deallocate_groups() +{ + // group A + + delete [] sfacrl_A; + delete [] sfacim_A; + delete [] sfacrl_A_all; + delete [] sfacim_A_all; + + // group B + + delete [] sfacrl_B; + delete [] sfacim_B; + delete [] sfacrl_B_all; + delete [] sfacim_B_all; +} diff --git a/src/KSPACE/ewald_disp.cpp b/src/KSPACE/ewald_disp.cpp index ba88e40f14..39951b8b0c 100644 --- a/src/KSPACE/ewald_disp.cpp +++ b/src/KSPACE/ewald_disp.cpp @@ -1,1475 +1,1475 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Pieter in 't Veld (SNL), Stan Moore (SNL) -------------------------------------------------------------------------- */ - -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "ewald_disp.h" -#include "math_vector.h" -#include "math_const.h" -#include "math_special.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "pair.h" -#include "domain.h" -#include "memory.h" -#include "error.h" -#include "update.h" - -using namespace LAMMPS_NS; -using namespace MathConst; -using namespace MathSpecial; - -#define SMALL 0.00001 - -enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; // same as in pair.h - -//#define DEBUG - -/* ---------------------------------------------------------------------- */ - -EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command"); - - ewaldflag = dispersionflag = dipoleflag = 1; - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - memset(function, 0, EWALD_NORDER*sizeof(int)); - kenergy = kvirial = NULL; - cek_local = cek_global = NULL; - ekr_local = NULL; - hvec = NULL; - kvec = NULL; - B = NULL; - first_output = 0; - energy_self_peratom = NULL; - virial_self_peratom = NULL; - nmax = 0; - q2 = 0; - b2 = 0; - M2 = 0; -} - -/* ---------------------------------------------------------------------- */ - -EwaldDisp::~EwaldDisp() -{ - deallocate(); - deallocate_peratom(); - delete [] ekr_local; - delete [] B; -} - -/* --------------------------------------------------------------------- */ - -void EwaldDisp::init() -{ - nkvec = nkvec_max = nevec = nevec_max = 0; - nfunctions = nsums = sums = 0; - nbox = -1; - bytes = 0.0; - - if (!comm->me) { - if (screen) fprintf(screen,"EwaldDisp initialization ...\n"); - if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n"); - } - - triclinic_check(); - if (domain->dimension == 2) - error->all(FLERR,"Cannot use EwaldDisp with 2d simulation"); - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp"); - if (slabflag == 1) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab EwaldDisp"); - } - - scale = 1.0; - mumurd2e = force->qqrd2e; - dielectric = force->dielectric; - - int tmp; - Pair *pair = force->pair; - int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; - double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; - if (!(ptr||cutoff)) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - int ewald_order = ptr ? *((int *) ptr) : 1<<1; - int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; - memset(function, 0, EWALD_NFUNCS*sizeof(int)); - for (int i=0; i<=EWALD_NORDER; ++i) // transcribe order - if (ewald_order&(1<all(FLERR, - "Unsupported mixing rule in kspace_style ewald/disp"); - default: - error->all(FLERR,"Unsupported order in kspace_style ewald/disp"); - } - nfunctions += function[k] = 1; - nsums += n[k]; - } - - if (!gewaldflag) g_ewald = 0.0; - pair->init(); // so B is defined - init_coeffs(); - init_coeff_sums(); - - double qsum, qsqsum, bsbsum; - qsum = qsqsum = bsbsum = 0.0; - if (function[0]) { - qsum = sum[0].x; - qsqsum = sum[0].x2; - } - - // turn off coulombic if no charge - - if (function[0] && qsqsum == 0.0) { - function[0] = 0; - nfunctions -= 1; - nsums -= 1; - } - - if (function[1]) bsbsum = sum[1].x2; - if (function[2]) bsbsum = sum[2].x2; - - if (function[3]) M2 = sum[9].x2; - - if (function[3] && strcmp(update->unit_style,"electron") == 0) - error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); - - if (qsqsum == 0.0 && bsbsum == 0.0 && M2 == 0.0) - error->all(FLERR,"Cannot use Ewald/disp solver " - "on system with no charge, dipole, or LJ particles"); - if (fabs(qsum) > SMALL && comm->me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - if (!function[1] && !function[2]) - dispersionflag = 0; - - if (!function[3]) - dipoleflag = 0; - - pair_check(); - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup K-space resolution - - q2 = qsqsum * force->qqrd2e; - M2 *= mumurd2e; - b2 = bsbsum; //Are these units right? - bigint natoms = atom->natoms; - - if (!gewaldflag) { - if (function[0]) { - g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff); - else g_ewald = sqrt(-log(g_ewald)) / (*cutoff); - } - else if (function[1] || function[2]) { - //Try Newton Solver - //Use old method to get guess - g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; - - double g_ewald_new = - NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2); - if (g_ewald_new > 0.0) g_ewald = g_ewald_new; - else error->warning(FLERR,"Ewald/disp Newton solver failed, " - "using old method to estimate g_ewald"); - } else if (function[3]) { - //Try Newton Solver - //Use old method to get guess - g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; - double g_ewald_new = - NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),M2); - if (g_ewald_new > 0.0) g_ewald = g_ewald_new; - else error->warning(FLERR,"Ewald/disp Newton solver failed, " - "using old method to estimate g_ewald"); - } - } - - if (!comm->me) { - if (screen) fprintf(screen, " G vector = %g\n", g_ewald); - if (logfile) fprintf(logfile, " G vector = %g\n", g_ewald); - } - - g_ewald_6 = g_ewald; - deallocate_peratom(); - peratom_allocate_flag = 0; -} - -/* ---------------------------------------------------------------------- - adjust EwaldDisp coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void EwaldDisp::setup() -{ - volume = shape_det(domain->h)*slab_volfactor; - memcpy(unit, domain->h_inv, sizeof(shape)); - shape_scalar_mult(unit, 2.0*MY_PI); - unit[2] /= slab_volfactor; - - // int nbox_old = nbox, nkvec_old = nkvec; - - if (accuracy >= 1) { - nbox = 0; - error->all(FLERR,"KSpace accuracy too low"); - } - - bigint natoms = atom->natoms; - double err; - int kxmax = 1; - int kymax = 1; - int kzmax = 1; - err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); - while (err > accuracy) { - kxmax++; - err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); - } - err = rms(kymax,domain->h[1],natoms,q2,b2,M2); - while (err > accuracy) { - kymax++; - err = rms(kymax,domain->h[1],natoms,q2,b2,M2); - } - err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); - while (err > accuracy) { - kzmax++; - err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); - } - nbox = MAX(kxmax,kymax); - nbox = MAX(nbox,kzmax); - double gsqxmx = unit[0]*unit[0]*kxmax*kxmax; - double gsqymx = unit[1]*unit[1]*kymax*kymax; - double gsqzmx = unit[2]*unit[2]*kzmax*kzmax; - gsqmx = MAX(gsqxmx,gsqymx); - gsqmx = MAX(gsqmx,gsqzmx); - gsqmx *= 1.00001; - - reallocate(); - coefficients(); - init_coeffs(); - init_coeff_sums(); - init_self(); - - if (!(first_output||comm->me)) { - first_output = 1; - if (screen) fprintf(screen, - " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); - if (logfile) fprintf(logfile, - " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); - } -} - -/* ---------------------------------------------------------------------- - compute RMS accuracy for a dimension -------------------------------------------------------------------------- */ - -double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2, double M2) -{ - double value = 0.0; - - // Coulombic - - double g2 = g_ewald*g_ewald; - - value += 2.0*q2*g_ewald/prd * - sqrt(1.0/(MY_PI*km*natoms)) * - exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)); - - // Lennard-Jones - - double g7 = g2*g2*g2*g_ewald; - - value += 4.0*b2*g7/3.0 * - sqrt(1.0/(MY_PI*natoms)) * - (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) * - (MY_PI*km/(g_ewald*prd) + 1)); - - // dipole - - value += 8.0*MY_PI*M2/volume*g_ewald * - sqrt(2.0*MY_PI*km*km*km/(15.0*natoms)) * - exp(-pow(MY_PI*km/(g_ewald*prd),2.0)); - - return value; -} - -void EwaldDisp::reallocate() -{ - int ix, iy, iz; - int nkvec_max = nkvec; - vector h; - - nkvec = 0; - int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)]; - int *flag = kflag; - - for (ix=0; ix<=nbox; ++ix) - for (iy=-nbox; iy<=nbox; ++iy) - for (iz=-nbox; iz<=nbox; ++iz) - if (!(ix||iy||iz)) *(flag++) = 0; - else if ((!ix)&&(iy<0)) *(flag++) = 0; - else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0; // use symmetry - else { - h[0] = unit[0]*ix; - h[1] = unit[5]*ix+unit[1]*iy; - h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz; - if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec; - } - - if (nkvec>nkvec_max) { - deallocate(); // free memory - hvec = new hvector[nkvec]; // hvec - bytes += (nkvec-nkvec_max)*sizeof(hvector); - kvec = new kvector[nkvec]; // kvec - bytes += (nkvec-nkvec_max)*sizeof(kvector); - kenergy = new double[nkvec*nfunctions]; // kenergy - bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double); - kvirial = new double[6*nkvec*nfunctions]; // kvirial - bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double); - cek_local = new complex[nkvec*nsums]; // cek_local - bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); - cek_global = new complex[nkvec*nsums]; // cek_global - bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); - nkvec_max = nkvec; - } - - flag = kflag; // create index and - kvector *k = kvec; // wave vectors - hvector *hi = hvec; - for (ix=0; ix<=nbox; ++ix) - for (iy=-nbox; iy<=nbox; ++iy) - for (iz=-nbox; iz<=nbox; ++iz) - if (*(flag++)) { - hi->x = unit[0]*ix; - hi->y = unit[5]*ix+unit[1]*iy; - (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz; - k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; } - - delete [] kflag; -} - - -void EwaldDisp::reallocate_atoms() -{ - if (eflag_atom || vflag_atom) - if (atom->nlocal > nmax) { - deallocate_peratom(); - allocate_peratom(); - nmax = atom->nmax; - } - - if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return; - delete [] ekr_local; - ekr_local = new cvector[nevec]; - bytes += (nevec-nevec_max)*sizeof(cvector); - nevec_max = nevec; -} - - -void EwaldDisp::allocate_peratom() -{ - memory->create(energy_self_peratom, - atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom"); - memory->create(virial_self_peratom, - atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom"); -} - - -void EwaldDisp::deallocate_peratom() // free memory -{ - memory->destroy(energy_self_peratom); - memory->destroy(virial_self_peratom); -} - - -void EwaldDisp::deallocate() // free memory -{ - delete [] hvec; hvec = NULL; - delete [] kvec; kvec = NULL; - delete [] kenergy; kenergy = NULL; - delete [] kvirial; kvirial = NULL; - delete [] cek_local; cek_local = NULL; - delete [] cek_global; cek_global = NULL; -} - - -void EwaldDisp::coefficients() -{ - vector h; - hvector *hi = hvec, *nh; - double eta2 = 0.25/(g_ewald*g_ewald); - double b1, b2, expb2, h1, h2, c1, c2; - double *ke = kenergy, *kv = kvirial; - int func0 = function[0], func12 = function[1]||function[2], - func3 = function[3]; - - for (nh = (hi = hvec)+nkvec; hintypes; - - if (function[1]) { // geometric 1/r^6 - double **b = (double **) force->pair->extract("B",tmp); - delete [] B; - B = new double[n+1]; - bytes += (n+1)*sizeof(double); - for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); - } - if (function[2]) { // arithmetic 1/r^6 - double **epsilon = (double **) force->pair->extract("epsilon",tmp); - double **sigma = (double **) force->pair->extract("sigma",tmp); - double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; - double c[7] = { - 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; - - if (!(epsilon&&sigma)) - error->all( - FLERR,"Epsilon or sigma reference not set by pair style in ewald/n"); - for (int i=0; i<=n; ++i) { - eps_i = sqrt(epsilon[i][i]); - sigma_i = sigma[i][i]; - sigma_n = 1.0; - for (int j=0; j<7; ++j) { - *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i; - } - } - } -} - -void EwaldDisp::init_coeff_sums() -{ - if (sums) return; // calculated only once - sums = 1; - - Sum sum_local[EWALD_MAX_NSUMS]; - - memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum)); - if (function[0]) { // 1/r - double *q = atom->q, *qn = q+atom->nlocal; - for (double *i=q; itype, *ntype = type+atom->nlocal; - for (int *i=type; itype, *ntype = type+atom->nlocal; - for (int *i=type; imu) { // dipole - double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal; - for (double *i = mu; i < nmu; i += 4) - sum_local[9].x2 += i[3]*i[3]; - } - MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world); -} - - -void EwaldDisp::init_self() -{ - double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; - const double qscale = force->qqrd2e * scale; - - memset(energy_self, 0, EWALD_NFUNCS*sizeof(double)); // self energy - memset(virial_self, 0, EWALD_NFUNCS*sizeof(double)); - - if (function[0]) { // 1/r - virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x; - energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0]; - } - if (function[1]) { // geometric 1/r^6 - virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x; - energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1]; - } - if (function[2]) { // arithmetic 1/r^6 - virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+ - sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x); - energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2]; - } - if (function[3]) { // dipole - virial_self[3] = 0; // in surface - energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3]; - } -} - - -void EwaldDisp::init_self_peratom() -{ - if (!(vflag_atom || eflag_atom)) return; - - double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; - const double qscale = force->qqrd2e * scale; - double *energy = energy_self_peratom[0]; - double *virial = virial_self_peratom[0]; - int nlocal = atom->nlocal; - - memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double)); - memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double)); - - if (function[0]) { // 1/r - double *ei = energy; - double *vi = virial; - double ce = qscale*g1/MY_PIS; - double cv = -0.5*MY_PI*qscale/(g2*volume); - double *qi = atom->q, *qn = qi + nlocal; - for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - double q = *qi; - *vi = cv*q*sum[0].x; - *ei = ce*q*q-vi[0]; - } - } - if (function[1]) { // geometric 1/r^6 - double *ei = energy+1; - double *vi = virial+1; - double ce = -g3*g3/12.0; - double cv = MY_PI*MY_PIS*g3/(6.0*volume); - int *typei = atom->type, *typen = typei + atom->nlocal; - for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - double b = B[*typei]; - *vi = cv*b*sum[1].x; - *ei = ce*b*b+vi[0]; - } - } - if (function[2]) { // arithmetic 1/r^6 - double *bi; - double *ei = energy+2; - double *vi = virial+2; - double ce = -g3*g3/3.0; - double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume); - int *typei = atom->type, *typen = typei + atom->nlocal; - for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - bi = B+7*typei[0]+7; - for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0]; - - /* PJV 20120225: - should this be this instead? above implies an inverse dependence - seems to be the above way in original; i recall having tested - arithmetic mixing in the conception phase, but an extra test would - be prudent (pattern repeats in multiple functions below) - - bi = B+7*typei[0]; - for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0]; - - */ - - *ei = ce*bi[0]*bi[6]+vi[0]; - } - } - if (function[3]&&atom->mu) { // dipole - double *ei = energy+3; - double *vi = virial+3; - double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal; - double ce = mumurd2e*2.0*g3/3.0/MY_PIS; - for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - *vi = 0; // in surface - *ei = ce*imu[3]*imu[3]-vi[0]; - } - } -} - - -/* ---------------------------------------------------------------------- - compute the EwaldDisp long-range force, energy, virial -------------------------------------------------------------------------- */ - -void EwaldDisp::compute(int eflag, int vflag) -{ - if (!nbox) return; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; - - if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { - allocate_peratom(); - peratom_allocate_flag = 1; - nmax = atom->nmax; - } - - reallocate_atoms(); - init_self_peratom(); - compute_ek(); - compute_force(); - //compute_surface(); // assume conducting metal (tinfoil) boundary conditions - compute_energy(); - compute_energy_peratom(); - compute_virial(); - compute_virial_dipole(); - compute_virial_peratom(); -} - - -void EwaldDisp::compute_ek() -{ - cvector *ekr = ekr_local; - int lbytes = (2*nbox+1)*sizeof(cvector); - hvector *h = NULL; - kvector *k, *nk = kvec+nkvec; - cvector *z = new cvector[2*nbox+1]; - cvector z1, *zx, *zy, *zz, *zn = z+2*nbox; - complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL; - vector mui; - double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0; - double bi = 0.0, ci[7]; - double *mu = atom->mu ? atom->mu[0] : NULL; - int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(cek_local, 0, n*sizeof(complex)); // reset sums - while (xx, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0); // z[0] - if (tri) { // triclinic z[1] - C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]); - C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]); - C_ANGLE(z1.z, x[2]*unit[2]); x += 3; - } - else { // orthogonal z[1] - C_ANGLE(z1.x, *(x++)*unit[0]); - C_ANGLE(z1.y, *(x++)*unit[1]); - C_ANGLE(z1.z, *(x++)*unit[2]); - } - for (; zzx, zz->x, z1.x); // 3D k-vector - C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y); - C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z); - } - kx = ky = -1; - cek = cek_local; - if (func[0]) qi = *(q++); - if (func[1]) bi = B[*type]; - if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double)); - if (func[3]) { - memcpy(mui, mu, sizeof(vector)); - mu += 4; - h = hvec; - } - for (k=kvec; ky) { // based on order in - if (kx!=k->x) cx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, cx); - } - C_RMULT(zxyz, z[k->z].z, zxy); - if (func[0]) { - cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi; - } - if (func[1]) { - cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi; - } - if (func[2]) for (i=0; i<7; ++i) { - cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i]; - } - if (func[3]) { - register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h; - cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk; - } - } - ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes); - ++type; - } - MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world); - - delete [] z; -} - - -void EwaldDisp::compute_force() -{ - kvector *k; - hvector *h, *nh; - cvector *z = ekr_local; - vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL; - complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; - complex *cek_coul; - double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL; - double *mu = atom->mu ? atom->mu[0] : NULL; - const double qscale = force->qqrd2e * scale; - double *ke, c[EWALD_NFUNCS] = { - 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; - double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - int func[EWALD_NFUNCS]; - - if (atom->torque) t = atom->torque[0]; - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector)); // fj = -dE/dr = - for (; fy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - if (func[0]) { // 1/r - register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); - if (func[3]) cek_coul = cek; - ++cek; - sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im; - } - if (func[1]) { // geometric 1/r^6 - register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek; - sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im; - } - if (func[2]) { // arithmetic 1/r^6 - register double im, c = *(ke++); - for (i=2; i<9; ++i) { - im = c*(zc.im*cek->re+cek->im*zc.re); ++cek; - sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im; - } - } - if (func[3]) { // dipole - register double im = *(ke)*(zc.im*cek->re+ - cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - register double im2 = *(ke)*(zc.re*cek->re- - cek->im*zc.im); - sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; - t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque - t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; - t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; - if (func[0]) { // charge-dipole - register double qi = *(q)*c[0]; - im = - *(ke)*(zc.re*cek_coul->re - - cek_coul->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - im += *(ke)*(zc.re*cek->re - cek->im*zc.im)*qi; - sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; - - im2 = *(ke)*(zc.re*cek_coul->im + cek_coul->re*zc.im); - im2 += -*(ke)*(zc.re*cek->im - cek->im*zc.re); - t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque - t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; - t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; - } - ++cek; - ke++; - } - } - if (func[0]) { // 1/r - register double qi = *(q++)*c[0]; - f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi; - } - if (func[1]) { // geometric 1/r^6 - register double bi = B[*type]*c[1]; - f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi; - } - if (func[2]) { // arithmetic 1/r^6 - register double *bi = B+7*type[0]+7; - for (i=2; i<9; ++i) { - register double c2 = (--bi)[0]*c[2]; - f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2; - } - } - if (func[3]) { // dipole - f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2]; - } - z = (cvector *) ((char *) z+lbytes); - ++type; - t += 3; - } -} - - -void EwaldDisp::compute_surface() -{ - // assume conducting metal (tinfoil) boundary conditions, so this function is - // not called because dielectric at the boundary --> infinity, which makes all - // the terms here zero. - - if (!function[3]) return; - if (!atom->mu) return; - - vector sum_local = VECTOR_NULL, sum_total; - memset(sum_local, 0, sizeof(vector)); - double *i, *n, *mu = atom->mu[0]; - - for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) { - sum_local[0] += (i++)[0]; - sum_local[1] += (i++)[0]; - sum_local[2] += (i++)[0]; - } - MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world); - - virial_self[3] = - mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume); - energy_self[3] -= virial_self[3]; - - if (!(vflag_atom || eflag_atom)) return; - - double *ei = energy_self_peratom[0]+3; - double *vi = virial_self_peratom[0]+3; - double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume; - - for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) { - *vi = cv*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]); - *ei -= *vi; - } -} - - -void EwaldDisp::compute_energy() -{ - energy = 0.0; - if (!eflag_global) return; - - complex *cek = cek_global; - complex *cek_coul; - double *ke = kenergy; - const double qscale = force->qqrd2e * scale; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - double sum[EWALD_NFUNCS]; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(sum, 0, EWALD_NFUNCS*sizeof(double)); // reset sums - for (int k=0; kre*cek->re+cek->im*cek->im); - if (func[3]) cek_coul = cek; - ++cek; - } - if (func[1]) { // geometric 1/r^6 - sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; } - if (func[2]) { // arithmetic 1/r^6 - register double r = - (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ - (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ - (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ - 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; - sum[2] += *(ke++)*r; - } - if (func[3]) { // dipole - sum[3] += *(ke)*(cek->re*cek->re+cek->im*cek->im); - if (func[0]) { // charge-dipole - sum[3] += *(ke)*2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); - } - ke++; - ++cek; - } - } - for (int k=0; kq; - double *eatomj = eatom; - double *mu = atom->mu ? atom->mu[0] : NULL; - const double qscale = force->qqrd2e * scale; - double *ke = kenergy; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - for (int j = 0; j < atom->nlocal; j++, ++eatomj) { - k = kvec; - kx = ky = -1; - ke = kenergy; - cek = cek_global; - memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double)); - if (func[3]) { - register double di = c[3]; - mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; - mu++; - } - for (nh = (h = hvec)+nkvec; hy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - if (func[0]) { // 1/r - sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); - if (func[3]) cek_coul = cek; - ++cek; - } - if (func[1]) { // geometric 1/r^6 - sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; } - if (func[2]) { // arithmetic 1/r^6 - register double im, c = *(ke++); - for (i=2; i<9; ++i) { - im = c*(cek->re*zc.re - cek->im*zc.im); ++cek; - sum[i] += im; - } - } - if (func[3]) { // dipole - double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - sum[9] += *(ke)*(cek->re*zc.re - cek->im*zc.im)*muk; - if (func[0]) { // charge-dipole - register double qj = *(q)*c[0]; - sum[9] += *(ke)*(cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; - sum[9] -= *(ke)*(cek->re*zc.im + cek->im*zc.re)*qj; - } - ++cek; - ke++; - } - } - - if (func[0]) { // 1/r - register double qj = *(q++)*c[0]; - *eatomj += sum[0]*qj - energy_self_peratom[j][0]; - } - if (func[1]) { // geometric 1/r^6 - register double bj = B[*type]*c[1]; - *eatomj += sum[1]*bj - energy_self_peratom[j][1]; - } - if (func[2]) { // arithmetic 1/r^6 - register double *bj = B+7*type[0]+7; - for (i=2; i<9; ++i) { - register double c2 = (--bj)[0]*c[2]; - *eatomj += 0.5*sum[i]*c2; - } - *eatomj -= energy_self_peratom[j][2]; - } - if (func[3]) { // dipole - *eatomj += sum[9] - energy_self_peratom[j][3]; - } - z = (cvector *) ((char *) z+lbytes); - ++type; - } -} - - -#define swap(a, b) { register double t = a; a= b; b = t; } - -void EwaldDisp::compute_virial() -{ - memset(virial, 0, sizeof(shape)); - if (!vflag_global) return; - - complex *cek = cek_global; - complex *cek_coul; - double *kv = kvirial; - const double qscale = force->qqrd2e * scale; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - shape sum[EWALD_NFUNCS]; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(sum, 0, EWALD_NFUNCS*sizeof(shape)); - for (int k=0; kre*cek->re+cek->im*cek->im; - if (func[3]) cek_coul = cek; - ++cek; - sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r; - sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r; - } - if (func[1]) { // geometric 1/r^6 - register double r = cek->re*cek->re+cek->im*cek->im; ++cek; - sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r; - sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r; - } - if (func[2]) { // arithmetic 1/r^6 - register double r = - (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ - (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ - (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ - 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; - sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r; - sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r; - } - if (func[3]) { - register double r = cek->re*cek->re+cek->im*cek->im; - sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; - sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; - if (func[0]) { // charge-dipole - kv -= 6; - register double r = 2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); - sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; - sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; - } - ++cek; - } - } - for (int k=0; kmu ? atom->mu[0] : NULL; - double *vatomj = NULL; - if (vflag_atom && vatom) vatomj = vatom[0]; - const double qscale = force->qqrd2e * scale; - double *ke, c[EWALD_NFUNCS] = { - 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; - double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(&sum[0], 0, 6*sizeof(double)); - memset(&sum_total[0], 0, 6*sizeof(double)); - for (int j = 0; j < atom->nlocal; j++) { - k = kvec; - kx = ky = -1; - ke = kenergy; - cek = cek_global; - memset(&sum[0], 0, 6*sizeof(double)); - if (func[3]) { - register double di = c[3]; - mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; - mu++; - } - for (nh = (h = hvec)+nkvec; hy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - double im = 0.0; - if (func[0]) { // 1/r - ke++; - if (func[3]) cek_coul = cek; - ++cek; - } - if (func[1]) { // geometric 1/r^6 - ke++; - ++cek; - } - if (func[2]) { // arithmetic 1/r^6 - ke++; - for (i=2; i<9; ++i) { - ++cek; - } - } - if (func[3]) { // dipole - im = *(ke)*(zc.re*cek->re - cek->im*zc.im); - if (func[0]) { // charge-dipole - im += *(ke)*(zc.im*cek_coul->re + cek_coul->im*zc.re); - } - sum[0] -= mui[0]*h->x*im; - sum[1] -= mui[1]*h->y*im; - sum[2] -= mui[2]*h->z*im; - sum[3] -= mui[0]*h->y*im; - sum[4] -= mui[0]*h->z*im; - sum[5] -= mui[1]*h->z*im; - ++cek; - ke++; - } - } - - if (vflag_global) - for (int n = 0; n < 6; n++) - sum_total[n] -= sum[n]; - - if (vflag_atom) - for (int n = 0; n < 6; n++) - vatomj[n] -= sum[n]; - - z = (cvector *) ((char *) z+lbytes); - ++type; - if (vflag_atom) vatomj += 6; - } - - if (vflag_global) { - MPI_Allreduce(&sum_total[0],&sum[0],6,MPI_DOUBLE,MPI_SUM,world); - for (int n = 0; n < 6; n++) - virial[n] += sum[n]; - } - -} - -void EwaldDisp::compute_virial_peratom() -{ - if (!vflag_atom) return; - - kvector *k; - hvector *h, *nh; - cvector *z = ekr_local; - vector mui = VECTOR_NULL; - complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; - complex *cek_coul; - double *kv; - double *q = atom->q; - double *vatomj = vatom ? vatom[0] : NULL; - double *mu = atom->mu ? atom->mu[0] : NULL; - const double qscale = force->qqrd2e * scale; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - shape sum[EWALD_MAX_NSUMS]; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - for (int j = 0; j < atom->nlocal; j++) { - k = kvec; - kx = ky = -1; - kv = kvirial; - cek = cek_global; - memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape)); - if (func[3]) { - register double di = c[3]; - mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; - mu++; - } - for (nh = (h = hvec)+nkvec; hy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - if (func[0]) { // 1/r - if (func[3]) cek_coul = cek; - register double r = cek->re*zc.re - cek->im*zc.im; ++cek; - sum[0][0] += *(kv++)*r; - sum[0][1] += *(kv++)*r; - sum[0][2] += *(kv++)*r; - sum[0][3] += *(kv++)*r; - sum[0][4] += *(kv++)*r; - sum[0][5] += *(kv++)*r; - } - if (func[1]) { // geometric 1/r^6 - register double r = cek->re*zc.re - cek->im*zc.im; ++cek; - sum[1][0] += *(kv++)*r; - sum[1][1] += *(kv++)*r; - sum[1][2] += *(kv++)*r; - sum[1][3] += *(kv++)*r; - sum[1][4] += *(kv++)*r; - sum[1][5] += *(kv++)*r; - } - if (func[2]) { // arithmetic 1/r^6 - register double r; - for (i=2; i<9; ++i) { - r = cek->re*zc.re - cek->im*zc.im; ++cek; - sum[i][0] += *(kv++)*r; - sum[i][1] += *(kv++)*r; - sum[i][2] += *(kv++)*r; - sum[i][3] += *(kv++)*r; - sum[i][4] += *(kv++)*r; - sum[i][5] += *(kv++)*r; - kv -= 6; - } - kv += 6; - } - if (func[3]) { // dipole - double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - register double - r = (cek->re*zc.re - cek->im*zc.im)*muk; - sum[9][0] += *(kv++)*r; - sum[9][1] += *(kv++)*r; - sum[9][2] += *(kv++)*r; - sum[9][3] += *(kv++)*r; - sum[9][4] += *(kv++)*r; - sum[9][5] += *(kv++)*r; - if (func[0]) { // charge-dipole - kv -= 6; - register double qj = *(q)*c[0]; - r = (cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; - r += -(cek->re*zc.im + cek->im*zc.re)*qj; - sum[9][0] += *(kv++)*r; sum[9][1] += *(kv++)*r; sum[9][2] += *(kv++)*r; - sum[9][3] += *(kv++)*r; sum[9][4] += *(kv++)*r; sum[9][5] += *(kv++)*r; - } - ++cek; - } - } - - if (func[0]) { // 1/r - register double qi = *(q++)*c[0]; - for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi; - } - if (func[1]) { // geometric 1/r^6 - register double bi = B[*type]*c[1]; - for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi; - } - if (func[2]) { // arithmetic 1/r^6 - register double *bj = B+7*type[0]+7; - for (i=2; i<9; ++i) { - register double c2 = (--bj)[0]*c[2]; - for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2; - } - } - if (func[3]) { // dipole - for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n]; - } - - for (int k=0; kq; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double qsum = 0.0; - if (function[0]) qsum = sum[0].x; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - if (function[3] && atom->mu) { - double **mu = atom->mu; - for (int i = 0; i < nlocal; i++) dipole += mu[i][2]; - } - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - - if (function[3] && atom->mu) - error->all(FLERR,"Cannot (yet) use kspace slab correction with " - "long-range dipoles and non-neutral systems or per-atom energy"); - - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); - - // add on torque corrections - - if (function[3] && atom->mu && atom->torque) { - double **mu = atom->mu; - double **torque = atom->torque; - for (int i = 0; i < nlocal; i++) { - torque[i][0] += ffact * dipole_all * mu[i][1]; - torque[i][1] += -ffact * dipole_all * mu[i][0]; - } - } -} - -/* ---------------------------------------------------------------------- - Newton solver used to find g_ewald for LJ systems - ------------------------------------------------------------------------- */ - -double EwaldDisp::NewtonSolve(double x, double Rc, - bigint natoms, double vol, double b2) -{ - double dx,tol; - int maxit; - - maxit = 10000; //Maximum number of iterations - tol = 0.00001; //Convergence tolerance - - //Begin algorithm - - for (int i = 0; i < maxit; i++) { - dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2); - x = x - dx; //Update x - if (fabs(dx) < tol) return x; - if (x < 0 || x != x) // solver failed - return -1; - } - return -1; -} - -/* ---------------------------------------------------------------------- - Calculate f(x) - ------------------------------------------------------------------------- */ - -double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2) -{ - double a = Rc*x; - double f = 0.0; - - if (function[1] || function[2]) { // LJ - f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) * - (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy); - } else { // dipole - double rg2 = a*a; - double rg4 = rg2*rg2; - double rg6 = rg4*rg2; - double Cc = 4.0*rg4 + 6.0*rg2 + 3.0; - double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0; - f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) * - sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) * - exp(-rg2)) - accuracy; - } - - return f; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) - ------------------------------------------------------------------------- */ - -double EwaldDisp::derivf(double x, double Rc, - bigint natoms, double vol, double b2) -{ - double h = 0.000001; //Derivative step-size - return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Pieter in 't Veld (SNL), Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "ewald_disp.h" +#include "math_vector.h" +#include "math_const.h" +#include "math_special.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "pair.h" +#include "domain.h" +#include "memory.h" +#include "error.h" +#include "update.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define SMALL 0.00001 + +enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; // same as in pair.h + +//#define DEBUG + +/* ---------------------------------------------------------------------- */ + +EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command"); + + ewaldflag = dispersionflag = dipoleflag = 1; + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + memset(function, 0, EWALD_NORDER*sizeof(int)); + kenergy = kvirial = NULL; + cek_local = cek_global = NULL; + ekr_local = NULL; + hvec = NULL; + kvec = NULL; + B = NULL; + first_output = 0; + energy_self_peratom = NULL; + virial_self_peratom = NULL; + nmax = 0; + q2 = 0; + b2 = 0; + M2 = 0; +} + +/* ---------------------------------------------------------------------- */ + +EwaldDisp::~EwaldDisp() +{ + deallocate(); + deallocate_peratom(); + delete [] ekr_local; + delete [] B; +} + +/* --------------------------------------------------------------------- */ + +void EwaldDisp::init() +{ + nkvec = nkvec_max = nevec = nevec_max = 0; + nfunctions = nsums = sums = 0; + nbox = -1; + bytes = 0.0; + + if (!comm->me) { + if (screen) fprintf(screen,"EwaldDisp initialization ...\n"); + if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n"); + } + + triclinic_check(); + if (domain->dimension == 2) + error->all(FLERR,"Cannot use EwaldDisp with 2d simulation"); + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp"); + if (slabflag == 1) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab EwaldDisp"); + } + + scale = 1.0; + mumurd2e = force->qqrd2e; + dielectric = force->dielectric; + + int tmp; + Pair *pair = force->pair; + int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; + double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; + if (!(ptr||cutoff)) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + int ewald_order = ptr ? *((int *) ptr) : 1<<1; + int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; + memset(function, 0, EWALD_NFUNCS*sizeof(int)); + for (int i=0; i<=EWALD_NORDER; ++i) // transcribe order + if (ewald_order&(1<all(FLERR, + "Unsupported mixing rule in kspace_style ewald/disp"); + default: + error->all(FLERR,"Unsupported order in kspace_style ewald/disp"); + } + nfunctions += function[k] = 1; + nsums += n[k]; + } + + if (!gewaldflag) g_ewald = 0.0; + pair->init(); // so B is defined + init_coeffs(); + init_coeff_sums(); + + double qsum, qsqsum, bsbsum; + qsum = qsqsum = bsbsum = 0.0; + if (function[0]) { + qsum = sum[0].x; + qsqsum = sum[0].x2; + } + + // turn off coulombic if no charge + + if (function[0] && qsqsum == 0.0) { + function[0] = 0; + nfunctions -= 1; + nsums -= 1; + } + + if (function[1]) bsbsum = sum[1].x2; + if (function[2]) bsbsum = sum[2].x2; + + if (function[3]) M2 = sum[9].x2; + + if (function[3] && strcmp(update->unit_style,"electron") == 0) + error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); + + if (qsqsum == 0.0 && bsbsum == 0.0 && M2 == 0.0) + error->all(FLERR,"Cannot use Ewald/disp solver " + "on system with no charge, dipole, or LJ particles"); + if (fabs(qsum) > SMALL && comm->me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + if (!function[1] && !function[2]) + dispersionflag = 0; + + if (!function[3]) + dipoleflag = 0; + + pair_check(); + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup K-space resolution + + q2 = qsqsum * force->qqrd2e; + M2 *= mumurd2e; + b2 = bsbsum; //Are these units right? + bigint natoms = atom->natoms; + + if (!gewaldflag) { + if (function[0]) { + g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff); + else g_ewald = sqrt(-log(g_ewald)) / (*cutoff); + } + else if (function[1] || function[2]) { + //Try Newton Solver + //Use old method to get guess + g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; + + double g_ewald_new = + NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2); + if (g_ewald_new > 0.0) g_ewald = g_ewald_new; + else error->warning(FLERR,"Ewald/disp Newton solver failed, " + "using old method to estimate g_ewald"); + } else if (function[3]) { + //Try Newton Solver + //Use old method to get guess + g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; + double g_ewald_new = + NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),M2); + if (g_ewald_new > 0.0) g_ewald = g_ewald_new; + else error->warning(FLERR,"Ewald/disp Newton solver failed, " + "using old method to estimate g_ewald"); + } + } + + if (!comm->me) { + if (screen) fprintf(screen, " G vector = %g\n", g_ewald); + if (logfile) fprintf(logfile, " G vector = %g\n", g_ewald); + } + + g_ewald_6 = g_ewald; + deallocate_peratom(); + peratom_allocate_flag = 0; +} + +/* ---------------------------------------------------------------------- + adjust EwaldDisp coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void EwaldDisp::setup() +{ + volume = shape_det(domain->h)*slab_volfactor; + memcpy(unit, domain->h_inv, sizeof(shape)); + shape_scalar_mult(unit, 2.0*MY_PI); + unit[2] /= slab_volfactor; + + // int nbox_old = nbox, nkvec_old = nkvec; + + if (accuracy >= 1) { + nbox = 0; + error->all(FLERR,"KSpace accuracy too low"); + } + + bigint natoms = atom->natoms; + double err; + int kxmax = 1; + int kymax = 1; + int kzmax = 1; + err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); + while (err > accuracy) { + kxmax++; + err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); + } + err = rms(kymax,domain->h[1],natoms,q2,b2,M2); + while (err > accuracy) { + kymax++; + err = rms(kymax,domain->h[1],natoms,q2,b2,M2); + } + err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); + while (err > accuracy) { + kzmax++; + err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); + } + nbox = MAX(kxmax,kymax); + nbox = MAX(nbox,kzmax); + double gsqxmx = unit[0]*unit[0]*kxmax*kxmax; + double gsqymx = unit[1]*unit[1]*kymax*kymax; + double gsqzmx = unit[2]*unit[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + gsqmx *= 1.00001; + + reallocate(); + coefficients(); + init_coeffs(); + init_coeff_sums(); + init_self(); + + if (!(first_output||comm->me)) { + first_output = 1; + if (screen) fprintf(screen, + " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); + if (logfile) fprintf(logfile, + " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); + } +} + +/* ---------------------------------------------------------------------- + compute RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2, double M2) +{ + double value = 0.0; + + // Coulombic + + double g2 = g_ewald*g_ewald; + + value += 2.0*q2*g_ewald/prd * + sqrt(1.0/(MY_PI*km*natoms)) * + exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)); + + // Lennard-Jones + + double g7 = g2*g2*g2*g_ewald; + + value += 4.0*b2*g7/3.0 * + sqrt(1.0/(MY_PI*natoms)) * + (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) * + (MY_PI*km/(g_ewald*prd) + 1)); + + // dipole + + value += 8.0*MY_PI*M2/volume*g_ewald * + sqrt(2.0*MY_PI*km*km*km/(15.0*natoms)) * + exp(-pow(MY_PI*km/(g_ewald*prd),2.0)); + + return value; +} + +void EwaldDisp::reallocate() +{ + int ix, iy, iz; + int nkvec_max = nkvec; + vector h; + + nkvec = 0; + int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)]; + int *flag = kflag; + + for (ix=0; ix<=nbox; ++ix) + for (iy=-nbox; iy<=nbox; ++iy) + for (iz=-nbox; iz<=nbox; ++iz) + if (!(ix||iy||iz)) *(flag++) = 0; + else if ((!ix)&&(iy<0)) *(flag++) = 0; + else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0; // use symmetry + else { + h[0] = unit[0]*ix; + h[1] = unit[5]*ix+unit[1]*iy; + h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz; + if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec; + } + + if (nkvec>nkvec_max) { + deallocate(); // free memory + hvec = new hvector[nkvec]; // hvec + bytes += (nkvec-nkvec_max)*sizeof(hvector); + kvec = new kvector[nkvec]; // kvec + bytes += (nkvec-nkvec_max)*sizeof(kvector); + kenergy = new double[nkvec*nfunctions]; // kenergy + bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double); + kvirial = new double[6*nkvec*nfunctions]; // kvirial + bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double); + cek_local = new complex[nkvec*nsums]; // cek_local + bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); + cek_global = new complex[nkvec*nsums]; // cek_global + bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); + nkvec_max = nkvec; + } + + flag = kflag; // create index and + kvector *k = kvec; // wave vectors + hvector *hi = hvec; + for (ix=0; ix<=nbox; ++ix) + for (iy=-nbox; iy<=nbox; ++iy) + for (iz=-nbox; iz<=nbox; ++iz) + if (*(flag++)) { + hi->x = unit[0]*ix; + hi->y = unit[5]*ix+unit[1]*iy; + (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz; + k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; } + + delete [] kflag; +} + + +void EwaldDisp::reallocate_atoms() +{ + if (eflag_atom || vflag_atom) + if (atom->nlocal > nmax) { + deallocate_peratom(); + allocate_peratom(); + nmax = atom->nmax; + } + + if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return; + delete [] ekr_local; + ekr_local = new cvector[nevec]; + bytes += (nevec-nevec_max)*sizeof(cvector); + nevec_max = nevec; +} + + +void EwaldDisp::allocate_peratom() +{ + memory->create(energy_self_peratom, + atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom"); + memory->create(virial_self_peratom, + atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom"); +} + + +void EwaldDisp::deallocate_peratom() // free memory +{ + memory->destroy(energy_self_peratom); + memory->destroy(virial_self_peratom); +} + + +void EwaldDisp::deallocate() // free memory +{ + delete [] hvec; hvec = NULL; + delete [] kvec; kvec = NULL; + delete [] kenergy; kenergy = NULL; + delete [] kvirial; kvirial = NULL; + delete [] cek_local; cek_local = NULL; + delete [] cek_global; cek_global = NULL; +} + + +void EwaldDisp::coefficients() +{ + vector h; + hvector *hi = hvec, *nh; + double eta2 = 0.25/(g_ewald*g_ewald); + double b1, b2, expb2, h1, h2, c1, c2; + double *ke = kenergy, *kv = kvirial; + int func0 = function[0], func12 = function[1]||function[2], + func3 = function[3]; + + for (nh = (hi = hvec)+nkvec; hintypes; + + if (function[1]) { // geometric 1/r^6 + double **b = (double **) force->pair->extract("B",tmp); + delete [] B; + B = new double[n+1]; + bytes += (n+1)*sizeof(double); + for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); + } + if (function[2]) { // arithmetic 1/r^6 + double **epsilon = (double **) force->pair->extract("epsilon",tmp); + double **sigma = (double **) force->pair->extract("sigma",tmp); + double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; + double c[7] = { + 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; + + if (!(epsilon&&sigma)) + error->all( + FLERR,"Epsilon or sigma reference not set by pair style in ewald/n"); + for (int i=0; i<=n; ++i) { + eps_i = sqrt(epsilon[i][i]); + sigma_i = sigma[i][i]; + sigma_n = 1.0; + for (int j=0; j<7; ++j) { + *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i; + } + } + } +} + +void EwaldDisp::init_coeff_sums() +{ + if (sums) return; // calculated only once + sums = 1; + + Sum sum_local[EWALD_MAX_NSUMS]; + + memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum)); + if (function[0]) { // 1/r + double *q = atom->q, *qn = q+atom->nlocal; + for (double *i=q; itype, *ntype = type+atom->nlocal; + for (int *i=type; itype, *ntype = type+atom->nlocal; + for (int *i=type; imu) { // dipole + double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal; + for (double *i = mu; i < nmu; i += 4) + sum_local[9].x2 += i[3]*i[3]; + } + MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world); +} + + +void EwaldDisp::init_self() +{ + double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; + const double qscale = force->qqrd2e * scale; + + memset(energy_self, 0, EWALD_NFUNCS*sizeof(double)); // self energy + memset(virial_self, 0, EWALD_NFUNCS*sizeof(double)); + + if (function[0]) { // 1/r + virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x; + energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0]; + } + if (function[1]) { // geometric 1/r^6 + virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x; + energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1]; + } + if (function[2]) { // arithmetic 1/r^6 + virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+ + sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x); + energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2]; + } + if (function[3]) { // dipole + virial_self[3] = 0; // in surface + energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3]; + } +} + + +void EwaldDisp::init_self_peratom() +{ + if (!(vflag_atom || eflag_atom)) return; + + double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; + const double qscale = force->qqrd2e * scale; + double *energy = energy_self_peratom[0]; + double *virial = virial_self_peratom[0]; + int nlocal = atom->nlocal; + + memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double)); + memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double)); + + if (function[0]) { // 1/r + double *ei = energy; + double *vi = virial; + double ce = qscale*g1/MY_PIS; + double cv = -0.5*MY_PI*qscale/(g2*volume); + double *qi = atom->q, *qn = qi + nlocal; + for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + double q = *qi; + *vi = cv*q*sum[0].x; + *ei = ce*q*q-vi[0]; + } + } + if (function[1]) { // geometric 1/r^6 + double *ei = energy+1; + double *vi = virial+1; + double ce = -g3*g3/12.0; + double cv = MY_PI*MY_PIS*g3/(6.0*volume); + int *typei = atom->type, *typen = typei + atom->nlocal; + for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + double b = B[*typei]; + *vi = cv*b*sum[1].x; + *ei = ce*b*b+vi[0]; + } + } + if (function[2]) { // arithmetic 1/r^6 + double *bi; + double *ei = energy+2; + double *vi = virial+2; + double ce = -g3*g3/3.0; + double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume); + int *typei = atom->type, *typen = typei + atom->nlocal; + for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + bi = B+7*typei[0]+7; + for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0]; + + /* PJV 20120225: + should this be this instead? above implies an inverse dependence + seems to be the above way in original; i recall having tested + arithmetic mixing in the conception phase, but an extra test would + be prudent (pattern repeats in multiple functions below) + + bi = B+7*typei[0]; + for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0]; + + */ + + *ei = ce*bi[0]*bi[6]+vi[0]; + } + } + if (function[3]&&atom->mu) { // dipole + double *ei = energy+3; + double *vi = virial+3; + double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal; + double ce = mumurd2e*2.0*g3/3.0/MY_PIS; + for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + *vi = 0; // in surface + *ei = ce*imu[3]*imu[3]-vi[0]; + } + } +} + + +/* ---------------------------------------------------------------------- + compute the EwaldDisp long-range force, energy, virial +------------------------------------------------------------------------- */ + +void EwaldDisp::compute(int eflag, int vflag) +{ + if (!nbox) return; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; + + if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { + allocate_peratom(); + peratom_allocate_flag = 1; + nmax = atom->nmax; + } + + reallocate_atoms(); + init_self_peratom(); + compute_ek(); + compute_force(); + //compute_surface(); // assume conducting metal (tinfoil) boundary conditions + compute_energy(); + compute_energy_peratom(); + compute_virial(); + compute_virial_dipole(); + compute_virial_peratom(); +} + + +void EwaldDisp::compute_ek() +{ + cvector *ekr = ekr_local; + int lbytes = (2*nbox+1)*sizeof(cvector); + hvector *h = NULL; + kvector *k, *nk = kvec+nkvec; + cvector *z = new cvector[2*nbox+1]; + cvector z1, *zx, *zy, *zz, *zn = z+2*nbox; + complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL; + vector mui; + double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0; + double bi = 0.0, ci[7]; + double *mu = atom->mu ? atom->mu[0] : NULL; + int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(cek_local, 0, n*sizeof(complex)); // reset sums + while (xx, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0); // z[0] + if (tri) { // triclinic z[1] + C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]); + C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]); + C_ANGLE(z1.z, x[2]*unit[2]); x += 3; + } + else { // orthogonal z[1] + C_ANGLE(z1.x, *(x++)*unit[0]); + C_ANGLE(z1.y, *(x++)*unit[1]); + C_ANGLE(z1.z, *(x++)*unit[2]); + } + for (; zzx, zz->x, z1.x); // 3D k-vector + C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y); + C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z); + } + kx = ky = -1; + cek = cek_local; + if (func[0]) qi = *(q++); + if (func[1]) bi = B[*type]; + if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double)); + if (func[3]) { + memcpy(mui, mu, sizeof(vector)); + mu += 4; + h = hvec; + } + for (k=kvec; ky) { // based on order in + if (kx!=k->x) cx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, cx); + } + C_RMULT(zxyz, z[k->z].z, zxy); + if (func[0]) { + cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi; + } + if (func[1]) { + cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi; + } + if (func[2]) for (i=0; i<7; ++i) { + cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i]; + } + if (func[3]) { + register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h; + cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk; + } + } + ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes); + ++type; + } + MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world); + + delete [] z; +} + + +void EwaldDisp::compute_force() +{ + kvector *k; + hvector *h, *nh; + cvector *z = ekr_local; + vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL; + complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; + complex *cek_coul; + double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL; + double *mu = atom->mu ? atom->mu[0] : NULL; + const double qscale = force->qqrd2e * scale; + double *ke, c[EWALD_NFUNCS] = { + 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; + double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + int func[EWALD_NFUNCS]; + + if (atom->torque) t = atom->torque[0]; + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector)); // fj = -dE/dr = + for (; fy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + if (func[0]) { // 1/r + register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); + if (func[3]) cek_coul = cek; + ++cek; + sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im; + } + if (func[1]) { // geometric 1/r^6 + register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek; + sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im; + } + if (func[2]) { // arithmetic 1/r^6 + register double im, c = *(ke++); + for (i=2; i<9; ++i) { + im = c*(zc.im*cek->re+cek->im*zc.re); ++cek; + sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im; + } + } + if (func[3]) { // dipole + register double im = *(ke)*(zc.im*cek->re+ + cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + register double im2 = *(ke)*(zc.re*cek->re- + cek->im*zc.im); + sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; + t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque + t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; + t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; + if (func[0]) { // charge-dipole + register double qi = *(q)*c[0]; + im = - *(ke)*(zc.re*cek_coul->re - + cek_coul->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + im += *(ke)*(zc.re*cek->re - cek->im*zc.im)*qi; + sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; + + im2 = *(ke)*(zc.re*cek_coul->im + cek_coul->re*zc.im); + im2 += -*(ke)*(zc.re*cek->im - cek->im*zc.re); + t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque + t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; + t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; + } + ++cek; + ke++; + } + } + if (func[0]) { // 1/r + register double qi = *(q++)*c[0]; + f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi; + } + if (func[1]) { // geometric 1/r^6 + register double bi = B[*type]*c[1]; + f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi; + } + if (func[2]) { // arithmetic 1/r^6 + register double *bi = B+7*type[0]+7; + for (i=2; i<9; ++i) { + register double c2 = (--bi)[0]*c[2]; + f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2; + } + } + if (func[3]) { // dipole + f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2]; + } + z = (cvector *) ((char *) z+lbytes); + ++type; + t += 3; + } +} + + +void EwaldDisp::compute_surface() +{ + // assume conducting metal (tinfoil) boundary conditions, so this function is + // not called because dielectric at the boundary --> infinity, which makes all + // the terms here zero. + + if (!function[3]) return; + if (!atom->mu) return; + + vector sum_local = VECTOR_NULL, sum_total; + memset(sum_local, 0, sizeof(vector)); + double *i, *n, *mu = atom->mu[0]; + + for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) { + sum_local[0] += (i++)[0]; + sum_local[1] += (i++)[0]; + sum_local[2] += (i++)[0]; + } + MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world); + + virial_self[3] = + mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume); + energy_self[3] -= virial_self[3]; + + if (!(vflag_atom || eflag_atom)) return; + + double *ei = energy_self_peratom[0]+3; + double *vi = virial_self_peratom[0]+3; + double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume; + + for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) { + *vi = cv*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]); + *ei -= *vi; + } +} + + +void EwaldDisp::compute_energy() +{ + energy = 0.0; + if (!eflag_global) return; + + complex *cek = cek_global; + complex *cek_coul; + double *ke = kenergy; + const double qscale = force->qqrd2e * scale; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + double sum[EWALD_NFUNCS]; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(sum, 0, EWALD_NFUNCS*sizeof(double)); // reset sums + for (int k=0; kre*cek->re+cek->im*cek->im); + if (func[3]) cek_coul = cek; + ++cek; + } + if (func[1]) { // geometric 1/r^6 + sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; } + if (func[2]) { // arithmetic 1/r^6 + register double r = + (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ + (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ + (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ + 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; + sum[2] += *(ke++)*r; + } + if (func[3]) { // dipole + sum[3] += *(ke)*(cek->re*cek->re+cek->im*cek->im); + if (func[0]) { // charge-dipole + sum[3] += *(ke)*2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); + } + ke++; + ++cek; + } + } + for (int k=0; kq; + double *eatomj = eatom; + double *mu = atom->mu ? atom->mu[0] : NULL; + const double qscale = force->qqrd2e * scale; + double *ke = kenergy; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + for (int j = 0; j < atom->nlocal; j++, ++eatomj) { + k = kvec; + kx = ky = -1; + ke = kenergy; + cek = cek_global; + memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double)); + if (func[3]) { + register double di = c[3]; + mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; + mu++; + } + for (nh = (h = hvec)+nkvec; hy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + if (func[0]) { // 1/r + sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); + if (func[3]) cek_coul = cek; + ++cek; + } + if (func[1]) { // geometric 1/r^6 + sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; } + if (func[2]) { // arithmetic 1/r^6 + register double im, c = *(ke++); + for (i=2; i<9; ++i) { + im = c*(cek->re*zc.re - cek->im*zc.im); ++cek; + sum[i] += im; + } + } + if (func[3]) { // dipole + double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + sum[9] += *(ke)*(cek->re*zc.re - cek->im*zc.im)*muk; + if (func[0]) { // charge-dipole + register double qj = *(q)*c[0]; + sum[9] += *(ke)*(cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; + sum[9] -= *(ke)*(cek->re*zc.im + cek->im*zc.re)*qj; + } + ++cek; + ke++; + } + } + + if (func[0]) { // 1/r + register double qj = *(q++)*c[0]; + *eatomj += sum[0]*qj - energy_self_peratom[j][0]; + } + if (func[1]) { // geometric 1/r^6 + register double bj = B[*type]*c[1]; + *eatomj += sum[1]*bj - energy_self_peratom[j][1]; + } + if (func[2]) { // arithmetic 1/r^6 + register double *bj = B+7*type[0]+7; + for (i=2; i<9; ++i) { + register double c2 = (--bj)[0]*c[2]; + *eatomj += 0.5*sum[i]*c2; + } + *eatomj -= energy_self_peratom[j][2]; + } + if (func[3]) { // dipole + *eatomj += sum[9] - energy_self_peratom[j][3]; + } + z = (cvector *) ((char *) z+lbytes); + ++type; + } +} + + +#define swap(a, b) { register double t = a; a= b; b = t; } + +void EwaldDisp::compute_virial() +{ + memset(virial, 0, sizeof(shape)); + if (!vflag_global) return; + + complex *cek = cek_global; + complex *cek_coul; + double *kv = kvirial; + const double qscale = force->qqrd2e * scale; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + shape sum[EWALD_NFUNCS]; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(sum, 0, EWALD_NFUNCS*sizeof(shape)); + for (int k=0; kre*cek->re+cek->im*cek->im; + if (func[3]) cek_coul = cek; + ++cek; + sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r; + sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r; + } + if (func[1]) { // geometric 1/r^6 + register double r = cek->re*cek->re+cek->im*cek->im; ++cek; + sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r; + sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r; + } + if (func[2]) { // arithmetic 1/r^6 + register double r = + (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ + (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ + (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ + 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; + sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r; + sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r; + } + if (func[3]) { + register double r = cek->re*cek->re+cek->im*cek->im; + sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; + sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; + if (func[0]) { // charge-dipole + kv -= 6; + register double r = 2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); + sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; + sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; + } + ++cek; + } + } + for (int k=0; kmu ? atom->mu[0] : NULL; + double *vatomj = NULL; + if (vflag_atom && vatom) vatomj = vatom[0]; + const double qscale = force->qqrd2e * scale; + double *ke, c[EWALD_NFUNCS] = { + 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; + double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(&sum[0], 0, 6*sizeof(double)); + memset(&sum_total[0], 0, 6*sizeof(double)); + for (int j = 0; j < atom->nlocal; j++) { + k = kvec; + kx = ky = -1; + ke = kenergy; + cek = cek_global; + memset(&sum[0], 0, 6*sizeof(double)); + if (func[3]) { + register double di = c[3]; + mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; + mu++; + } + for (nh = (h = hvec)+nkvec; hy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + double im = 0.0; + if (func[0]) { // 1/r + ke++; + if (func[3]) cek_coul = cek; + ++cek; + } + if (func[1]) { // geometric 1/r^6 + ke++; + ++cek; + } + if (func[2]) { // arithmetic 1/r^6 + ke++; + for (i=2; i<9; ++i) { + ++cek; + } + } + if (func[3]) { // dipole + im = *(ke)*(zc.re*cek->re - cek->im*zc.im); + if (func[0]) { // charge-dipole + im += *(ke)*(zc.im*cek_coul->re + cek_coul->im*zc.re); + } + sum[0] -= mui[0]*h->x*im; + sum[1] -= mui[1]*h->y*im; + sum[2] -= mui[2]*h->z*im; + sum[3] -= mui[0]*h->y*im; + sum[4] -= mui[0]*h->z*im; + sum[5] -= mui[1]*h->z*im; + ++cek; + ke++; + } + } + + if (vflag_global) + for (int n = 0; n < 6; n++) + sum_total[n] -= sum[n]; + + if (vflag_atom) + for (int n = 0; n < 6; n++) + vatomj[n] -= sum[n]; + + z = (cvector *) ((char *) z+lbytes); + ++type; + if (vflag_atom) vatomj += 6; + } + + if (vflag_global) { + MPI_Allreduce(&sum_total[0],&sum[0],6,MPI_DOUBLE,MPI_SUM,world); + for (int n = 0; n < 6; n++) + virial[n] += sum[n]; + } + +} + +void EwaldDisp::compute_virial_peratom() +{ + if (!vflag_atom) return; + + kvector *k; + hvector *h, *nh; + cvector *z = ekr_local; + vector mui = VECTOR_NULL; + complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; + complex *cek_coul; + double *kv; + double *q = atom->q; + double *vatomj = vatom ? vatom[0] : NULL; + double *mu = atom->mu ? atom->mu[0] : NULL; + const double qscale = force->qqrd2e * scale; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + shape sum[EWALD_MAX_NSUMS]; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + for (int j = 0; j < atom->nlocal; j++) { + k = kvec; + kx = ky = -1; + kv = kvirial; + cek = cek_global; + memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape)); + if (func[3]) { + register double di = c[3]; + mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; + mu++; + } + for (nh = (h = hvec)+nkvec; hy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + if (func[0]) { // 1/r + if (func[3]) cek_coul = cek; + register double r = cek->re*zc.re - cek->im*zc.im; ++cek; + sum[0][0] += *(kv++)*r; + sum[0][1] += *(kv++)*r; + sum[0][2] += *(kv++)*r; + sum[0][3] += *(kv++)*r; + sum[0][4] += *(kv++)*r; + sum[0][5] += *(kv++)*r; + } + if (func[1]) { // geometric 1/r^6 + register double r = cek->re*zc.re - cek->im*zc.im; ++cek; + sum[1][0] += *(kv++)*r; + sum[1][1] += *(kv++)*r; + sum[1][2] += *(kv++)*r; + sum[1][3] += *(kv++)*r; + sum[1][4] += *(kv++)*r; + sum[1][5] += *(kv++)*r; + } + if (func[2]) { // arithmetic 1/r^6 + register double r; + for (i=2; i<9; ++i) { + r = cek->re*zc.re - cek->im*zc.im; ++cek; + sum[i][0] += *(kv++)*r; + sum[i][1] += *(kv++)*r; + sum[i][2] += *(kv++)*r; + sum[i][3] += *(kv++)*r; + sum[i][4] += *(kv++)*r; + sum[i][5] += *(kv++)*r; + kv -= 6; + } + kv += 6; + } + if (func[3]) { // dipole + double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + register double + r = (cek->re*zc.re - cek->im*zc.im)*muk; + sum[9][0] += *(kv++)*r; + sum[9][1] += *(kv++)*r; + sum[9][2] += *(kv++)*r; + sum[9][3] += *(kv++)*r; + sum[9][4] += *(kv++)*r; + sum[9][5] += *(kv++)*r; + if (func[0]) { // charge-dipole + kv -= 6; + register double qj = *(q)*c[0]; + r = (cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; + r += -(cek->re*zc.im + cek->im*zc.re)*qj; + sum[9][0] += *(kv++)*r; sum[9][1] += *(kv++)*r; sum[9][2] += *(kv++)*r; + sum[9][3] += *(kv++)*r; sum[9][4] += *(kv++)*r; sum[9][5] += *(kv++)*r; + } + ++cek; + } + } + + if (func[0]) { // 1/r + register double qi = *(q++)*c[0]; + for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi; + } + if (func[1]) { // geometric 1/r^6 + register double bi = B[*type]*c[1]; + for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi; + } + if (func[2]) { // arithmetic 1/r^6 + register double *bj = B+7*type[0]+7; + for (i=2; i<9; ++i) { + register double c2 = (--bj)[0]*c[2]; + for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2; + } + } + if (func[3]) { // dipole + for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n]; + } + + for (int k=0; kq; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double qsum = 0.0; + if (function[0]) qsum = sum[0].x; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + if (function[3] && atom->mu) { + double **mu = atom->mu; + for (int i = 0; i < nlocal; i++) dipole += mu[i][2]; + } + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + + if (function[3] && atom->mu) + error->all(FLERR,"Cannot (yet) use kspace slab correction with " + "long-range dipoles and non-neutral systems or per-atom energy"); + + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); + + // add on torque corrections + + if (function[3] && atom->mu && atom->torque) { + double **mu = atom->mu; + double **torque = atom->torque; + for (int i = 0; i < nlocal; i++) { + torque[i][0] += ffact * dipole_all * mu[i][1]; + torque[i][1] += -ffact * dipole_all * mu[i][0]; + } + } +} + +/* ---------------------------------------------------------------------- + Newton solver used to find g_ewald for LJ systems + ------------------------------------------------------------------------- */ + +double EwaldDisp::NewtonSolve(double x, double Rc, + bigint natoms, double vol, double b2) +{ + double dx,tol; + int maxit; + + maxit = 10000; //Maximum number of iterations + tol = 0.00001; //Convergence tolerance + + //Begin algorithm + + for (int i = 0; i < maxit; i++) { + dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2); + x = x - dx; //Update x + if (fabs(dx) < tol) return x; + if (x < 0 || x != x) // solver failed + return -1; + } + return -1; +} + +/* ---------------------------------------------------------------------- + Calculate f(x) + ------------------------------------------------------------------------- */ + +double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2) +{ + double a = Rc*x; + double f = 0.0; + + if (function[1] || function[2]) { // LJ + f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) * + (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy); + } else { // dipole + double rg2 = a*a; + double rg4 = rg2*rg2; + double rg6 = rg4*rg2; + double Cc = 4.0*rg4 + 6.0*rg2 + 3.0; + double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0; + f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) * + sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) * + exp(-rg2)) - accuracy; + } + + return f; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) + ------------------------------------------------------------------------- */ + +double EwaldDisp::derivf(double x, double Rc, + bigint natoms, double vol, double b2) +{ + double h = 0.000001; //Derivative step-size + return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h; +} diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index 9c5db42ad8..c3b54559a0 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -1,3501 +1,3501 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) - per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) - analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University) - triclinic added by Stan Moore (SNL) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "pppm.h" -#include "atom.h" -#include "comm.h" -#include "commgrid.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" - -#include "math_const.h" -#include "math_special.h" - -using namespace LAMMPS_NS; -using namespace MathConst; -using namespace MathSpecial; - -#define MAXORDER 7 -#define OFFSET 16384 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -enum{REVERSE_RHO}; -enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM}; - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); - - pppmflag = 1; - group_group_enable = 1; - - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - u_brick = NULL; - v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; - greensfn = NULL; - work1 = work2 = NULL; - vg = NULL; - fkx = fky = fkz = NULL; - - sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = - sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; - - density_A_brick = density_B_brick = NULL; - density_A_fft = density_B_fft = NULL; - - gf_b = NULL; - rho1d = rho_coeff = drho1d = drho_coeff = NULL; - - fft1 = fft2 = NULL; - remap = NULL; - cg = NULL; - cg_peratom = NULL; - - nmax = 0; - part2grid = NULL; - - peratom_allocate_flag = 0; - group_allocate_flag = 0; - - // define acons coefficients for estimation of kspace errors - // see JCP 109, pg 7698 for derivation of coefficients - // higher order coefficients may be computed if needed - - memory->create(acons,8,7,"pppm:acons"); - acons[1][0] = 2.0 / 3.0; - acons[2][0] = 1.0 / 50.0; - acons[2][1] = 5.0 / 294.0; - acons[3][0] = 1.0 / 588.0; - acons[3][1] = 7.0 / 1440.0; - acons[3][2] = 21.0 / 3872.0; - acons[4][0] = 1.0 / 4320.0; - acons[4][1] = 3.0 / 1936.0; - acons[4][2] = 7601.0 / 2271360.0; - acons[4][3] = 143.0 / 28800.0; - acons[5][0] = 1.0 / 23232.0; - acons[5][1] = 7601.0 / 13628160.0; - acons[5][2] = 143.0 / 69120.0; - acons[5][3] = 517231.0 / 106536960.0; - acons[5][4] = 106640677.0 / 11737571328.0; - acons[6][0] = 691.0 / 68140800.0; - acons[6][1] = 13.0 / 57600.0; - acons[6][2] = 47021.0 / 35512320.0; - acons[6][3] = 9694607.0 / 2095994880.0; - acons[6][4] = 733191589.0 / 59609088000.0; - acons[6][5] = 326190917.0 / 11700633600.0; - acons[7][0] = 1.0 / 345600.0; - acons[7][1] = 3617.0 / 35512320.0; - acons[7][2] = 745739.0 / 838397952.0; - acons[7][3] = 56399353.0 / 12773376000.0; - acons[7][4] = 25091609.0 / 1560084480.0; - acons[7][5] = 1755948832039.0 / 36229939200000.0; - acons[7][6] = 4887769399.0 / 37838389248.0; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPM::~PPPM() -{ - delete [] factors; - deallocate(); - if (peratom_allocate_flag) deallocate_peratom(); - if (group_allocate_flag) deallocate_groups(); - memory->destroy(part2grid); - memory->destroy(acons); -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPM::init() -{ - if (me == 0) { - if (screen) fprintf(screen,"PPPM initialization ...\n"); - if (logfile) fprintf(logfile,"PPPM initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->triclinic && differentiation_flag == 1) - error->all(FLERR,"Cannot (yet) use PPPM with triclinic box " - "and kspace_modify diff ad"); - if (domain->triclinic && slabflag) - error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and " - "slab correction"); - if (domain->dimension == 2) error->all(FLERR, - "Cannot use PPPM with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); - if (slabflag) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPM"); - } - - if (order < 2 || order > MAXORDER) { - char str[128]; - sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); - error->all(FLERR,str); - } - - // extract short-range Coulombic cutoff from pair style - - triclinic = domain->triclinic; - scale = 1.0; - - pair_check(); - - int itmp = 0; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - - // if kspace is TIP4P, extract TIP4P params from pair style - // bond/angle are not yet init(), so insure equilibrium request is valid - - qdist = 0.0; - - if (tip4pflag) { - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL || - force->angle->setflag == NULL || force->bond->setflag == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - if (typeA < 1 || typeA > atom->nangletypes || - force->angle->setflag[typeA] == 0) - error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); - if (typeB < 1 || typeB > atom->nbondtypes || - force->bond->setflag[typeB] == 0) - error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (cos(0.5*theta) * blen); - if (domain->triclinic) - error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P"); - } - - // compute qsum & qsqsum and warn if not charge-neutral - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - q2 = qsqsum * force->qqrd2e; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // free all arrays previously allocated - - deallocate(); - if (peratom_allocate_flag) deallocate_peratom(); - if (group_allocate_flag) deallocate_groups(); - - // setup FFT grid resolution and g_ewald - // normally one iteration thru while loop is all that is required - // if grid stencil does not extend beyond neighbor proc - // or overlap is allowed, then done - // else reduce order and try again - - int (*procneigh)[2] = comm->procneigh; - - CommGrid *cgtmp = NULL; - int iteration = 0; - - while (order >= minorder) { - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPM order b/c stencil extends " - "beyond nearest neighbor processor"); - - if (stagger_flag && !differentiation_flag) compute_gf_denom(); - set_grid_global(); - set_grid_local(); - if (overlap_allowed) break; - - cgtmp = new CommGrid(lmp,world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - cgtmp->ghost_notify(); - if (!cgtmp->ghost_overlap()) break; - delete cgtmp; - - order--; - iteration++; - } - - if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order"); - if (!overlap_allowed && cgtmp->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - if (cgtmp) delete cgtmp; - - // adjust g_ewald - - if (!gewaldflag) adjust_gewald(); - - // calculate the final accuracy - - double estimated_accuracy = final_accuracy(); - - // print stats - - int ngrid_max,nfft_both_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - -#ifdef FFT_SINGLE - const char fft_prec[] = "single"; -#else - const char fft_prec[] = "double"; -#endif - - if (screen) { - fprintf(screen," G vector (1/distance) = %g\n",g_ewald); - fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," stencil order = %d\n",order); - fprintf(screen," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(screen," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - fprintf(screen," 3d grid and FFT values/proc = %d %d\n", - ngrid_max,nfft_both_max); - } - if (logfile) { - fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," stencil order = %d\n",order); - fprintf(logfile," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(logfile," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", - ngrid_max,nfft_both_max); - } - } - - // allocate K-space dependent memory - // don't invoke allocate peratom() or group(), will be allocated when needed - - allocate(); - cg->ghost_notify(); - cg->setup(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - if (differentiation_flag == 1) compute_sf_precoeff(); - compute_rho_coeff(); -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPM::setup() -{ - if (triclinic) { - setup_triclinic(); - return; - } - - int i,j,k,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double unitkx = (MY_2PI/xprd); - double unitky = (MY_2PI/yprd); - double unitkz = (MY_2PI/zprd_slab); - - // fkx,fky,fkz for my FFT grid pts - - double per; - - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per = i - nx_pppm*(2*i/nx_pppm); - fkx[i] = unitkx*per; - } - - for (i = nylo_fft; i <= nyhi_fft; i++) { - per = i - ny_pppm*(2*i/ny_pppm); - fky[i] = unitky*per; - } - - for (i = nzlo_fft; i <= nzhi_fft; i++) { - per = i - nz_pppm*(2*i/nz_pppm); - fkz[i] = unitkz*per; - } - - // virial coefficients - - double sqk,vterm; - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - for (j = nylo_fft; j <= nyhi_fft; j++) { - for (i = nxlo_fft; i <= nxhi_fft; i++) { - sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; - vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; - vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; - vg[n][3] = vterm*fkx[i]*fky[j]; - vg[n][4] = vterm*fkx[i]*fkz[k]; - vg[n][5] = vterm*fky[j]*fkz[k]; - } - n++; - } - } - } - - if (differentiation_flag == 1) compute_gf_ad(); - else compute_gf_ik(); -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed - for a triclinic system -------------------------------------------------------------------------- */ - -void PPPM::setup_triclinic() -{ - int i,j,k,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - prd = domain->prd; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - // use lamda (0-1) coordinates - - delxinv = nx_pppm; - delyinv = ny_pppm; - delzinv = nz_pppm; - delvolinv = delxinv*delyinv*delzinv/volume; - - // fkx,fky,fkz for my FFT grid pts - - double per_i,per_j,per_k; - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - per_k = k - nz_pppm*(2*k/nz_pppm); - for (j = nylo_fft; j <= nyhi_fft; j++) { - per_j = j - ny_pppm*(2*j/ny_pppm); - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per_i = i - nx_pppm*(2*i/nx_pppm); - - double unitk_lamda[3]; - unitk_lamda[0] = 2.0*MY_PI*per_i; - unitk_lamda[1] = 2.0*MY_PI*per_j; - unitk_lamda[2] = 2.0*MY_PI*per_k; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - fkx[n] = unitk_lamda[0]; - fky[n] = unitk_lamda[1]; - fkz[n] = unitk_lamda[2]; - n++; - } - } - } - - // virial coefficients - - double sqk,vterm; - - for (n = 0; n < nfft; n++) { - sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n]; - vg[n][1] = 1.0 + vterm*fky[n]*fky[n]; - vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n]; - vg[n][3] = vterm*fkx[n]*fky[n]; - vg[n][4] = vterm*fkx[n]*fkz[n]; - vg[n][5] = vterm*fky[n]*fkz[n]; - } - } - - compute_gf_ik_triclinic(); -} - -/* ---------------------------------------------------------------------- - reset local grid arrays and communication stencils - called by fix balance b/c it changed sizes of processor sub-domains -------------------------------------------------------------------------- */ - -void PPPM::setup_grid() -{ - // free all arrays previously allocated - - deallocate(); - if (peratom_allocate_flag) deallocate_peratom(); - if (group_allocate_flag) deallocate_groups(); - - // reset portion of global grid that each proc owns - - set_grid_local(); - - // reallocate K-space dependent memory - // check if grid communication is now overlapping if not allowed - // don't invoke allocate peratom() or group(), will be allocated when needed - - allocate(); - - cg->ghost_notify(); - if (overlap_allowed == 0 && cg->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - cg->setup(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - if (differentiation_flag == 1) compute_sf_precoeff(); - compute_rho_coeff(); - - // pre-compute volume-dependent coeffs - - setup(); -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPM::compute(int eflag, int vflag) -{ - int i,j; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - cg_peratom->ghost_notify(); - cg_peratom->setup(); - } - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(part2grid); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - } - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - particle_map(); - make_rho(); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - cg->reverse_comm(this,REVERSE_RHO); - brick2fft(); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - // also performs per-atom calculations via poisson_peratom() - - poisson(); - - // all procs communicate E-field values - // to fill ghost cells surrounding their 3d bricks - - if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD); - else cg->forward_comm(this,FORWARD_IK); - - // extra per-atom energy/virial communication - - if (evflag_atom) { - if (differentiation_flag == 1 && vflag_atom) - cg_peratom->forward_comm(this,FORWARD_AD_PERATOM); - else if (differentiation_flag == 0) - cg_peratom->forward_comm(this,FORWARD_IK_PERATOM); - } - - // calculate the force on my particles - - fieldforce(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fieldforce_peratom(); - - // sum global energy across procs and add in volume-dependent term - - const double qscale = force->qqrd2e * scale; - - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // sum global virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - } - - // per-atom energy/virial - // energy includes self-energy correction - // notal accounts for TIP4P tallying eatom/vatom for ghost atoms - - if (evflag_atom) { - double *q = atom->q; - int nlocal = atom->nlocal; - int ntotal = nlocal; - if (tip4pflag) ntotal += atom->nghost; - - if (eflag_atom) { - for (i = 0; i < nlocal; i++) { - eatom[i] *= 0.5; - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale; - } - - if (vflag_atom) { - for (i = 0; i < ntotal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale; - } - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::allocate() -{ - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); - - memory->create(density_fft,nfft_both,"pppm:density_fft"); - memory->create(greensfn,nfft_both,"pppm:greensfn"); - memory->create(work1,2*nfft_both,"pppm:work1"); - memory->create(work2,2*nfft_both,"pppm:work2"); - memory->create(vg,nfft_both,6,"pppm:vg"); - - if (triclinic == 0) { - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); - } else { - memory->create(fkx,nfft_both,"pppm:fkx"); - memory->create(fky,nfft_both,"pppm:fky"); - memory->create(fkz,nfft_both,"pppm:fkz"); - } - - if (differentiation_flag == 1) { - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); - - memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1"); - memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2"); - memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3"); - memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4"); - memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5"); - memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6"); - - } else { - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); - } - - // summation coeffs - - order_allocated = order; - if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2, - "pppm:drho_coeff"); - - // create 2 FFTs and a Remap - // 1st FFT keeps data in FFT decompostion - // 2nd FFT returns data in 3d brick decomposition - // remap takes data from 3d brick to FFT decomposition - - int tmp; - - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp); - - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp); - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - int (*procneigh)[2] = comm->procneigh; - - if (differentiation_flag == 1) - cg = new CommGrid(lmp,world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg = new CommGrid(lmp,world,3,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); -} - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - - if (differentiation_flag == 1) { - memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(sf_precoeff1); - memory->destroy(sf_precoeff2); - memory->destroy(sf_precoeff3); - memory->destroy(sf_precoeff4); - memory->destroy(sf_precoeff5); - memory->destroy(sf_precoeff6); - } else { - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - } - - memory->destroy(density_fft); - memory->destroy(greensfn); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(vg); - - if (triclinic == 0) { - memory->destroy1d_offset(fkx,nxlo_fft); - memory->destroy1d_offset(fky,nylo_fft); - memory->destroy1d_offset(fkz,nzlo_fft); - } else { - memory->destroy(fkx); - memory->destroy(fky); - memory->destroy(fkz); - } - - memory->destroy(gf_b); - if (stagger_flag) gf_b = NULL; - memory->destroy2d_offset(rho1d,-order_allocated/2); - memory->destroy2d_offset(drho1d,-order_allocated/2); - memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2); - memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2); - - delete fft1; - delete fft2; - delete remap; - delete cg; -} - -/* ---------------------------------------------------------------------- - allocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::allocate_peratom() -{ - peratom_allocate_flag = 1; - - if (differentiation_flag != 1) - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); - - memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v0_brick"); - - memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v1_brick"); - memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v2_brick"); - memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v3_brick"); - memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v4_brick"); - memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v5_brick"); - - // create ghost grid object for rho and electric field communication - - int (*procneigh)[2] = comm->procneigh; - - if (differentiation_flag == 1) - cg_peratom = - new CommGrid(lmp,world,6,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom = - new CommGrid(lmp,world,7,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); -} - -/* ---------------------------------------------------------------------- - deallocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::deallocate_peratom() -{ - peratom_allocate_flag = 0; - - memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); - - if (differentiation_flag != 1) - memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); - - delete cg_peratom; -} - -/* ---------------------------------------------------------------------- - set global size of PPPM grid = nx,ny,nz_pppm - used for charge accumulation, FFTs, and electric field interpolation -------------------------------------------------------------------------- */ - -void PPPM::set_grid_global() -{ - // use xprd,yprd,zprd (even if triclinic, and then scale later) - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h; - bigint natoms = atom->natoms; - - if (!gewaldflag) { - if (accuracy <= 0.0) - error->all(FLERR,"KSpace accuracy must be > 0"); - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; - else g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy - // nz_pppm uses extended zprd_slab instead of zprd - // reduce it until accuracy target is met - - if (!gridflag) { - - if (differentiation_flag == 1 || stagger_flag) { - - h = h_x = h_y = h_z = 4.0/g_ewald; - int count = 0; - while (1) { - - // set grid dimension - nx_pppm = static_cast (xprd/h_x); - ny_pppm = static_cast (yprd/h_y); - nz_pppm = static_cast (zprd_slab/h_z); - - if (nx_pppm <= 1) nx_pppm = 2; - if (ny_pppm <= 1) ny_pppm = 2; - if (nz_pppm <= 1) nz_pppm = 2; - - //set local grid dimension - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - double df_kspace = compute_df_kspace(); - - count++; - - // break loop if the accuracy has been reached or - // too many loops have been performed - - if (df_kspace <= accuracy) break; - if (count > 500) error->all(FLERR, "Could not compute grid size"); - h *= 0.95; - h_x = h_y = h_z = h; - } - - } else { - - double err; - h_x = h_y = h_z = 1.0/g_ewald; - - nx_pppm = static_cast (xprd/h_x) + 1; - ny_pppm = static_cast (yprd/h_y) + 1; - nz_pppm = static_cast (zprd_slab/h_z) + 1; - - err = estimate_ik_error(h_x,xprd,natoms); - while (err > accuracy) { - err = estimate_ik_error(h_x,xprd,natoms); - nx_pppm++; - h_x = xprd/nx_pppm; - } - - err = estimate_ik_error(h_y,yprd,natoms); - while (err > accuracy) { - err = estimate_ik_error(h_y,yprd,natoms); - ny_pppm++; - h_y = yprd/ny_pppm; - } - - err = estimate_ik_error(h_z,zprd_slab,natoms); - while (err > accuracy) { - err = estimate_ik_error(h_z,zprd_slab,natoms); - nz_pppm++; - h_z = zprd_slab/nz_pppm; - } - } - - // scale grid for triclinic skew - - if (triclinic) { - double tmp[3]; - tmp[0] = nx_pppm/xprd; - tmp[1] = ny_pppm/yprd; - tmp[2] = nz_pppm/zprd; - lamda2xT(&tmp[0],&tmp[0]); - nx_pppm = static_cast(tmp[0]) + 1; - ny_pppm = static_cast(tmp[1]) + 1; - nz_pppm = static_cast(tmp[2]) + 1; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; - - if (triclinic == 0) { - h_x = xprd/nx_pppm; - h_y = yprd/ny_pppm; - h_z = zprd_slab/nz_pppm; - } else { - double tmp[3]; - tmp[0] = nx_pppm; - tmp[1] = ny_pppm; - tmp[2] = nz_pppm; - x2lamdaT(&tmp[0],&tmp[0]); - h_x = 1.0/tmp[0]; - h_y = 1.0/tmp[1]; - h_z = 1.0/tmp[2]; - } - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPM grid is too large"); -} - -/* ---------------------------------------------------------------------- - check if all factors of n are in list of factors - return 1 if yes, 0 if no -------------------------------------------------------------------------- */ - -int PPPM::factorable(int n) -{ - int i; - - while (n > 1) { - for (i = 0; i < nfactors; i++) { - if (n % factors[i] == 0) { - n /= factors[i]; - break; - } - } - if (i == nfactors) return 0; - } - - return 1; -} - -/* ---------------------------------------------------------------------- - compute estimated kspace force error -------------------------------------------------------------------------- */ - -double PPPM::compute_df_kspace() -{ - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - double df_kspace = 0.0; - if (differentiation_flag == 1 || stagger_flag) { - double qopt = compute_qopt(); - df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - } else { - double lprx = estimate_ik_error(h_x,xprd,natoms); - double lpry = estimate_ik_error(h_y,yprd,natoms); - double lprz = estimate_ik_error(h_z,zprd_slab,natoms); - df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - } - return df_kspace; -} - -/* ---------------------------------------------------------------------- - compute qopt -------------------------------------------------------------------------- */ - -double PPPM::compute_qopt() -{ - double qopt = 0.0; - double *prd = domain->prd; - - const double xprd = prd[0]; - const double yprd = prd[1]; - const double zprd = prd[2]; - const double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - const double unitkx = (MY_2PI/xprd); - const double unitky = (MY_2PI/yprd); - const double unitkz = (MY_2PI/zprd_slab); - - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double u1, u2, sqk; - double sum1,sum2,sum3,sum4,dot2; - - int k,l,m,nx,ny,nz; - const int twoorder = 2*order; - - for (m = nzlo_fft; m <= nzhi_fft; m++) { - const int mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - const int lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - const int kper = k - nx_pppm*(2*k/nx_pppm); - - sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); - - if (sqk != 0.0) { - - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - sum4 = 0.0; - for (nx = -2; nx <= 2; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*square(qx/g_ewald)); - argx = 0.5*qx*xprd/nx_pppm; - wx = powsinxx(argx,twoorder); - qx *= qx; - - for (ny = -2; ny <= 2; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*square(qy/g_ewald)); - argy = 0.5*qy*yprd/ny_pppm; - wy = powsinxx(argy,twoorder); - qy *= qy; - - for (nz = -2; nz <= 2; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*square(qz/g_ewald)); - argz = 0.5*qz*zprd_slab/nz_pppm; - wz = powsinxx(argz,twoorder); - qz *= qz; - - dot2 = qx+qy+qz; - u1 = sx*sy*sz; - u2 = wx*wy*wz; - sum1 += u1*u1/dot2*MY_4PI*MY_4PI; - sum2 += u1 * u2 * MY_4PI; - sum3 += u2; - sum4 += dot2*u2; - } - } - } - sum2 *= sum2; - qopt += sum1 - sum2/(sum3*sum4); - } - } - } - } - double qopt_all; - MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); - return qopt_all; -} - -/* ---------------------------------------------------------------------- - estimate kspace force error for ik method -------------------------------------------------------------------------- */ - -double PPPM::estimate_ik_error(double h, double prd, bigint natoms) -{ - double sum = 0.0; - for (int m = 0; m < order; m++) - sum += acons[order][m] * pow(h*g_ewald,2.0*m); - double value = q2 * pow(h*g_ewald,(double)order) * - sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd); - - return value; -} - -/* ---------------------------------------------------------------------- - adjust the g_ewald parameter to near its optimal value - using a Newton-Raphson solver -------------------------------------------------------------------------- */ - -void PPPM::adjust_gewald() -{ - double dx; - - for (int i = 0; i < LARGE; i++) { - dx = newton_raphson_f() / derivf(); - g_ewald -= dx; - if (fabs(newton_raphson_f()) < SMALL) return; - } - - char str[128]; - sprintf(str, "Could not compute g_ewald"); - error->all(FLERR, str); -} - -/* ---------------------------------------------------------------------- - Calculate f(x) using Newton-Raphson solver - ------------------------------------------------------------------------- */ - -double PPPM::newton_raphson_f() -{ - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - bigint natoms = atom->natoms; - - double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd); - - double df_kspace = compute_df_kspace(); - - return df_rspace - df_kspace; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) using forward difference - [f(x + h) - f(x)] / h - ------------------------------------------------------------------------- */ - -double PPPM::derivf() -{ - double h = 0.000001; //Derivative step-size - double df,f1,f2,g_ewald_old; - - f1 = newton_raphson_f(); - g_ewald_old = g_ewald; - g_ewald += h; - f2 = newton_raphson_f(); - g_ewald = g_ewald_old; - df = (f2 - f1)/h; - - return df; -} - -/* ---------------------------------------------------------------------- - Calculate the final estimate of the accuracy -------------------------------------------------------------------------- */ - -double PPPM::final_accuracy() -{ - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - - double df_kspace = compute_df_kspace(); - double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd); - double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); - double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace); - double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace + - df_table*df_table); - - return estimated_accuracy; -} - -/* ---------------------------------------------------------------------- - set local subset of PPPM/FFT grid that I own - n xyz lo/hi in = 3d brick that I own (inclusive) - n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive) - n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz) -------------------------------------------------------------------------- */ - -void PPPM::set_grid_local() -{ - // global indices of PPPM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPM grid that I own without ghost cells - // for slab PPPM, assign z grid as if it were not extended - - nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); - nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; - - nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); - nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; - - nzlo_in = static_cast - (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); - nzhi_in = static_cast - (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; - - // nlower,nupper = stencil size for mapping particles to PPPM grid - - nlower = -(order-1)/2; - nupper = order/2; - - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shift = OFFSET + 0.5; - else shift = OFFSET; - if (order % 2) shiftone = 0.0; - else shiftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended - - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else kspacebbox(cuthalf,&dist[0]); - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nxlo_out = nlo + nlower; - nxhi_out = nhi + nupper; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nylo_out = nlo + nlower; - nyhi_out = nhi + nupper; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - if (stagger_flag) { - nxhi_out++; - nyhi_out++; - nzhi_out++; - } - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - // also insure no other procs use ghost cells beyond +z limit - - if (slabflag == 1) { - if (comm->myloc[2] == comm->procgrid[2]-1) - nzhi_in = nzhi_out = nz_pppm - 1; - nzhi_out = MIN(nzhi_out,nz_pppm-1); - } - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clumps of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // PPPM grid pts owned by this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // FFT grids owned by this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_denom() -{ - int k,l,m; - - for (l = 1; l < order; l++) gf_b[l] = 0.0; - gf_b[0] = 1.0; - - for (m = 1; m < order; m++) { - for (l = m; l > 0; l--) - gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); - gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); - } - - bigint ifact = 1; - for (k = 1; k < 2*order; k++) ifact *= k; - double gaminv = 1.0/ifact; - for (l = 0; l < order; l++) gf_b[l] *= gaminv; -} - -/* ---------------------------------------------------------------------- - pre-compute modified (Hockney-Eastwood) Coulomb Green's function -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_ik() -{ - const double * const prd = domain->prd; - - const double xprd = prd[0]; - const double yprd = prd[1]; - const double zprd = prd[2]; - const double zprd_slab = zprd*slab_volfactor; - const double unitkx = (MY_2PI/xprd); - const double unitky = (MY_2PI/yprd); - const double unitkz = (MY_2PI/zprd_slab); - - double snx,sny,snz; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,dot1,dot2; - double numerator,denominator; - double sqk; - - int k,l,m,n,nx,ny,nz,kper,lper,mper; - - const int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * - pow(-log(EPS_HOC),0.25)); - const int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * - pow(-log(EPS_HOC),0.25)); - const int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * - pow(-log(EPS_HOC),0.25)); - const int twoorder = 2*order; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); - - sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); - - if (sqk != 0.0) { - numerator = 12.5663706/sqk; - denominator = gf_denom(snx,sny,snz); - sum1 = 0.0; - - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*square(qx/g_ewald)); - argx = 0.5*qx*xprd/nx_pppm; - wx = powsinxx(argx,twoorder); - - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*square(qy/g_ewald)); - argy = 0.5*qy*yprd/ny_pppm; - wy = powsinxx(argy,twoorder); - - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*square(qz/g_ewald)); - argz = 0.5*qz*zprd_slab/nz_pppm; - wz = powsinxx(argz,twoorder); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; - } - } - } - greensfn[n++] = numerator*sum1/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - pre-compute modified (Hockney-Eastwood) Coulomb Green's function - for a triclinic system -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_ik_triclinic() -{ - double snx,sny,snz; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,dot1,dot2; - double numerator,denominator; - double sqk; - - int k,l,m,n,nx,ny,nz,kper,lper,mper; - - double tmp[3]; - tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25); - tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25); - tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25); - lamda2xT(&tmp[0],&tmp[0]); - const int nbx = static_cast (tmp[0]); - const int nby = static_cast (tmp[1]); - const int nbz = static_cast (tmp[2]); - - const int twoorder = 2*order; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - snz = square(sin(MY_PI*mper/nz_pppm)); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - sny = square(sin(MY_PI*lper/ny_pppm)); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - snx = square(sin(MY_PI*kper/nx_pppm)); - - double unitk_lamda[3]; - unitk_lamda[0] = 2.0*MY_PI*kper; - unitk_lamda[1] = 2.0*MY_PI*lper; - unitk_lamda[2] = 2.0*MY_PI*mper; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - - sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); - - if (sqk != 0.0) { - numerator = 12.5663706/sqk; - denominator = gf_denom(snx,sny,snz); - sum1 = 0.0; - - for (nx = -nbx; nx <= nbx; nx++) { - argx = MY_PI*kper/nx_pppm + MY_PI*nx; - wx = powsinxx(argx,twoorder); - - for (ny = -nby; ny <= nby; ny++) { - argy = MY_PI*lper/ny_pppm + MY_PI*ny; - wy = powsinxx(argy,twoorder); - - for (nz = -nbz; nz <= nbz; nz++) { - argz = MY_PI*mper/nz_pppm + MY_PI*nz; - wz = powsinxx(argz,twoorder); - - double b[3]; - b[0] = 2.0*MY_PI*nx_pppm*nx; - b[1] = 2.0*MY_PI*ny_pppm*ny; - b[2] = 2.0*MY_PI*nz_pppm*nz; - x2lamdaT(&b[0],&b[0]); - - qx = unitk_lamda[0]+b[0]; - sx = exp(-0.25*square(qx/g_ewald)); - - qy = unitk_lamda[1]+b[1]; - sy = exp(-0.25*square(qy/g_ewald)); - - qz = unitk_lamda[2]+b[2]; - sz = exp(-0.25*square(qz/g_ewald)); - - dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; - dot2 = qx*qx+qy*qy+qz*qz; - sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; - } - } - } - greensfn[n++] = numerator*sum1/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute optimized Green's function for energy calculation -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_ad() -{ - const double * const prd = domain->prd; - - const double xprd = prd[0]; - const double yprd = prd[1]; - const double zprd = prd[2]; - const double zprd_slab = zprd*slab_volfactor; - const double unitkx = (MY_2PI/xprd); - const double unitky = (MY_2PI/yprd); - const double unitkz = (MY_2PI/zprd_slab); - - double snx,sny,snz,sqk; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double numerator,denominator; - int k,l,m,n,kper,lper,mper; - - const int twoorder = 2*order; - - for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - qz = unitkz*mper; - snz = square(sin(0.5*qz*zprd_slab/nz_pppm)); - sz = exp(-0.25*square(qz/g_ewald)); - argz = 0.5*qz*zprd_slab/nz_pppm; - wz = powsinxx(argz,twoorder); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - qy = unitky*lper; - sny = square(sin(0.5*qy*yprd/ny_pppm)); - sy = exp(-0.25*square(qy/g_ewald)); - argy = 0.5*qy*yprd/ny_pppm; - wy = powsinxx(argy,twoorder); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - qx = unitkx*kper; - snx = square(sin(0.5*qx*xprd/nx_pppm)); - sx = exp(-0.25*square(qx/g_ewald)); - argx = 0.5*qx*xprd/nx_pppm; - wx = powsinxx(argx,twoorder); - - sqk = qx*qx + qy*qy + qz*qz; - - if (sqk != 0.0) { - numerator = MY_4PI/sqk; - denominator = gf_denom(snx,sny,snz); - greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator; - sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; - sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; - sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; - sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; - sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; - sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; - n++; - } else { - greensfn[n] = 0.0; - sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; - sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; - sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; - sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; - sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; - sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; - n++; - } - } - } - } - - // compute the coefficients for the self-force correction - - double prex, prey, prez; - prex = prey = prez = MY_PI/volume; - prex *= nx_pppm/xprd; - prey *= ny_pppm/yprd; - prez *= nz_pppm/zprd_slab; - sf_coeff[0] *= prex; - sf_coeff[1] *= prex*2; - sf_coeff[2] *= prey; - sf_coeff[3] *= prey*2; - sf_coeff[4] *= prez; - sf_coeff[5] *= prez*2; - - // communicate values with other procs - - double tmp[6]; - MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); - for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme -------------------------------------------------------------------------- */ - -void PPPM::compute_sf_precoeff() -{ - int i,k,l,m,n; - int nx,ny,nz,kper,lper,mper; - double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; - double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; - double u0,u1,u2,u3,u4,u5,u6; - double sum1,sum2,sum3,sum4,sum5,sum6; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - - sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; - for (i = 0; i < 5; i++) { - - qx0 = MY_2PI*(kper+nx_pppm*(i-2)); - qx1 = MY_2PI*(kper+nx_pppm*(i-1)); - qx2 = MY_2PI*(kper+nx_pppm*(i )); - wx0[i] = powsinxx(0.5*qx0/nx_pppm,order); - wx1[i] = powsinxx(0.5*qx1/nx_pppm,order); - wx2[i] = powsinxx(0.5*qx2/nx_pppm,order); - - qy0 = MY_2PI*(lper+ny_pppm*(i-2)); - qy1 = MY_2PI*(lper+ny_pppm*(i-1)); - qy2 = MY_2PI*(lper+ny_pppm*(i )); - wy0[i] = powsinxx(0.5*qy0/ny_pppm,order); - wy1[i] = powsinxx(0.5*qy1/ny_pppm,order); - wy2[i] = powsinxx(0.5*qy2/ny_pppm,order); - - qz0 = MY_2PI*(mper+nz_pppm*(i-2)); - qz1 = MY_2PI*(mper+nz_pppm*(i-1)); - qz2 = MY_2PI*(mper+nz_pppm*(i )); - - wz0[i] = powsinxx(0.5*qz0/nz_pppm,order); - wz1[i] = powsinxx(0.5*qz1/nz_pppm,order); - wz2[i] = powsinxx(0.5*qz2/nz_pppm,order); - } - - for (nx = 0; nx < 5; nx++) { - for (ny = 0; ny < 5; ny++) { - for (nz = 0; nz < 5; nz++) { - u0 = wx0[nx]*wy0[ny]*wz0[nz]; - u1 = wx1[nx]*wy0[ny]*wz0[nz]; - u2 = wx2[nx]*wy0[ny]*wz0[nz]; - u3 = wx0[nx]*wy1[ny]*wz0[nz]; - u4 = wx0[nx]*wy2[ny]*wz0[nz]; - u5 = wx0[nx]*wy0[ny]*wz1[nz]; - u6 = wx0[nx]*wy0[ny]*wz2[nz]; - - sum1 += u0*u1; - sum2 += u0*u2; - sum3 += u0*u3; - sum4 += u0*u4; - sum5 += u0*u5; - sum6 += u0*u6; - } - } - } - - // store values - - sf_precoeff1[n] = sum1; - sf_precoeff2[n] = sum2; - sf_precoeff3[n] = sum3; - sf_precoeff4[n] = sum4; - sf_precoeff5[n] = sum5; - sf_precoeff6[n++] = sum6; - } - } - } -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPM::particle_map() -{ - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - part2grid[i][0] = nx; - part2grid[i][1] = ny; - part2grid[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPM::make_rho() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - remap density from 3d brick decomposition to FFT decomposition -------------------------------------------------------------------------- */ - -void PPPM::brick2fft() -{ - int n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_in; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_fft[n++] = density_brick[iz][iy][ix]; - - remap->perform(density_fft,density_fft,work1); -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver -------------------------------------------------------------------------- */ - -void PPPM::poisson() -{ - if (differentiation_flag == 1) poisson_ad(); - else poisson_ik(); -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ik -------------------------------------------------------------------------- */ - -void PPPM::poisson_ik() -{ - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (evflag_atom) poisson_peratom(); - - // triclinic system - - if (triclinic) { - poisson_ik_triclinic(); - return; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdx_brick[k][j][i] = work2[n]; - n += 2; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fky[j]*work1[n+1]; - work2[n+1] = -fky[j]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdy_brick[k][j][i] = work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkz[k]*work1[n+1]; - work2[n+1] = -fkz[k]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ik for a triclinic system -------------------------------------------------------------------------- */ - -void PPPM::poisson_ik_triclinic() -{ - int i,j,k,n; - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdx_brick[k][j][i] = work2[n]; - n += 2; - } - - // y direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = fky[i]*work1[n+1]; - work2[n+1] = -fky[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdy_brick[k][j][i] = work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = fkz[i]*work1[n+1]; - work2[n+1] = -fkz[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ad -------------------------------------------------------------------------- */ - -void PPPM::poisson_ad() -{ - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (vflag_atom) poisson_peratom(); - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]; - work2[n+1] = work1[n+1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - u_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPM::poisson_peratom() -{ - int i,j,k,n; - - // energy - - if (eflag_atom && differentiation_flag != 1) { - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]; - work2[n+1] = work1[n+1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - u_brick[k][j][i] = work2[n]; - n += 2; - } - } - - // 6 components of virial in v0 thru v5 - - if (!vflag_atom) return; - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][0]; - work2[n+1] = work1[n+1]*vg[i][0]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v0_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][1]; - work2[n+1] = work1[n+1]*vg[i][1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v1_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][2]; - work2[n+1] = work1[n+1]*vg[i][2]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v2_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][3]; - work2[n+1] = work1[n+1]*vg[i][3]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v3_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][4]; - work2[n+1] = work1[n+1]*vg[i][4]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v4_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][5]; - work2[n+1] = work1[n+1]*vg[i][5]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v5_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPM::fieldforce() -{ - if (differentiation_flag == 1) fieldforce_ad(); - else fieldforce_ik(); -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles for ik -------------------------------------------------------------------------- */ - -void PPPM::fieldforce_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles for ad -------------------------------------------------------------------------- */ - -void PPPM::fieldforce_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz; - FFT_SCALAR ekx,eky,ekz; - double s1,s2,s3; - double sf = 0.0; - double *prd; - - prd = domain->prd; - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - - double hx_inv = nx_pppm/xprd; - double hy_inv = ny_pppm/yprd; - double hz_inv = nz_pppm/zprd; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - compute_drho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; - } - } - } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - - // convert E-field to force and substract self forces - - const double qfactor = force->qqrd2e * scale; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - sf = sf_coeff[0]*sin(2*MY_PI*s1); - sf += sf_coeff[1]*sin(4*MY_PI*s1); - sf *= 2*q[i]*q[i]; - f[i][0] += qfactor*(ekx*q[i] - sf); - - sf = sf_coeff[2]*sin(2*MY_PI*s2); - sf += sf_coeff[3]*sin(4*MY_PI*s2); - sf *= 2*q[i]*q[i]; - f[i][1] += qfactor*(eky*q[i] - sf); - - - sf = sf_coeff[4]*sin(2*MY_PI*s3); - sf += sf_coeff[5]*sin(4*MY_PI*s3); - sf *= 2*q[i]*q[i]; - if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPM::fieldforce_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - if (eflag_atom) eatom[i] += q[i]*u; - if (vflag_atom) { - vatom[i][0] += q[i]*v0; - vatom[i][1] += q[i]*v1; - vatom[i][2] += q[i]*v2; - vatom[i][3] += q[i]*v3; - vatom[i][4] += q[i]*v4; - vatom[i][5] += q[i]*v5; - } - } -} - -/* ---------------------------------------------------------------------- - pack own values to buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - if (flag == FORWARD_IK) { - FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - } else if (flag == FORWARD_AD) { - FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - } else if (flag == FORWARD_IK_PERATOM) { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - } else if (flag == FORWARD_AD_PERATOM) { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's own values from buf and set own ghost values -------------------------------------------------------------------------- */ - -void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - if (flag == FORWARD_IK) { - FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - } else if (flag == FORWARD_AD) { - FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[i]; - } else if (flag == FORWARD_IK_PERATOM) { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - } else if (flag == FORWARD_AD_PERATOM) { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } -} - -/* ---------------------------------------------------------------------- - pack ghost values into buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - if (flag == REVERSE_RHO) { - FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's ghost values from buf and add to own values -------------------------------------------------------------------------- */ - -void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - if (flag == REVERSE_RHO) { - FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[i]; - } -} - -/* ---------------------------------------------------------------------- - map nprocs to NX by NY grid as PX by PY procs - return optimal px,py -------------------------------------------------------------------------- */ - -void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) -{ - // loop thru all possible factorizations of nprocs - // surf = surface area of largest proc sub-domain - // innermost if test minimizes surface area and surface/volume ratio - - int bestsurf = 2 * (nx + ny); - int bestboxx = 0; - int bestboxy = 0; - - int boxx,boxy,surf,ipx,ipy; - - ipx = 1; - while (ipx <= nprocs) { - if (nprocs % ipx == 0) { - ipy = nprocs/ipx; - boxx = nx/ipx; - if (nx % ipx) boxx++; - boxy = ny/ipy; - if (ny % ipy) boxy++; - surf = boxx + boxy; - if (surf < bestsurf || - (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { - bestsurf = surf; - bestboxx = boxx; - bestboxy = boxy; - *px = ipx; - *py = ipy; - } - } - ipx++; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into rho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = order-1; l >= 0; l--) { - r1 = rho_coeff[l][k] + r1*dx; - r2 = rho_coeff[l][k] + r2*dy; - r3 = rho_coeff[l][k] + r3*dz; - } - rho1d[0][k] = r1; - rho1d[1][k] = r2; - rho1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into drho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = order-2; l >= 0; l--) { - r1 = drho_coeff[l][k] + r1*dx; - r2 = drho_coeff[l][k] + r2*dy; - r3 = drho_coeff[l][k] + r3*dz; - } - drho1d[0][k] = r1; - drho1d[1][k] = r2; - drho1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - generate coeffients for the weight function of order n - - (n-1) - Wn(x) = Sum wn(k,x) , Sum is over every other integer - k=-(n-1) - For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 - k is odd integers if n is even and even integers if n is odd - --- - | n-1 - | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 - wn(k,x) = < l=0 - | - | 0 otherwise - --- - a coeffients are packed into the array rho_coeff to eliminate zeros - rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) -------------------------------------------------------------------------- */ - -void PPPM::compute_rho_coeff() -{ - int j,k,l,m; - FFT_SCALAR s; - - FFT_SCALAR **a; - memory->create2d_offset(a,order,-order,order,"pppm:a"); - - for (k = -order; k <= order; k++) - for (l = 0; l < order; l++) - a[l][k] = 0.0; - - a[0][0] = 1.0; - for (j = 1; j < order; j++) { - for (k = -j; k <= j; k += 2) { - s = 0.0; - for (l = 0; l < j; l++) { - a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); -#ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); -#else - s += pow(0.5,(double) l+1) * - (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); -#endif - } - a[0][k] = s; - } - } - - m = (1-order)/2; - for (k = -(order-1); k < order; k += 2) { - for (l = 0; l < order; l++) - rho_coeff[l][m] = a[l][k]; - for (l = 1; l < order; l++) - drho_coeff[l-1][m] = l*a[l][k]; - m++; - } - - memory->destroy2d_offset(a,-order); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPM::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - -/* ---------------------------------------------------------------------- - perform and time the 1d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPM::timing_1d(int n, double &time1d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->timing1d(work1,nfft_both,1); - fft2->timing1d(work1,nfft_both,-1); - if (differentiation_flag != 1) { - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d = time2 - time1; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - perform and time the 3d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPM::timing_3d(int n, double &time3d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->compute(work1,work1,1); - fft2->compute(work1,work1,-1); - if (differentiation_flag != 1) { - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d = time2 - time1; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPM::memory_usage() -{ - double bytes = nmax*3 * sizeof(double); - int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - if (differentiation_flag == 1) { - bytes += 2 * nbrick * sizeof(FFT_SCALAR); - } else { - bytes += 4 * nbrick * sizeof(FFT_SCALAR); - } - if (triclinic) bytes += 3 * nfft_both * sizeof(double); - bytes += 6 * nfft_both * sizeof(double); - bytes += nfft_both * sizeof(double); - bytes += nfft_both*5 * sizeof(FFT_SCALAR); - - if (peratom_allocate_flag) - bytes += 6 * nbrick * sizeof(FFT_SCALAR); - - if (group_allocate_flag) { - bytes += 2 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; - } - - bytes += cg->memory_usage(); - - return bytes; -} - -/* ---------------------------------------------------------------------- - group-group interactions - ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - compute the PPPM total long-range force and energy for groups A and B - ------------------------------------------------------------------------- */ - -void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) -{ - if (slabflag && triclinic) - error->all(FLERR,"Cannot (yet) use K-space slab " - "correction with compute group/group for triclinic systems"); - - if (differentiation_flag) - error->all(FLERR,"Cannot (yet) use kspace_modify " - "diff ad with compute group/group"); - - if (!group_allocate_flag) allocate_groups(); - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - e2group = 0.0; //energy - f2group[0] = 0.0; //force in x-direction - f2group[1] = 0.0; //force in y-direction - f2group[2] = 0.0; //force in z-direction - - // map my particle charge onto my local 3d density grid - - make_rho_groups(groupbit_A,groupbit_B,AA_flag); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - // temporarily store and switch pointers so we can - // use brick2fft() for groups A and B (without - // writing an additional function) - - FFT_SCALAR ***density_brick_real = density_brick; - FFT_SCALAR *density_fft_real = density_fft; - - // group A - - density_brick = density_A_brick; - density_fft = density_A_fft; - - cg->reverse_comm(this,REVERSE_RHO); - brick2fft(); - - // group B - - density_brick = density_B_brick; - density_fft = density_B_fft; - - cg->reverse_comm(this,REVERSE_RHO); - brick2fft(); - - // switch back pointers - - density_brick = density_brick_real; - density_fft = density_fft_real; - - // compute potential gradient on my FFT grid and - // portion of group-group energy/force on this proc's FFT grid - - poisson_groups(AA_flag); - - const double qscale = force->qqrd2e * scale; - - // total group A <--> group B energy - // self and boundary correction terms are in compute_group_group.cpp - - double e2group_all; - MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); - e2group = e2group_all; - - e2group *= qscale*0.5*volume; - - // total group A <--> group B force - - double f2group_all[3]; - MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); - - f2group[0] = qscale*volume*f2group_all[0]; - f2group[1] = qscale*volume*f2group_all[1]; - if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2]; - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); - - if (slabflag == 1) - slabcorr_groups(groupbit_A, groupbit_B, AA_flag); -} - -/* ---------------------------------------------------------------------- - allocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPM::allocate_groups() -{ - group_allocate_flag = 1; - - memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_A_brick"); - memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_B_brick"); - memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); - memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); -} - -/* ---------------------------------------------------------------------- - deallocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPM::deallocate_groups() -{ - group_allocate_flag = 0; - - memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(density_A_fft); - memory->destroy(density_B_fft); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag) -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density arrays - - memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - for (int i = 0; i < nlocal; i++) { - - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - - // group A - - if (mask[i] & groupbit_A) - density_A_brick[mz][my][mx] += x0*rho1d[0][l]; - - // group B - - if (mask[i] & groupbit_B) - density_B_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPM::poisson_groups(int AA_flag) -{ - int i,j,k,n; - - // reuse memory (already declared) - - FFT_SCALAR *work_A = work1; - FFT_SCALAR *work_B = work2; - - // transform charge density (r -> k) - - // group A - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] = density_A_fft[i]; - work_A[n++] = ZEROF; - } - - fft1->compute(work_A,work_A,1); - - // group B - - n = 0; - for (i = 0; i < nfft; i++) { - work_B[n++] = density_B_fft[i]; - work_B[n++] = ZEROF; - } - - fft1->compute(work_B,work_B,1); - - // group-group energy and force contribution, - // keep everything in reciprocal space so - // no inverse FFTs needed - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - // energy - - n = 0; - for (i = 0; i < nfft; i++) { - e2group += s2 * greensfn[i] * - (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); - n += 2; - } - - if (AA_flag) return; - - - // multiply by Green's function and s2 - // (only for work_A so it is not squared below) - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] *= s2 * greensfn[i]; - work_A[n++] *= s2 * greensfn[i]; - } - - // triclinic system - - if (triclinic) { - poisson_groups_triclinic(); - return; - } - - double partial_group; - - // force, x direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[0] += fkx[i] * partial_group; - n += 2; - } - - // force, y direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[1] += fky[j] * partial_group; - n += 2; - } - - // force, z direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[2] += fkz[k] * partial_group; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for group-group interactions - for a triclinic system - ------------------------------------------------------------------------- */ - -void PPPM::poisson_groups_triclinic() -{ - int i,j,k,n; - - // reuse memory (already declared) - - FFT_SCALAR *work_A = work1; - FFT_SCALAR *work_B = work2; - - double partial_group; - - // force, x direction - - n = 0; - for (i = 0; i < nfft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[0] += fkx[i] * partial_group; - n += 2; - } - - // force, y direction - - n = 0; - for (i = 0; i < nfft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[1] += fky[i] * partial_group; - n += 2; - } - - // force, z direction - - n = 0; - for (i = 0; i < nfft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[2] += fkz[i] * partial_group; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double qsum_A = 0.0; - double qsum_B = 0.0; - double dipole_A = 0.0; - double dipole_B = 0.0; - double dipole_r2_A = 0.0; - double dipole_r2_B = 0.0; - - for (int i = 0; i < nlocal; i++) { - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if (mask[i] & groupbit_A) { - qsum_A += q[i]; - dipole_A += q[i]*x[i][2]; - dipole_r2_A += q[i]*x[i][2]*x[i][2]; - } - - if (mask[i] & groupbit_B) { - qsum_B += q[i]; - dipole_B += q[i]*x[i][2]; - dipole_r2_B += q[i]*x[i][2]*x[i][2]; - } - } - - // sum local contributions to get total charge and global dipole moment - // for each group - - double tmp; - MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_A = tmp; - - MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_B = tmp; - - MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_A = tmp; - - MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_B = tmp; - - MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_A = tmp; - - MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_B = tmp; - - // compute corrections - - const double qscale = force->qqrd2e * scale; - const double efact = qscale * MY_2PI/volume; - - e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + - qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); - - // add on force corrections - - const double ffact = qscale * (-4.0*MY_PI/volume); - f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) + analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University) + triclinic added by Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "pppm.h" +#include "atom.h" +#include "comm.h" +#include "commgrid.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" + +#include "math_const.h" +#include "math_special.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define MAXORDER 7 +#define OFFSET 16384 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + +enum{REVERSE_RHO}; +enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM}; + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); + + pppmflag = 1; + group_group_enable = 1; + + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + u_brick = NULL; + v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; + greensfn = NULL; + work1 = work2 = NULL; + vg = NULL; + fkx = fky = fkz = NULL; + + sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = + sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; + + density_A_brick = density_B_brick = NULL; + density_A_fft = density_B_fft = NULL; + + gf_b = NULL; + rho1d = rho_coeff = drho1d = drho_coeff = NULL; + + fft1 = fft2 = NULL; + remap = NULL; + cg = NULL; + cg_peratom = NULL; + + nmax = 0; + part2grid = NULL; + + peratom_allocate_flag = 0; + group_allocate_flag = 0; + + // define acons coefficients for estimation of kspace errors + // see JCP 109, pg 7698 for derivation of coefficients + // higher order coefficients may be computed if needed + + memory->create(acons,8,7,"pppm:acons"); + acons[1][0] = 2.0 / 3.0; + acons[2][0] = 1.0 / 50.0; + acons[2][1] = 5.0 / 294.0; + acons[3][0] = 1.0 / 588.0; + acons[3][1] = 7.0 / 1440.0; + acons[3][2] = 21.0 / 3872.0; + acons[4][0] = 1.0 / 4320.0; + acons[4][1] = 3.0 / 1936.0; + acons[4][2] = 7601.0 / 2271360.0; + acons[4][3] = 143.0 / 28800.0; + acons[5][0] = 1.0 / 23232.0; + acons[5][1] = 7601.0 / 13628160.0; + acons[5][2] = 143.0 / 69120.0; + acons[5][3] = 517231.0 / 106536960.0; + acons[5][4] = 106640677.0 / 11737571328.0; + acons[6][0] = 691.0 / 68140800.0; + acons[6][1] = 13.0 / 57600.0; + acons[6][2] = 47021.0 / 35512320.0; + acons[6][3] = 9694607.0 / 2095994880.0; + acons[6][4] = 733191589.0 / 59609088000.0; + acons[6][5] = 326190917.0 / 11700633600.0; + acons[7][0] = 1.0 / 345600.0; + acons[7][1] = 3617.0 / 35512320.0; + acons[7][2] = 745739.0 / 838397952.0; + acons[7][3] = 56399353.0 / 12773376000.0; + acons[7][4] = 25091609.0 / 1560084480.0; + acons[7][5] = 1755948832039.0 / 36229939200000.0; + acons[7][6] = 4887769399.0 / 37838389248.0; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPM::~PPPM() +{ + delete [] factors; + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + if (group_allocate_flag) deallocate_groups(); + memory->destroy(part2grid); + memory->destroy(acons); +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPM::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPM initialization ...\n"); + if (logfile) fprintf(logfile,"PPPM initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->triclinic && differentiation_flag == 1) + error->all(FLERR,"Cannot (yet) use PPPM with triclinic box " + "and kspace_modify diff ad"); + if (domain->triclinic && slabflag) + error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and " + "slab correction"); + if (domain->dimension == 2) error->all(FLERR, + "Cannot use PPPM with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPM"); + } + + if (order < 2 || order > MAXORDER) { + char str[128]; + sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); + error->all(FLERR,str); + } + + // extract short-range Coulombic cutoff from pair style + + triclinic = domain->triclinic; + scale = 1.0; + + pair_check(); + + int itmp = 0; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + + // if kspace is TIP4P, extract TIP4P params from pair style + // bond/angle are not yet init(), so insure equilibrium request is valid + + qdist = 0.0; + + if (tip4pflag) { + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL || + force->angle->setflag == NULL || force->bond->setflag == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + if (typeA < 1 || typeA > atom->nangletypes || + force->angle->setflag[typeA] == 0) + error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); + if (typeB < 1 || typeB > atom->nbondtypes || + force->bond->setflag[typeB] == 0) + error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (cos(0.5*theta) * blen); + if (domain->triclinic) + error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P"); + } + + // compute qsum & qsqsum and warn if not charge-neutral + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + q2 = qsqsum * force->qqrd2e; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // free all arrays previously allocated + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + if (group_allocate_flag) deallocate_groups(); + + // setup FFT grid resolution and g_ewald + // normally one iteration thru while loop is all that is required + // if grid stencil does not extend beyond neighbor proc + // or overlap is allowed, then done + // else reduce order and try again + + int (*procneigh)[2] = comm->procneigh; + + CommGrid *cgtmp = NULL; + int iteration = 0; + + while (order >= minorder) { + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPM order b/c stencil extends " + "beyond nearest neighbor processor"); + + if (stagger_flag && !differentiation_flag) compute_gf_denom(); + set_grid_global(); + set_grid_local(); + if (overlap_allowed) break; + + cgtmp = new CommGrid(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + + order--; + iteration++; + } + + if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order"); + if (!overlap_allowed && cgtmp->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald + + if (!gewaldflag) adjust_gewald(); + + // calculate the final accuracy + + double estimated_accuracy = final_accuracy(); + + // print stats + + int ngrid_max,nfft_both_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + +#ifdef FFT_SINGLE + const char fft_prec[] = "single"; +#else + const char fft_prec[] = "double"; +#endif + + if (screen) { + fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," stencil order = %d\n",order); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc = %d %d\n", + ngrid_max,nfft_both_max); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," stencil order = %d\n",order); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", + ngrid_max,nfft_both_max); + } + } + + // allocate K-space dependent memory + // don't invoke allocate peratom() or group(), will be allocated when needed + + allocate(); + cg->ghost_notify(); + cg->setup(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + if (differentiation_flag == 1) compute_sf_precoeff(); + compute_rho_coeff(); +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPM::setup() +{ + if (triclinic) { + setup_triclinic(); + return; + } + + int i,j,k,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double unitkx = (MY_2PI/xprd); + double unitky = (MY_2PI/yprd); + double unitkz = (MY_2PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + + double per; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + } + + // virial coefficients + + double sqk,vterm; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + } + n++; + } + } + } + + if (differentiation_flag == 1) compute_gf_ad(); + else compute_gf_ik(); +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed + for a triclinic system +------------------------------------------------------------------------- */ + +void PPPM::setup_triclinic() +{ + int i,j,k,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + prd = domain->prd; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + // use lamda (0-1) coordinates + + delxinv = nx_pppm; + delyinv = ny_pppm; + delzinv = nz_pppm; + delvolinv = delxinv*delyinv*delzinv/volume; + + // fkx,fky,fkz for my FFT grid pts + + double per_i,per_j,per_k; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + per_k = k - nz_pppm*(2*k/nz_pppm); + for (j = nylo_fft; j <= nyhi_fft; j++) { + per_j = j - ny_pppm*(2*j/ny_pppm); + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per_i = i - nx_pppm*(2*i/nx_pppm); + + double unitk_lamda[3]; + unitk_lamda[0] = 2.0*MY_PI*per_i; + unitk_lamda[1] = 2.0*MY_PI*per_j; + unitk_lamda[2] = 2.0*MY_PI*per_k; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + fkx[n] = unitk_lamda[0]; + fky[n] = unitk_lamda[1]; + fkz[n] = unitk_lamda[2]; + n++; + } + } + } + + // virial coefficients + + double sqk,vterm; + + for (n = 0; n < nfft; n++) { + sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n]; + vg[n][1] = 1.0 + vterm*fky[n]*fky[n]; + vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n]; + vg[n][3] = vterm*fkx[n]*fky[n]; + vg[n][4] = vterm*fkx[n]*fkz[n]; + vg[n][5] = vterm*fky[n]*fkz[n]; + } + } + + compute_gf_ik_triclinic(); +} + +/* ---------------------------------------------------------------------- + reset local grid arrays and communication stencils + called by fix balance b/c it changed sizes of processor sub-domains +------------------------------------------------------------------------- */ + +void PPPM::setup_grid() +{ + // free all arrays previously allocated + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + if (group_allocate_flag) deallocate_groups(); + + // reset portion of global grid that each proc owns + + set_grid_local(); + + // reallocate K-space dependent memory + // check if grid communication is now overlapping if not allowed + // don't invoke allocate peratom() or group(), will be allocated when needed + + allocate(); + + cg->ghost_notify(); + if (overlap_allowed == 0 && cg->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + cg->setup(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + if (differentiation_flag == 1) compute_sf_precoeff(); + compute_rho_coeff(); + + // pre-compute volume-dependent coeffs + + setup(); +} + +/* ---------------------------------------------------------------------- + compute the PPPM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPM::compute(int eflag, int vflag) +{ + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + cg_peratom->ghost_notify(); + cg_peratom->setup(); + } + + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(part2grid); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm:part2grid"); + } + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid + + particle_map(); + make_rho(); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + cg->reverse_comm(this,REVERSE_RHO); + brick2fft(); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + // also performs per-atom calculations via poisson_peratom() + + poisson(); + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks + + if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD); + else cg->forward_comm(this,FORWARD_IK); + + // extra per-atom energy/virial communication + + if (evflag_atom) { + if (differentiation_flag == 1 && vflag_atom) + cg_peratom->forward_comm(this,FORWARD_AD_PERATOM); + else if (differentiation_flag == 0) + cg_peratom->forward_comm(this,FORWARD_IK_PERATOM); + } + + // calculate the force on my particles + + fieldforce(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fieldforce_peratom(); + + // sum global energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qscale; + } + + // sum global virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + } + + // per-atom energy/virial + // energy includes self-energy correction + // notal accounts for TIP4P tallying eatom/vatom for ghost atoms + + if (evflag_atom) { + double *q = atom->q; + int nlocal = atom->nlocal; + int ntotal = nlocal; + if (tip4pflag) ntotal += atom->nghost; + + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] *= 0.5; + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale; + } + + if (vflag_atom) { + for (i = 0; i < ntotal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale; + } + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::allocate() +{ + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); + + memory->create(density_fft,nfft_both,"pppm:density_fft"); + memory->create(greensfn,nfft_both,"pppm:greensfn"); + memory->create(work1,2*nfft_both,"pppm:work1"); + memory->create(work2,2*nfft_both,"pppm:work2"); + memory->create(vg,nfft_both,6,"pppm:vg"); + + if (triclinic == 0) { + memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); + memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); + memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); + } else { + memory->create(fkx,nfft_both,"pppm:fkx"); + memory->create(fky,nfft_both,"pppm:fky"); + memory->create(fkz,nfft_both,"pppm:fkz"); + } + + if (differentiation_flag == 1) { + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:u_brick"); + + memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1"); + memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2"); + memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3"); + memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4"); + memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5"); + memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6"); + + } else { + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdx_brick"); + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdy_brick"); + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdz_brick"); + } + + // summation coeffs + + order_allocated = order; + if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b"); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); + memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); + memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2, + "pppm:drho_coeff"); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decompostion + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition + + int tmp; + + fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp); + + fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp); + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + int (*procneigh)[2] = comm->procneigh; + + if (differentiation_flag == 1) + cg = new CommGrid(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg = new CommGrid(lmp,world,3,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); +} + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + + if (differentiation_flag == 1) { + memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(sf_precoeff1); + memory->destroy(sf_precoeff2); + memory->destroy(sf_precoeff3); + memory->destroy(sf_precoeff4); + memory->destroy(sf_precoeff5); + memory->destroy(sf_precoeff6); + } else { + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + } + + memory->destroy(density_fft); + memory->destroy(greensfn); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(vg); + + if (triclinic == 0) { + memory->destroy1d_offset(fkx,nxlo_fft); + memory->destroy1d_offset(fky,nylo_fft); + memory->destroy1d_offset(fkz,nzlo_fft); + } else { + memory->destroy(fkx); + memory->destroy(fky); + memory->destroy(fkz); + } + + memory->destroy(gf_b); + if (stagger_flag) gf_b = NULL; + memory->destroy2d_offset(rho1d,-order_allocated/2); + memory->destroy2d_offset(drho1d,-order_allocated/2); + memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2); + memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2); + + delete fft1; + delete fft2; + delete remap; + delete cg; +} + +/* ---------------------------------------------------------------------- + allocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::allocate_peratom() +{ + peratom_allocate_flag = 1; + + if (differentiation_flag != 1) + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:u_brick"); + + memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v0_brick"); + + memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v1_brick"); + memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v2_brick"); + memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v3_brick"); + memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v4_brick"); + memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v5_brick"); + + // create ghost grid object for rho and electric field communication + + int (*procneigh)[2] = comm->procneigh; + + if (differentiation_flag == 1) + cg_peratom = + new CommGrid(lmp,world,6,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom = + new CommGrid(lmp,world,7,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); +} + +/* ---------------------------------------------------------------------- + deallocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::deallocate_peratom() +{ + peratom_allocate_flag = 0; + + memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); + + if (differentiation_flag != 1) + memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); + + delete cg_peratom; +} + +/* ---------------------------------------------------------------------- + set global size of PPPM grid = nx,ny,nz_pppm + used for charge accumulation, FFTs, and electric field interpolation +------------------------------------------------------------------------- */ + +void PPPM::set_grid_global() +{ + // use xprd,yprd,zprd (even if triclinic, and then scale later) + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h; + bigint natoms = atom->natoms; + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy + // nz_pppm uses extended zprd_slab instead of zprd + // reduce it until accuracy target is met + + if (!gridflag) { + + if (differentiation_flag == 1 || stagger_flag) { + + h = h_x = h_y = h_z = 4.0/g_ewald; + int count = 0; + while (1) { + + // set grid dimension + nx_pppm = static_cast (xprd/h_x); + ny_pppm = static_cast (yprd/h_y); + nz_pppm = static_cast (zprd_slab/h_z); + + if (nx_pppm <= 1) nx_pppm = 2; + if (ny_pppm <= 1) ny_pppm = 2; + if (nz_pppm <= 1) nz_pppm = 2; + + //set local grid dimension + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + double df_kspace = compute_df_kspace(); + + count++; + + // break loop if the accuracy has been reached or + // too many loops have been performed + + if (df_kspace <= accuracy) break; + if (count > 500) error->all(FLERR, "Could not compute grid size"); + h *= 0.95; + h_x = h_y = h_z = h; + } + + } else { + + double err; + h_x = h_y = h_z = 1.0/g_ewald; + + nx_pppm = static_cast (xprd/h_x) + 1; + ny_pppm = static_cast (yprd/h_y) + 1; + nz_pppm = static_cast (zprd_slab/h_z) + 1; + + err = estimate_ik_error(h_x,xprd,natoms); + while (err > accuracy) { + err = estimate_ik_error(h_x,xprd,natoms); + nx_pppm++; + h_x = xprd/nx_pppm; + } + + err = estimate_ik_error(h_y,yprd,natoms); + while (err > accuracy) { + err = estimate_ik_error(h_y,yprd,natoms); + ny_pppm++; + h_y = yprd/ny_pppm; + } + + err = estimate_ik_error(h_z,zprd_slab,natoms); + while (err > accuracy) { + err = estimate_ik_error(h_z,zprd_slab,natoms); + nz_pppm++; + h_z = zprd_slab/nz_pppm; + } + } + + // scale grid for triclinic skew + + if (triclinic) { + double tmp[3]; + tmp[0] = nx_pppm/xprd; + tmp[1] = ny_pppm/yprd; + tmp[2] = nz_pppm/zprd; + lamda2xT(&tmp[0],&tmp[0]); + nx_pppm = static_cast(tmp[0]) + 1; + ny_pppm = static_cast(tmp[1]) + 1; + nz_pppm = static_cast(tmp[2]) + 1; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; + + if (triclinic == 0) { + h_x = xprd/nx_pppm; + h_y = yprd/ny_pppm; + h_z = zprd_slab/nz_pppm; + } else { + double tmp[3]; + tmp[0] = nx_pppm; + tmp[1] = ny_pppm; + tmp[2] = nz_pppm; + x2lamdaT(&tmp[0],&tmp[0]); + h_x = 1.0/tmp[0]; + h_y = 1.0/tmp[1]; + h_z = 1.0/tmp[2]; + } + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPM grid is too large"); +} + +/* ---------------------------------------------------------------------- + check if all factors of n are in list of factors + return 1 if yes, 0 if no +------------------------------------------------------------------------- */ + +int PPPM::factorable(int n) +{ + int i; + + while (n > 1) { + for (i = 0; i < nfactors; i++) { + if (n % factors[i] == 0) { + n /= factors[i]; + break; + } + } + if (i == nfactors) return 0; + } + + return 1; +} + +/* ---------------------------------------------------------------------- + compute estimated kspace force error +------------------------------------------------------------------------- */ + +double PPPM::compute_df_kspace() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + double df_kspace = 0.0; + if (differentiation_flag == 1 || stagger_flag) { + double qopt = compute_qopt(); + df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + } else { + double lprx = estimate_ik_error(h_x,xprd,natoms); + double lpry = estimate_ik_error(h_y,yprd,natoms); + double lprz = estimate_ik_error(h_z,zprd_slab,natoms); + df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + } + return df_kspace; +} + +/* ---------------------------------------------------------------------- + compute qopt +------------------------------------------------------------------------- */ + +double PPPM::compute_qopt() +{ + double qopt = 0.0; + double *prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double u1, u2, sqk; + double sum1,sum2,sum3,sum4,dot2; + + int k,l,m,nx,ny,nz; + const int twoorder = 2*order; + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + const int mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + const int lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + const int kper = k - nx_pppm*(2*k/nx_pppm); + + sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); + + if (sqk != 0.0) { + + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + sum4 = 0.0; + for (nx = -2; nx <= 2; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + qx *= qx; + + for (ny = -2; ny <= 2; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + qy *= qy; + + for (nz = -2; nz <= 2; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + qz *= qz; + + dot2 = qx+qy+qz; + u1 = sx*sy*sz; + u2 = wx*wy*wz; + sum1 += u1*u1/dot2*MY_4PI*MY_4PI; + sum2 += u1 * u2 * MY_4PI; + sum3 += u2; + sum4 += dot2*u2; + } + } + } + sum2 *= sum2; + qopt += sum1 - sum2/(sum3*sum4); + } + } + } + } + double qopt_all; + MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); + return qopt_all; +} + +/* ---------------------------------------------------------------------- + estimate kspace force error for ik method +------------------------------------------------------------------------- */ + +double PPPM::estimate_ik_error(double h, double prd, bigint natoms) +{ + double sum = 0.0; + for (int m = 0; m < order; m++) + sum += acons[order][m] * pow(h*g_ewald,2.0*m); + double value = q2 * pow(h*g_ewald,(double)order) * + sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd); + + return value; +} + +/* ---------------------------------------------------------------------- + adjust the g_ewald parameter to near its optimal value + using a Newton-Raphson solver +------------------------------------------------------------------------- */ + +void PPPM::adjust_gewald() +{ + double dx; + + for (int i = 0; i < LARGE; i++) { + dx = newton_raphson_f() / derivf(); + g_ewald -= dx; + if (fabs(newton_raphson_f()) < SMALL) return; + } + + char str[128]; + sprintf(str, "Could not compute g_ewald"); + error->all(FLERR, str); +} + +/* ---------------------------------------------------------------------- + Calculate f(x) using Newton-Raphson solver + ------------------------------------------------------------------------- */ + +double PPPM::newton_raphson_f() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + bigint natoms = atom->natoms; + + double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd); + + double df_kspace = compute_df_kspace(); + + return df_rspace - df_kspace; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) using forward difference + [f(x + h) - f(x)] / h + ------------------------------------------------------------------------- */ + +double PPPM::derivf() +{ + double h = 0.000001; //Derivative step-size + double df,f1,f2,g_ewald_old; + + f1 = newton_raphson_f(); + g_ewald_old = g_ewald; + g_ewald += h; + f2 = newton_raphson_f(); + g_ewald = g_ewald_old; + df = (f2 - f1)/h; + + return df; +} + +/* ---------------------------------------------------------------------- + Calculate the final estimate of the accuracy +------------------------------------------------------------------------- */ + +double PPPM::final_accuracy() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + + double df_kspace = compute_df_kspace(); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd); + double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace); + double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace + + df_table*df_table); + + return estimated_accuracy; +} + +/* ---------------------------------------------------------------------- + set local subset of PPPM/FFT grid that I own + n xyz lo/hi in = 3d brick that I own (inclusive) + n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive) + n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz) +------------------------------------------------------------------------- */ + +void PPPM::set_grid_local() +{ + // global indices of PPPM grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPM grid that I own without ghost cells + // for slab PPPM, assign z grid as if it were not extended + + nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); + nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; + + nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); + nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; + + nzlo_in = static_cast + (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); + nzhi_in = static_cast + (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; + + // nlower,nupper = stencil size for mapping particles to PPPM grid + + nlower = -(order-1)/2; + nupper = order/2; + + // shift values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (order % 2) shift = OFFSET + 0.5; + else shift = OFFSET; + if (order % 2) shiftone = 0.0; + else shiftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPM grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPM, assign z grid as if it were not extended + + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else kspacebbox(cuthalf,&dist[0]); + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nxlo_out = nlo + nlower; + nxhi_out = nhi + nupper; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nylo_out = nlo + nlower; + nyhi_out = nhi + nupper; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nzlo_out = nlo + nlower; + nzhi_out = nhi + nupper; + + if (stagger_flag) { + nxhi_out++; + nyhi_out++; + nzhi_out++; + } + + // for slab PPPM, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPM, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) + // also insure no other procs use ghost cells beyond +z limit + + if (slabflag == 1) { + if (comm->myloc[2] == comm->procgrid[2]-1) + nzhi_in = nzhi_out = nz_pppm - 1; + nzhi_out = MIN(nzhi_out,nz_pppm-1); + } + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clumps of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + // PPPM grid pts owned by this proc, including ghosts + + ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + + // FFT grids owned by this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * + (nzhi_fft-nzlo_fft+1); + int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * + (nzhi_in-nzlo_in+1); + nfft_both = MAX(nfft,nfft_brick); +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_denom() +{ + int k,l,m; + + for (l = 1; l < order; l++) gf_b[l] = 0.0; + gf_b[0] = 1.0; + + for (m = 1; m < order; m++) { + for (l = m; l > 0; l--) + gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); + gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); + } + + bigint ifact = 1; + for (k = 1; k < 2*order; k++) ifact *= k; + double gaminv = 1.0/ifact; + for (l = 0; l < order; l++) gf_b[l] *= gaminv; +} + +/* ---------------------------------------------------------------------- + pre-compute modified (Hockney-Eastwood) Coulomb Green's function +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_ik() +{ + const double * const prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double snx,sny,snz; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + double sqk; + + int k,l,m,n,nx,ny,nz,kper,lper,mper; + + const int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int twoorder = 2*order; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); + + sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); + + if (sqk != 0.0) { + numerator = 12.5663706/sqk; + denominator = gf_denom(snx,sny,snz); + sum1 = 0.0; + + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; + } + } + } + greensfn[n++] = numerator*sum1/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + pre-compute modified (Hockney-Eastwood) Coulomb Green's function + for a triclinic system +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_ik_triclinic() +{ + double snx,sny,snz; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + double sqk; + + int k,l,m,n,nx,ny,nz,kper,lper,mper; + + double tmp[3]; + tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25); + tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25); + tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25); + lamda2xT(&tmp[0],&tmp[0]); + const int nbx = static_cast (tmp[0]); + const int nby = static_cast (tmp[1]); + const int nbz = static_cast (tmp[2]); + + const int twoorder = 2*order; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = square(sin(MY_PI*mper/nz_pppm)); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = square(sin(MY_PI*lper/ny_pppm)); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = square(sin(MY_PI*kper/nx_pppm)); + + double unitk_lamda[3]; + unitk_lamda[0] = 2.0*MY_PI*kper; + unitk_lamda[1] = 2.0*MY_PI*lper; + unitk_lamda[2] = 2.0*MY_PI*mper; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + + sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); + + if (sqk != 0.0) { + numerator = 12.5663706/sqk; + denominator = gf_denom(snx,sny,snz); + sum1 = 0.0; + + for (nx = -nbx; nx <= nbx; nx++) { + argx = MY_PI*kper/nx_pppm + MY_PI*nx; + wx = powsinxx(argx,twoorder); + + for (ny = -nby; ny <= nby; ny++) { + argy = MY_PI*lper/ny_pppm + MY_PI*ny; + wy = powsinxx(argy,twoorder); + + for (nz = -nbz; nz <= nbz; nz++) { + argz = MY_PI*mper/nz_pppm + MY_PI*nz; + wz = powsinxx(argz,twoorder); + + double b[3]; + b[0] = 2.0*MY_PI*nx_pppm*nx; + b[1] = 2.0*MY_PI*ny_pppm*ny; + b[2] = 2.0*MY_PI*nz_pppm*nz; + x2lamdaT(&b[0],&b[0]); + + qx = unitk_lamda[0]+b[0]; + sx = exp(-0.25*square(qx/g_ewald)); + + qy = unitk_lamda[1]+b[1]; + sy = exp(-0.25*square(qy/g_ewald)); + + qz = unitk_lamda[2]+b[2]; + sz = exp(-0.25*square(qz/g_ewald)); + + dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; + } + } + } + greensfn[n++] = numerator*sum1/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute optimized Green's function for energy calculation +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_ad() +{ + const double * const prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double snx,sny,snz,sqk; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double numerator,denominator; + int k,l,m,n,kper,lper,mper; + + const int twoorder = 2*order; + + for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + qz = unitkz*mper; + snz = square(sin(0.5*qz*zprd_slab/nz_pppm)); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + qy = unitky*lper; + sny = square(sin(0.5*qy*yprd/ny_pppm)); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + qx = unitkx*kper; + snx = square(sin(0.5*qx*xprd/nx_pppm)); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + + sqk = qx*qx + qy*qy + qz*qz; + + if (sqk != 0.0) { + numerator = MY_4PI/sqk; + denominator = gf_denom(snx,sny,snz); + greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator; + sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; + sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; + sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; + sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; + sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; + sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; + n++; + } else { + greensfn[n] = 0.0; + sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; + sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; + sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; + sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; + sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; + sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; + n++; + } + } + } + } + + // compute the coefficients for the self-force correction + + double prex, prey, prez; + prex = prey = prez = MY_PI/volume; + prex *= nx_pppm/xprd; + prey *= ny_pppm/yprd; + prez *= nz_pppm/zprd_slab; + sf_coeff[0] *= prex; + sf_coeff[1] *= prex*2; + sf_coeff[2] *= prey; + sf_coeff[3] *= prey*2; + sf_coeff[4] *= prez; + sf_coeff[5] *= prez*2; + + // communicate values with other procs + + double tmp[6]; + MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); + for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme +------------------------------------------------------------------------- */ + +void PPPM::compute_sf_precoeff() +{ + int i,k,l,m,n; + int nx,ny,nz,kper,lper,mper; + double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; + double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; + double u0,u1,u2,u3,u4,u5,u6; + double sum1,sum2,sum3,sum4,sum5,sum6; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + + sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; + for (i = 0; i < 5; i++) { + + qx0 = MY_2PI*(kper+nx_pppm*(i-2)); + qx1 = MY_2PI*(kper+nx_pppm*(i-1)); + qx2 = MY_2PI*(kper+nx_pppm*(i )); + wx0[i] = powsinxx(0.5*qx0/nx_pppm,order); + wx1[i] = powsinxx(0.5*qx1/nx_pppm,order); + wx2[i] = powsinxx(0.5*qx2/nx_pppm,order); + + qy0 = MY_2PI*(lper+ny_pppm*(i-2)); + qy1 = MY_2PI*(lper+ny_pppm*(i-1)); + qy2 = MY_2PI*(lper+ny_pppm*(i )); + wy0[i] = powsinxx(0.5*qy0/ny_pppm,order); + wy1[i] = powsinxx(0.5*qy1/ny_pppm,order); + wy2[i] = powsinxx(0.5*qy2/ny_pppm,order); + + qz0 = MY_2PI*(mper+nz_pppm*(i-2)); + qz1 = MY_2PI*(mper+nz_pppm*(i-1)); + qz2 = MY_2PI*(mper+nz_pppm*(i )); + + wz0[i] = powsinxx(0.5*qz0/nz_pppm,order); + wz1[i] = powsinxx(0.5*qz1/nz_pppm,order); + wz2[i] = powsinxx(0.5*qz2/nz_pppm,order); + } + + for (nx = 0; nx < 5; nx++) { + for (ny = 0; ny < 5; ny++) { + for (nz = 0; nz < 5; nz++) { + u0 = wx0[nx]*wy0[ny]*wz0[nz]; + u1 = wx1[nx]*wy0[ny]*wz0[nz]; + u2 = wx2[nx]*wy0[ny]*wz0[nz]; + u3 = wx0[nx]*wy1[ny]*wz0[nz]; + u4 = wx0[nx]*wy2[ny]*wz0[nz]; + u5 = wx0[nx]*wy0[ny]*wz1[nz]; + u6 = wx0[nx]*wy0[ny]*wz2[nz]; + + sum1 += u0*u1; + sum2 += u0*u2; + sum3 += u0*u3; + sum4 += u0*u4; + sum5 += u0*u5; + sum6 += u0*u6; + } + } + } + + // store values + + sf_precoeff1[n] = sum1; + sf_precoeff2[n] = sum2; + sf_precoeff3[n] = sum3; + sf_precoeff4[n] = sum4; + sf_precoeff5[n] = sum5; + sf_precoeff6[n++] = sum6; + } + } + } +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPM::particle_map() +{ + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + + int flag = 0; + for (int i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPM::make_rho() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + remap density from 3d brick decomposition to FFT decomposition +------------------------------------------------------------------------- */ + +void PPPM::brick2fft() +{ + int n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_in; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_fft[n++] = density_brick[iz][iy][ix]; + + remap->perform(density_fft,density_fft,work1); +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver +------------------------------------------------------------------------- */ + +void PPPM::poisson() +{ + if (differentiation_flag == 1) poisson_ad(); + else poisson_ik(); +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik +------------------------------------------------------------------------- */ + +void PPPM::poisson_ik() +{ + int i,j,k,n; + double eng; + + // transform charge density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] = density_fft[i]; + work1[n++] = ZEROF; + } + + fft1->compute(work1,work1,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nfft; i++) { + eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; + if (eflag_global) energy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft; i++) { + energy += + s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] *= scaleinv * greensfn[i]; + work1[n++] *= scaleinv * greensfn[i]; + } + + // extra FFTs for per-atom energy/virial + + if (evflag_atom) poisson_peratom(); + + // triclinic system + + if (triclinic) { + poisson_ik_triclinic(); + return; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkx[i]*work1[n+1]; + work2[n+1] = -fkx[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdx_brick[k][j][i] = work2[n]; + n += 2; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fky[j]*work1[n+1]; + work2[n+1] = -fky[j]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdy_brick[k][j][i] = work2[n]; + n += 2; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkz[k]*work1[n+1]; + work2[n+1] = -fkz[k]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdz_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik for a triclinic system +------------------------------------------------------------------------- */ + +void PPPM::poisson_ik_triclinic() +{ + int i,j,k,n; + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = fkx[i]*work1[n+1]; + work2[n+1] = -fkx[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdx_brick[k][j][i] = work2[n]; + n += 2; + } + + // y direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = fky[i]*work1[n+1]; + work2[n+1] = -fky[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdy_brick[k][j][i] = work2[n]; + n += 2; + } + + // z direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = fkz[i]*work1[n+1]; + work2[n+1] = -fkz[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdz_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ad +------------------------------------------------------------------------- */ + +void PPPM::poisson_ad() +{ + int i,j,k,n; + double eng; + + // transform charge density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] = density_fft[i]; + work1[n++] = ZEROF; + } + + fft1->compute(work1,work1,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nfft; i++) { + eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; + if (eflag_global) energy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft; i++) { + energy += + s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] *= scaleinv * greensfn[i]; + work1[n++] *= scaleinv * greensfn[i]; + } + + // extra FFTs for per-atom energy/virial + + if (vflag_atom) poisson_peratom(); + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]; + work2[n+1] = work1[n+1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + u_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPM::poisson_peratom() +{ + int i,j,k,n; + + // energy + + if (eflag_atom && differentiation_flag != 1) { + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]; + work2[n+1] = work1[n+1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + u_brick[k][j][i] = work2[n]; + n += 2; + } + } + + // 6 components of virial in v0 thru v5 + + if (!vflag_atom) return; + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][0]; + work2[n+1] = work1[n+1]*vg[i][0]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v0_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][1]; + work2[n+1] = work1[n+1]*vg[i][1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v1_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][2]; + work2[n+1] = work1[n+1]*vg[i][2]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v2_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][3]; + work2[n+1] = work1[n+1]*vg[i][3]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v3_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][4]; + work2[n+1] = work1[n+1]*vg[i][4]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v4_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][5]; + work2[n+1] = work1[n+1]*vg[i][5]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v5_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPM::fieldforce() +{ + if (differentiation_flag == 1) fieldforce_ad(); + else fieldforce_ik(); +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles for ik +------------------------------------------------------------------------- */ + +void PPPM::fieldforce_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles for ad +------------------------------------------------------------------------- */ + +void PPPM::fieldforce_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR ekx,eky,ekz; + double s1,s2,s3; + double sf = 0.0; + double *prd; + + prd = domain->prd; + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + + double hx_inv = nx_pppm/xprd; + double hy_inv = ny_pppm/yprd; + double hz_inv = nz_pppm/zprd; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + compute_drho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force and substract self forces + + const double qfactor = force->qqrd2e * scale; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + sf = sf_coeff[0]*sin(2*MY_PI*s1); + sf += sf_coeff[1]*sin(4*MY_PI*s1); + sf *= 2*q[i]*q[i]; + f[i][0] += qfactor*(ekx*q[i] - sf); + + sf = sf_coeff[2]*sin(2*MY_PI*s2); + sf += sf_coeff[3]*sin(4*MY_PI*s2); + sf *= 2*q[i]*q[i]; + f[i][1] += qfactor*(eky*q[i] - sf); + + + sf = sf_coeff[4]*sin(2*MY_PI*s3); + sf += sf_coeff[5]*sin(4*MY_PI*s3); + sf *= 2*q[i]*q[i]; + if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPM::fieldforce_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += q[i]*v0; + vatom[i][1] += q[i]*v1; + vatom[i][2] += q[i]*v2; + vatom[i][3] += q[i]*v3; + vatom[i][4] += q[i]*v4; + vatom[i][5] += q[i]*v5; + } + } +} + +/* ---------------------------------------------------------------------- + pack own values to buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + if (flag == FORWARD_IK) { + FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + } else if (flag == FORWARD_AD) { + FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + } else if (flag == FORWARD_IK_PERATOM) { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + } else if (flag == FORWARD_AD_PERATOM) { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's own values from buf and set own ghost values +------------------------------------------------------------------------- */ + +void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + if (flag == FORWARD_IK) { + FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + } else if (flag == FORWARD_AD) { + FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[i]; + } else if (flag == FORWARD_IK_PERATOM) { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + } else if (flag == FORWARD_AD_PERATOM) { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + pack ghost values into buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + if (flag == REVERSE_RHO) { + FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's ghost values from buf and add to own values +------------------------------------------------------------------------- */ + +void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + if (flag == REVERSE_RHO) { + FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[i]; + } +} + +/* ---------------------------------------------------------------------- + map nprocs to NX by NY grid as PX by PY procs - return optimal px,py +------------------------------------------------------------------------- */ + +void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) +{ + // loop thru all possible factorizations of nprocs + // surf = surface area of largest proc sub-domain + // innermost if test minimizes surface area and surface/volume ratio + + int bestsurf = 2 * (nx + ny); + int bestboxx = 0; + int bestboxy = 0; + + int boxx,boxy,surf,ipx,ipy; + + ipx = 1; + while (ipx <= nprocs) { + if (nprocs % ipx == 0) { + ipy = nprocs/ipx; + boxx = nx/ipx; + if (nx % ipx) boxx++; + boxy = ny/ipy; + if (ny % ipy) boxy++; + surf = boxx + boxy; + if (surf < bestsurf || + (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { + bestsurf = surf; + bestboxx = boxx; + bestboxy = boxy; + *px = ipx; + *py = ipy; + } + } + ipx++; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + rho1d[0][k] = r1; + rho1d[1][k] = r2; + rho1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into drho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-2; l >= 0; l--) { + r1 = drho_coeff[l][k] + r1*dx; + r2 = drho_coeff[l][k] + r2*dy; + r3 = drho_coeff[l][k] + r3*dz; + } + drho1d[0][k] = r1; + drho1d[1][k] = r2; + drho1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + generate coeffients for the weight function of order n + + (n-1) + Wn(x) = Sum wn(k,x) , Sum is over every other integer + k=-(n-1) + For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 + k is odd integers if n is even and even integers if n is odd + --- + | n-1 + | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 + wn(k,x) = < l=0 + | + | 0 otherwise + --- + a coeffients are packed into the array rho_coeff to eliminate zeros + rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) +------------------------------------------------------------------------- */ + +void PPPM::compute_rho_coeff() +{ + int j,k,l,m; + FFT_SCALAR s; + + FFT_SCALAR **a; + memory->create2d_offset(a,order,-order,order,"pppm:a"); + + for (k = -order; k <= order; k++) + for (l = 0; l < order; l++) + a[l][k] = 0.0; + + a[0][0] = 1.0; + for (j = 1; j < order; j++) { + for (k = -j; k <= j; k += 2) { + s = 0.0; + for (l = 0; l < j; l++) { + a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); +#ifdef FFT_SINGLE + s += powf(0.5,(float) l+1) * + (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); +#else + s += pow(0.5,(double) l+1) * + (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); +#endif + } + a[0][k] = s; + } + } + + m = (1-order)/2; + for (k = -(order-1); k < order; k += 2) { + for (l = 0; l < order; l++) + rho_coeff[l][m] = a[l][k]; + for (l = 1; l < order; l++) + drho_coeff[l-1][m] = l*a[l][k]; + m++; + } + + memory->destroy2d_offset(a,-order); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPM::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + +/* ---------------------------------------------------------------------- + perform and time the 1d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPM::timing_1d(int n, double &time1d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->timing1d(work1,nfft_both,1); + fft2->timing1d(work1,nfft_both,-1); + if (differentiation_flag != 1) { + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d = time2 - time1; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + perform and time the 3d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPM::timing_3d(int n, double &time3d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->compute(work1,work1,1); + fft2->compute(work1,work1,-1); + if (differentiation_flag != 1) { + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d = time2 - time1; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPM::memory_usage() +{ + double bytes = nmax*3 * sizeof(double); + int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + if (differentiation_flag == 1) { + bytes += 2 * nbrick * sizeof(FFT_SCALAR); + } else { + bytes += 4 * nbrick * sizeof(FFT_SCALAR); + } + if (triclinic) bytes += 3 * nfft_both * sizeof(double); + bytes += 6 * nfft_both * sizeof(double); + bytes += nfft_both * sizeof(double); + bytes += nfft_both*5 * sizeof(FFT_SCALAR); + + if (peratom_allocate_flag) + bytes += 6 * nbrick * sizeof(FFT_SCALAR); + + if (group_allocate_flag) { + bytes += 2 * nbrick * sizeof(FFT_SCALAR); + bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; + } + + bytes += cg->memory_usage(); + + return bytes; +} + +/* ---------------------------------------------------------------------- + group-group interactions + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + compute the PPPM total long-range force and energy for groups A and B + ------------------------------------------------------------------------- */ + +void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) +{ + if (slabflag && triclinic) + error->all(FLERR,"Cannot (yet) use K-space slab " + "correction with compute group/group for triclinic systems"); + + if (differentiation_flag) + error->all(FLERR,"Cannot (yet) use kspace_modify " + "diff ad with compute group/group"); + + if (!group_allocate_flag) allocate_groups(); + + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + e2group = 0.0; //energy + f2group[0] = 0.0; //force in x-direction + f2group[1] = 0.0; //force in y-direction + f2group[2] = 0.0; //force in z-direction + + // map my particle charge onto my local 3d density grid + + make_rho_groups(groupbit_A,groupbit_B,AA_flag); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + // temporarily store and switch pointers so we can + // use brick2fft() for groups A and B (without + // writing an additional function) + + FFT_SCALAR ***density_brick_real = density_brick; + FFT_SCALAR *density_fft_real = density_fft; + + // group A + + density_brick = density_A_brick; + density_fft = density_A_fft; + + cg->reverse_comm(this,REVERSE_RHO); + brick2fft(); + + // group B + + density_brick = density_B_brick; + density_fft = density_B_fft; + + cg->reverse_comm(this,REVERSE_RHO); + brick2fft(); + + // switch back pointers + + density_brick = density_brick_real; + density_fft = density_fft_real; + + // compute potential gradient on my FFT grid and + // portion of group-group energy/force on this proc's FFT grid + + poisson_groups(AA_flag); + + const double qscale = force->qqrd2e * scale; + + // total group A <--> group B energy + // self and boundary correction terms are in compute_group_group.cpp + + double e2group_all; + MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); + e2group = e2group_all; + + e2group *= qscale*0.5*volume; + + // total group A <--> group B force + + double f2group_all[3]; + MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); + + f2group[0] = qscale*volume*f2group_all[0]; + f2group[1] = qscale*volume*f2group_all[1]; + if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2]; + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); + + if (slabflag == 1) + slabcorr_groups(groupbit_A, groupbit_B, AA_flag); +} + +/* ---------------------------------------------------------------------- + allocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPM::allocate_groups() +{ + group_allocate_flag = 1; + + memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_A_brick"); + memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_B_brick"); + memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); + memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); +} + +/* ---------------------------------------------------------------------- + deallocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPM::deallocate_groups() +{ + group_allocate_flag = 0; + + memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(density_A_fft); + memory->destroy(density_B_fft); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag) +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density arrays + + memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + for (int i = 0; i < nlocal; i++) { + + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + + // group A + + if (mask[i] & groupbit_A) + density_A_brick[mz][my][mx] += x0*rho1d[0][l]; + + // group B + + if (mask[i] & groupbit_B) + density_B_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPM::poisson_groups(int AA_flag) +{ + int i,j,k,n; + + // reuse memory (already declared) + + FFT_SCALAR *work_A = work1; + FFT_SCALAR *work_B = work2; + + // transform charge density (r -> k) + + // group A + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] = density_A_fft[i]; + work_A[n++] = ZEROF; + } + + fft1->compute(work_A,work_A,1); + + // group B + + n = 0; + for (i = 0; i < nfft; i++) { + work_B[n++] = density_B_fft[i]; + work_B[n++] = ZEROF; + } + + fft1->compute(work_B,work_B,1); + + // group-group energy and force contribution, + // keep everything in reciprocal space so + // no inverse FFTs needed + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + // energy + + n = 0; + for (i = 0; i < nfft; i++) { + e2group += s2 * greensfn[i] * + (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); + n += 2; + } + + if (AA_flag) return; + + + // multiply by Green's function and s2 + // (only for work_A so it is not squared below) + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] *= s2 * greensfn[i]; + work_A[n++] *= s2 * greensfn[i]; + } + + // triclinic system + + if (triclinic) { + poisson_groups_triclinic(); + return; + } + + double partial_group; + + // force, x direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[0] += fkx[i] * partial_group; + n += 2; + } + + // force, y direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[1] += fky[j] * partial_group; + n += 2; + } + + // force, z direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[2] += fkz[k] * partial_group; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for group-group interactions + for a triclinic system + ------------------------------------------------------------------------- */ + +void PPPM::poisson_groups_triclinic() +{ + int i,j,k,n; + + // reuse memory (already declared) + + FFT_SCALAR *work_A = work1; + FFT_SCALAR *work_B = work2; + + double partial_group; + + // force, x direction + + n = 0; + for (i = 0; i < nfft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[0] += fkx[i] * partial_group; + n += 2; + } + + // force, y direction + + n = 0; + for (i = 0; i < nfft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[1] += fky[i] * partial_group; + n += 2; + } + + // force, z direction + + n = 0; + for (i = 0; i < nfft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[2] += fkz[i] * partial_group; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + double qsum_A = 0.0; + double qsum_B = 0.0; + double dipole_A = 0.0; + double dipole_B = 0.0; + double dipole_r2_A = 0.0; + double dipole_r2_B = 0.0; + + for (int i = 0; i < nlocal; i++) { + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if (mask[i] & groupbit_A) { + qsum_A += q[i]; + dipole_A += q[i]*x[i][2]; + dipole_r2_A += q[i]*x[i][2]*x[i][2]; + } + + if (mask[i] & groupbit_B) { + qsum_B += q[i]; + dipole_B += q[i]*x[i][2]; + dipole_r2_B += q[i]*x[i][2]*x[i][2]; + } + } + + // sum local contributions to get total charge and global dipole moment + // for each group + + double tmp; + MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_A = tmp; + + MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_B = tmp; + + MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_A = tmp; + + MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_B = tmp; + + MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_A = tmp; + + MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_B = tmp; + + // compute corrections + + const double qscale = force->qqrd2e * scale; + const double efact = qscale * MY_2PI/volume; + + e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + + qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); + + // add on force corrections + + const double ffact = qscale * (-4.0*MY_PI/volume); + f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); +} diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp index 426dbf0e7b..a15cf38515 100755 --- a/src/KSPACE/pppm_disp.cpp +++ b/src/KSPACE/pppm_disp.cpp @@ -1,8209 +1,8209 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Rolf Isele-Holder (Aachen University) - Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "pppm_disp.h" -#include "math_const.h" -#include "atom.h" -#include "comm.h" -#include "commgrid.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXORDER 7 -#define OFFSET 16384 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; -enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE}; -enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM, - FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G, - FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A, - FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE}; - - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command"); - - triclinic_support = 0; - pppmflag = dispersionflag = 1; - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - csumflag = 0; - B = NULL; - cii = NULL; - csumi = NULL; - peratom_allocate_flag = 0; - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - u_brick = v0_brick = v1_brick = v2_brick = v3_brick = - v4_brick = v5_brick = NULL; - - density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; - density_fft_g = NULL; - u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = - v4_brick_g = v5_brick_g = NULL; - - density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; - density_fft_a0 = NULL; - u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = - v4_brick_a0 = v5_brick_a0 = NULL; - - density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; - density_fft_a1 = NULL; - u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = - v4_brick_a1 = v5_brick_a1 = NULL; - - density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; - density_fft_a2 = NULL; - u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = - v4_brick_a2 = v5_brick_a2 = NULL; - - density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; - density_fft_a3 = NULL; - u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = - v4_brick_a3 = v5_brick_a3 = NULL; - - density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; - density_fft_a4 = NULL; - u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = - v4_brick_a4 = v5_brick_a4 = NULL; - - density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; - density_fft_a5 = NULL; - u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = - v4_brick_a5 = v5_brick_a5 = NULL; - - density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; - density_fft_a6 = NULL; - u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = - v4_brick_a6 = v5_brick_a6 = NULL; - - density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; - density_fft_none = NULL; - u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = - v4_brick_none = v5_brick_none = NULL; - - greensfn = NULL; - greensfn_6 = NULL; - work1 = work2 = NULL; - work1_6 = work2_6 = NULL; - vg = NULL; - vg2 = NULL; - vg_6 = NULL; - vg2_6 = NULL; - fkx = fky = fkz = NULL; - fkx2 = fky2 = fkz2 = NULL; - fkx_6 = fky_6 = fkz_6 = NULL; - fkx2_6 = fky2_6 = fkz2_6 = NULL; - - sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = - sf_precoeff5 = sf_precoeff6 = NULL; - sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = - sf_precoeff5_6 = sf_precoeff6_6 = NULL; - - gf_b = NULL; - gf_b_6 = NULL; - rho1d = rho_coeff = NULL; - drho1d = drho_coeff = NULL; - rho1d_6 = rho_coeff_6 = NULL; - drho1d_6 = drho_coeff_6 = NULL; - fft1 = fft2 = NULL; - fft1_6 = fft2_6 = NULL; - remap = NULL; - remap_6 = NULL; - - nmax = 0; - part2grid = NULL; - part2grid_6 = NULL; - - cg = NULL; - cg_peratom = NULL; - cg_6 = NULL; - cg_peratom_6 = NULL; - - memset(function, 0, EWALD_FUNCS*sizeof(int)); -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMDisp::~PPPMDisp() -{ - delete [] factors; - delete [] B; - B = NULL; - delete [] cii; - cii = NULL; - delete [] csumi; - csumi = NULL; - deallocate(); - deallocate_peratom(); - memory->destroy(part2grid); - memory->destroy(part2grid_6); - part2grid = part2grid_6 = NULL; -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPMDisp::init() -{ - if (me == 0) { - if (screen) fprintf(screen,"PPPMDisp initialization ...\n"); - if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n"); - } - - triclinic_check(); - if (domain->dimension == 2) - error->all(FLERR,"Cannot use PPPMDisp with 2d simulation"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp"); - if (slabflag == 1) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPMDisp"); - } - - if (order > MAXORDER || order_6 > MAXORDER) { - char str[128]; - sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER); - error->all(FLERR,str); - } - - // free all arrays previously allocated - - deallocate(); - deallocate_peratom(); - - // set scale - - scale = 1.0; - - triclinic = domain->triclinic; - - // check whether cutoff and pair style are set - - pair_check(); - - int tmp; - Pair *pair = force->pair; - int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; - double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; - double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL; - if (!(ptr||*p_cutoff||*p_cutoff_lj)) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - cutoff_lj = *p_cutoff_lj; - - double tmp2; - MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world); - - // check out which types of potentials will have to be calculated - - int ewald_order = ptr ? *((int *) ptr) : 1<<1; - int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; - memset(function, 0, EWALD_FUNCS*sizeof(int)); - for (int i=0; i<=EWALD_MAXORDER; ++i) // transcribe order - if (ewald_order&(1<pair_style); - error->all(FLERR,str); - } - function[k] = 1; - } - - - // warn, if function[0] is not set but charge attribute is set! - if (!function[0] && atom->q_flag && me == 0) { - char str[128]; - sprintf(str, "Charges are set, but coulombic solver is not used"); - error->warning(FLERR, str); - } - - // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral - - if (function[0]) { - if (!atom->q_flag) - error->all(FLERR,"Kspace style with selected options " - "requires atom attribute q"); - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver with selected options " - "on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - } - - // if kspace is TIP4P, extract TIP4P params from pair style - // bond/angle are not yet init(), so insure equilibrium request is valid - - qdist = 0.0; - - if (tip4pflag) { - int itmp; - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - if (typeA < 1 || typeA > atom->nangletypes || - force->angle->setflag[typeA] == 0) - error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P"); - if (typeB < 1 || typeB > atom->nbondtypes || - force->bond->setflag[typeB] == 0) - error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (cos(0.5*theta) * blen); - } - - - // initialize the pair style to get the coefficients - neighrequest_flag = 0; - pair->init(); - neighrequest_flag = 1; - init_coeffs(); - - //if g_ewald and g_ewald_6 have not been specified, set some initial value - // to avoid problems when calculating the energies! - - if (!gewaldflag) g_ewald = 1; - if (!gewaldflag_6) g_ewald_6 = 1; - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - int (*procneigh)[2] = comm->procneigh; - - int iteration = 0; - if (function[0]) { - CommGrid *cgtmp = NULL; - while (order >= minorder) { - - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPMDisp Coulomb order " - "b/c stencil extends beyond neighbor processor"); - iteration++; - - // set grid for dispersion interaction and coulomb interactions - - set_grid(); - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPMDisp Coulomb grid is too large"); - - set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, - nxhi_in, nyhi_in, nzhi_in, - nxlo_out, nylo_out, nzlo_out, - nxhi_out, nyhi_out, nzhi_out, - nlower, nupper, - ngrid, nfft, nfft_both, - shift, shiftone, order); - - if (overlap_allowed) break; - - cgtmp = new CommGrid(lmp, world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out, - nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - cgtmp->ghost_notify(); - if (!cgtmp->ghost_overlap()) break; - delete cgtmp; - - order--; - } - - if (order < minorder) - error->all(FLERR, - "Coulomb PPPMDisp order has been reduced below minorder"); - if (cgtmp) delete cgtmp; - - // adjust g_ewald - - if (!gewaldflag) adjust_gewald(); - - // calculate the final accuracy - - double acc = final_accuracy(); - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - #ifdef FFT_SINGLE - const char fft_prec[] = "single"; - #else - const char fft_prec[] = "double"; - #endif - - if (screen) { - fprintf(screen," Coulomb G vector (1/distance)= %g\n",g_ewald); - fprintf(screen," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," Coulomb stencil order = %d\n",order); - fprintf(screen," Coulomb estimated absolute RMS force accuracy = %g\n", - acc); - fprintf(screen," Coulomb estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - fprintf(screen," 3d grid and FFT values/proc = %d %d\n", - ngrid_max, nfft_both_max); - } - if (logfile) { - fprintf(logfile," Coulomb G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," Coulomb stencil order = %d\n",order); - fprintf(logfile, - " Coulomb estimated absolute RMS force accuracy = %g\n", - acc); - fprintf(logfile," Coulomb estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", - ngrid_max, nfft_both_max); - } - } - } - - iteration = 0; - if (function[1] + function[2] + function[3]) { - CommGrid *cgtmp = NULL; - while (order_6 >= minorder) { - - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPMDisp dispersion order " - "b/c stencil extends beyond neighbor processor"); - iteration++; - - set_grid_6(); - - if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET) - error->all(FLERR,"PPPMDisp Dispersion grid is too large"); - - set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, - nxhi_in_6, nyhi_in_6, nzhi_in_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, - nxhi_out_6, nyhi_out_6, nzhi_out_6, - nlower_6, nupper_6, - ngrid_6, nfft_6, nfft_both_6, - shift_6, shiftone_6, order_6); - - if (overlap_allowed) break; - - cgtmp = new CommGrid(lmp,world,1,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6, - nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6, - nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - cgtmp->ghost_notify(); - if (!cgtmp->ghost_overlap()) break; - delete cgtmp; - order_6--; - } - - if (order_6 < minorder) - error->all(FLERR,"Dispersion PPPMDisp order has been " - "reduced below minorder"); - if (cgtmp) delete cgtmp; - - // adjust g_ewald_6 - - if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) - adjust_gewald_6(); - - // calculate the final accuracy - - double acc, acc_real, acc_kspace; - final_accuracy_6(acc, acc_real, acc_kspace); - - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - #ifdef FFT_SINGLE - const char fft_prec[] = "single"; - #else - const char fft_prec[] = "double"; - #endif - - if (screen) { - fprintf(screen," Dispersion G vector (1/distance)= %g\n",g_ewald_6); - fprintf(screen," Dispersion grid = %d %d %d\n", - nx_pppm_6,ny_pppm_6,nz_pppm_6); - fprintf(screen," Dispersion stencil order = %d\n",order_6); - fprintf(screen," Dispersion estimated absolute " - "RMS force accuracy = %g\n",acc); - fprintf(screen," Dispersion estimated absolute " - "real space RMS force accuracy = %g\n",acc_real); - fprintf(screen," Dispersion estimated absolute " - "kspace RMS force accuracy = %g\n",acc_kspace); - fprintf(screen," Dispersion estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - fprintf(screen," 3d grid and FFT values/proc dispersion = %d %d\n", - ngrid_max,nfft_both_max); - } - if (logfile) { - fprintf(logfile," Dispersion G vector (1/distance) = %g\n",g_ewald_6); - fprintf(logfile," Dispersion grid = %d %d %d\n", - nx_pppm_6,ny_pppm_6,nz_pppm_6); - fprintf(logfile," Dispersion stencil order = %d\n",order_6); - fprintf(logfile," Dispersion estimated absolute " - "RMS force accuracy = %g\n",acc); - fprintf(logfile," Dispersion estimated absolute " - "real space RMS force accuracy = %g\n",acc_real); - fprintf(logfile," Dispersion estimated absolute " - "kspace RMS force accuracy = %g\n",acc_kspace); - fprintf(logfile," Disperion estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - fprintf(logfile," 3d grid and FFT values/proc dispersion = %d %d\n", - ngrid_max,nfft_both_max); - } - } - } - - // allocate K-space dependent memory - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - if (function[0]) { - compute_gf_denom(gf_b, order); - compute_rho_coeff(rho_coeff, drho_coeff, order); - cg->ghost_notify(); - cg->setup(); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - sf_precoeff1, sf_precoeff2, sf_precoeff3, - sf_precoeff4, sf_precoeff5, sf_precoeff6); - } - if (function[1] + function[2] + function[3]) { - compute_gf_denom(gf_b_6, order_6); - compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); - cg_6->ghost_notify(); - cg_6->setup(); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, - sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); - } - -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPMDisp::setup() -{ - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - // compute fkx,fky,fkz for my FFT grid pts - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - //compute the virial coefficients and green functions - if (function[0]){ - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double per; - int i, j, k, n; - - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per = i - nx_pppm*(2*i/nx_pppm); - fkx[i] = unitkx*per; - j = (nx_pppm - i) % nx_pppm; - per = j - nx_pppm*(2*j/nx_pppm); - fkx2[i] = unitkx*per; - } - - for (i = nylo_fft; i <= nyhi_fft; i++) { - per = i - ny_pppm*(2*i/ny_pppm); - fky[i] = unitky*per; - j = (ny_pppm - i) % ny_pppm; - per = j - ny_pppm*(2*j/ny_pppm); - fky2[i] = unitky*per; - } - - for (i = nzlo_fft; i <= nzhi_fft; i++) { - per = i - nz_pppm*(2*i/nz_pppm); - fkz[i] = unitkz*per; - j = (nz_pppm - i) % nz_pppm; - per = j - nz_pppm*(2*j/nz_pppm); - fkz2[i] = unitkz*per; - } - - double sqk,vterm; - double gew2inv = 1/(g_ewald*g_ewald); - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - for (j = nylo_fft; j <= nyhi_fft; j++) { - for (i = nxlo_fft; i <= nxhi_fft; i++) { - sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25*gew2inv); - vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; - vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; - vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; - vg[n][3] = vterm*fkx[i]*fky[j]; - vg[n][4] = vterm*fkx[i]*fkz[k]; - vg[n][5] = vterm*fky[j]*fkz[k]; - vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]); - vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]); - vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]); - } - n++; - } - } - } - compute_gf(); - if (differentiation_flag == 1) compute_sf_coeff(); - } - - if (function[1] + function[2] + function[3]) { - delxinv_6 = nx_pppm_6/xprd; - delyinv_6 = ny_pppm_6/yprd; - delzinv_6 = nz_pppm_6/zprd_slab; - delvolinv_6 = delxinv_6*delyinv_6*delzinv_6; - - double per; - int i, j, k, n; - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - per = i - nx_pppm_6*(2*i/nx_pppm_6); - fkx_6[i] = unitkx*per; - j = (nx_pppm_6 - i) % nx_pppm_6; - per = j - nx_pppm_6*(2*j/nx_pppm_6); - fkx2_6[i] = unitkx*per; - } - for (i = nylo_fft_6; i <= nyhi_fft_6; i++) { - per = i - ny_pppm_6*(2*i/ny_pppm_6); - fky_6[i] = unitky*per; - j = (ny_pppm_6 - i) % ny_pppm_6; - per = j - ny_pppm_6*(2*j/ny_pppm_6); - fky2_6[i] = unitky*per; - } - for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) { - per = i - nz_pppm_6*(2*i/nz_pppm_6); - fkz_6[i] = unitkz*per; - j = (nz_pppm_6 - i) % nz_pppm_6; - per = j - nz_pppm_6*(2*j/nz_pppm_6); - fkz2_6[i] = unitkz*per; - } - double sqk,vterm; - long double erft, expt,nom, denom; - long double b, bs, bt; - double rtpi = sqrt(MY_PI); - double gewinv = 1/g_ewald_6; - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) { - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) { - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k]; - if (sqk == 0.0) { - vg_6[n][0] = 0.0; - vg_6[n][1] = 0.0; - vg_6[n][2] = 0.0; - vg_6[n][3] = 0.0; - vg_6[n][4] = 0.0; - vg_6[n][5] = 0.0; - } else { - b = 0.5*sqrt(sqk)*gewinv; - bs = b*b; - bt = bs*b; - erft = 2*bt*rtpi*erfc(b); - expt = exp(-bs); - nom = erft - 2*bs*expt; - denom = nom + expt; - if (denom == 0) vterm = 3.0/sqk; - else vterm = 3.0*nom/(sqk*denom); - vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i]; - vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j]; - vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k]; - vg_6[n][3] = vterm*fkx_6[i]*fky_6[j]; - vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k]; - vg_6[n][5] = vterm*fky_6[j]*fkz_6[k]; - vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]); - vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]); - vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]); - } - n++; - } - } - } - compute_gf_6(); - if (differentiation_flag == 1) compute_sf_coeff_6(); - } -} - -/* ---------------------------------------------------------------------- - reset local grid arrays and communication stencils - called by fix balance b/c it changed sizes of processor sub-domains -------------------------------------------------------------------------- */ - -void PPPMDisp::setup_grid() -{ - // free all arrays previously allocated - - deallocate(); - deallocate_peratom(); - - // reset portion of global grid that each proc owns - - if (function[0]) - set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, - nxhi_in, nyhi_in, nzhi_in, - nxlo_out, nylo_out, nzlo_out, - nxhi_out, nyhi_out, nzhi_out, - nlower, nupper, - ngrid, nfft, nfft_both, - shift, shiftone, order); - - if (function[1] + function[2] + function[3]) - set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, - nxhi_in_6, nyhi_in_6, nzhi_in_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, - nxhi_out_6, nyhi_out_6, nzhi_out_6, - nlower_6, nupper_6, - ngrid_6, nfft_6, nfft_both_6, - shift_6, shiftone_6, order_6); - - // reallocate K-space dependent memory - // check if grid communication is now overlapping if not allowed - // don't invoke allocate_peratom(), compute() will allocate when needed - - allocate(); - - if (function[0]) { - cg->ghost_notify(); - if (overlap_allowed == 0 && cg->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - cg->setup(); - } - if (function[1] + function[2] + function[3]) { - cg_6->ghost_notify(); - if (overlap_allowed == 0 && cg_6->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - cg_6->setup(); - } - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - if (function[0]) { - compute_gf_denom(gf_b, order); - compute_rho_coeff(rho_coeff, drho_coeff, order); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - sf_precoeff1, sf_precoeff2, sf_precoeff3, - sf_precoeff4, sf_precoeff5, sf_precoeff6); - } - if (function[1] + function[2] + function[3]) { - compute_gf_denom(gf_b_6, order_6); - compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, - sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); - } - - // pre-compute volume-dependent coeffs - - setup(); -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMDisp::compute(int eflag, int vflag) -{ - - int i; - // convert atoms from box to lamda coords - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - if (function[0]) { - cg_peratom->ghost_notify(); - cg_peratom->setup(); - } - if (function[1] + function[2] + function[3]) { - cg_peratom_6->ghost_notify(); - cg_peratom_6->setup(); - } - peratom_allocate_flag = 1; - } - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - - if (function[0]) memory->destroy(part2grid); - if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6); - nmax = atom->nmax; - if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid"); - if (function[1] + function[2] + function[3]) - memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6"); - } - - - energy = 0.0; - energy_1 = 0.0; - energy_6 = 0.0; - if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0; - - // find grid points for all my particles - // distribute partcles' charges/dispersion coefficients on the grid - // communication between processors and remapping two fft - // Solution of poissons equation in k-space and backtransformation - // communication between processors - // calculation of forces - - if (function[0]) { - - //perfrom calculations for coulomb interactions only - - particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower, - nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out); - - make_rho_c(); - - cg->reverse_comm(this,REVERSE_RHO); - - brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, - density_brick, density_fft, work1,remap); - - if (differentiation_flag == 1) { - - poisson_ad(work1, work2, density_fft, fft1, fft2, - nx_pppm, ny_pppm, nz_pppm, nfft, - nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, - energy_1, greensfn, - virial_1, vg,vg2, - u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); - - cg->forward_comm(this,FORWARD_AD); - - fieldforce_c_ad(); - - if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM); - - } else { - poisson_ik(work1, work2, density_fft, fft1, fft2, - nx_pppm, ny_pppm, nz_pppm, nfft, - nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, - energy_1, greensfn, - fkx, fky, fkz,fkx2, fky2, fkz2, - vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2, - u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); - - cg->forward_comm(this, FORWARD_IK); - - fieldforce_c_ik(); - - if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM); - } - if (evflag_atom) fieldforce_c_peratom(); - } - - if (function[1]) { - //perfrom calculations for geometric mixing - particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); - make_rho_g(); - - - cg_6->reverse_comm(this, REVERSE_RHO_G); - - brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - density_brick_g, density_fft_g, work1_6,remap_6); - - if (differentiation_flag == 1) { - - poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - virial_6, vg_6, vg2_6, - u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); - - cg_6->forward_comm(this,FORWARD_AD_G); - - fieldforce_g_ad(); - - if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G); - - } else { - poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, - vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6, - u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); - - cg_6->forward_comm(this,FORWARD_IK_G); - - fieldforce_g_ik(); - - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G); - } - if (evflag_atom) fieldforce_g_peratom(); - } - - if (function[2]) { - //perform calculations for arithmetic mixing - particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); - make_rho_a(); - - cg_6->reverse_comm(this, REVERSE_RHO_A); - - brick2fft_a(); - - if ( differentiation_flag == 1) { - - poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - virial_6, vg_6, vg2_6, - u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); - poisson_2s_ad(density_fft_a0, density_fft_a6, - u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, - u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); - poisson_2s_ad(density_fft_a1, density_fft_a5, - u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, - u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); - poisson_2s_ad(density_fft_a2, density_fft_a4, - u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, - u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); - - cg_6->forward_comm(this, FORWARD_AD_A); - - fieldforce_a_ad(); - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A); - - } else { - - poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, - vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6, - u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); - poisson_2s_ik(density_fft_a0, density_fft_a6, - vdx_brick_a0, vdy_brick_a0, vdz_brick_a0, - vdx_brick_a6, vdy_brick_a6, vdz_brick_a6, - u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, - u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); - poisson_2s_ik(density_fft_a1, density_fft_a5, - vdx_brick_a1, vdy_brick_a1, vdz_brick_a1, - vdx_brick_a5, vdy_brick_a5, vdz_brick_a5, - u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, - u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); - poisson_2s_ik(density_fft_a2, density_fft_a4, - vdx_brick_a2, vdy_brick_a2, vdz_brick_a2, - vdx_brick_a4, vdy_brick_a4, vdz_brick_a4, - u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, - u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); - - cg_6->forward_comm(this, FORWARD_IK_A); - - fieldforce_a_ik(); - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A); - } - if (evflag_atom) fieldforce_a_peratom(); - } - - if (function[3]) { - //perfrom calculations if no mixing rule applies - particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); - - make_rho_none(); - - cg_6->reverse_comm(this, REVERSE_RHO_NONE); - - brick2fft_none(); - - if (differentiation_flag == 1) { - - int n = 0; - for (int k = 0; kforward_comm(this,FORWARD_AD_NONE); - - fieldforce_none_ad(); - - if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE); - - } else { - int n = 0; - for (int k = 0; kforward_comm(this,FORWARD_IK_NONE); - - fieldforce_none_ik(); - - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE); - } - if (evflag_atom) fieldforce_none_peratom(); - } - - // sum energy across procs and add in volume-dependent term - - const double qscale = force->qqrd2e * scale; - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy_1 = energy_all; - MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy_6 = energy_all; - - energy_1 *= 0.5*volume; - energy_6 *= 0.5*volume; - - energy_1 -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij + - 1.0/12.0*pow(g_ewald_6,6)*csum; - energy_1 *= qscale; - } - - // sum virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i]; - if (function[1]+function[2]+function[3]){ - double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij; - virial[0] -= a; - virial[1] -= a; - virial[2] -= a; - } - } - - if (eflag_atom) { - if (function[0]) { - double *q = atom->q; - for (i = 0; i < atom->nlocal; i++) { - eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction - } - } - if (function[1] + function[2] + function[3]) { - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] + - 1.0/12.0*pow(g_ewald_6,6)*cii[tmp]; - } - } - } - - if (vflag_atom) { - if (function[1] + function[2] + function[3]) { - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction - } - } - } - - - // 2d slab correction - - if (slabflag) slabcorr(eflag); - if (function[0]) energy += energy_1; - if (function[1] + function[2] + function[3]) energy += energy_6; - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - initialize coefficients needed for the dispersion density on the grids -------------------------------------------------------------------------- */ - -void PPPMDisp::init_coeffs() // local pair coeffs -{ - int tmp; - int n = atom->ntypes; - int converged; - delete [] B; - if (function[3] + function[2]) { // no mixing rule or arithmetic - if (function[2] && me == 0) { - if (screen) fprintf(screen," Optimizing splitting of Dispersion coefficients\n"); - if (logfile) fprintf(logfile," Optimizing splitting of Dispersion coefficients\n"); - } - // get dispersion coefficients - double **b = (double **) force->pair->extract("B",tmp); - // allocate data for eigenvalue decomposition - double **A; - double **Q; - memory->create(A,n,n,"pppm/disp:A"); - memory->create(Q,n,n,"pppm/disp:Q"); - // fill coefficients to matrix a - for (int i = 1; i <= n; i++) - for (int j = 1; j <= n; j++) - A[i-1][j-1] = b[i][j]; - // transform q to a unity matrix - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - Q[i][j] = 0.0; - for (int i = 0; i < n; i++) - Q[i][i] = 1.0; - // perfrom eigenvalue decomposition with QR algorithm - converged = qr_alg(A,Q,n); - if (function[3] && !converged) { - error->all(FLERR,"Matrix factorization to split dispersion coefficients failed"); - } - // determine number of used eigenvalues - // based on maximum allowed number or cutoff criterion - // sort eigenvalues according to their size with bubble sort - double t; - for (int i = 0; i < n; i++) { - for (int j = 0; j < n-1-i; j++) { - if (fabs(A[j][j]) < fabs(A[j+1][j+1])) { - t = A[j][j]; - A[j][j] = A[j+1][j+1]; - A[j+1][j+1] = t; - for (int k = 0; k < n; k++) { - t = Q[k][j]; - Q[k][j] = Q[k][j+1]; - Q[k][j+1] = t; - } - } - } - } - - // check which eigenvalue is the first that is smaller - // than a specified tolerance - // check how many are maximum allowed by the user - double amax = fabs(A[0][0]); - double acrit = amax*splittol; - double bmax = 0; - double err = 0; - nsplit = 0; - for (int i = 0; i < n; i++) { - if (fabs(A[i][i]) > acrit) nsplit++; - else { - bmax = fabs(A[i][i]); - break; - } - } - - err = bmax/amax; - if (err > 1.0e-4) { - char str[128]; - sprintf(str,"Error in splitting of dispersion coeffs is estimated %g",err); - error->warning(FLERR, str); - } - // set B - B = new double[nsplit*n+nsplit]; - for (int i = 0; i< nsplit; i++) { - B[i] = A[i][i]; - for (int j = 0; j < n; j++) { - B[nsplit*(j+1) + i] = Q[j][i]; - } - } - - nsplit_alloc = nsplit; - if (nsplit%2 == 1) nsplit_alloc = nsplit + 1; - // check if the function should preferably be [1] or [2] or [3] - if (nsplit == 1) { - delete [] B; - function[3] = 0; - function[2] = 0; - function[1] = 1; - if (me == 0) { - if (screen) fprintf(screen," Using geometric mixing for reciprocal space\n"); - if (logfile) fprintf(logfile," Using geometric mixing for reciprocal space\n"); - } - } - if (function[2] && nsplit <= 6) { - if (me == 0) { - if (screen) fprintf(screen," Using %d instead of 7 structure factors\n",nsplit); - if (logfile) fprintf(logfile," Using %d instead of 7 structure factors\n",nsplit); - } - function[3] = 1; - function[2] = 0; - } - if (function[2] && (nsplit > 6)) { - if (me == 0) { - if (screen) fprintf(screen," Using 7 structure factors\n"); - if (logfile) fprintf(logfile," Using 7 structure factors\n"); - } - delete [] B; - } - if (function[3]) { - if (me == 0) { - if (screen) fprintf(screen," Using %d structure factors\n",nsplit); - if (logfile) fprintf(logfile," Using %d structure factors\n",nsplit); - } - if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors"); - } - - memory->destroy(A); - memory->destroy(Q); - } - if (function[1]) { // geometric 1/r^6 - double **b = (double **) force->pair->extract("B",tmp); - B = new double[n+1]; - for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); - } - if (function[2]) { // arithmetic 1/r^6 - //cannot use epsilon, because this has not been set yet - double **epsilon = (double **) force->pair->extract("epsilon",tmp); - //cannot use sigma, because this has not been set yet - double **sigma = (double **) force->pair->extract("sigma",tmp); - if (!(epsilon&&sigma)) - error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp"); - double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; - double c[7] = { - 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; - for (int i=0; i<=n; ++i) { - eps_i = sqrt(epsilon[i][i]); - sigma_i = sigma[i][i]; - sigma_n = 1.0; - for (int j=0; j<7; ++j) { - *(bi++) = sigma_n*eps_i*c[j]*0.25; - sigma_n *= sigma_i; - } - } - } -} - -/* ---------------------------------------------------------------------- - Eigenvalue decomposition of a real, symmetric matrix with the QR - method (includes transpformation to Tridiagonal Matrix + Wilkinson - shift) -------------------------------------------------------------------------- */ - -int PPPMDisp::qr_alg(double **A, double **Q, int n) -{ - int converged = 0; - double an1, an, bn1, d, mue; - // allocate some memory for the required operations - double **A0,**Qi,**C,**D,**E; - // make a copy of A for convergence check - memory->create(A0,n,n,"pppm/disp:A0"); - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - A0[i][j] = A[i][j]; - - // allocate an auxiliary matrix Qi - memory->create(Qi,n,n,"pppm/disp:Qi"); - - // alllocate an auxillary matrices for the matrix multiplication - memory->create(C,n,n,"pppm/disp:C"); - memory->create(D,n,n,"pppm/disp:D"); - memory->create(E,n,n,"pppm/disp:E"); - - // transform Matrix A to Tridiagonal form - hessenberg(A,Q,n); - - // start loop for the matrix factorization - int count = 0; - int countmax = 100000; - while (1) { - // make a Wilkinson shift - an1 = A[n-2][n-2]; - an = A[n-1][n-1]; - bn1 = A[n-2][n-1]; - d = (an1-an)/2; - mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1); - for (int i = 0; i < n; i++) - A[i][i] -= mue; - - // perform a QR factorization for a tridiagonal matrix A - qr_tri(Qi,A,n); - - // update the matrices - mmult(A,Qi,C,n); - mmult(Q,Qi,C,n); - - // backward Wilkinson shift - for (int i = 0; i < n; i++) - A[i][i] += mue; - - // check the convergence - converged = check_convergence(A,Q,A0,C,D,E,n); - if (converged) break; - count = count + 1; - if (count == countmax) break; - } - - // free allocated memory - memory->destroy(Qi); - memory->destroy(A0); - memory->destroy(C); - memory->destroy(D); - memory->destroy(E); - - return converged; -} - -/* ---------------------------------------------------------------------- - Transform a Matrix to Hessenberg form (for symmetric Matrices, the - result will be a tridiagonal matrix) -------------------------------------------------------------------------- */ - -void PPPMDisp::hessenberg(double **A, double **Q, int n) -{ - double r,a,b,c,s,x1,x2; - for (int i = 0; i < n-1; i++) { - for (int j = i+2; j < n; j++) { - // compute coeffs for the rotation matrix - a = A[i+1][i]; - b = A[j][i]; - r = sqrt(a*a + b*b); - c = a/r; - s = b/r; - // update the entries of A with multiplication from the left - for (int k = 0; k < n; k++) { - x1 = A[i+1][k]; - x2 = A[j][k]; - A[i+1][k] = c*x1 + s*x2; - A[j][k] = -s*x1 + c*x2; - } - // update the entries of A and Q with a multiplication from the right - for (int k = 0; k < n; k++) { - x1 = A[k][i+1]; - x2 = A[k][j]; - A[k][i+1] = c*x1 + s*x2; - A[k][j] = -s*x1 + c*x2; - x1 = Q[k][i+1]; - x2 = Q[k][j]; - Q[k][i+1] = c*x1 + s*x2; - Q[k][j] = -s*x1 + c*x2; - } - } - } -} - -/* ---------------------------------------------------------------------- - QR factorization for a tridiagonal matrix; Result of the factorization - is stored in A and Qi -------------------------------------------------------------------------- */ - -void PPPMDisp::qr_tri(double** Qi,double** A,int n) -{ - double r,a,b,c,s,x1,x2; - int j,k,k0,kmax; - // make Qi a unity matrix - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - Qi[i][j] = 0.0; - for (int i = 0; i < n; i++) - Qi[i][i] = 1.0; - // loop over main diagonal and first of diagonal of A - for (int i = 0; i < n-1; i++) { - j = i+1; - // coefficients of the rotation matrix - a = A[i][i]; - b = A[j][i]; - r = sqrt(a*a + b*b); - c = a/r; - s = b/r; - // update the entries of A and Q - k0 = (i-1>0)?i-1:0; //min(i-1,0); - kmax = (i+3A0[i][j])?Bmax:A0[i][j]; //max(Bmax,A0[i][j]); - double epsabs = eps*Bmax; - - // reconstruct the original matrix - // store the diagonal elements in D - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - D[i][j] = 0.0; - for (int i = 0; i < n; i++) - D[i][i] = A[i][i]; - // store matrix Q in E - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - E[i][j] = Q[i][j]; - // E = Q*A - mmult(E,D,C,n); - // store transpose of Q in D - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - D[i][j] = Q[j][i]; - // E = Q*A*Q.t - mmult(E,D,C,n); - - //compare the original matrix and the final matrix - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - diff = A0[i][j] - E[i][j]; - epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff)); - } - } - if (epsmax > epsabs) converged = 0; - return converged; -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMDisp::allocate() -{ - - int (*procneigh)[2] = comm->procneigh; - - if (function[0]) { - memory->create(work1,2*nfft_both,"pppm/disp:work1"); - memory->create(work2,2*nfft_both,"pppm/disp:work2"); - - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz"); - - memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2"); - memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2"); - memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2"); - - - memory->create(gf_b,order,"pppm/disp:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff"); - memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d"); - memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff"); - - memory->create(greensfn,nfft_both,"pppm/disp:greensfn"); - memory->create(vg,nfft_both,6,"pppm/disp:vg"); - memory->create(vg2,nfft_both,3,"pppm/disp:vg2"); - - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:density_brick"); - if ( differentiation_flag == 1) { - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:u_brick"); - memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1"); - memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2"); - memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3"); - memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4"); - memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5"); - memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6"); - - } else { - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:vdz_brick"); - } - memory->create(density_fft,nfft_both,"pppm/disp:density_fft"); - - int tmp; - - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp); - - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp); - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg = new CommGrid(lmp,world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg = new CommGrid(lmp,world,3,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - - if (function[1]) { - memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); - memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); - - memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); - memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); - memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); - - memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); - memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); - memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); - - memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); - memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); - memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); - memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); - memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); - - memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); - memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); - memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); - - memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g"); - if ( differentiation_flag == 1) { - memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); - - memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); - memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); - memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); - memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); - memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); - memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); - - } else { - memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g"); - memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g"); - memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g"); - } - memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g"); - - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_6 = new CommGrid(lmp,world,1,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_6 = new CommGrid(lmp,world,3,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - - if (function[2]) { - memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); - memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); - - memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); - memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); - memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); - - memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); - memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); - memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); - - memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); - memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); - memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); - memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); - memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); - - memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); - memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); - memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); - - memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0"); - memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1"); - memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2"); - memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3"); - memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4"); - memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5"); - memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6"); - - memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0"); - memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1"); - memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2"); - memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3"); - memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4"); - memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5"); - memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6"); - - - if ( differentiation_flag == 1 ) { - memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); - memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); - memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); - memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); - memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); - memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); - memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); - - memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); - memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); - memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); - memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); - memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); - memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); - - } else { - - memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0"); - memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0"); - memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0"); - - memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1"); - memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1"); - memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1"); - - memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2"); - memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2"); - memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2"); - - memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3"); - memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3"); - memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3"); - - memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4"); - memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4"); - memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4"); - - memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5"); - memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5"); - memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5"); - - memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6"); - memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6"); - memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6"); - } - - - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - - if (differentiation_flag == 1) - cg_6 = new CommGrid(lmp,world,7,7, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_6 = new CommGrid(lmp,world,21,7, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - - if (function[3]) { - memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); - memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); - - memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); - memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); - memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); - - memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); - memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); - memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); - - memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); - memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); - memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); - memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); - memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); - - memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); - memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); - memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); - - memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none"); - if ( differentiation_flag == 1) { - memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); - - memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); - memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); - memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); - memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); - memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); - memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); - - } else { - memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none"); - memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none"); - memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none"); - } - memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none"); - - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order - for per atom calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::allocate_peratom() -{ - - int (*procneigh)[2] = comm->procneigh; - - if (function[0]) { - - if (differentiation_flag != 1) - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:u_brick"); - - memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v0_brick"); - memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v1_brick"); - memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v2_brick"); - memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v3_brick"); - memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v4_brick"); - memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v5_brick"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom = - new CommGrid(lmp,world,6,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom = - new CommGrid(lmp,world,7,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } - - - if (function[1]) { - - if ( differentiation_flag != 1 ) - memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); - - memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g"); - memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g"); - memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g"); - memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g"); - memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g"); - memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom_6 = - new CommGrid(lmp,world,6,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom_6 = - new CommGrid(lmp,world,7,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } - - if (function[2]) { - - if ( differentiation_flag != 1 ) { - memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); - memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); - memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); - memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); - memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); - memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); - memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); - } - - memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0"); - memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0"); - memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0"); - memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0"); - memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0"); - memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0"); - - memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1"); - memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1"); - memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1"); - memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1"); - memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1"); - memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1"); - - memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2"); - memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2"); - memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2"); - memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2"); - memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2"); - memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2"); - - memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3"); - memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3"); - memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3"); - memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3"); - memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3"); - memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3"); - - memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4"); - memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4"); - memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4"); - memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4"); - memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4"); - memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4"); - - memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5"); - memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5"); - memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5"); - memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5"); - memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5"); - memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5"); - - memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6"); - memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6"); - memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6"); - memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6"); - memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6"); - memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom_6 = - new CommGrid(lmp,world,42,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom_6 = - new CommGrid(lmp,world,49,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } - - if (function[3]) { - - if ( differentiation_flag != 1 ) - memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); - - memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none"); - memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none"); - memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none"); - memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none"); - memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none"); - memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom_6 = - new CommGrid(lmp,world,6*nsplit_alloc,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom_6 = - new CommGrid(lmp,world,7*nsplit_alloc,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } -} - - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMDisp::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(density_fft); - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - - memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_g); - density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; - density_fft_g = NULL; - - memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a0); - density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; - density_fft_a0 = NULL; - - memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a1); - density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; - density_fft_a1 = NULL; - - memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a2); - density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; - density_fft_a2 = NULL; - - memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a3); - density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; - density_fft_a3 = NULL; - - memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a4); - density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; - density_fft_a4 = NULL; - - memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a5); - density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; - density_fft_a5 = NULL; - - memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a6); - density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; - density_fft_a6 = NULL; - - memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_none); - density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; - density_fft_none = NULL; - - memory->destroy(sf_precoeff1); - memory->destroy(sf_precoeff2); - memory->destroy(sf_precoeff3); - memory->destroy(sf_precoeff4); - memory->destroy(sf_precoeff5); - memory->destroy(sf_precoeff6); - sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; - - memory->destroy(sf_precoeff1_6); - memory->destroy(sf_precoeff2_6); - memory->destroy(sf_precoeff3_6); - memory->destroy(sf_precoeff4_6); - memory->destroy(sf_precoeff5_6); - memory->destroy(sf_precoeff6_6); - sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL; - - memory->destroy(greensfn); - memory->destroy(greensfn_6); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(work1_6); - memory->destroy(work2_6); - memory->destroy(vg); - memory->destroy(vg2); - memory->destroy(vg_6); - memory->destroy(vg2_6); - greensfn = greensfn_6 = NULL; - work1 = work2 = work1_6 = work2_6 = NULL; - vg = vg2 = vg_6 = vg2_6 = NULL; - - memory->destroy1d_offset(fkx,nxlo_fft); - memory->destroy1d_offset(fky,nylo_fft); - memory->destroy1d_offset(fkz,nzlo_fft); - fkx = fky = fkz = NULL; - - memory->destroy1d_offset(fkx2,nxlo_fft); - memory->destroy1d_offset(fky2,nylo_fft); - memory->destroy1d_offset(fkz2,nzlo_fft); - fkx2 = fky2 = fkz2 = NULL; - - memory->destroy1d_offset(fkx_6,nxlo_fft_6); - memory->destroy1d_offset(fky_6,nylo_fft_6); - memory->destroy1d_offset(fkz_6,nzlo_fft_6); - fkx_6 = fky_6 = fkz_6 = NULL; - - memory->destroy1d_offset(fkx2_6,nxlo_fft_6); - memory->destroy1d_offset(fky2_6,nylo_fft_6); - memory->destroy1d_offset(fkz2_6,nzlo_fft_6); - fkx2_6 = fky2_6 = fkz2_6 = NULL; - - - memory->destroy(gf_b); - memory->destroy2d_offset(rho1d,-order/2); - memory->destroy2d_offset(rho_coeff,(1-order)/2); - memory->destroy2d_offset(drho1d,-order/2); - memory->destroy2d_offset(drho_coeff, (1-order)/2); - gf_b = NULL; - rho1d = rho_coeff = drho1d = drho_coeff = NULL; - - memory->destroy(gf_b_6); - memory->destroy2d_offset(rho1d_6,-order_6/2); - memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2); - memory->destroy2d_offset(drho1d_6,-order_6/2); - memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2); - gf_b_6 = NULL; - rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL; - - delete fft1; - delete fft2; - delete remap; - delete cg; - fft1 = fft2 = NULL; - remap = NULL; - cg = NULL; - - delete fft1_6; - delete fft2_6; - delete remap_6; - delete cg_6; - fft1_6 = fft2_6 = NULL; - remap_6 = NULL; - cg_6 = NULL; -} - - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order - for per atom calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::deallocate_peratom() -{ - peratom_allocate_flag = 0; - - memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out); - u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; - - memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL; - - memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL; - - memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL; - - memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL; - - memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL; - - memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL; - - memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL; - - memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL; - - memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL; - - delete cg_peratom; - delete cg_peratom_6; - cg_peratom = cg_peratom_6 = NULL; -} - -/* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald - for Coulomb interactions -------------------------------------------------------------------------- */ - -void PPPMDisp::set_grid() -{ - double q2 = qsqsum * force->qqrd2e; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h, h_x,h_y,h_z; - bigint natoms = atom->natoms; - - if (!gewaldflag) { - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) - error->all(FLERR,"KSpace accuracy too large to estimate G vector"); - g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy - // nz_pppm uses extended zprd_slab instead of zprd - // reduce it until accuracy target is met - - if (!gridflag) { - h = h_x = h_y = h_z = 4.0/g_ewald; - int count = 0; - while (1) { - - // set grid dimension - nx_pppm = static_cast (xprd/h_x); - ny_pppm = static_cast (yprd/h_y); - nz_pppm = static_cast (zprd_slab/h_z); - - if (nx_pppm <= 1) nx_pppm = 2; - if (ny_pppm <= 1) ny_pppm = 2; - if (nz_pppm <= 1) nz_pppm = 2; - - //set local grid dimension - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - double qopt = compute_qopt(); - - double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - - count++; - - // break loop if the accuracy has been reached or too many loops have been performed - if (dfkspace <= accuracy) break; - if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction"); - h *= 0.95; - h_x = h_y = h_z = h; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; -} - -/* ---------------------------------------------------------------------- - set the FFT parameters -------------------------------------------------------------------------- */ - -void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p, - int& nxlo_f,int& nylo_f,int& nzlo_f, - int& nxhi_f,int& nyhi_f,int& nzhi_f, - int& nxlo_i,int& nylo_i,int& nzlo_i, - int& nxhi_i,int& nyhi_i,int& nzhi_i, - int& nxlo_o,int& nylo_o,int& nzlo_o, - int& nxhi_o,int& nyhi_o,int& nzhi_o, - int& nlow, int& nupp, - int& ng, int& nf, int& nfb, - double& sft,double& sftone, int& ord) -{ - // global indices of PPPM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPM grid that I own without ghost cells - // for slab PPPM, assign z grid as if it were not extended - - nxlo_i = static_cast (comm->xsplit[comm->myloc[0]] * nx_p); - nxhi_i = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1; - - nylo_i = static_cast (comm->ysplit[comm->myloc[1]] * ny_p); - nyhi_i = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1; - - nzlo_i = static_cast - (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor); - nzhi_i = static_cast - (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1; - - - // nlow,nupp = stencil size for mapping particles to PPPM grid - - nlow = -(ord-1)/2; - nupp = ord/2; - - // sft values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (ord % 2) sft = OFFSET + 0.5; - else sft = OFFSET; - if (ord % 2) sftone = 0.0; - else sftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended - - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_p/xprd + sft) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_p/xprd + sft) - OFFSET; - nxlo_o = nlo + nlow; - nxhi_o = nhi + nupp; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_p/yprd + sft) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_p/yprd + sft) - OFFSET; - nylo_o = nlo + nlow; - nyhi_o = nhi + nupp; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_p/zprd_slab + sft) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_p/zprd_slab + sft) - OFFSET; - nzlo_o = nlo + nlow; - nzhi_o = nhi + nupp; - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells) - - if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) { - nzhi_i = nz_p - 1; - nzhi_o = nz_p - 1; - } - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_p >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_f = 0; - nxhi_f = nx_p - 1; - nylo_f = me_y*ny_p/npey_fft; - nyhi_f = (me_y+1)*ny_p/npey_fft - 1; - nzlo_f = me_z*nz_p/npez_fft; - nzhi_f = (me_z+1)*nz_p/npez_fft - 1; - - // PPPM grid for this proc, including ghosts - - ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) * - (nzhi_o-nzlo_o+1); - - // FFT arrays on this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) * - (nzhi_f-nzlo_f+1); - int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) * - (nzhi_i-nzlo_i+1); - nfb = MAX(nf,nfft_brick); - -} - -/* ---------------------------------------------------------------------- - check if all factors of n are in list of factors - return 1 if yes, 0 if no -------------------------------------------------------------------------- */ - -int PPPMDisp::factorable(int n) -{ - int i; - - while (n > 1) { - for (i = 0; i < nfactors; i++) { - if (n % factors[i] == 0) { - n /= factors[i]; - break; - } - } - if (i == nfactors) return 0; - } - - return 1; -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ -void PPPMDisp::adjust_gewald() -{ - - // Use Newton solver to find g_ewald - - double dx; - - // Begin algorithm - - for (int i = 0; i < LARGE; i++) { - dx = f() / derivf(); - g_ewald -= dx; //Update g_ewald - if (fabs(f()) < SMALL) return; - } - - // Failed to converge - - char str[128]; - sprintf(str, "Could not compute g_ewald"); - error->all(FLERR, str); - -} - -/* ---------------------------------------------------------------------- - Calculate f(x) - ------------------------------------------------------------------------- */ - -double PPPMDisp::f() -{ - double df_rspace, df_kspace; - double q2 = qsqsum * force->qqrd2e; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - - df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd); - - double qopt = compute_qopt(); - df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - - return df_rspace - df_kspace; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) using forward difference - [f(x + h) - f(x)] / h - ------------------------------------------------------------------------- */ - -double PPPMDisp::derivf() -{ - double h = 0.000001; //Derivative step-size - double df,f1,f2,g_ewald_old; - - f1 = f(); - g_ewald_old = g_ewald; - g_ewald += h; - f2 = f(); - g_ewald = g_ewald_old; - df = (f2 - f1)/h; - - return df; -} - -/* ---------------------------------------------------------------------- - Calculate the final estimator for the accuracy -------------------------------------------------------------------------- */ - -double PPPMDisp::final_accuracy() -{ - double df_rspace, df_kspace; - double q2 = qsqsum * force->qqrd2e; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd); - - double qopt = compute_qopt(); - - df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - - double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace); - return acc; -} - -/* ---------------------------------------------------------------------- - Calculate the final estimator for the Dispersion accuracy -------------------------------------------------------------------------- */ - -void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace) -{ - double df_rspace, df_kspace; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - acc_real = lj_rspace_error(); - - double qopt = compute_qopt_6(); - - acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); - - acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace); - return; -} - -/* ---------------------------------------------------------------------- - Compute qopt for Coulomb interactions -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt() -{ - double qopt; - if (differentiation_flag == 1) { - qopt = compute_qopt_ad(); - } else { - qopt = compute_qopt_ik(); - } - double qopt_all; - MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); - return qopt_all; -} - -/* ---------------------------------------------------------------------- - Compute qopt for Dispersion interactions -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_6() -{ - double qopt; - if (differentiation_flag == 1) { - qopt = compute_qopt_6_ad(); - } else { - qopt = compute_qopt_6_ik(); - } - double qopt_all; - MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); - return qopt_all; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ik differentiation scheme and Coulomb interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_ik() -{ - double qopt = 0.0; - int k,l,m; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double sqk, u2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,sum2, sum3,dot1,dot2; - - int nbx = 2; - int nby = 2; - int nbz = 2; - - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,order); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,order); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,order); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - u2 = pow(wx*wy*wz,2.0); - sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; - sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1; - sum3 += u2; - } - } - } - sum2 *= sum2; - sum3 *= sum3*sqk; - qopt += sum1 -sum2/sum3; - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ad differentiation scheme and Coulomb interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_ad() -{ - double qopt = 0.0; - int k,l,m; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double u2, sqk; - double sum1,sum2,sum3,sum4,dot2; - double numerator; - - int nbx = 2; - int nby = 2; - int nbz = 2; - double form = 1.0; - - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - numerator = form*12.5663706; - - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - sum4 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,order); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,order); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,order); - - dot2 = qx*qx+qy*qy+qz*qz; - u2 = pow(wx*wy*wz,2.0); - sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; - sum2 += sx*sy*sz * u2*4.0*MY_PI; - sum3 += u2; - sum4 += dot2*u2; - } - } - } - sum2 *= sum2; - qopt += sum1 - sum2/(sum3*sum4); - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ik differentiation scheme and Dispersion interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_6_ik() -{ - double qopt = 0.0; - int k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double sqk, u2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,sum2, sum3; - double dot1,dot2, rtdot2, term; - double inv2ew = 2*g_ewald_6; - inv2ew = 1.0/inv2ew; - double rtpi = sqrt(MY_PI); - - int nbx = 2; - int nby = 2; - int nbz = 2; - - n = 0; - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - mper = m - nz_pppm_6*(2*m/nz_pppm_6); - - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - lper = l - ny_pppm_6*(2*l/ny_pppm_6); - - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - kper = k - nx_pppm_6*(2*k/nx_pppm_6); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm_6*nx); - sx = exp(-qx*qx*inv2ew*inv2ew); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm_6; - if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm_6*ny); - sy = exp(-qy*qy*inv2ew*inv2ew); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm_6; - if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm_6*nz); - sz = exp(-qz*qz*inv2ew*inv2ew); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm_6; - if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - rtdot2 = sqrt(dot2); - term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + - 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); - term *= g_ewald_6*g_ewald_6*g_ewald_6; - u2 = pow(wx*wy*wz,2.0); - sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; - sum2 += -u2*term*MY_PI*rtpi/3.0*dot1; - sum3 += u2; - } - } - } - sum2 *= sum2; - sum3 *= sum3*sqk; - qopt += sum1 -sum2/sum3; - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ad differentiation scheme and Dispersion interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_6_ad() -{ - double qopt = 0.0; - int k,l,m; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double u2, sqk; - double sum1,sum2,sum3,sum4; - double dot2, rtdot2, term; - double inv2ew = 2*g_ewald_6; - inv2ew = 1/inv2ew; - double rtpi = sqrt(MY_PI); - - int nbx = 2; - int nby = 2; - int nbz = 2; - - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - mper = m - nz_pppm_6*(2*m/nz_pppm_6); - - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - lper = l - ny_pppm_6*(2*l/ny_pppm_6); - - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - kper = k - nx_pppm_6*(2*k/nx_pppm_6); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - sum4 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm_6*nx); - sx = exp(-qx*qx*inv2ew*inv2ew); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm_6; - if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm_6*ny); - sy = exp(-qy*qy*inv2ew*inv2ew); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm_6; - if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm_6*nz); - sz = exp(-qz*qz*inv2ew*inv2ew); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm_6; - if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); - - dot2 = qx*qx+qy*qy+qz*qz; - rtdot2 = sqrt(dot2); - term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + - 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); - term *= g_ewald_6*g_ewald_6*g_ewald_6; - u2 = pow(wx*wy*wz,2.0); - sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; - sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2; - sum3 += u2; - sum4 += dot2*u2; - } - } - } - sum2 *= sum2; - qopt += sum1 - sum2/(sum3*sum4); - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - set size of FFT grid and g_ewald_6 - for Dispersion interactions -------------------------------------------------------------------------- */ - -void PPPMDisp::set_grid_6() -{ - // Calculate csum - if (!csumflag) calc_csum(); - if (!gewaldflag_6) set_init_g6(); - if (!gridflag_6) set_n_pppm_6(); - while (!factorable(nx_pppm_6)) nx_pppm_6++; - while (!factorable(ny_pppm_6)) ny_pppm_6++; - while (!factorable(nz_pppm_6)) nz_pppm_6++; - -} - -/* ---------------------------------------------------------------------- - Calculate the sum of the squared dispersion coefficients and other - related quantities required for the calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::calc_csum() -{ - csumij = 0.0; - csum = 0.0; - - int ntypes = atom->ntypes; - int i,j,k; - - delete [] cii; - cii = new double[ntypes +1]; - for (i = 0; i<=ntypes; i++) cii[i] = 0.0; - delete [] csumi; - csumi = new double[ntypes +1]; - for (i = 0; i<=ntypes; i++) csumi[i] = 0.0; - int *neach = new int[ntypes+1]; - for (i = 0; i<=ntypes; i++) neach[i] = 0; - - //the following variables are needed to distinguish between arithmetic - // and geometric mixing - - double mix1; // scales 20/16 to 4 - int mix2; // shifts the value to the sigma^3 value - int mix3; // shifts the value to the right atom type - if (function[1]) { - for (i = 1; i <= ntypes; i++) - cii[i] = B[i]*B[i]; - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - neach[tmp]++; - csum += B[tmp]*B[tmp]; - } - } - if (function[2]) { - for (i = 1; i <= ntypes; i++) - cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3]; - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - neach[tmp]++; - csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3]; - } - } - if (function[3]) { - for (i = 1; i <= ntypes; i++) - for (j = 0; j < nsplit; j++) - cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j]; - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - neach[tmp]++; - for (j = 0; j < nsplit; j++) - csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j]; - } - } - - - double tmp2; - MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world); - csum = tmp2; - csumflag = 1; - - int *neach_all = new int[ntypes+1]; - MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world); - - // copmute csumij and csumi - double d1, d2; - if (function[1]){ - for (i=1; i<=ntypes; i++) { - for (j=1; j<=ntypes; j++) { - csumi[i] += neach_all[j]*B[i]*B[j]; - d1 = neach_all[i]*B[i]; - d2 = neach_all[j]*B[j]; - csumij += d1*d2; - //csumij += neach_all[i]*neach_all[j]*B[i]*B[j]; - } - } - } - if (function[2]) { - for (i=1; i<=ntypes; i++) { - for (j=1; j<=ntypes; j++) { - for (k=0; k<=6; k++) { - csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; - d1 = neach_all[i]*B[7*i + k]; - d2 = neach_all[j]*B[7*(j+1)-k-1]; - csumij += d1*d2; - //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; - } - } - } - } - if (function[3]) { - for (i=1; i<=ntypes; i++) { - for (j=1; j<=ntypes; j++) { - for (k=0; kall(FLERR, str); - -} - -/* ---------------------------------------------------------------------- - Calculate f(x) for Dispersion interaction - ------------------------------------------------------------------------- */ - -double PPPMDisp::f_6() -{ - double df_rspace, df_kspace; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - - df_rspace = lj_rspace_error(); - - double qopt = compute_qopt_6(); - df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); - - return df_rspace - df_kspace; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) using forward difference - [f(x + h) - f(x)] / h - ------------------------------------------------------------------------- */ - -double PPPMDisp::derivf_6() -{ - double h = 0.000001; //Derivative step-size - double df,f1,f2,g_ewald_old; - - f1 = f_6(); - g_ewald_old = g_ewald_6; - g_ewald_6 += h; - f2 = f_6(); - g_ewald_6 = g_ewald_old; - df = (f2 - f1)/h; - - return df; -} - - -/* ---------------------------------------------------------------------- - calculate an initial value for g_ewald_6 - ---------------------------------------------------------------------- */ - -void PPPMDisp::set_init_g6() -{ - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - // make initial g_ewald estimate - // based on desired error and real space cutoff - - // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj - // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0 - // else, repeat multiply g_ewald_6 by 2 until df_real > 0 - // perform bisection for the last two values of - double df_real; - double g_ewald_old; - double gmin, gmax; - - // check if there is a user defined accuracy - double acc_rspace = accuracy; - if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6; - - g_ewald_6 = 1.0/cutoff_lj; - df_real = lj_rspace_error() - acc_rspace; - int counter = 0; - if (df_real > 0) { - while (df_real > 0 && counter < LARGE) { - counter++; - g_ewald_old = g_ewald_6; - g_ewald_6 *= 2; - df_real = lj_rspace_error() - acc_rspace; - } - } - - if (df_real < 0) { - while (df_real < 0 && counter < LARGE) { - counter++; - g_ewald_old = g_ewald_6; - g_ewald_6 *= 0.5; - df_real = lj_rspace_error() - acc_rspace; - } - } - - if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); - - gmin = MIN(g_ewald_6, g_ewald_old); - gmax = MAX(g_ewald_6, g_ewald_old); - g_ewald_6 = gmin + 0.5*(gmax-gmin); - counter = 0; - while (gmax-gmin > SMALL && counter < LARGE) { - counter++; - df_real = lj_rspace_error() -acc_rspace; - if (df_real < 0) gmax = g_ewald_6; - else gmin = g_ewald_6; - g_ewald_6 = gmin + 0.5*(gmax-gmin); - } - if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); - -} - -/* ---------------------------------------------------------------------- - calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction - ---------------------------------------------------------------------- */ - -void PPPMDisp::set_n_pppm_6() -{ - bigint natoms = atom->natoms; - - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - double h, h_x,h_y,h_z; - - double acc_kspace = accuracy; - if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6; - - // initial value for the grid spacing - h = h_x = h_y = h_z = 4.0/g_ewald_6; - // decrease grid spacing untill required precision is obtained - int count = 0; - while(1) { - - // set grid dimension - nx_pppm_6 = static_cast (xprd/h_x); - ny_pppm_6 = static_cast (yprd/h_y); - nz_pppm_6 = static_cast (zprd_slab/h_z); - - if (nx_pppm_6 <= 1) nx_pppm_6 = 2; - if (ny_pppm_6 <= 1) ny_pppm_6 = 2; - if (nz_pppm_6 <= 1) nz_pppm_6 = 2; - - //set local grid dimension - int npey_fft,npez_fft; - if (nz_pppm_6 >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft_6 = 0; - nxhi_fft_6 = nx_pppm_6 - 1; - nylo_fft_6 = me_y*ny_pppm_6/npey_fft; - nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1; - nzlo_fft_6 = me_z*nz_pppm_6/npez_fft; - nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1; - - double qopt = compute_qopt_6(); - - double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); - - count++; - - // break loop if the accuracy has been reached or too many loops have been performed - if (df_kspace <= acc_kspace) break; - if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion"); - h *= 0.95; - h_x = h_y = h_z = h; - } -} - -/* ---------------------------------------------------------------------- - calculate the real space error for dispersion interactions - ---------------------------------------------------------------------- */ - -double PPPMDisp::lj_rspace_error() -{ - bigint natoms = atom->natoms; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - double deltaf; - double rgs = (cutoff_lj*g_ewald_6); - rgs *= rgs; - double rgs_inv = 1.0/rgs; - deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)* - exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6))); - return deltaf; -} - - -/* ---------------------------------------------------------------------- - Compyute the modified (hockney-eastwood) coulomb green function - ---------------------------------------------------------------------- */ - -void PPPMDisp::compute_gf() -{ - int k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int kper,lper,mper; - double snx,sny,snz,snx2,sny2,snz2; - double sqk; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double numerator,denominator; - - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - qz = unitkz*mper; - snz = sin(0.5*qz*zprd_slab/nz_pppm); - snz2 = snz*snz; - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,order); - wz *= wz; - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - qy = unitky*lper; - sny = sin(0.5*qy*yprd/ny_pppm); - sny2 = sny*sny; - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,order); - wy *= wy; - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - qx = unitkx*kper; - snx = sin(0.5*qx*xprd/nx_pppm); - snx2 = snx*snx; - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,order); - wx *= wx; - - sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); - - if (sqk != 0.0) { - numerator = 4.0*MY_PI/sqk; - denominator = gf_denom(snx2,sny2,snz2, gf_b, order); - greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme - and Coulomb interaction -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord, - int nxlo_ft, int nylo_ft, int nzlo_ft, - int nxhi_ft, int nyhi_ft, int nzhi_ft, - double *sf_pre1, double *sf_pre2, double *sf_pre3, - double *sf_pre4, double *sf_pre5, double *sf_pre6) -{ - - int i,k,l,m,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double argx,argy,argz; - double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; - double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; - double u0,u1,u2,u3,u4,u5,u6; - double sum1,sum2,sum3,sum4,sum5,sum6; - - int nb = 2; - - n = 0; - for (m = nzlo_ft; m <= nzhi_ft; m++) { - mper = m - nzp*(2*m/nzp); - - for (l = nylo_ft; l <= nyhi_ft; l++) { - lper = l - nyp*(2*l/nyp); - - for (k = nxlo_ft; k <= nxhi_ft; k++) { - kper = k - nxp*(2*k/nxp); - - sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; - for (i = -nb; i <= nb; i++) { - - qx0 = unitkx*(kper+nxp*i); - qx1 = unitkx*(kper+nxp*(i+1)); - qx2 = unitkx*(kper+nxp*(i+2)); - wx0[i+2] = 1.0; - wx1[i+2] = 1.0; - wx2[i+2] = 1.0; - argx = 0.5*qx0*xprd/nxp; - if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord); - argx = 0.5*qx1*xprd/nxp; - if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord); - argx = 0.5*qx2*xprd/nxp; - if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord); - - qy0 = unitky*(lper+nyp*i); - qy1 = unitky*(lper+nyp*(i+1)); - qy2 = unitky*(lper+nyp*(i+2)); - wy0[i+2] = 1.0; - wy1[i+2] = 1.0; - wy2[i+2] = 1.0; - argy = 0.5*qy0*yprd/nyp; - if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord); - argy = 0.5*qy1*yprd/nyp; - if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord); - argy = 0.5*qy2*yprd/nyp; - if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord); - - qz0 = unitkz*(mper+nzp*i); - qz1 = unitkz*(mper+nzp*(i+1)); - qz2 = unitkz*(mper+nzp*(i+2)); - wz0[i+2] = 1.0; - wz1[i+2] = 1.0; - wz2[i+2] = 1.0; - argz = 0.5*qz0*zprd_slab/nzp; - if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord); - argz = 0.5*qz1*zprd_slab/nzp; - if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord); - argz = 0.5*qz2*zprd_slab/nzp; - if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord); - } - - for (nx = 0; nx <= 4; nx++) { - for (ny = 0; ny <= 4; ny++) { - for (nz = 0; nz <= 4; nz++) { - u0 = wx0[nx]*wy0[ny]*wz0[nz]; - u1 = wx1[nx]*wy0[ny]*wz0[nz]; - u2 = wx2[nx]*wy0[ny]*wz0[nz]; - u3 = wx0[nx]*wy1[ny]*wz0[nz]; - u4 = wx0[nx]*wy2[ny]*wz0[nz]; - u5 = wx0[nx]*wy0[ny]*wz1[nz]; - u6 = wx0[nx]*wy0[ny]*wz2[nz]; - - sum1 += u0*u1; - sum2 += u0*u2; - sum3 += u0*u3; - sum4 += u0*u4; - sum5 += u0*u5; - sum6 += u0*u6; - } - } - } - - // store values - - sf_pre1[n] = sum1; - sf_pre2[n] = sum2; - sf_pre3[n] = sum3; - sf_pre4[n] = sum4; - sf_pre5[n] = sum5; - sf_pre6[n++] = sum6; - } - } - } -} - -/* ---------------------------------------------------------------------- - Compute the modified (hockney-eastwood) dispersion green function - ---------------------------------------------------------------------- */ - -void PPPMDisp::compute_gf_6() -{ - double *prd; - int k,l,m,n; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int kper,lper,mper; - double sqk; - double snx,sny,snz,snx2,sny2,snz2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz; - double qx,qy,qz; - double rtsqk, term; - double numerator,denominator; - double inv2ew = 2*g_ewald_6; - inv2ew = 1/inv2ew; - double rtpi = sqrt(MY_PI); - - numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0); - - n = 0; - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - mper = m - nz_pppm_6*(2*m/nz_pppm_6); - qz = unitkz*mper; - snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6); - snz2 = snz*snz; - sz = exp(-qz*qz*inv2ew*inv2ew); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm_6; - if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); - wz *= wz; - - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - lper = l - ny_pppm_6*(2*l/ny_pppm_6); - qy = unitky*lper; - sny = sin(0.5*unitky*lper*yprd/ny_pppm_6); - sny2 = sny*sny; - sy = exp(-qy*qy*inv2ew*inv2ew); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm_6; - if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); - wy *= wy; - - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - kper = k - nx_pppm_6*(2*k/nx_pppm_6); - qx = unitkx*kper; - snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6); - snx2 = snx*snx; - sx = exp(-qx*qx*inv2ew*inv2ew); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm_6; - if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); - wx *= wx; - - sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); - - if (sqk != 0.0) { - denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); - rtsqk = sqrt(sqk); - term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz + - 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew); - greensfn_6[n++] = numerator*term*wx*wy*wz/denominator; - } else greensfn_6[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme - and Coulomb interaction -------------------------------------------------------------------------- */ -void PPPMDisp::compute_sf_coeff() -{ - int i,k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - for (l = nylo_fft; l <= nyhi_fft; l++) { - for (k = nxlo_fft; k <= nxhi_fft; k++) { - sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; - sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; - sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; - sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; - sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; - sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; - ++n; - } - } - } - - // Compute the coefficients for the self-force correction - - double prex, prey, prez; - prex = prey = prez = MY_PI/volume; - prex *= nx_pppm/xprd; - prey *= ny_pppm/yprd; - prez *= nz_pppm/zprd_slab; - sf_coeff[0] *= prex; - sf_coeff[1] *= prex*2; - sf_coeff[2] *= prey; - sf_coeff[3] *= prey*2; - sf_coeff[4] *= prez; - sf_coeff[5] *= prez*2; - - // communicate values with other procs - - double tmp[6]; - MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); - for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme - and Dispersion interaction -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_sf_coeff_6() -{ - int i,k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0; - - n = 0; - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n]; - sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n]; - sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n]; - sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n]; - sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n]; - sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n]; - ++n; - } - } - } - - - // perform multiplication with prefactors - - double prex, prey, prez; - prex = prey = prez = MY_PI/volume; - prex *= nx_pppm_6/xprd; - prey *= ny_pppm_6/yprd; - prez *= nz_pppm_6/zprd_slab; - sf_coeff_6[0] *= prex; - sf_coeff_6[1] *= prex*2; - sf_coeff_6[2] *= prey; - sf_coeff_6[3] *= prey*2; - sf_coeff_6[4] *= prez; - sf_coeff_6[5] *= prez*2; - - // communicate values with other procs - - double tmp[6]; - MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world); - for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n]; - -} - -/* ---------------------------------------------------------------------- - denominator for Hockney-Eastwood Green's function - of x,y,z = sin(kx*deltax/2), etc - - inf n-1 - S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l - j=-inf l=0 - - = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x) - gf_b = denominator expansion coeffs -------------------------------------------------------------------------- */ - -double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord) -{ - double sx,sy,sz; - sz = sy = sx = 0.0; - for (int l = ord-1; l >= 0; l--) { - sx = g_b[l] + sx*x; - sy = g_b[l] + sy*y; - sz = g_b[l] + sz*z; - } - double s = sx*sy*sz; - return s*s; -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_gf_denom(double* gf, int ord) -{ - int k,l,m; - - for (l = 1; l < ord; l++) gf[l] = 0.0; - gf[0] = 1.0; - - for (m = 1; m < ord; m++) { - for (l = m; l > 0; l--) - gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1)); - gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5)); - } - - bigint ifact = 1; - for (k = 1; k < 2*ord; k++) ifact *= k; - double gaminv = 1.0/ifact; - for (l = 0; l < ord; l++) gf[l] *= gaminv; -} - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFTdecomposition - for coulomb interaction or dispersion interaction with geometric - mixing -------------------------------------------------------------------------- */ - -void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work, - LAMMPS_NS::Remap* rmp) -{ - int n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_i; iz <= nzhi_i; iz++) - for (iy = nylo_i; iy <= nyhi_i; iy++) - for (ix = nxlo_i; ix <= nxhi_i; ix++) - dfft[n++] = dbrick[iz][iy][ix]; - - rmp->perform(dfft,dfft,work); -} - - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFTdecomposition - for dispersion with arithmetic mixing rule -------------------------------------------------------------------------- */ - -void PPPMDisp::brick2fft_a() -{ - int n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++) - for (iy = nylo_in_6; iy <= nyhi_in_6; iy++) - for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) { - density_fft_a0[n] = density_brick_a0[iz][iy][ix]; - density_fft_a1[n] = density_brick_a1[iz][iy][ix]; - density_fft_a2[n] = density_brick_a2[iz][iy][ix]; - density_fft_a3[n] = density_brick_a3[iz][iy][ix]; - density_fft_a4[n] = density_brick_a4[iz][iy][ix]; - density_fft_a5[n] = density_brick_a5[iz][iy][ix]; - density_fft_a6[n++] = density_brick_a6[iz][iy][ix]; - } - - remap_6->perform(density_fft_a0,density_fft_a0,work1_6); - remap_6->perform(density_fft_a1,density_fft_a1,work1_6); - remap_6->perform(density_fft_a2,density_fft_a2,work1_6); - remap_6->perform(density_fft_a3,density_fft_a3,work1_6); - remap_6->perform(density_fft_a4,density_fft_a4,work1_6); - remap_6->perform(density_fft_a5,density_fft_a5,work1_6); - remap_6->perform(density_fft_a6,density_fft_a6,work1_6); - -} - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFTdecomposition - for dispersion with special case -------------------------------------------------------------------------- */ - -void PPPMDisp::brick2fft_none() -{ - int k,n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - for (k = 0; kperform(density_fft_none[k],density_fft_none[k],work1_6); -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPMDisp::particle_map(double delx, double dely, double delz, - double sft, int** p2g, int nup, int nlow, - int nxlo, int nylo, int nzlo, - int nxhi, int nyhi, int nzhi) -{ - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((x[i][0]-boxlo[0])*delx+sft) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*dely+sft) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delz+sft) - OFFSET; - - p2g[i][0] = nx; - p2g[i][1] = ny; - p2g[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlow < nxlo || nx+nup > nxhi || - ny+nlow < nylo || ny+nup > nyhi || - nz+nlow < nzlo || nz+nup > nzhi) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp"); -} - - -void PPPMDisp::particle_map_c(double delx, double dely, double delz, - double sft, int** p2g, int nup, int nlow, - int nxlo, int nylo, int nzlo, - int nxhi, int nyhi, int nzhi) -{ - particle_map(delx, dely, delz, sft, p2g, nup, nlow, - nxlo, nylo, nzlo, nxhi, nyhi, nzhi); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_c() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = dispersion "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid --- geometric mixing -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_g() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - int type; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - type = atom->type[i]; - z0 = delvolinv_6 * B[type]; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - y0 = z0*rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - x0 = y0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - density_brick_g[mz][my][mx] += x0*rho1d_6[0][l]; - } - } - } - } -} - - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = dispersion "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid --- arithmetic mixing -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_a() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0,w; - - // clear 3d density array - - memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - - // loop over my particles, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - int type; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - //do the following for all 4 grids - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - type = atom->type[i]; - z0 = delvolinv_6; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - y0 = z0*rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - x0 = y0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - w = x0*rho1d_6[0][l]; - density_brick_a0[mz][my][mx] += w*B[7*type]; - density_brick_a1[mz][my][mx] += w*B[7*type+1]; - density_brick_a2[mz][my][mx] += w*B[7*type+2]; - density_brick_a3[mz][my][mx] += w*B[7*type+3]; - density_brick_a4[mz][my][mx] += w*B[7*type+4]; - density_brick_a5[mz][my][mx] += w*B[7*type+5]; - density_brick_a6[mz][my][mx] += w*B[7*type+6]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = dispersion "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid --- case when mixing rules don't apply -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_none() -{ - int k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0,w; - - // clear 3d density array - for (k = 0; k < nsplit_alloc; k++) - memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - - - // loop over my particles, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - int type; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - //do the following for all 4 grids - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - type = atom->type[i]; - z0 = delvolinv_6; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - y0 = z0*rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - x0 = y0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - w = x0*rho1d_6[0][l]; - for (k = 0; k < nsplit; k++) - density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k]; - } - } - } - } -} - - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ik differentiation -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2, - FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, - int nx_p, int ny_p, int nz_p, int nft, - int nxlo_ft, int nylo_ft, int nzlo_ft, - int nxhi_ft, int nyhi_ft, int nzhi_ft, - int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - double& egy, double* gfn, - double* kx, double* ky, double* kz, - double* kx2, double* ky2, double* kz2, - FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick, - double* vir, double** vcoeff, double** vcoeff2, - FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, - FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) - - -{ - int i,j,k,n; - double eng; - - // transform charge/dispersion density (r -> k) - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] = dfft[i]; - wk1[n++] = ZEROF; - } - - ft1->compute(wk1,wk1,1); - - // if requested, compute energy and virial contribution - - double scaleinv = 1.0/(nx_p*ny_p*nz_p); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nft; i++) { - eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; - if (eflag_global) egy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nft; i++) { - egy += - s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] *= scaleinv * gfn[i]; - wk1[n++] *= scaleinv * gfn[i]; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x & y direction gradient - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n]; - wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - vx_brick[k][j][i] = wk2[n++]; - vy_brick[k][j][i] = wk2[n++]; - } - - if (!eflag_atom) { - // z direction gradient only - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = kz[k]*wk1[n+1]; - wk2[n+1] = -kz[k]*wk1[n]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - vz_brick[k][j][i] = wk2[n]; - n += 2; - } - - } - - else { - // z direction gradient & per-atom energy - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1]; - wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - vz_brick[k][j][i] = wk2[n++]; - u_pa[k][j][i] = wk2[n++];; - } - } - - if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, - nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, - v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ad differentiation -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2, - FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, - int nx_p, int ny_p, int nz_p, int nft, - int nxlo_ft, int nylo_ft, int nzlo_ft, - int nxhi_ft, int nyhi_ft, int nzhi_ft, - int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - double& egy, double* gfn, - double* vir, double** vcoeff, double** vcoeff2, - FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, - FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) - - -{ - int i,j,k,n; - double eng; - - // transform charge/dispersion density (r -> k) - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] = dfft[i]; - wk1[n++] = ZEROF; - } - - ft1->compute(wk1,wk1,1); - - // if requested, compute energy and virial contribution - - double scaleinv = 1.0/(nx_p*ny_p*nz_p); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nft; i++) { - eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; - if (eflag_global) egy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nft; i++) { - egy += - s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] *= scaleinv * gfn[i]; - wk1[n++] *= scaleinv * gfn[i]; - } - - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = wk1[n]; - wk2[n+1] = wk1[n+1]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - u_pa[k][j][i] = wk2[n++]; - n++; - } - - - if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, - nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, - v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); - -} - -/* ---------------------------------------------------------------------- - Fourier Transform for per atom virial calculations -------------------------------------------------------------------------- */ - -void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2, - double** vcoeff, double** vcoeff2, int nft, - int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, - FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) -{ - //v0 & v1 term - int n, i, j, k; - n = 0; - for (i = 0; i < nft; i++) { - wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1]; - wk2[n+1] = wk1[n+1]*vcoeff[i][0] + wk1[n]*vcoeff[i][1]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - v0_pa[k][j][i] = wk2[n++]; - v1_pa[k][j][i] = wk2[n++]; - } - - //v2 & v3 term - - n = 0; - for (i = 0; i < nft; i++) { - wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0]; - wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - v2_pa[k][j][i] = wk2[n++]; - v3_pa[k][j][i] = wk2[n++]; - } - - //v4 & v5 term - - n = 0; - for (i = 0; i < nft; i++) { - wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2]; - wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - v4_pa[k][j][i] = wk2[n++]; - v5_pa[k][j][i] = wk2[n++]; - } - -} - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, - FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, - FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) - -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vxbrick_1[k][j][i] = work2_6[n++]; - vxbrick_2[k][j][i] = work2_6[n++]; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vybrick_1[k][j][i] = work2_6[n++]; - vybrick_2[k][j][i] = work2_6[n++]; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vzbrick_1[k][j][i] = work2_6[n++]; - vzbrick_2[k][j][i] = work2_6[n++]; - } - - //Per-atom energy - - if (eflag_atom) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa_1[k][j][i] = work2_6[n++]; - u_pa_2[k][j][i] = work2_6[n++]; - } - } - - if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, - v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); -} - - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, - FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, - FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, - FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - - - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vxbrick_1[k][j][i] = B[n1]*work2_6[n++]; - vxbrick_2[k][j][i] = B[n2]*work2_6[n++]; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vybrick_1[k][j][i] = B[n1]*work2_6[n++]; - vybrick_2[k][j][i] = B[n2]*work2_6[n++]; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vzbrick_1[k][j][i] = B[n1]*work2_6[n++]; - vzbrick_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Per-atom energy - - if (eflag_atom) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa[n1][k][j][i] = B[n1]*work2_6[n++]; - u_pa[n2][k][j][i] = B[n2]*work2_6[n++]; - } - } - - if (vflag_atom) poisson_none_peratom(n1,n2, - v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], - v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); -} - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) - -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa_1[k][j][i] = work2_6[n++]; - u_pa_2[k][j][i] = work2_6[n++]; - } - - if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, - v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); -} - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2, - FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, - FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa_1[k][j][i] = B[n1]*work2_6[n++]; - u_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - if (vflag_atom) poisson_none_peratom(n1,n2, - v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], - v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); -} - -/* ---------------------------------------------------------------------- - Fourier Transform for per atom virial calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) -{ - //Compute first virial term v0 - int n, i, j, k; - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v0_pa_1[k][j][i] = work2_6[n++]; - v0_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute second virial term v1 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v1_pa_1[k][j][i] = work2_6[n++]; - v1_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute third virial term v2 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v2_pa_1[k][j][i] = work2_6[n++]; - v2_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute fourth virial term v3 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v3_pa_1[k][j][i] = work2_6[n++]; - v3_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute fifth virial term v4 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v4_pa_1[k][j][i] = work2_6[n++]; - v4_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute last virial term v5 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v5_pa_1[k][j][i] = work2_6[n++]; - v5_pa_2[k][j][i] = work2_6[n++]; - } -} - -/* ---------------------------------------------------------------------- - Fourier Transform for per atom virial calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_none_peratom(int n1, int n2, - FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) -{ - //Compute first virial term v0 - int n, i, j, k; - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v0_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v0_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute second virial term v1 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v1_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v1_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute third virial term v2 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v2_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v2_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute fourth virial term v3 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v3_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v3_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute fifth virial term v4 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v4_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v4_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute last virial term v5 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v5_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v5_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles - for ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_c_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles - for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_c_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz; - FFT_SCALAR ekx,eky,ekz; - double s1,s2,s3; - double sf = 0.0; - - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm/xprd; - double hy_inv = ny_pppm/yprd; - double hz_inv = nz_pppm/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; - } - } - } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - // convert E-field to force and substract self forces - const double qfactor = force->qqrd2e * scale; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - sf = sf_coeff[0]*sin(2*MY_PI*s1); - sf += sf_coeff[1]*sin(4*MY_PI*s1); - sf *= 2*q[i]*q[i]; - f[i][0] += qfactor*(ekx*q[i] - sf); - - sf = sf_coeff[2]*sin(2*MY_PI*s2); - sf += sf_coeff[3]*sin(4*MY_PI*s2); - sf *= 2*q[i]*q[i]; - f[i][1] += qfactor*(eky*q[i] - sf); - - - sf = sf_coeff[4]*sin(2*MY_PI*s3); - sf += sf_coeff[5]*sin(4*MY_PI*s3); - sf *= 2*q[i]*q[i]; - if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_c_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u_pa += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - // convert E-field to force - - const double qfactor = 0.5*force->qqrd2e * scale * q[i]; - - if (eflag_atom) eatom[i] += u_pa*qfactor; - if (vflag_atom) { - vatom[i][0] += v0*qfactor; - vatom[i][1] += v1*qfactor; - vatom[i][2] += v2*qfactor; - vatom[i][3] += v3*qfactor; - vatom[i][4] += v4*qfactor; - vatom[i][5] += v5*qfactor; - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for geometric mixing rule -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_g_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - ekx = eky = ekz = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - ekx -= x0*vdx_brick_g[mz][my][mx]; - eky -= x0*vdy_brick_g[mz][my][mx]; - ekz -= x0*vdz_brick_g[mz][my][mx]; - } - } - } - - // convert E-field to force - type = atom->type[i]; - lj = B[type]; - f[i][0] += lj*ekx; - f[i][1] += lj*eky; - if (slabflag != 2) f[i][2] += lj*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for geometric mixing rule for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_g_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz; - FFT_SCALAR ekx,eky,ekz; - double s1,s2,s3; - double sf = 0.0; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm_6/xprd; - double hy_inv = ny_pppm_6/yprd; - double hz_inv = nz_pppm_6/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - - for (i = 0; i < nlocal; i++) { - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); - - - ekx = eky = ekz = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; - eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; - ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx]; - } - } - } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - - // convert E-field to force - type = atom->type[i]; - lj = B[type]; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - - sf = sf_coeff_6[0]*sin(2*MY_PI*s1); - sf += sf_coeff_6[1]*sin(4*MY_PI*s1); - sf *= 2*lj*lj; - f[i][0] += ekx*lj - sf; - - sf = sf_coeff_6[2]*sin(2*MY_PI*s2); - sf += sf_coeff_6[3]*sin(4*MY_PI*s2); - sf *= 2*lj*lj; - f[i][1] += eky*lj - sf; - - - sf = sf_coeff_6[4]*sin(2*MY_PI*s3); - sf += sf_coeff_6[5]*sin(4*MY_PI*s3); - sf *= 2*lj*lj; - if (slabflag != 2) f[i][2] += ekz*lj - sf; - - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for geometric mixing rule for per atom quantities -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_g_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick_g[mz][my][mx]; - v1 += x0*v1_brick_g[mz][my][mx]; - v2 += x0*v2_brick_g[mz][my][mx]; - v3 += x0*v3_brick_g[mz][my][mx]; - v4 += x0*v4_brick_g[mz][my][mx]; - v5 += x0*v5_brick_g[mz][my][mx]; - } - } - } - } - - // convert E-field to force - type = atom->type[i]; - lj = B[type]*0.5; - - if (eflag_atom) eatom[i] += u_pa*lj; - if (vflag_atom) { - vatom[i][0] += v0*lj; - vatom[i][1] += v1*lj; - vatom[i][2] += v2*lj; - vatom[i][3] += v3*lj; - vatom[i][4] += v4*lj; - vatom[i][5] += v5*lj; - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule and ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_a_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; - FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; - FFT_SCALAR ekx6, eky6, ekz6; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj0, lj1, lj2, lj3, lj4, lj5, lj6; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - ekx0 = eky0 = ekz0 = ZEROF; - ekx1 = eky1 = ekz1 = ZEROF; - ekx2 = eky2 = ekz2 = ZEROF; - ekx3 = eky3 = ekz3 = ZEROF; - ekx4 = eky4 = ekz4 = ZEROF; - ekx5 = eky5 = ekz5 = ZEROF; - ekx6 = eky6 = ekz6 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - ekx0 -= x0*vdx_brick_a0[mz][my][mx]; - eky0 -= x0*vdy_brick_a0[mz][my][mx]; - ekz0 -= x0*vdz_brick_a0[mz][my][mx]; - ekx1 -= x0*vdx_brick_a1[mz][my][mx]; - eky1 -= x0*vdy_brick_a1[mz][my][mx]; - ekz1 -= x0*vdz_brick_a1[mz][my][mx]; - ekx2 -= x0*vdx_brick_a2[mz][my][mx]; - eky2 -= x0*vdy_brick_a2[mz][my][mx]; - ekz2 -= x0*vdz_brick_a2[mz][my][mx]; - ekx3 -= x0*vdx_brick_a3[mz][my][mx]; - eky3 -= x0*vdy_brick_a3[mz][my][mx]; - ekz3 -= x0*vdz_brick_a3[mz][my][mx]; - ekx4 -= x0*vdx_brick_a4[mz][my][mx]; - eky4 -= x0*vdy_brick_a4[mz][my][mx]; - ekz4 -= x0*vdz_brick_a4[mz][my][mx]; - ekx5 -= x0*vdx_brick_a5[mz][my][mx]; - eky5 -= x0*vdy_brick_a5[mz][my][mx]; - ekz5 -= x0*vdz_brick_a5[mz][my][mx]; - ekx6 -= x0*vdx_brick_a6[mz][my][mx]; - eky6 -= x0*vdy_brick_a6[mz][my][mx]; - ekz6 -= x0*vdz_brick_a6[mz][my][mx]; - } - } - } - // convert D-field to force - type = atom->type[i]; - lj0 = B[7*type+6]; - lj1 = B[7*type+5]; - lj2 = B[7*type+4]; - lj3 = B[7*type+3]; - lj4 = B[7*type+2]; - lj5 = B[7*type+1]; - lj6 = B[7*type]; - f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; - f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6; - if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for the ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_a_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; - FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; - FFT_SCALAR ekx6, eky6, ekz6; - - double s1,s2,s3; - double sf = 0.0; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm_6/xprd; - double hy_inv = ny_pppm_6/yprd; - double hz_inv = nz_pppm_6/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj0, lj1, lj2, lj3, lj4, lj5, lj6; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); - - ekx0 = eky0 = ekz0 = ZEROF; - ekx1 = eky1 = ekz1 = ZEROF; - ekx2 = eky2 = ekz2 = ZEROF; - ekx3 = eky3 = ekz3 = ZEROF; - ekx4 = eky4 = ekz4 = ZEROF; - ekx5 = eky5 = ekz5 = ZEROF; - ekx6 = eky6 = ekz6 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; - y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; - z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; - - ekx0 += x0*u_brick_a0[mz][my][mx]; - eky0 += y0*u_brick_a0[mz][my][mx]; - ekz0 += z0*u_brick_a0[mz][my][mx]; - - ekx1 += x0*u_brick_a1[mz][my][mx]; - eky1 += y0*u_brick_a1[mz][my][mx]; - ekz1 += z0*u_brick_a1[mz][my][mx]; - - ekx2 += x0*u_brick_a2[mz][my][mx]; - eky2 += y0*u_brick_a2[mz][my][mx]; - ekz2 += z0*u_brick_a2[mz][my][mx]; - - ekx3 += x0*u_brick_a3[mz][my][mx]; - eky3 += y0*u_brick_a3[mz][my][mx]; - ekz3 += z0*u_brick_a3[mz][my][mx]; - - ekx4 += x0*u_brick_a4[mz][my][mx]; - eky4 += y0*u_brick_a4[mz][my][mx]; - ekz4 += z0*u_brick_a4[mz][my][mx]; - - ekx5 += x0*u_brick_a5[mz][my][mx]; - eky5 += y0*u_brick_a5[mz][my][mx]; - ekz5 += z0*u_brick_a5[mz][my][mx]; - - ekx6 += x0*u_brick_a6[mz][my][mx]; - eky6 += y0*u_brick_a6[mz][my][mx]; - ekz6 += z0*u_brick_a6[mz][my][mx]; - } - } - } - - ekx0 *= hx_inv; - eky0 *= hy_inv; - ekz0 *= hz_inv; - - ekx1 *= hx_inv; - eky1 *= hy_inv; - ekz1 *= hz_inv; - - ekx2 *= hx_inv; - eky2 *= hy_inv; - ekz2 *= hz_inv; - - ekx3 *= hx_inv; - eky3 *= hy_inv; - ekz3 *= hz_inv; - - ekx4 *= hx_inv; - eky4 *= hy_inv; - ekz4 *= hz_inv; - - ekx5 *= hx_inv; - eky5 *= hy_inv; - ekz5 *= hz_inv; - - ekx6 *= hx_inv; - eky6 *= hy_inv; - ekz6 *= hz_inv; - - // convert D-field to force - type = atom->type[i]; - lj0 = B[7*type+6]; - lj1 = B[7*type+5]; - lj2 = B[7*type+4]; - lj3 = B[7*type+3]; - lj4 = B[7*type+2]; - lj5 = B[7*type+1]; - lj6 = B[7*type]; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - - sf = sf_coeff_6[0]*sin(2*MY_PI*s1); - sf += sf_coeff_6[1]*sin(4*MY_PI*s1); - sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; - f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf; - - sf = sf_coeff_6[2]*sin(2*MY_PI*s2); - sf += sf_coeff_6[3]*sin(4*MY_PI*s2); - sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; - f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf; - - sf = sf_coeff_6[4]*sin(2*MY_PI*s3); - sf += sf_coeff_6[5]*sin(4*MY_PI*s3); - sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; - if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for per atom quantities -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_a_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50; - FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51; - FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52; - FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53; - FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54; - FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55; - FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - int type; - double lj0, lj1, lj2, lj3, lj4, lj5, lj6; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF; - u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF; - u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF; - u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF; - u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF; - u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF; - u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - if (eflag_atom) { - u_pa0 += x0*u_brick_a0[mz][my][mx]; - u_pa1 += x0*u_brick_a1[mz][my][mx]; - u_pa2 += x0*u_brick_a2[mz][my][mx]; - u_pa3 += x0*u_brick_a3[mz][my][mx]; - u_pa4 += x0*u_brick_a4[mz][my][mx]; - u_pa5 += x0*u_brick_a5[mz][my][mx]; - u_pa6 += x0*u_brick_a6[mz][my][mx]; - } - if (vflag_atom) { - v00 += x0*v0_brick_a0[mz][my][mx]; - v10 += x0*v1_brick_a0[mz][my][mx]; - v20 += x0*v2_brick_a0[mz][my][mx]; - v30 += x0*v3_brick_a0[mz][my][mx]; - v40 += x0*v4_brick_a0[mz][my][mx]; - v50 += x0*v5_brick_a0[mz][my][mx]; - v01 += x0*v0_brick_a1[mz][my][mx]; - v11 += x0*v1_brick_a1[mz][my][mx]; - v21 += x0*v2_brick_a1[mz][my][mx]; - v31 += x0*v3_brick_a1[mz][my][mx]; - v41 += x0*v4_brick_a1[mz][my][mx]; - v51 += x0*v5_brick_a1[mz][my][mx]; - v02 += x0*v0_brick_a2[mz][my][mx]; - v12 += x0*v1_brick_a2[mz][my][mx]; - v22 += x0*v2_brick_a2[mz][my][mx]; - v32 += x0*v3_brick_a2[mz][my][mx]; - v42 += x0*v4_brick_a2[mz][my][mx]; - v52 += x0*v5_brick_a2[mz][my][mx]; - v03 += x0*v0_brick_a3[mz][my][mx]; - v13 += x0*v1_brick_a3[mz][my][mx]; - v23 += x0*v2_brick_a3[mz][my][mx]; - v33 += x0*v3_brick_a3[mz][my][mx]; - v43 += x0*v4_brick_a3[mz][my][mx]; - v53 += x0*v5_brick_a3[mz][my][mx]; - v04 += x0*v0_brick_a4[mz][my][mx]; - v14 += x0*v1_brick_a4[mz][my][mx]; - v24 += x0*v2_brick_a4[mz][my][mx]; - v34 += x0*v3_brick_a4[mz][my][mx]; - v44 += x0*v4_brick_a4[mz][my][mx]; - v54 += x0*v5_brick_a4[mz][my][mx]; - v05 += x0*v0_brick_a5[mz][my][mx]; - v15 += x0*v1_brick_a5[mz][my][mx]; - v25 += x0*v2_brick_a5[mz][my][mx]; - v35 += x0*v3_brick_a5[mz][my][mx]; - v45 += x0*v4_brick_a5[mz][my][mx]; - v55 += x0*v5_brick_a5[mz][my][mx]; - v06 += x0*v0_brick_a6[mz][my][mx]; - v16 += x0*v1_brick_a6[mz][my][mx]; - v26 += x0*v2_brick_a6[mz][my][mx]; - v36 += x0*v3_brick_a6[mz][my][mx]; - v46 += x0*v4_brick_a6[mz][my][mx]; - v56 += x0*v5_brick_a6[mz][my][mx]; - } - } - } - } - // convert D-field to force - type = atom->type[i]; - lj0 = B[7*type+6]*0.5; - lj1 = B[7*type+5]*0.5; - lj2 = B[7*type+4]*0.5; - lj3 = B[7*type+3]*0.5; - lj4 = B[7*type+2]*0.5; - lj5 = B[7*type+1]*0.5; - lj6 = B[7*type]*0.5; - - - if (eflag_atom) - eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 + - u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6; - if (vflag_atom) { - vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + - v04*lj4 + v05*lj5 + v06*lj6; - vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + - v14*lj4 + v15*lj5 + v16*lj6; - vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + - v24*lj4 + v25*lj5 + v26*lj6; - vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + - v34*lj4 + v35*lj5 + v36*lj6; - vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + - v44*lj4 + v45*lj5 + v46*lj6; - vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + - v54*lj4 + v55*lj5 + v56*lj6; - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule and ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_none_ik() -{ - int i,k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR *ekx, *eky, *ekz; - - ekx = new FFT_SCALAR[nsplit]; - eky = new FFT_SCALAR[nsplit]; - ekz = new FFT_SCALAR[nsplit]; - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - for (k = 0; k < nsplit; k++) - ekx[k] = eky[k] = ekz[k] = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - for (k = 0; k < nsplit; k++) { - ekx[k] -= x0*vdx_brick_none[k][mz][my][mx]; - eky[k] -= x0*vdy_brick_none[k][mz][my][mx]; - ekz[k] -= x0*vdz_brick_none[k][mz][my][mx]; - } - } - } - } - // convert D-field to force - type = atom->type[i]; - for (k = 0; k < nsplit; k++) { - lj = B[nsplit*type + k]; - f[i][0] += lj*ekx[k]; - f[i][1] +=lj*eky[k]; - if (slabflag != 2) f[i][2] +=lj*ekz[k]; - } - } - - delete [] ekx; - delete [] eky; - delete [] ekz; -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for the ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_none_ad() -{ - int i,k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR *ekx, *eky, *ekz; - - ekx = new FFT_SCALAR[nsplit]; - eky = new FFT_SCALAR[nsplit]; - ekz = new FFT_SCALAR[nsplit]; - - - double s1,s2,s3; - double sf1,sf2,sf3; - double sf = 0.0; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm_6/xprd; - double hy_inv = ny_pppm_6/yprd; - double hz_inv = nz_pppm_6/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); - - for (k = 0; k < nsplit; k++) - ekx[k] = eky[k] = ekz[k] = ZEROF; - - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; - y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; - z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; - - for (k = 0; k < nsplit; k++) { - ekx[k] += x0*u_brick_none[k][mz][my][mx]; - eky[k] += y0*u_brick_none[k][mz][my][mx]; - ekz[k] += z0*u_brick_none[k][mz][my][mx]; - } - } - } - } - - for (k = 0; k < nsplit; k++) { - ekx[k] *= hx_inv; - eky[k] *= hy_inv; - ekz[k] *= hz_inv; - } - - // convert D-field to force - type = atom->type[i]; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - - sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1); - sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1); - - sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2); - sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2); - - sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3); - sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3); - - for (k = 0; k < nsplit; k++) { - lj = B[nsplit*type + k]; - - sf = sf1*B[k]*2*lj*lj; - f[i][0] += lj*ekx[k] - sf; - - - sf = sf2*B[k]*2*lj*lj; - f[i][1] += lj*eky[k] - sf; - - sf = sf3*B[k]*2*lj*lj; - if (slabflag != 2) f[i][2] += lj*ekz[k] - sf; - } - } - - delete [] ekx; - delete [] eky; - delete [] ekz; -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for per atom quantities -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_none_peratom() -{ - int i,k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5; - - u_pa = new FFT_SCALAR[nsplit]; - v0 = new FFT_SCALAR[nsplit]; - v1 = new FFT_SCALAR[nsplit]; - v2 = new FFT_SCALAR[nsplit]; - v3 = new FFT_SCALAR[nsplit]; - v4 = new FFT_SCALAR[nsplit]; - v5 = new FFT_SCALAR[nsplit]; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - for (k = 0; k < nsplit; k++) - u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF; - - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - if (eflag_atom) { - for (k = 0; k < nsplit; k++) - u_pa[k] += x0*u_brick_none[k][mz][my][mx]; - } - if (vflag_atom) { - for (k = 0; k < nsplit; k++) { - v0[k] += x0*v0_brick_none[k][mz][my][mx]; - v1[k] += x0*v1_brick_none[k][mz][my][mx]; - v2[k] += x0*v2_brick_none[k][mz][my][mx]; - v3[k] += x0*v3_brick_none[k][mz][my][mx]; - v4[k] += x0*v4_brick_none[k][mz][my][mx]; - v5[k] += x0*v5_brick_none[k][mz][my][mx]; - } - } - } - } - } - // convert D-field to force - type = atom->type[i]; - for (k = 0; k < nsplit; k++) { - lj = B[nsplit*type + k]*0.5; - - if (eflag_atom) { - eatom[i] += u_pa[k]*lj; - } - if (vflag_atom) { - vatom[i][0] += v0[k]*lj; - vatom[i][1] += v1[k]*lj; - vatom[i][2] += v2[k]*lj; - vatom[i][3] += v3[k]*lj; - vatom[i][4] += v4[k]*lj; - vatom[i][5] += v5[k]*lj; - } - } - } - - delete [] u_pa; - delete [] v0; - delete [] v1; - delete [] v2; - delete [] v3; - delete [] v4; - delete [] v5; -} - -/* ---------------------------------------------------------------------- - pack values to buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - switch (flag) { - - // Coulomb interactions - - case FORWARD_IK: { - FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - break; - } - - case FORWARD_AD: { - FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - break; - } - - case FORWARD_IK_PERATOM: { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - break; - } - - case FORWARD_AD_PERATOM: { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - break; - } - - // Dispersion interactions, geometric mixing - - case FORWARD_IK_G: { - FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - break; - } - - case FORWARD_AD_G: { - FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - break; - } - - case FORWARD_IK_PERATOM_G: { - FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - break; - } - - case FORWARD_AD_PERATOM_G: { - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - break; - } - - // Dispersion interactions, arithmetic mixing - - case FORWARD_IK_A: { - FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc0[list[i]]; - buf[n++] = ysrc0[list[i]]; - buf[n++] = zsrc0[list[i]]; - - buf[n++] = xsrc1[list[i]]; - buf[n++] = ysrc1[list[i]]; - buf[n++] = zsrc1[list[i]]; - - buf[n++] = xsrc2[list[i]]; - buf[n++] = ysrc2[list[i]]; - buf[n++] = zsrc2[list[i]]; - - buf[n++] = xsrc3[list[i]]; - buf[n++] = ysrc3[list[i]]; - buf[n++] = zsrc3[list[i]]; - - buf[n++] = xsrc4[list[i]]; - buf[n++] = ysrc4[list[i]]; - buf[n++] = zsrc4[list[i]]; - - buf[n++] = xsrc5[list[i]]; - buf[n++] = ysrc5[list[i]]; - buf[n++] = zsrc5[list[i]]; - - buf[n++] = xsrc6[list[i]]; - buf[n++] = ysrc6[list[i]]; - buf[n++] = zsrc6[list[i]]; - } - break; - } - - case FORWARD_AD_A: { - FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - buf[n++] = src0[list[i]]; - buf[n++] = src1[list[i]]; - buf[n++] = src2[list[i]]; - buf[n++] = src3[list[i]]; - buf[n++] = src4[list[i]]; - buf[n++] = src5[list[i]]; - buf[n++] = src6[list[i]]; - } - break; - } - - case FORWARD_IK_PERATOM_A: { - FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - if (eflag_atom) { - buf[n++] = esrc0[list[i]]; - buf[n++] = esrc1[list[i]]; - buf[n++] = esrc2[list[i]]; - buf[n++] = esrc3[list[i]]; - buf[n++] = esrc4[list[i]]; - buf[n++] = esrc5[list[i]]; - buf[n++] = esrc6[list[i]]; - } - if (vflag_atom) { - buf[n++] = v0src0[list[i]]; - buf[n++] = v1src0[list[i]]; - buf[n++] = v2src0[list[i]]; - buf[n++] = v3src0[list[i]]; - buf[n++] = v4src0[list[i]]; - buf[n++] = v5src0[list[i]]; - - buf[n++] = v0src1[list[i]]; - buf[n++] = v1src1[list[i]]; - buf[n++] = v2src1[list[i]]; - buf[n++] = v3src1[list[i]]; - buf[n++] = v4src1[list[i]]; - buf[n++] = v5src1[list[i]]; - - buf[n++] = v0src2[list[i]]; - buf[n++] = v1src2[list[i]]; - buf[n++] = v2src2[list[i]]; - buf[n++] = v3src2[list[i]]; - buf[n++] = v4src2[list[i]]; - buf[n++] = v5src2[list[i]]; - - buf[n++] = v0src3[list[i]]; - buf[n++] = v1src3[list[i]]; - buf[n++] = v2src3[list[i]]; - buf[n++] = v3src3[list[i]]; - buf[n++] = v4src3[list[i]]; - buf[n++] = v5src3[list[i]]; - - buf[n++] = v0src4[list[i]]; - buf[n++] = v1src4[list[i]]; - buf[n++] = v2src4[list[i]]; - buf[n++] = v3src4[list[i]]; - buf[n++] = v4src4[list[i]]; - buf[n++] = v5src4[list[i]]; - - buf[n++] = v0src5[list[i]]; - buf[n++] = v1src5[list[i]]; - buf[n++] = v2src5[list[i]]; - buf[n++] = v3src5[list[i]]; - buf[n++] = v4src5[list[i]]; - buf[n++] = v5src5[list[i]]; - - buf[n++] = v0src6[list[i]]; - buf[n++] = v1src6[list[i]]; - buf[n++] = v2src6[list[i]]; - buf[n++] = v3src6[list[i]]; - buf[n++] = v4src6[list[i]]; - buf[n++] = v5src6[list[i]]; - } - } - break; - } - - case FORWARD_AD_PERATOM_A: { - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src0[list[i]]; - buf[n++] = v1src0[list[i]]; - buf[n++] = v2src0[list[i]]; - buf[n++] = v3src0[list[i]]; - buf[n++] = v4src0[list[i]]; - buf[n++] = v5src0[list[i]]; - - buf[n++] = v0src1[list[i]]; - buf[n++] = v1src1[list[i]]; - buf[n++] = v2src1[list[i]]; - buf[n++] = v3src1[list[i]]; - buf[n++] = v4src1[list[i]]; - buf[n++] = v5src1[list[i]]; - - buf[n++] = v0src2[list[i]]; - buf[n++] = v1src2[list[i]]; - buf[n++] = v2src2[list[i]]; - buf[n++] = v3src2[list[i]]; - buf[n++] = v4src2[list[i]]; - buf[n++] = v5src2[list[i]]; - - buf[n++] = v0src3[list[i]]; - buf[n++] = v1src3[list[i]]; - buf[n++] = v2src3[list[i]]; - buf[n++] = v3src3[list[i]]; - buf[n++] = v4src3[list[i]]; - buf[n++] = v5src3[list[i]]; - - buf[n++] = v0src4[list[i]]; - buf[n++] = v1src4[list[i]]; - buf[n++] = v2src4[list[i]]; - buf[n++] = v3src4[list[i]]; - buf[n++] = v4src4[list[i]]; - buf[n++] = v5src4[list[i]]; - - buf[n++] = v0src5[list[i]]; - buf[n++] = v1src5[list[i]]; - buf[n++] = v2src5[list[i]]; - buf[n++] = v3src5[list[i]]; - buf[n++] = v4src5[list[i]]; - buf[n++] = v5src5[list[i]]; - - buf[n++] = v0src6[list[i]]; - buf[n++] = v1src6[list[i]]; - buf[n++] = v2src6[list[i]]; - buf[n++] = v3src6[list[i]]; - buf[n++] = v4src6[list[i]]; - buf[n++] = v5src6[list[i]]; - } - break; - } - - // Dispersion interactions, no mixing - - case FORWARD_IK_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - } - break; - } - - case FORWARD_AD_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - buf[n++] = src[list[i]]; - } - break; - } - - case FORWARD_IK_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - } - break; - } - - case FORWARD_AD_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - break; - } - - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's own values from buf and set own ghost values -------------------------------------------------------------------------- */ - -void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - switch (flag) { - - // Coulomb interactions - - case FORWARD_IK: { - FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_AD: { - FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[n++]; - break; - } - - case FORWARD_IK_PERATOM: { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_PERATOM: { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - break; - } - - // Disperion interactions, geometric mixing - - case FORWARD_IK_G: { - FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_AD_G: { - FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[n++]; - break; - } - - case FORWARD_IK_PERATOM_G: { - FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_PERATOM_G: { - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - break; - } - - // Disperion interactions, arithmetic mixing - - case FORWARD_IK_A: { - FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - xdest0[list[i]] = buf[n++]; - ydest0[list[i]] = buf[n++]; - zdest0[list[i]] = buf[n++]; - - xdest1[list[i]] = buf[n++]; - ydest1[list[i]] = buf[n++]; - zdest1[list[i]] = buf[n++]; - - xdest2[list[i]] = buf[n++]; - ydest2[list[i]] = buf[n++]; - zdest2[list[i]] = buf[n++]; - - xdest3[list[i]] = buf[n++]; - ydest3[list[i]] = buf[n++]; - zdest3[list[i]] = buf[n++]; - - xdest4[list[i]] = buf[n++]; - ydest4[list[i]] = buf[n++]; - zdest4[list[i]] = buf[n++]; - - xdest5[list[i]] = buf[n++]; - ydest5[list[i]] = buf[n++]; - zdest5[list[i]] = buf[n++]; - - xdest6[list[i]] = buf[n++]; - ydest6[list[i]] = buf[n++]; - zdest6[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_AD_A: { - FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - dest0[list[i]] = buf[n++]; - dest1[list[i]] = buf[n++]; - dest2[list[i]] = buf[n++]; - dest3[list[i]] = buf[n++]; - dest4[list[i]] = buf[n++]; - dest5[list[i]] = buf[n++]; - dest6[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_IK_PERATOM_A: { - FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - if (eflag_atom) { - esrc0[list[i]] = buf[n++]; - esrc1[list[i]] = buf[n++]; - esrc2[list[i]] = buf[n++]; - esrc3[list[i]] = buf[n++]; - esrc4[list[i]] = buf[n++]; - esrc5[list[i]] = buf[n++]; - esrc6[list[i]] = buf[n++]; - } - if (vflag_atom) { - v0src0[list[i]] = buf[n++]; - v1src0[list[i]] = buf[n++]; - v2src0[list[i]] = buf[n++]; - v3src0[list[i]] = buf[n++]; - v4src0[list[i]] = buf[n++]; - v5src0[list[i]] = buf[n++]; - - v0src1[list[i]] = buf[n++]; - v1src1[list[i]] = buf[n++]; - v2src1[list[i]] = buf[n++]; - v3src1[list[i]] = buf[n++]; - v4src1[list[i]] = buf[n++]; - v5src1[list[i]] = buf[n++]; - - v0src2[list[i]] = buf[n++]; - v1src2[list[i]] = buf[n++]; - v2src2[list[i]] = buf[n++]; - v3src2[list[i]] = buf[n++]; - v4src2[list[i]] = buf[n++]; - v5src2[list[i]] = buf[n++]; - - v0src3[list[i]] = buf[n++]; - v1src3[list[i]] = buf[n++]; - v2src3[list[i]] = buf[n++]; - v3src3[list[i]] = buf[n++]; - v4src3[list[i]] = buf[n++]; - v5src3[list[i]] = buf[n++]; - - v0src4[list[i]] = buf[n++]; - v1src4[list[i]] = buf[n++]; - v2src4[list[i]] = buf[n++]; - v3src4[list[i]] = buf[n++]; - v4src4[list[i]] = buf[n++]; - v5src4[list[i]] = buf[n++]; - - v0src5[list[i]] = buf[n++]; - v1src5[list[i]] = buf[n++]; - v2src5[list[i]] = buf[n++]; - v3src5[list[i]] = buf[n++]; - v4src5[list[i]] = buf[n++]; - v5src5[list[i]] = buf[n++]; - - v0src6[list[i]] = buf[n++]; - v1src6[list[i]] = buf[n++]; - v2src6[list[i]] = buf[n++]; - v3src6[list[i]] = buf[n++]; - v4src6[list[i]] = buf[n++]; - v5src6[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_PERATOM_A: { - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - v0src0[list[i]] = buf[n++]; - v1src0[list[i]] = buf[n++]; - v2src0[list[i]] = buf[n++]; - v3src0[list[i]] = buf[n++]; - v4src0[list[i]] = buf[n++]; - v5src0[list[i]] = buf[n++]; - - v0src1[list[i]] = buf[n++]; - v1src1[list[i]] = buf[n++]; - v2src1[list[i]] = buf[n++]; - v3src1[list[i]] = buf[n++]; - v4src1[list[i]] = buf[n++]; - v5src1[list[i]] = buf[n++]; - - v0src2[list[i]] = buf[n++]; - v1src2[list[i]] = buf[n++]; - v2src2[list[i]] = buf[n++]; - v3src2[list[i]] = buf[n++]; - v4src2[list[i]] = buf[n++]; - v5src2[list[i]] = buf[n++]; - - v0src3[list[i]] = buf[n++]; - v1src3[list[i]] = buf[n++]; - v2src3[list[i]] = buf[n++]; - v3src3[list[i]] = buf[n++]; - v4src3[list[i]] = buf[n++]; - v5src3[list[i]] = buf[n++]; - - v0src4[list[i]] = buf[n++]; - v1src4[list[i]] = buf[n++]; - v2src4[list[i]] = buf[n++]; - v3src4[list[i]] = buf[n++]; - v4src4[list[i]] = buf[n++]; - v5src4[list[i]] = buf[n++]; - - v0src5[list[i]] = buf[n++]; - v1src5[list[i]] = buf[n++]; - v2src5[list[i]] = buf[n++]; - v3src5[list[i]] = buf[n++]; - v4src5[list[i]] = buf[n++]; - v5src5[list[i]] = buf[n++]; - - v0src6[list[i]] = buf[n++]; - v1src6[list[i]] = buf[n++]; - v2src6[list[i]] = buf[n++]; - v3src6[list[i]] = buf[n++]; - v4src6[list[i]] = buf[n++]; - v5src6[list[i]] = buf[n++]; - } - break; - } - - // Disperion interactions, geometric mixing - - case FORWARD_IK_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_IK_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - } - break; - } - - case FORWARD_AD_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - break; - } - - } -} - -/* ---------------------------------------------------------------------- - pack ghost values into buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - //Coulomb interactions - - if (flag == REVERSE_RHO) { - FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - - //Dispersion interactions, geometric mixing - - } else if (flag == REVERSE_RHO_G) { - FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - - //Dispersion interactions, arithmetic mixing - - } else if (flag == REVERSE_RHO_A) { - FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = src0[list[i]]; - buf[n++] = src1[list[i]]; - buf[n++] = src2[list[i]]; - buf[n++] = src3[list[i]]; - buf[n++] = src4[list[i]]; - buf[n++] = src5[list[i]]; - buf[n++] = src6[list[i]]; - } - - //Dispersion interactions, no mixing - - } else if (flag == REVERSE_RHO_NONE) { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = src[list[i]]; - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's ghost values from buf and add to own values -------------------------------------------------------------------------- */ - -void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - //Coulomb interactions - - if (flag == REVERSE_RHO) { - FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[i]; - - //Dispersion interactions, geometric mixing - - } else if (flag == REVERSE_RHO_G) { - FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[i]; - - //Dispersion interactions, arithmetic mixing - - } else if (flag == REVERSE_RHO_A) { - FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - dest0[list[i]] += buf[n++]; - dest1[list[i]] += buf[n++]; - dest2[list[i]] += buf[n++]; - dest3[list[i]] += buf[n++]; - dest4[list[i]] += buf[n++]; - dest5[list[i]] += buf[n++]; - dest6[list[i]] += buf[n++]; - } - - //Dispersion interactions, no mixing - - } else if (flag == REVERSE_RHO_NONE) { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[n++]; - } - } -} - -/* ---------------------------------------------------------------------- - map nprocs to NX by NY grid as PX by PY procs - return optimal px,py -------------------------------------------------------------------------- */ - -void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) -{ - // loop thru all possible factorizations of nprocs - // surf = surface area of largest proc sub-domain - // innermost if test minimizes surface area and surface/volume ratio - - int bestsurf = 2 * (nx + ny); - int bestboxx = 0; - int bestboxy = 0; - - int boxx,boxy,surf,ipx,ipy; - - ipx = 1; - while (ipx <= nprocs) { - if (nprocs % ipx == 0) { - ipy = nprocs/ipx; - boxx = nx/ipx; - if (nx % ipx) boxx++; - boxy = ny/ipy; - if (ny % ipy) boxy++; - surf = boxx + boxy; - if (surf < bestsurf || - (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { - bestsurf = surf; - bestboxx = boxx; - bestboxy = boxy; - *px = ipx; - *py = ipy; - } - } - ipx++; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into rho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz, int ord, - FFT_SCALAR **rho_c, FFT_SCALAR **r1d) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-ord)/2; k <= ord/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = ord-1; l >= 0; l--) { - r1 = rho_c[l][k] + r1*dx; - r2 = rho_c[l][k] + r2*dy; - r3 = rho_c[l][k] + r3*dz; - } - r1d[0][k] = r1; - r1d[1][k] = r2; - r1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into drho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz, int ord, - FFT_SCALAR **drho_c, FFT_SCALAR **dr1d) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-ord)/2; k <= ord/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = ord-2; l >= 0; l--) { - r1 = drho_c[l][k] + r1*dx; - r2 = drho_c[l][k] + r2*dy; - r3 = drho_c[l][k] + r3*dz; - } - dr1d[0][k] = r1; - dr1d[1][k] = r2; - dr1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - generate coeffients for the weight function of order n - - (n-1) - Wn(x) = Sum wn(k,x) , Sum is over every other integer - k=-(n-1) - For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 - k is odd integers if n is even and even integers if n is odd - --- - | n-1 - | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 - wn(k,x) = < l=0 - | - | 0 otherwise - --- - a coeffients are packed into the array rho_coeff to eliminate zeros - rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff, - int ord) -{ - int j,k,l,m; - FFT_SCALAR s; - - FFT_SCALAR **a; - memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a"); - - for (k = -ord; k <= ord; k++) - for (l = 0; l < ord; l++) - a[l][k] = 0.0; - - a[0][0] = 1.0; - for (j = 1; j < ord; j++) { - for (k = -j; k <= j; k += 2) { - s = 0.0; - for (l = 0; l < j; l++) { - a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); -#ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); -#else - s += pow(0.5,(double) l+1) * - (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); -#endif - } - a[0][k] = s; - } - } - - m = (1-ord)/2; - for (k = -(ord-1); k < ord; k += 2) { - for (l = 0; l < ord; l++) - coeff[l][m] = a[l][k]; - for (l = 1; l < ord; l++) - dcoeff[l-1][m] = l*a[l][k]; - m++; - } - - memory->destroy2d_offset(a,-ord); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPMDisp::slabcorr(int eflag) -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy_1 += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - -/* ---------------------------------------------------------------------- - perform and time the 1d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMDisp::timing_1d(int n, double &time1d) -{ - double time1,time2; - int mixing = 1; - if (function[2]) mixing = 4; - if (function[3]) mixing = nsplit_alloc/2; - - if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - if (function[1] + function[2] + function[3]) - for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[0]) { - for (int i = 0; i < n; i++) { - fft1->timing1d(work1,nfft_both,1); - fft2->timing1d(work1,nfft_both,-1); - if (differentiation_flag != 1){ - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d = time2 - time1; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[1] + function[2] + function[3]) { - for (int i = 0; i < n; i++) { - fft1_6->timing1d(work1_6,nfft_both_6,1); - fft2_6->timing1d(work1_6,nfft_both_6,-1); - if (differentiation_flag != 1){ - fft2_6->timing1d(work1_6,nfft_both_6,-1); - fft2_6->timing1d(work1_6,nfft_both_6,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d += (time2 - time1)*mixing; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - perform and time the 3d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMDisp::timing_3d(int n, double &time3d) -{ - double time1,time2; - int mixing = 1; - if (function[2]) mixing = 4; - if (function[3]) mixing = nsplit_alloc/2; - - if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - if (function[1] + function[2] + function[3]) - for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; - - - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[0]) { - for (int i = 0; i < n; i++) { - fft1->compute(work1,work1,1); - fft2->compute(work1,work1,-1); - if (differentiation_flag != 1) { - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d = time2 - time1; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[1] + function[2] + function[3]) { - for (int i = 0; i < n; i++) { - fft1_6->compute(work1_6,work1_6,1); - fft2_6->compute(work1_6,work1_6,-1); - if (differentiation_flag != 1) { - fft2_6->compute(work1_6,work1_6,-1); - fft2_6->compute(work1_6,work1_6,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d += (time2 - time1) * mixing; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPMDisp::memory_usage() -{ - double bytes = nmax*3 * sizeof(double); - int mixing = 1; - int diff = 3; //depends on differentiation - int per = 7; //depends on per atom calculations - if (differentiation_flag) { - diff = 1; - per = 6; - } - if (!evflag_atom) per = 0; - if (function[2]) mixing = 7; - if (function[3]) mixing = nsplit_alloc; - - if (function[0]) { - int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - bytes += (1 + diff + per) * nbrick * sizeof(FFT_SCALAR); //brick memory - bytes += 6 * nfft_both * sizeof(double); // vg - bytes += nfft_both * sizeof(double); // greensfn - bytes += nfft_both * 3 * sizeof(FFT_SCALAR); // density_FFT, work1, work2 - bytes += cg->memory_usage(); - } - - if (function[1] + function[2] + function[3]) { - int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * - (nzhi_out_6-nzlo_out_6+1); - bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing; // density_brick + vd_brick + per atom bricks - bytes += 6 * nfft_both_6 * sizeof(double); // vg - bytes += nfft_both_6 * sizeof(double); // greensfn - bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR); // density_FFT, work1, work2 - bytes += cg_6->memory_usage(); - } - return bytes; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Rolf Isele-Holder (Aachen University) + Paul Crozier (SNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "pppm_disp.h" +#include "math_const.h" +#include "atom.h" +#include "comm.h" +#include "commgrid.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXORDER 7 +#define OFFSET 16384 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + +enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; +enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE}; +enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM, + FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G, + FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A, + FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE}; + + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command"); + + triclinic_support = 0; + pppmflag = dispersionflag = 1; + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + csumflag = 0; + B = NULL; + cii = NULL; + csumi = NULL; + peratom_allocate_flag = 0; + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + u_brick = v0_brick = v1_brick = v2_brick = v3_brick = + v4_brick = v5_brick = NULL; + + density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; + density_fft_g = NULL; + u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = + v4_brick_g = v5_brick_g = NULL; + + density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; + density_fft_a0 = NULL; + u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = + v4_brick_a0 = v5_brick_a0 = NULL; + + density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; + density_fft_a1 = NULL; + u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = + v4_brick_a1 = v5_brick_a1 = NULL; + + density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; + density_fft_a2 = NULL; + u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = + v4_brick_a2 = v5_brick_a2 = NULL; + + density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; + density_fft_a3 = NULL; + u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = + v4_brick_a3 = v5_brick_a3 = NULL; + + density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; + density_fft_a4 = NULL; + u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = + v4_brick_a4 = v5_brick_a4 = NULL; + + density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; + density_fft_a5 = NULL; + u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = + v4_brick_a5 = v5_brick_a5 = NULL; + + density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; + density_fft_a6 = NULL; + u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = + v4_brick_a6 = v5_brick_a6 = NULL; + + density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; + density_fft_none = NULL; + u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = + v4_brick_none = v5_brick_none = NULL; + + greensfn = NULL; + greensfn_6 = NULL; + work1 = work2 = NULL; + work1_6 = work2_6 = NULL; + vg = NULL; + vg2 = NULL; + vg_6 = NULL; + vg2_6 = NULL; + fkx = fky = fkz = NULL; + fkx2 = fky2 = fkz2 = NULL; + fkx_6 = fky_6 = fkz_6 = NULL; + fkx2_6 = fky2_6 = fkz2_6 = NULL; + + sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = + sf_precoeff5 = sf_precoeff6 = NULL; + sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = + sf_precoeff5_6 = sf_precoeff6_6 = NULL; + + gf_b = NULL; + gf_b_6 = NULL; + rho1d = rho_coeff = NULL; + drho1d = drho_coeff = NULL; + rho1d_6 = rho_coeff_6 = NULL; + drho1d_6 = drho_coeff_6 = NULL; + fft1 = fft2 = NULL; + fft1_6 = fft2_6 = NULL; + remap = NULL; + remap_6 = NULL; + + nmax = 0; + part2grid = NULL; + part2grid_6 = NULL; + + cg = NULL; + cg_peratom = NULL; + cg_6 = NULL; + cg_peratom_6 = NULL; + + memset(function, 0, EWALD_FUNCS*sizeof(int)); +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMDisp::~PPPMDisp() +{ + delete [] factors; + delete [] B; + B = NULL; + delete [] cii; + cii = NULL; + delete [] csumi; + csumi = NULL; + deallocate(); + deallocate_peratom(); + memory->destroy(part2grid); + memory->destroy(part2grid_6); + part2grid = part2grid_6 = NULL; +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPMDisp::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPMDisp initialization ...\n"); + if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n"); + } + + triclinic_check(); + if (domain->dimension == 2) + error->all(FLERR,"Cannot use PPPMDisp with 2d simulation"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp"); + if (slabflag == 1) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPMDisp"); + } + + if (order > MAXORDER || order_6 > MAXORDER) { + char str[128]; + sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER); + error->all(FLERR,str); + } + + // free all arrays previously allocated + + deallocate(); + deallocate_peratom(); + + // set scale + + scale = 1.0; + + triclinic = domain->triclinic; + + // check whether cutoff and pair style are set + + pair_check(); + + int tmp; + Pair *pair = force->pair; + int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; + double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; + double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL; + if (!(ptr||*p_cutoff||*p_cutoff_lj)) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + cutoff_lj = *p_cutoff_lj; + + double tmp2; + MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world); + + // check out which types of potentials will have to be calculated + + int ewald_order = ptr ? *((int *) ptr) : 1<<1; + int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; + memset(function, 0, EWALD_FUNCS*sizeof(int)); + for (int i=0; i<=EWALD_MAXORDER; ++i) // transcribe order + if (ewald_order&(1<pair_style); + error->all(FLERR,str); + } + function[k] = 1; + } + + + // warn, if function[0] is not set but charge attribute is set! + if (!function[0] && atom->q_flag && me == 0) { + char str[128]; + sprintf(str, "Charges are set, but coulombic solver is not used"); + error->warning(FLERR, str); + } + + // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral + + if (function[0]) { + if (!atom->q_flag) + error->all(FLERR,"Kspace style with selected options " + "requires atom attribute q"); + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver with selected options " + "on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + } + + // if kspace is TIP4P, extract TIP4P params from pair style + // bond/angle are not yet init(), so insure equilibrium request is valid + + qdist = 0.0; + + if (tip4pflag) { + int itmp; + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + if (typeA < 1 || typeA > atom->nangletypes || + force->angle->setflag[typeA] == 0) + error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P"); + if (typeB < 1 || typeB > atom->nbondtypes || + force->bond->setflag[typeB] == 0) + error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (cos(0.5*theta) * blen); + } + + + // initialize the pair style to get the coefficients + neighrequest_flag = 0; + pair->init(); + neighrequest_flag = 1; + init_coeffs(); + + //if g_ewald and g_ewald_6 have not been specified, set some initial value + // to avoid problems when calculating the energies! + + if (!gewaldflag) g_ewald = 1; + if (!gewaldflag_6) g_ewald_6 = 1; + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + int (*procneigh)[2] = comm->procneigh; + + int iteration = 0; + if (function[0]) { + CommGrid *cgtmp = NULL; + while (order >= minorder) { + + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPMDisp Coulomb order " + "b/c stencil extends beyond neighbor processor"); + iteration++; + + // set grid for dispersion interaction and coulomb interactions + + set_grid(); + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPMDisp Coulomb grid is too large"); + + set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, + nxhi_in, nyhi_in, nzhi_in, + nxlo_out, nylo_out, nzlo_out, + nxhi_out, nyhi_out, nzhi_out, + nlower, nupper, + ngrid, nfft, nfft_both, + shift, shiftone, order); + + if (overlap_allowed) break; + + cgtmp = new CommGrid(lmp, world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out, + nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + + order--; + } + + if (order < minorder) + error->all(FLERR, + "Coulomb PPPMDisp order has been reduced below minorder"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald + + if (!gewaldflag) adjust_gewald(); + + // calculate the final accuracy + + double acc = final_accuracy(); + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + #ifdef FFT_SINGLE + const char fft_prec[] = "single"; + #else + const char fft_prec[] = "double"; + #endif + + if (screen) { + fprintf(screen," Coulomb G vector (1/distance)= %g\n",g_ewald); + fprintf(screen," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," Coulomb stencil order = %d\n",order); + fprintf(screen," Coulomb estimated absolute RMS force accuracy = %g\n", + acc); + fprintf(screen," Coulomb estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc = %d %d\n", + ngrid_max, nfft_both_max); + } + if (logfile) { + fprintf(logfile," Coulomb G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," Coulomb stencil order = %d\n",order); + fprintf(logfile, + " Coulomb estimated absolute RMS force accuracy = %g\n", + acc); + fprintf(logfile," Coulomb estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", + ngrid_max, nfft_both_max); + } + } + } + + iteration = 0; + if (function[1] + function[2] + function[3]) { + CommGrid *cgtmp = NULL; + while (order_6 >= minorder) { + + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPMDisp dispersion order " + "b/c stencil extends beyond neighbor processor"); + iteration++; + + set_grid_6(); + + if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET) + error->all(FLERR,"PPPMDisp Dispersion grid is too large"); + + set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, + nxhi_in_6, nyhi_in_6, nzhi_in_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, + nxhi_out_6, nyhi_out_6, nzhi_out_6, + nlower_6, nupper_6, + ngrid_6, nfft_6, nfft_both_6, + shift_6, shiftone_6, order_6); + + if (overlap_allowed) break; + + cgtmp = new CommGrid(lmp,world,1,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6, + nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6, + nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + order_6--; + } + + if (order_6 < minorder) + error->all(FLERR,"Dispersion PPPMDisp order has been " + "reduced below minorder"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald_6 + + if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) + adjust_gewald_6(); + + // calculate the final accuracy + + double acc, acc_real, acc_kspace; + final_accuracy_6(acc, acc_real, acc_kspace); + + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + #ifdef FFT_SINGLE + const char fft_prec[] = "single"; + #else + const char fft_prec[] = "double"; + #endif + + if (screen) { + fprintf(screen," Dispersion G vector (1/distance)= %g\n",g_ewald_6); + fprintf(screen," Dispersion grid = %d %d %d\n", + nx_pppm_6,ny_pppm_6,nz_pppm_6); + fprintf(screen," Dispersion stencil order = %d\n",order_6); + fprintf(screen," Dispersion estimated absolute " + "RMS force accuracy = %g\n",acc); + fprintf(screen," Dispersion estimated absolute " + "real space RMS force accuracy = %g\n",acc_real); + fprintf(screen," Dispersion estimated absolute " + "kspace RMS force accuracy = %g\n",acc_kspace); + fprintf(screen," Dispersion estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc dispersion = %d %d\n", + ngrid_max,nfft_both_max); + } + if (logfile) { + fprintf(logfile," Dispersion G vector (1/distance) = %g\n",g_ewald_6); + fprintf(logfile," Dispersion grid = %d %d %d\n", + nx_pppm_6,ny_pppm_6,nz_pppm_6); + fprintf(logfile," Dispersion stencil order = %d\n",order_6); + fprintf(logfile," Dispersion estimated absolute " + "RMS force accuracy = %g\n",acc); + fprintf(logfile," Dispersion estimated absolute " + "real space RMS force accuracy = %g\n",acc_real); + fprintf(logfile," Dispersion estimated absolute " + "kspace RMS force accuracy = %g\n",acc_kspace); + fprintf(logfile," Disperion estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc dispersion = %d %d\n", + ngrid_max,nfft_both_max); + } + } + } + + // allocate K-space dependent memory + + allocate(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + if (function[0]) { + compute_gf_denom(gf_b, order); + compute_rho_coeff(rho_coeff, drho_coeff, order); + cg->ghost_notify(); + cg->setup(); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + sf_precoeff1, sf_precoeff2, sf_precoeff3, + sf_precoeff4, sf_precoeff5, sf_precoeff6); + } + if (function[1] + function[2] + function[3]) { + compute_gf_denom(gf_b_6, order_6); + compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); + cg_6->ghost_notify(); + cg_6->setup(); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, + sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); + } + +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMDisp::setup() +{ + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + // compute fkx,fky,fkz for my FFT grid pts + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + //compute the virial coefficients and green functions + if (function[0]){ + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double per; + int i, j, k, n; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + j = (nx_pppm - i) % nx_pppm; + per = j - nx_pppm*(2*j/nx_pppm); + fkx2[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + j = (ny_pppm - i) % ny_pppm; + per = j - ny_pppm*(2*j/ny_pppm); + fky2[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + j = (nz_pppm - i) % nz_pppm; + per = j - nz_pppm*(2*j/nz_pppm); + fkz2[i] = unitkz*per; + } + + double sqk,vterm; + double gew2inv = 1/(g_ewald*g_ewald); + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25*gew2inv); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]); + vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]); + vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]); + } + n++; + } + } + } + compute_gf(); + if (differentiation_flag == 1) compute_sf_coeff(); + } + + if (function[1] + function[2] + function[3]) { + delxinv_6 = nx_pppm_6/xprd; + delyinv_6 = ny_pppm_6/yprd; + delzinv_6 = nz_pppm_6/zprd_slab; + delvolinv_6 = delxinv_6*delyinv_6*delzinv_6; + + double per; + int i, j, k, n; + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + per = i - nx_pppm_6*(2*i/nx_pppm_6); + fkx_6[i] = unitkx*per; + j = (nx_pppm_6 - i) % nx_pppm_6; + per = j - nx_pppm_6*(2*j/nx_pppm_6); + fkx2_6[i] = unitkx*per; + } + for (i = nylo_fft_6; i <= nyhi_fft_6; i++) { + per = i - ny_pppm_6*(2*i/ny_pppm_6); + fky_6[i] = unitky*per; + j = (ny_pppm_6 - i) % ny_pppm_6; + per = j - ny_pppm_6*(2*j/ny_pppm_6); + fky2_6[i] = unitky*per; + } + for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) { + per = i - nz_pppm_6*(2*i/nz_pppm_6); + fkz_6[i] = unitkz*per; + j = (nz_pppm_6 - i) % nz_pppm_6; + per = j - nz_pppm_6*(2*j/nz_pppm_6); + fkz2_6[i] = unitkz*per; + } + double sqk,vterm; + long double erft, expt,nom, denom; + long double b, bs, bt; + double rtpi = sqrt(MY_PI); + double gewinv = 1/g_ewald_6; + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) { + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) { + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k]; + if (sqk == 0.0) { + vg_6[n][0] = 0.0; + vg_6[n][1] = 0.0; + vg_6[n][2] = 0.0; + vg_6[n][3] = 0.0; + vg_6[n][4] = 0.0; + vg_6[n][5] = 0.0; + } else { + b = 0.5*sqrt(sqk)*gewinv; + bs = b*b; + bt = bs*b; + erft = 2*bt*rtpi*erfc(b); + expt = exp(-bs); + nom = erft - 2*bs*expt; + denom = nom + expt; + if (denom == 0) vterm = 3.0/sqk; + else vterm = 3.0*nom/(sqk*denom); + vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i]; + vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j]; + vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k]; + vg_6[n][3] = vterm*fkx_6[i]*fky_6[j]; + vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k]; + vg_6[n][5] = vterm*fky_6[j]*fkz_6[k]; + vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]); + vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]); + vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]); + } + n++; + } + } + } + compute_gf_6(); + if (differentiation_flag == 1) compute_sf_coeff_6(); + } +} + +/* ---------------------------------------------------------------------- + reset local grid arrays and communication stencils + called by fix balance b/c it changed sizes of processor sub-domains +------------------------------------------------------------------------- */ + +void PPPMDisp::setup_grid() +{ + // free all arrays previously allocated + + deallocate(); + deallocate_peratom(); + + // reset portion of global grid that each proc owns + + if (function[0]) + set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, + nxhi_in, nyhi_in, nzhi_in, + nxlo_out, nylo_out, nzlo_out, + nxhi_out, nyhi_out, nzhi_out, + nlower, nupper, + ngrid, nfft, nfft_both, + shift, shiftone, order); + + if (function[1] + function[2] + function[3]) + set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, + nxhi_in_6, nyhi_in_6, nzhi_in_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, + nxhi_out_6, nyhi_out_6, nzhi_out_6, + nlower_6, nupper_6, + ngrid_6, nfft_6, nfft_both_6, + shift_6, shiftone_6, order_6); + + // reallocate K-space dependent memory + // check if grid communication is now overlapping if not allowed + // don't invoke allocate_peratom(), compute() will allocate when needed + + allocate(); + + if (function[0]) { + cg->ghost_notify(); + if (overlap_allowed == 0 && cg->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + cg->setup(); + } + if (function[1] + function[2] + function[3]) { + cg_6->ghost_notify(); + if (overlap_allowed == 0 && cg_6->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + cg_6->setup(); + } + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + if (function[0]) { + compute_gf_denom(gf_b, order); + compute_rho_coeff(rho_coeff, drho_coeff, order); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + sf_precoeff1, sf_precoeff2, sf_precoeff3, + sf_precoeff4, sf_precoeff5, sf_precoeff6); + } + if (function[1] + function[2] + function[3]) { + compute_gf_denom(gf_b_6, order_6); + compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, + sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); + } + + // pre-compute volume-dependent coeffs + + setup(); +} + +/* ---------------------------------------------------------------------- + compute the PPPM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMDisp::compute(int eflag, int vflag) +{ + + int i; + // convert atoms from box to lamda coords + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + if (function[0]) { + cg_peratom->ghost_notify(); + cg_peratom->setup(); + } + if (function[1] + function[2] + function[3]) { + cg_peratom_6->ghost_notify(); + cg_peratom_6->setup(); + } + peratom_allocate_flag = 1; + } + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + + if (function[0]) memory->destroy(part2grid); + if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6); + nmax = atom->nmax; + if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid"); + if (function[1] + function[2] + function[3]) + memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6"); + } + + + energy = 0.0; + energy_1 = 0.0; + energy_6 = 0.0; + if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0; + + // find grid points for all my particles + // distribute partcles' charges/dispersion coefficients on the grid + // communication between processors and remapping two fft + // Solution of poissons equation in k-space and backtransformation + // communication between processors + // calculation of forces + + if (function[0]) { + + //perfrom calculations for coulomb interactions only + + particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower, + nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out); + + make_rho_c(); + + cg->reverse_comm(this,REVERSE_RHO); + + brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + density_brick, density_fft, work1,remap); + + if (differentiation_flag == 1) { + + poisson_ad(work1, work2, density_fft, fft1, fft2, + nx_pppm, ny_pppm, nz_pppm, nfft, + nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + energy_1, greensfn, + virial_1, vg,vg2, + u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); + + cg->forward_comm(this,FORWARD_AD); + + fieldforce_c_ad(); + + if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM); + + } else { + poisson_ik(work1, work2, density_fft, fft1, fft2, + nx_pppm, ny_pppm, nz_pppm, nfft, + nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + energy_1, greensfn, + fkx, fky, fkz,fkx2, fky2, fkz2, + vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2, + u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); + + cg->forward_comm(this, FORWARD_IK); + + fieldforce_c_ik(); + + if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM); + } + if (evflag_atom) fieldforce_c_peratom(); + } + + if (function[1]) { + //perfrom calculations for geometric mixing + particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); + make_rho_g(); + + + cg_6->reverse_comm(this, REVERSE_RHO_G); + + brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + density_brick_g, density_fft_g, work1_6,remap_6); + + if (differentiation_flag == 1) { + + poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + virial_6, vg_6, vg2_6, + u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); + + cg_6->forward_comm(this,FORWARD_AD_G); + + fieldforce_g_ad(); + + if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G); + + } else { + poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, + vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6, + u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); + + cg_6->forward_comm(this,FORWARD_IK_G); + + fieldforce_g_ik(); + + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G); + } + if (evflag_atom) fieldforce_g_peratom(); + } + + if (function[2]) { + //perform calculations for arithmetic mixing + particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); + make_rho_a(); + + cg_6->reverse_comm(this, REVERSE_RHO_A); + + brick2fft_a(); + + if ( differentiation_flag == 1) { + + poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + virial_6, vg_6, vg2_6, + u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); + poisson_2s_ad(density_fft_a0, density_fft_a6, + u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, + u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); + poisson_2s_ad(density_fft_a1, density_fft_a5, + u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, + u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); + poisson_2s_ad(density_fft_a2, density_fft_a4, + u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, + u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); + + cg_6->forward_comm(this, FORWARD_AD_A); + + fieldforce_a_ad(); + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A); + + } else { + + poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, + vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6, + u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); + poisson_2s_ik(density_fft_a0, density_fft_a6, + vdx_brick_a0, vdy_brick_a0, vdz_brick_a0, + vdx_brick_a6, vdy_brick_a6, vdz_brick_a6, + u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, + u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); + poisson_2s_ik(density_fft_a1, density_fft_a5, + vdx_brick_a1, vdy_brick_a1, vdz_brick_a1, + vdx_brick_a5, vdy_brick_a5, vdz_brick_a5, + u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, + u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); + poisson_2s_ik(density_fft_a2, density_fft_a4, + vdx_brick_a2, vdy_brick_a2, vdz_brick_a2, + vdx_brick_a4, vdy_brick_a4, vdz_brick_a4, + u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, + u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); + + cg_6->forward_comm(this, FORWARD_IK_A); + + fieldforce_a_ik(); + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A); + } + if (evflag_atom) fieldforce_a_peratom(); + } + + if (function[3]) { + //perfrom calculations if no mixing rule applies + particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); + + make_rho_none(); + + cg_6->reverse_comm(this, REVERSE_RHO_NONE); + + brick2fft_none(); + + if (differentiation_flag == 1) { + + int n = 0; + for (int k = 0; kforward_comm(this,FORWARD_AD_NONE); + + fieldforce_none_ad(); + + if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE); + + } else { + int n = 0; + for (int k = 0; kforward_comm(this,FORWARD_IK_NONE); + + fieldforce_none_ik(); + + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE); + } + if (evflag_atom) fieldforce_none_peratom(); + } + + // sum energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy_1 = energy_all; + MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy_6 = energy_all; + + energy_1 *= 0.5*volume; + energy_6 *= 0.5*volume; + + energy_1 -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij + + 1.0/12.0*pow(g_ewald_6,6)*csum; + energy_1 *= qscale; + } + + // sum virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i]; + if (function[1]+function[2]+function[3]){ + double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij; + virial[0] -= a; + virial[1] -= a; + virial[2] -= a; + } + } + + if (eflag_atom) { + if (function[0]) { + double *q = atom->q; + for (i = 0; i < atom->nlocal; i++) { + eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction + } + } + if (function[1] + function[2] + function[3]) { + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] + + 1.0/12.0*pow(g_ewald_6,6)*cii[tmp]; + } + } + } + + if (vflag_atom) { + if (function[1] + function[2] + function[3]) { + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction + } + } + } + + + // 2d slab correction + + if (slabflag) slabcorr(eflag); + if (function[0]) energy += energy_1; + if (function[1] + function[2] + function[3]) energy += energy_6; + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + +/* ---------------------------------------------------------------------- + initialize coefficients needed for the dispersion density on the grids +------------------------------------------------------------------------- */ + +void PPPMDisp::init_coeffs() // local pair coeffs +{ + int tmp; + int n = atom->ntypes; + int converged; + delete [] B; + if (function[3] + function[2]) { // no mixing rule or arithmetic + if (function[2] && me == 0) { + if (screen) fprintf(screen," Optimizing splitting of Dispersion coefficients\n"); + if (logfile) fprintf(logfile," Optimizing splitting of Dispersion coefficients\n"); + } + // get dispersion coefficients + double **b = (double **) force->pair->extract("B",tmp); + // allocate data for eigenvalue decomposition + double **A; + double **Q; + memory->create(A,n,n,"pppm/disp:A"); + memory->create(Q,n,n,"pppm/disp:Q"); + // fill coefficients to matrix a + for (int i = 1; i <= n; i++) + for (int j = 1; j <= n; j++) + A[i-1][j-1] = b[i][j]; + // transform q to a unity matrix + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + Q[i][j] = 0.0; + for (int i = 0; i < n; i++) + Q[i][i] = 1.0; + // perfrom eigenvalue decomposition with QR algorithm + converged = qr_alg(A,Q,n); + if (function[3] && !converged) { + error->all(FLERR,"Matrix factorization to split dispersion coefficients failed"); + } + // determine number of used eigenvalues + // based on maximum allowed number or cutoff criterion + // sort eigenvalues according to their size with bubble sort + double t; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n-1-i; j++) { + if (fabs(A[j][j]) < fabs(A[j+1][j+1])) { + t = A[j][j]; + A[j][j] = A[j+1][j+1]; + A[j+1][j+1] = t; + for (int k = 0; k < n; k++) { + t = Q[k][j]; + Q[k][j] = Q[k][j+1]; + Q[k][j+1] = t; + } + } + } + } + + // check which eigenvalue is the first that is smaller + // than a specified tolerance + // check how many are maximum allowed by the user + double amax = fabs(A[0][0]); + double acrit = amax*splittol; + double bmax = 0; + double err = 0; + nsplit = 0; + for (int i = 0; i < n; i++) { + if (fabs(A[i][i]) > acrit) nsplit++; + else { + bmax = fabs(A[i][i]); + break; + } + } + + err = bmax/amax; + if (err > 1.0e-4) { + char str[128]; + sprintf(str,"Error in splitting of dispersion coeffs is estimated %g",err); + error->warning(FLERR, str); + } + // set B + B = new double[nsplit*n+nsplit]; + for (int i = 0; i< nsplit; i++) { + B[i] = A[i][i]; + for (int j = 0; j < n; j++) { + B[nsplit*(j+1) + i] = Q[j][i]; + } + } + + nsplit_alloc = nsplit; + if (nsplit%2 == 1) nsplit_alloc = nsplit + 1; + // check if the function should preferably be [1] or [2] or [3] + if (nsplit == 1) { + delete [] B; + function[3] = 0; + function[2] = 0; + function[1] = 1; + if (me == 0) { + if (screen) fprintf(screen," Using geometric mixing for reciprocal space\n"); + if (logfile) fprintf(logfile," Using geometric mixing for reciprocal space\n"); + } + } + if (function[2] && nsplit <= 6) { + if (me == 0) { + if (screen) fprintf(screen," Using %d instead of 7 structure factors\n",nsplit); + if (logfile) fprintf(logfile," Using %d instead of 7 structure factors\n",nsplit); + } + function[3] = 1; + function[2] = 0; + } + if (function[2] && (nsplit > 6)) { + if (me == 0) { + if (screen) fprintf(screen," Using 7 structure factors\n"); + if (logfile) fprintf(logfile," Using 7 structure factors\n"); + } + delete [] B; + } + if (function[3]) { + if (me == 0) { + if (screen) fprintf(screen," Using %d structure factors\n",nsplit); + if (logfile) fprintf(logfile," Using %d structure factors\n",nsplit); + } + if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors"); + } + + memory->destroy(A); + memory->destroy(Q); + } + if (function[1]) { // geometric 1/r^6 + double **b = (double **) force->pair->extract("B",tmp); + B = new double[n+1]; + for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); + } + if (function[2]) { // arithmetic 1/r^6 + //cannot use epsilon, because this has not been set yet + double **epsilon = (double **) force->pair->extract("epsilon",tmp); + //cannot use sigma, because this has not been set yet + double **sigma = (double **) force->pair->extract("sigma",tmp); + if (!(epsilon&&sigma)) + error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp"); + double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; + double c[7] = { + 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; + for (int i=0; i<=n; ++i) { + eps_i = sqrt(epsilon[i][i]); + sigma_i = sigma[i][i]; + sigma_n = 1.0; + for (int j=0; j<7; ++j) { + *(bi++) = sigma_n*eps_i*c[j]*0.25; + sigma_n *= sigma_i; + } + } + } +} + +/* ---------------------------------------------------------------------- + Eigenvalue decomposition of a real, symmetric matrix with the QR + method (includes transpformation to Tridiagonal Matrix + Wilkinson + shift) +------------------------------------------------------------------------- */ + +int PPPMDisp::qr_alg(double **A, double **Q, int n) +{ + int converged = 0; + double an1, an, bn1, d, mue; + // allocate some memory for the required operations + double **A0,**Qi,**C,**D,**E; + // make a copy of A for convergence check + memory->create(A0,n,n,"pppm/disp:A0"); + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + A0[i][j] = A[i][j]; + + // allocate an auxiliary matrix Qi + memory->create(Qi,n,n,"pppm/disp:Qi"); + + // alllocate an auxillary matrices for the matrix multiplication + memory->create(C,n,n,"pppm/disp:C"); + memory->create(D,n,n,"pppm/disp:D"); + memory->create(E,n,n,"pppm/disp:E"); + + // transform Matrix A to Tridiagonal form + hessenberg(A,Q,n); + + // start loop for the matrix factorization + int count = 0; + int countmax = 100000; + while (1) { + // make a Wilkinson shift + an1 = A[n-2][n-2]; + an = A[n-1][n-1]; + bn1 = A[n-2][n-1]; + d = (an1-an)/2; + mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1); + for (int i = 0; i < n; i++) + A[i][i] -= mue; + + // perform a QR factorization for a tridiagonal matrix A + qr_tri(Qi,A,n); + + // update the matrices + mmult(A,Qi,C,n); + mmult(Q,Qi,C,n); + + // backward Wilkinson shift + for (int i = 0; i < n; i++) + A[i][i] += mue; + + // check the convergence + converged = check_convergence(A,Q,A0,C,D,E,n); + if (converged) break; + count = count + 1; + if (count == countmax) break; + } + + // free allocated memory + memory->destroy(Qi); + memory->destroy(A0); + memory->destroy(C); + memory->destroy(D); + memory->destroy(E); + + return converged; +} + +/* ---------------------------------------------------------------------- + Transform a Matrix to Hessenberg form (for symmetric Matrices, the + result will be a tridiagonal matrix) +------------------------------------------------------------------------- */ + +void PPPMDisp::hessenberg(double **A, double **Q, int n) +{ + double r,a,b,c,s,x1,x2; + for (int i = 0; i < n-1; i++) { + for (int j = i+2; j < n; j++) { + // compute coeffs for the rotation matrix + a = A[i+1][i]; + b = A[j][i]; + r = sqrt(a*a + b*b); + c = a/r; + s = b/r; + // update the entries of A with multiplication from the left + for (int k = 0; k < n; k++) { + x1 = A[i+1][k]; + x2 = A[j][k]; + A[i+1][k] = c*x1 + s*x2; + A[j][k] = -s*x1 + c*x2; + } + // update the entries of A and Q with a multiplication from the right + for (int k = 0; k < n; k++) { + x1 = A[k][i+1]; + x2 = A[k][j]; + A[k][i+1] = c*x1 + s*x2; + A[k][j] = -s*x1 + c*x2; + x1 = Q[k][i+1]; + x2 = Q[k][j]; + Q[k][i+1] = c*x1 + s*x2; + Q[k][j] = -s*x1 + c*x2; + } + } + } +} + +/* ---------------------------------------------------------------------- + QR factorization for a tridiagonal matrix; Result of the factorization + is stored in A and Qi +------------------------------------------------------------------------- */ + +void PPPMDisp::qr_tri(double** Qi,double** A,int n) +{ + double r,a,b,c,s,x1,x2; + int j,k,k0,kmax; + // make Qi a unity matrix + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + Qi[i][j] = 0.0; + for (int i = 0; i < n; i++) + Qi[i][i] = 1.0; + // loop over main diagonal and first of diagonal of A + for (int i = 0; i < n-1; i++) { + j = i+1; + // coefficients of the rotation matrix + a = A[i][i]; + b = A[j][i]; + r = sqrt(a*a + b*b); + c = a/r; + s = b/r; + // update the entries of A and Q + k0 = (i-1>0)?i-1:0; //min(i-1,0); + kmax = (i+3A0[i][j])?Bmax:A0[i][j]; //max(Bmax,A0[i][j]); + double epsabs = eps*Bmax; + + // reconstruct the original matrix + // store the diagonal elements in D + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + D[i][j] = 0.0; + for (int i = 0; i < n; i++) + D[i][i] = A[i][i]; + // store matrix Q in E + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + E[i][j] = Q[i][j]; + // E = Q*A + mmult(E,D,C,n); + // store transpose of Q in D + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + D[i][j] = Q[j][i]; + // E = Q*A*Q.t + mmult(E,D,C,n); + + //compare the original matrix and the final matrix + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + diff = A0[i][j] - E[i][j]; + epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff)); + } + } + if (epsmax > epsabs) converged = 0; + return converged; +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDisp::allocate() +{ + + int (*procneigh)[2] = comm->procneigh; + + if (function[0]) { + memory->create(work1,2*nfft_both,"pppm/disp:work1"); + memory->create(work2,2*nfft_both,"pppm/disp:work2"); + + memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx"); + memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky"); + memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz"); + + memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2"); + memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2"); + memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2"); + + + memory->create(gf_b,order,"pppm/disp:gf_b"); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff"); + memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d"); + memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff"); + + memory->create(greensfn,nfft_both,"pppm/disp:greensfn"); + memory->create(vg,nfft_both,6,"pppm/disp:vg"); + memory->create(vg2,nfft_both,3,"pppm/disp:vg2"); + + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:density_brick"); + if ( differentiation_flag == 1) { + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:u_brick"); + memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1"); + memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2"); + memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3"); + memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4"); + memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5"); + memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6"); + + } else { + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:vdx_brick"); + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:vdy_brick"); + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:vdz_brick"); + } + memory->create(density_fft,nfft_both,"pppm/disp:density_fft"); + + int tmp; + + fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp); + + fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp); + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg = new CommGrid(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg = new CommGrid(lmp,world,3,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + + if (function[1]) { + memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); + memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); + + memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); + memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); + memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); + + memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); + memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); + memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); + + memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); + memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); + memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); + memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); + memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); + + memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); + memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); + memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); + + memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g"); + if ( differentiation_flag == 1) { + memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); + + memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); + memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); + memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); + memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); + memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); + memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); + + } else { + memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g"); + memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g"); + memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g"); + } + memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g"); + + + int tmp; + + fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 0,0,&tmp); + + fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + 0,0,&tmp); + + remap_6 = new Remap(lmp,world, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_6 = new CommGrid(lmp,world,1,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_6 = new CommGrid(lmp,world,3,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + + if (function[2]) { + memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); + memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); + + memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); + memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); + memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); + + memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); + memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); + memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); + + memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); + memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); + memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); + memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); + memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); + + memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); + memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); + memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); + + memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0"); + memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1"); + memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2"); + memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3"); + memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4"); + memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5"); + memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6"); + + memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0"); + memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1"); + memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2"); + memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3"); + memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4"); + memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5"); + memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6"); + + + if ( differentiation_flag == 1 ) { + memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); + memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); + memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); + memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); + memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); + memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); + memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); + + memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); + memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); + memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); + memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); + memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); + memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); + + } else { + + memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0"); + memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0"); + memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0"); + + memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1"); + memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1"); + memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1"); + + memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2"); + memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2"); + memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2"); + + memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3"); + memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3"); + memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3"); + + memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4"); + memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4"); + memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4"); + + memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5"); + memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5"); + memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5"); + + memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6"); + memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6"); + memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6"); + } + + + + int tmp; + + fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 0,0,&tmp); + + fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + 0,0,&tmp); + + remap_6 = new Remap(lmp,world, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + + if (differentiation_flag == 1) + cg_6 = new CommGrid(lmp,world,7,7, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_6 = new CommGrid(lmp,world,21,7, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + + if (function[3]) { + memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); + memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); + + memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); + memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); + memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); + + memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); + memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); + memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); + + memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); + memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); + memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); + memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); + memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); + + memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); + memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); + memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); + + memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none"); + if ( differentiation_flag == 1) { + memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); + + memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); + memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); + memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); + memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); + memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); + memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); + + } else { + memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none"); + memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none"); + memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none"); + } + memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none"); + + + int tmp; + + fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 0,0,&tmp); + + fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + 0,0,&tmp); + + remap_6 = new Remap(lmp,world, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order + for per atom calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::allocate_peratom() +{ + + int (*procneigh)[2] = comm->procneigh; + + if (function[0]) { + + if (differentiation_flag != 1) + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:u_brick"); + + memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v0_brick"); + memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v1_brick"); + memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v2_brick"); + memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v3_brick"); + memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v4_brick"); + memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v5_brick"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom = + new CommGrid(lmp,world,6,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom = + new CommGrid(lmp,world,7,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } + + + if (function[1]) { + + if ( differentiation_flag != 1 ) + memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); + + memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g"); + memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g"); + memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g"); + memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g"); + memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g"); + memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom_6 = + new CommGrid(lmp,world,6,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom_6 = + new CommGrid(lmp,world,7,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } + + if (function[2]) { + + if ( differentiation_flag != 1 ) { + memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); + memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); + memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); + memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); + memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); + memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); + memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); + } + + memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0"); + memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0"); + memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0"); + memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0"); + memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0"); + memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0"); + + memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1"); + memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1"); + memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1"); + memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1"); + memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1"); + memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1"); + + memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2"); + memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2"); + memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2"); + memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2"); + memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2"); + memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2"); + + memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3"); + memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3"); + memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3"); + memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3"); + memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3"); + memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3"); + + memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4"); + memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4"); + memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4"); + memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4"); + memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4"); + memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4"); + + memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5"); + memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5"); + memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5"); + memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5"); + memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5"); + memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5"); + + memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6"); + memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6"); + memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6"); + memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6"); + memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6"); + memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom_6 = + new CommGrid(lmp,world,42,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom_6 = + new CommGrid(lmp,world,49,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } + + if (function[3]) { + + if ( differentiation_flag != 1 ) + memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); + + memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none"); + memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none"); + memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none"); + memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none"); + memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none"); + memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom_6 = + new CommGrid(lmp,world,6*nsplit_alloc,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom_6 = + new CommGrid(lmp,world,7*nsplit_alloc,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } +} + + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDisp::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(density_fft); + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + + memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_g); + density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; + density_fft_g = NULL; + + memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a0); + density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; + density_fft_a0 = NULL; + + memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a1); + density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; + density_fft_a1 = NULL; + + memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a2); + density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; + density_fft_a2 = NULL; + + memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a3); + density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; + density_fft_a3 = NULL; + + memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a4); + density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; + density_fft_a4 = NULL; + + memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a5); + density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; + density_fft_a5 = NULL; + + memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a6); + density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; + density_fft_a6 = NULL; + + memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_none); + density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; + density_fft_none = NULL; + + memory->destroy(sf_precoeff1); + memory->destroy(sf_precoeff2); + memory->destroy(sf_precoeff3); + memory->destroy(sf_precoeff4); + memory->destroy(sf_precoeff5); + memory->destroy(sf_precoeff6); + sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; + + memory->destroy(sf_precoeff1_6); + memory->destroy(sf_precoeff2_6); + memory->destroy(sf_precoeff3_6); + memory->destroy(sf_precoeff4_6); + memory->destroy(sf_precoeff5_6); + memory->destroy(sf_precoeff6_6); + sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL; + + memory->destroy(greensfn); + memory->destroy(greensfn_6); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(work1_6); + memory->destroy(work2_6); + memory->destroy(vg); + memory->destroy(vg2); + memory->destroy(vg_6); + memory->destroy(vg2_6); + greensfn = greensfn_6 = NULL; + work1 = work2 = work1_6 = work2_6 = NULL; + vg = vg2 = vg_6 = vg2_6 = NULL; + + memory->destroy1d_offset(fkx,nxlo_fft); + memory->destroy1d_offset(fky,nylo_fft); + memory->destroy1d_offset(fkz,nzlo_fft); + fkx = fky = fkz = NULL; + + memory->destroy1d_offset(fkx2,nxlo_fft); + memory->destroy1d_offset(fky2,nylo_fft); + memory->destroy1d_offset(fkz2,nzlo_fft); + fkx2 = fky2 = fkz2 = NULL; + + memory->destroy1d_offset(fkx_6,nxlo_fft_6); + memory->destroy1d_offset(fky_6,nylo_fft_6); + memory->destroy1d_offset(fkz_6,nzlo_fft_6); + fkx_6 = fky_6 = fkz_6 = NULL; + + memory->destroy1d_offset(fkx2_6,nxlo_fft_6); + memory->destroy1d_offset(fky2_6,nylo_fft_6); + memory->destroy1d_offset(fkz2_6,nzlo_fft_6); + fkx2_6 = fky2_6 = fkz2_6 = NULL; + + + memory->destroy(gf_b); + memory->destroy2d_offset(rho1d,-order/2); + memory->destroy2d_offset(rho_coeff,(1-order)/2); + memory->destroy2d_offset(drho1d,-order/2); + memory->destroy2d_offset(drho_coeff, (1-order)/2); + gf_b = NULL; + rho1d = rho_coeff = drho1d = drho_coeff = NULL; + + memory->destroy(gf_b_6); + memory->destroy2d_offset(rho1d_6,-order_6/2); + memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2); + memory->destroy2d_offset(drho1d_6,-order_6/2); + memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2); + gf_b_6 = NULL; + rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL; + + delete fft1; + delete fft2; + delete remap; + delete cg; + fft1 = fft2 = NULL; + remap = NULL; + cg = NULL; + + delete fft1_6; + delete fft2_6; + delete remap_6; + delete cg_6; + fft1_6 = fft2_6 = NULL; + remap_6 = NULL; + cg_6 = NULL; +} + + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order + for per atom calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::deallocate_peratom() +{ + peratom_allocate_flag = 0; + + memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out); + u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; + + memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL; + + memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL; + + memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL; + + memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL; + + memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL; + + memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL; + + memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL; + + memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL; + + memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL; + + delete cg_peratom; + delete cg_peratom_6; + cg_peratom = cg_peratom_6 = NULL; +} + +/* ---------------------------------------------------------------------- + set size of FFT grid (nx,ny,nz_pppm) and g_ewald + for Coulomb interactions +------------------------------------------------------------------------- */ + +void PPPMDisp::set_grid() +{ + double q2 = qsqsum * force->qqrd2e; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h, h_x,h_y,h_z; + bigint natoms = atom->natoms; + + if (!gewaldflag) { + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) + error->all(FLERR,"KSpace accuracy too large to estimate G vector"); + g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy + // nz_pppm uses extended zprd_slab instead of zprd + // reduce it until accuracy target is met + + if (!gridflag) { + h = h_x = h_y = h_z = 4.0/g_ewald; + int count = 0; + while (1) { + + // set grid dimension + nx_pppm = static_cast (xprd/h_x); + ny_pppm = static_cast (yprd/h_y); + nz_pppm = static_cast (zprd_slab/h_z); + + if (nx_pppm <= 1) nx_pppm = 2; + if (ny_pppm <= 1) ny_pppm = 2; + if (nz_pppm <= 1) nz_pppm = 2; + + //set local grid dimension + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + double qopt = compute_qopt(); + + double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + + count++; + + // break loop if the accuracy has been reached or too many loops have been performed + if (dfkspace <= accuracy) break; + if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction"); + h *= 0.95; + h_x = h_y = h_z = h; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; +} + +/* ---------------------------------------------------------------------- + set the FFT parameters +------------------------------------------------------------------------- */ + +void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p, + int& nxlo_f,int& nylo_f,int& nzlo_f, + int& nxhi_f,int& nyhi_f,int& nzhi_f, + int& nxlo_i,int& nylo_i,int& nzlo_i, + int& nxhi_i,int& nyhi_i,int& nzhi_i, + int& nxlo_o,int& nylo_o,int& nzlo_o, + int& nxhi_o,int& nyhi_o,int& nzhi_o, + int& nlow, int& nupp, + int& ng, int& nf, int& nfb, + double& sft,double& sftone, int& ord) +{ + // global indices of PPPM grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPM grid that I own without ghost cells + // for slab PPPM, assign z grid as if it were not extended + + nxlo_i = static_cast (comm->xsplit[comm->myloc[0]] * nx_p); + nxhi_i = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1; + + nylo_i = static_cast (comm->ysplit[comm->myloc[1]] * ny_p); + nyhi_i = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1; + + nzlo_i = static_cast + (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor); + nzhi_i = static_cast + (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1; + + + // nlow,nupp = stencil size for mapping particles to PPPM grid + + nlow = -(ord-1)/2; + nupp = ord/2; + + // sft values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (ord % 2) sft = OFFSET + 0.5; + else sft = OFFSET; + if (ord % 2) sftone = 0.0; + else sftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPM grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPM, assign z grid as if it were not extended + + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else { + dist[0] = cuthalf/domain->prd[0]; + dist[1] = cuthalf/domain->prd[1]; + dist[2] = cuthalf/domain->prd[2]; + } + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_p/xprd + sft) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_p/xprd + sft) - OFFSET; + nxlo_o = nlo + nlow; + nxhi_o = nhi + nupp; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_p/yprd + sft) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_p/yprd + sft) - OFFSET; + nylo_o = nlo + nlow; + nyhi_o = nhi + nupp; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_p/zprd_slab + sft) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_p/zprd_slab + sft) - OFFSET; + nzlo_o = nlo + nlow; + nzhi_o = nhi + nupp; + + // for slab PPPM, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPM, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells) + + if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) { + nzhi_i = nz_p - 1; + nzhi_o = nz_p - 1; + } + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clump of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_p >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_f = 0; + nxhi_f = nx_p - 1; + nylo_f = me_y*ny_p/npey_fft; + nyhi_f = (me_y+1)*ny_p/npey_fft - 1; + nzlo_f = me_z*nz_p/npez_fft; + nzhi_f = (me_z+1)*nz_p/npez_fft - 1; + + // PPPM grid for this proc, including ghosts + + ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) * + (nzhi_o-nzlo_o+1); + + // FFT arrays on this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) * + (nzhi_f-nzlo_f+1); + int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) * + (nzhi_i-nzlo_i+1); + nfb = MAX(nf,nfft_brick); + +} + +/* ---------------------------------------------------------------------- + check if all factors of n are in list of factors + return 1 if yes, 0 if no +------------------------------------------------------------------------- */ + +int PPPMDisp::factorable(int n) +{ + int i; + + while (n > 1) { + for (i = 0; i < nfactors; i++) { + if (n % factors[i] == 0) { + n /= factors[i]; + break; + } + } + if (i == nfactors) return 0; + } + + return 1; +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ +void PPPMDisp::adjust_gewald() +{ + + // Use Newton solver to find g_ewald + + double dx; + + // Begin algorithm + + for (int i = 0; i < LARGE; i++) { + dx = f() / derivf(); + g_ewald -= dx; //Update g_ewald + if (fabs(f()) < SMALL) return; + } + + // Failed to converge + + char str[128]; + sprintf(str, "Could not compute g_ewald"); + error->all(FLERR, str); + +} + +/* ---------------------------------------------------------------------- + Calculate f(x) + ------------------------------------------------------------------------- */ + +double PPPMDisp::f() +{ + double df_rspace, df_kspace; + double q2 = qsqsum * force->qqrd2e; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + + df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd); + + double qopt = compute_qopt(); + df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + + return df_rspace - df_kspace; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) using forward difference + [f(x + h) - f(x)] / h + ------------------------------------------------------------------------- */ + +double PPPMDisp::derivf() +{ + double h = 0.000001; //Derivative step-size + double df,f1,f2,g_ewald_old; + + f1 = f(); + g_ewald_old = g_ewald; + g_ewald += h; + f2 = f(); + g_ewald = g_ewald_old; + df = (f2 - f1)/h; + + return df; +} + +/* ---------------------------------------------------------------------- + Calculate the final estimator for the accuracy +------------------------------------------------------------------------- */ + +double PPPMDisp::final_accuracy() +{ + double df_rspace, df_kspace; + double q2 = qsqsum * force->qqrd2e; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd); + + double qopt = compute_qopt(); + + df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + + double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace); + return acc; +} + +/* ---------------------------------------------------------------------- + Calculate the final estimator for the Dispersion accuracy +------------------------------------------------------------------------- */ + +void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace) +{ + double df_rspace, df_kspace; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + acc_real = lj_rspace_error(); + + double qopt = compute_qopt_6(); + + acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); + + acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace); + return; +} + +/* ---------------------------------------------------------------------- + Compute qopt for Coulomb interactions +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt() +{ + double qopt; + if (differentiation_flag == 1) { + qopt = compute_qopt_ad(); + } else { + qopt = compute_qopt_ik(); + } + double qopt_all; + MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); + return qopt_all; +} + +/* ---------------------------------------------------------------------- + Compute qopt for Dispersion interactions +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_6() +{ + double qopt; + if (differentiation_flag == 1) { + qopt = compute_qopt_6_ad(); + } else { + qopt = compute_qopt_6_ik(); + } + double qopt_all; + MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); + return qopt_all; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ik differentiation scheme and Coulomb interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_ik() +{ + double qopt = 0.0; + int k,l,m; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double sqk, u2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,sum2, sum3,dot1,dot2; + + int nbx = 2; + int nby = 2; + int nbz = 2; + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + u2 = pow(wx*wy*wz,2.0); + sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; + sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1; + sum3 += u2; + } + } + } + sum2 *= sum2; + sum3 *= sum3*sqk; + qopt += sum1 -sum2/sum3; + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ad differentiation scheme and Coulomb interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_ad() +{ + double qopt = 0.0; + int k,l,m; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double u2, sqk; + double sum1,sum2,sum3,sum4,dot2; + double numerator; + + int nbx = 2; + int nby = 2; + int nbz = 2; + double form = 1.0; + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + numerator = form*12.5663706; + + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + sum4 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + + dot2 = qx*qx+qy*qy+qz*qz; + u2 = pow(wx*wy*wz,2.0); + sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; + sum2 += sx*sy*sz * u2*4.0*MY_PI; + sum3 += u2; + sum4 += dot2*u2; + } + } + } + sum2 *= sum2; + qopt += sum1 - sum2/(sum3*sum4); + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ik differentiation scheme and Dispersion interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_6_ik() +{ + double qopt = 0.0; + int k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double sqk, u2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,sum2, sum3; + double dot1,dot2, rtdot2, term; + double inv2ew = 2*g_ewald_6; + inv2ew = 1.0/inv2ew; + double rtpi = sqrt(MY_PI); + + int nbx = 2; + int nby = 2; + int nbz = 2; + + n = 0; + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm_6*nx); + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm_6*ny); + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm_6*nz); + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + rtdot2 = sqrt(dot2); + term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + + 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); + term *= g_ewald_6*g_ewald_6*g_ewald_6; + u2 = pow(wx*wy*wz,2.0); + sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; + sum2 += -u2*term*MY_PI*rtpi/3.0*dot1; + sum3 += u2; + } + } + } + sum2 *= sum2; + sum3 *= sum3*sqk; + qopt += sum1 -sum2/sum3; + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ad differentiation scheme and Dispersion interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_6_ad() +{ + double qopt = 0.0; + int k,l,m; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double u2, sqk; + double sum1,sum2,sum3,sum4; + double dot2, rtdot2, term; + double inv2ew = 2*g_ewald_6; + inv2ew = 1/inv2ew; + double rtpi = sqrt(MY_PI); + + int nbx = 2; + int nby = 2; + int nbz = 2; + + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + sum4 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm_6*nx); + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm_6*ny); + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm_6*nz); + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + + dot2 = qx*qx+qy*qy+qz*qz; + rtdot2 = sqrt(dot2); + term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + + 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); + term *= g_ewald_6*g_ewald_6*g_ewald_6; + u2 = pow(wx*wy*wz,2.0); + sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; + sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2; + sum3 += u2; + sum4 += dot2*u2; + } + } + } + sum2 *= sum2; + qopt += sum1 - sum2/(sum3*sum4); + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + set size of FFT grid and g_ewald_6 + for Dispersion interactions +------------------------------------------------------------------------- */ + +void PPPMDisp::set_grid_6() +{ + // Calculate csum + if (!csumflag) calc_csum(); + if (!gewaldflag_6) set_init_g6(); + if (!gridflag_6) set_n_pppm_6(); + while (!factorable(nx_pppm_6)) nx_pppm_6++; + while (!factorable(ny_pppm_6)) ny_pppm_6++; + while (!factorable(nz_pppm_6)) nz_pppm_6++; + +} + +/* ---------------------------------------------------------------------- + Calculate the sum of the squared dispersion coefficients and other + related quantities required for the calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::calc_csum() +{ + csumij = 0.0; + csum = 0.0; + + int ntypes = atom->ntypes; + int i,j,k; + + delete [] cii; + cii = new double[ntypes +1]; + for (i = 0; i<=ntypes; i++) cii[i] = 0.0; + delete [] csumi; + csumi = new double[ntypes +1]; + for (i = 0; i<=ntypes; i++) csumi[i] = 0.0; + int *neach = new int[ntypes+1]; + for (i = 0; i<=ntypes; i++) neach[i] = 0; + + //the following variables are needed to distinguish between arithmetic + // and geometric mixing + + double mix1; // scales 20/16 to 4 + int mix2; // shifts the value to the sigma^3 value + int mix3; // shifts the value to the right atom type + if (function[1]) { + for (i = 1; i <= ntypes; i++) + cii[i] = B[i]*B[i]; + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + neach[tmp]++; + csum += B[tmp]*B[tmp]; + } + } + if (function[2]) { + for (i = 1; i <= ntypes; i++) + cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3]; + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + neach[tmp]++; + csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3]; + } + } + if (function[3]) { + for (i = 1; i <= ntypes; i++) + for (j = 0; j < nsplit; j++) + cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j]; + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + neach[tmp]++; + for (j = 0; j < nsplit; j++) + csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j]; + } + } + + + double tmp2; + MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world); + csum = tmp2; + csumflag = 1; + + int *neach_all = new int[ntypes+1]; + MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world); + + // copmute csumij and csumi + double d1, d2; + if (function[1]){ + for (i=1; i<=ntypes; i++) { + for (j=1; j<=ntypes; j++) { + csumi[i] += neach_all[j]*B[i]*B[j]; + d1 = neach_all[i]*B[i]; + d2 = neach_all[j]*B[j]; + csumij += d1*d2; + //csumij += neach_all[i]*neach_all[j]*B[i]*B[j]; + } + } + } + if (function[2]) { + for (i=1; i<=ntypes; i++) { + for (j=1; j<=ntypes; j++) { + for (k=0; k<=6; k++) { + csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; + d1 = neach_all[i]*B[7*i + k]; + d2 = neach_all[j]*B[7*(j+1)-k-1]; + csumij += d1*d2; + //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; + } + } + } + } + if (function[3]) { + for (i=1; i<=ntypes; i++) { + for (j=1; j<=ntypes; j++) { + for (k=0; kall(FLERR, str); + +} + +/* ---------------------------------------------------------------------- + Calculate f(x) for Dispersion interaction + ------------------------------------------------------------------------- */ + +double PPPMDisp::f_6() +{ + double df_rspace, df_kspace; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + + df_rspace = lj_rspace_error(); + + double qopt = compute_qopt_6(); + df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); + + return df_rspace - df_kspace; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) using forward difference + [f(x + h) - f(x)] / h + ------------------------------------------------------------------------- */ + +double PPPMDisp::derivf_6() +{ + double h = 0.000001; //Derivative step-size + double df,f1,f2,g_ewald_old; + + f1 = f_6(); + g_ewald_old = g_ewald_6; + g_ewald_6 += h; + f2 = f_6(); + g_ewald_6 = g_ewald_old; + df = (f2 - f1)/h; + + return df; +} + + +/* ---------------------------------------------------------------------- + calculate an initial value for g_ewald_6 + ---------------------------------------------------------------------- */ + +void PPPMDisp::set_init_g6() +{ + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + // make initial g_ewald estimate + // based on desired error and real space cutoff + + // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj + // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0 + // else, repeat multiply g_ewald_6 by 2 until df_real > 0 + // perform bisection for the last two values of + double df_real; + double g_ewald_old; + double gmin, gmax; + + // check if there is a user defined accuracy + double acc_rspace = accuracy; + if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6; + + g_ewald_6 = 1.0/cutoff_lj; + df_real = lj_rspace_error() - acc_rspace; + int counter = 0; + if (df_real > 0) { + while (df_real > 0 && counter < LARGE) { + counter++; + g_ewald_old = g_ewald_6; + g_ewald_6 *= 2; + df_real = lj_rspace_error() - acc_rspace; + } + } + + if (df_real < 0) { + while (df_real < 0 && counter < LARGE) { + counter++; + g_ewald_old = g_ewald_6; + g_ewald_6 *= 0.5; + df_real = lj_rspace_error() - acc_rspace; + } + } + + if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); + + gmin = MIN(g_ewald_6, g_ewald_old); + gmax = MAX(g_ewald_6, g_ewald_old); + g_ewald_6 = gmin + 0.5*(gmax-gmin); + counter = 0; + while (gmax-gmin > SMALL && counter < LARGE) { + counter++; + df_real = lj_rspace_error() -acc_rspace; + if (df_real < 0) gmax = g_ewald_6; + else gmin = g_ewald_6; + g_ewald_6 = gmin + 0.5*(gmax-gmin); + } + if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); + +} + +/* ---------------------------------------------------------------------- + calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction + ---------------------------------------------------------------------- */ + +void PPPMDisp::set_n_pppm_6() +{ + bigint natoms = atom->natoms; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + double h, h_x,h_y,h_z; + + double acc_kspace = accuracy; + if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6; + + // initial value for the grid spacing + h = h_x = h_y = h_z = 4.0/g_ewald_6; + // decrease grid spacing untill required precision is obtained + int count = 0; + while(1) { + + // set grid dimension + nx_pppm_6 = static_cast (xprd/h_x); + ny_pppm_6 = static_cast (yprd/h_y); + nz_pppm_6 = static_cast (zprd_slab/h_z); + + if (nx_pppm_6 <= 1) nx_pppm_6 = 2; + if (ny_pppm_6 <= 1) ny_pppm_6 = 2; + if (nz_pppm_6 <= 1) nz_pppm_6 = 2; + + //set local grid dimension + int npey_fft,npez_fft; + if (nz_pppm_6 >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft_6 = 0; + nxhi_fft_6 = nx_pppm_6 - 1; + nylo_fft_6 = me_y*ny_pppm_6/npey_fft; + nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1; + nzlo_fft_6 = me_z*nz_pppm_6/npez_fft; + nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1; + + double qopt = compute_qopt_6(); + + double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); + + count++; + + // break loop if the accuracy has been reached or too many loops have been performed + if (df_kspace <= acc_kspace) break; + if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion"); + h *= 0.95; + h_x = h_y = h_z = h; + } +} + +/* ---------------------------------------------------------------------- + calculate the real space error for dispersion interactions + ---------------------------------------------------------------------- */ + +double PPPMDisp::lj_rspace_error() +{ + bigint natoms = atom->natoms; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + double deltaf; + double rgs = (cutoff_lj*g_ewald_6); + rgs *= rgs; + double rgs_inv = 1.0/rgs; + deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)* + exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6))); + return deltaf; +} + + +/* ---------------------------------------------------------------------- + Compyute the modified (hockney-eastwood) coulomb green function + ---------------------------------------------------------------------- */ + +void PPPMDisp::compute_gf() +{ + int k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int kper,lper,mper; + double snx,sny,snz,snx2,sny2,snz2; + double sqk; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double numerator,denominator; + + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + qz = unitkz*mper; + snz = sin(0.5*qz*zprd_slab/nz_pppm); + snz2 = snz*snz; + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + wz *= wz; + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + qy = unitky*lper; + sny = sin(0.5*qy*yprd/ny_pppm); + sny2 = sny*sny; + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + wy *= wy; + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + qx = unitkx*kper; + snx = sin(0.5*qx*xprd/nx_pppm); + snx2 = snx*snx; + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + wx *= wx; + + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); + + if (sqk != 0.0) { + numerator = 4.0*MY_PI/sqk; + denominator = gf_denom(snx2,sny2,snz2, gf_b, order); + greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme + and Coulomb interaction +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord, + int nxlo_ft, int nylo_ft, int nzlo_ft, + int nxhi_ft, int nyhi_ft, int nzhi_ft, + double *sf_pre1, double *sf_pre2, double *sf_pre3, + double *sf_pre4, double *sf_pre5, double *sf_pre6) +{ + + int i,k,l,m,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double argx,argy,argz; + double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; + double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; + double u0,u1,u2,u3,u4,u5,u6; + double sum1,sum2,sum3,sum4,sum5,sum6; + + int nb = 2; + + n = 0; + for (m = nzlo_ft; m <= nzhi_ft; m++) { + mper = m - nzp*(2*m/nzp); + + for (l = nylo_ft; l <= nyhi_ft; l++) { + lper = l - nyp*(2*l/nyp); + + for (k = nxlo_ft; k <= nxhi_ft; k++) { + kper = k - nxp*(2*k/nxp); + + sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; + for (i = -nb; i <= nb; i++) { + + qx0 = unitkx*(kper+nxp*i); + qx1 = unitkx*(kper+nxp*(i+1)); + qx2 = unitkx*(kper+nxp*(i+2)); + wx0[i+2] = 1.0; + wx1[i+2] = 1.0; + wx2[i+2] = 1.0; + argx = 0.5*qx0*xprd/nxp; + if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord); + argx = 0.5*qx1*xprd/nxp; + if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord); + argx = 0.5*qx2*xprd/nxp; + if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord); + + qy0 = unitky*(lper+nyp*i); + qy1 = unitky*(lper+nyp*(i+1)); + qy2 = unitky*(lper+nyp*(i+2)); + wy0[i+2] = 1.0; + wy1[i+2] = 1.0; + wy2[i+2] = 1.0; + argy = 0.5*qy0*yprd/nyp; + if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord); + argy = 0.5*qy1*yprd/nyp; + if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord); + argy = 0.5*qy2*yprd/nyp; + if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord); + + qz0 = unitkz*(mper+nzp*i); + qz1 = unitkz*(mper+nzp*(i+1)); + qz2 = unitkz*(mper+nzp*(i+2)); + wz0[i+2] = 1.0; + wz1[i+2] = 1.0; + wz2[i+2] = 1.0; + argz = 0.5*qz0*zprd_slab/nzp; + if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord); + argz = 0.5*qz1*zprd_slab/nzp; + if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord); + argz = 0.5*qz2*zprd_slab/nzp; + if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord); + } + + for (nx = 0; nx <= 4; nx++) { + for (ny = 0; ny <= 4; ny++) { + for (nz = 0; nz <= 4; nz++) { + u0 = wx0[nx]*wy0[ny]*wz0[nz]; + u1 = wx1[nx]*wy0[ny]*wz0[nz]; + u2 = wx2[nx]*wy0[ny]*wz0[nz]; + u3 = wx0[nx]*wy1[ny]*wz0[nz]; + u4 = wx0[nx]*wy2[ny]*wz0[nz]; + u5 = wx0[nx]*wy0[ny]*wz1[nz]; + u6 = wx0[nx]*wy0[ny]*wz2[nz]; + + sum1 += u0*u1; + sum2 += u0*u2; + sum3 += u0*u3; + sum4 += u0*u4; + sum5 += u0*u5; + sum6 += u0*u6; + } + } + } + + // store values + + sf_pre1[n] = sum1; + sf_pre2[n] = sum2; + sf_pre3[n] = sum3; + sf_pre4[n] = sum4; + sf_pre5[n] = sum5; + sf_pre6[n++] = sum6; + } + } + } +} + +/* ---------------------------------------------------------------------- + Compute the modified (hockney-eastwood) dispersion green function + ---------------------------------------------------------------------- */ + +void PPPMDisp::compute_gf_6() +{ + double *prd; + int k,l,m,n; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int kper,lper,mper; + double sqk; + double snx,sny,snz,snx2,sny2,snz2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz; + double qx,qy,qz; + double rtsqk, term; + double numerator,denominator; + double inv2ew = 2*g_ewald_6; + inv2ew = 1/inv2ew; + double rtpi = sqrt(MY_PI); + + numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0); + + n = 0; + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + qz = unitkz*mper; + snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6); + snz2 = snz*snz; + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + wz *= wz; + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + qy = unitky*lper; + sny = sin(0.5*unitky*lper*yprd/ny_pppm_6); + sny2 = sny*sny; + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + wy *= wy; + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + qx = unitkx*kper; + snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6); + snx2 = snx*snx; + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + wx *= wx; + + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); + + if (sqk != 0.0) { + denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); + rtsqk = sqrt(sqk); + term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz + + 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew); + greensfn_6[n++] = numerator*term*wx*wy*wz/denominator; + } else greensfn_6[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme + and Coulomb interaction +------------------------------------------------------------------------- */ +void PPPMDisp::compute_sf_coeff() +{ + int i,k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + for (l = nylo_fft; l <= nyhi_fft; l++) { + for (k = nxlo_fft; k <= nxhi_fft; k++) { + sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; + sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; + sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; + sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; + sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; + sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; + ++n; + } + } + } + + // Compute the coefficients for the self-force correction + + double prex, prey, prez; + prex = prey = prez = MY_PI/volume; + prex *= nx_pppm/xprd; + prey *= ny_pppm/yprd; + prez *= nz_pppm/zprd_slab; + sf_coeff[0] *= prex; + sf_coeff[1] *= prex*2; + sf_coeff[2] *= prey; + sf_coeff[3] *= prey*2; + sf_coeff[4] *= prez; + sf_coeff[5] *= prez*2; + + // communicate values with other procs + + double tmp[6]; + MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); + for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme + and Dispersion interaction +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_sf_coeff_6() +{ + int i,k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0; + + n = 0; + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n]; + sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n]; + sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n]; + sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n]; + sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n]; + sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n]; + ++n; + } + } + } + + + // perform multiplication with prefactors + + double prex, prey, prez; + prex = prey = prez = MY_PI/volume; + prex *= nx_pppm_6/xprd; + prey *= ny_pppm_6/yprd; + prez *= nz_pppm_6/zprd_slab; + sf_coeff_6[0] *= prex; + sf_coeff_6[1] *= prex*2; + sf_coeff_6[2] *= prey; + sf_coeff_6[3] *= prey*2; + sf_coeff_6[4] *= prez; + sf_coeff_6[5] *= prez*2; + + // communicate values with other procs + + double tmp[6]; + MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world); + for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n]; + +} + +/* ---------------------------------------------------------------------- + denominator for Hockney-Eastwood Green's function + of x,y,z = sin(kx*deltax/2), etc + + inf n-1 + S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l + j=-inf l=0 + + = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x) + gf_b = denominator expansion coeffs +------------------------------------------------------------------------- */ + +double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord) +{ + double sx,sy,sz; + sz = sy = sx = 0.0; + for (int l = ord-1; l >= 0; l--) { + sx = g_b[l] + sx*x; + sy = g_b[l] + sy*y; + sz = g_b[l] + sz*z; + } + double s = sx*sy*sz; + return s*s; +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_gf_denom(double* gf, int ord) +{ + int k,l,m; + + for (l = 1; l < ord; l++) gf[l] = 0.0; + gf[0] = 1.0; + + for (m = 1; m < ord; m++) { + for (l = m; l > 0; l--) + gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1)); + gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5)); + } + + bigint ifact = 1; + for (k = 1; k < 2*ord; k++) ifact *= k; + double gaminv = 1.0/ifact; + for (l = 0; l < ord; l++) gf[l] *= gaminv; +} + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFTdecomposition + for coulomb interaction or dispersion interaction with geometric + mixing +------------------------------------------------------------------------- */ + +void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work, + LAMMPS_NS::Remap* rmp) +{ + int n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_i; iz <= nzhi_i; iz++) + for (iy = nylo_i; iy <= nyhi_i; iy++) + for (ix = nxlo_i; ix <= nxhi_i; ix++) + dfft[n++] = dbrick[iz][iy][ix]; + + rmp->perform(dfft,dfft,work); +} + + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFTdecomposition + for dispersion with arithmetic mixing rule +------------------------------------------------------------------------- */ + +void PPPMDisp::brick2fft_a() +{ + int n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++) + for (iy = nylo_in_6; iy <= nyhi_in_6; iy++) + for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) { + density_fft_a0[n] = density_brick_a0[iz][iy][ix]; + density_fft_a1[n] = density_brick_a1[iz][iy][ix]; + density_fft_a2[n] = density_brick_a2[iz][iy][ix]; + density_fft_a3[n] = density_brick_a3[iz][iy][ix]; + density_fft_a4[n] = density_brick_a4[iz][iy][ix]; + density_fft_a5[n] = density_brick_a5[iz][iy][ix]; + density_fft_a6[n++] = density_brick_a6[iz][iy][ix]; + } + + remap_6->perform(density_fft_a0,density_fft_a0,work1_6); + remap_6->perform(density_fft_a1,density_fft_a1,work1_6); + remap_6->perform(density_fft_a2,density_fft_a2,work1_6); + remap_6->perform(density_fft_a3,density_fft_a3,work1_6); + remap_6->perform(density_fft_a4,density_fft_a4,work1_6); + remap_6->perform(density_fft_a5,density_fft_a5,work1_6); + remap_6->perform(density_fft_a6,density_fft_a6,work1_6); + +} + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFTdecomposition + for dispersion with special case +------------------------------------------------------------------------- */ + +void PPPMDisp::brick2fft_none() +{ + int k,n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + for (k = 0; kperform(density_fft_none[k],density_fft_none[k],work1_6); +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMDisp::particle_map(double delx, double dely, double delz, + double sft, int** p2g, int nup, int nlow, + int nxlo, int nylo, int nzlo, + int nxhi, int nyhi, int nzhi) +{ + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + + int flag = 0; + for (int i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((x[i][0]-boxlo[0])*delx+sft) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*dely+sft) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delz+sft) - OFFSET; + + p2g[i][0] = nx; + p2g[i][1] = ny; + p2g[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlow < nxlo || nx+nup > nxhi || + ny+nlow < nylo || ny+nup > nyhi || + nz+nlow < nzlo || nz+nup > nzhi) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp"); +} + + +void PPPMDisp::particle_map_c(double delx, double dely, double delz, + double sft, int** p2g, int nup, int nlow, + int nxlo, int nylo, int nzlo, + int nxhi, int nyhi, int nzhi) +{ + particle_map(delx, dely, delz, sft, p2g, nup, nlow, + nxlo, nylo, nzlo, nxhi, nyhi, nzhi); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_c() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- geometric mixing +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_g() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + int type; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + type = atom->type[i]; + z0 = delvolinv_6 * B[type]; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + y0 = z0*rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + x0 = y0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + density_brick_g[mz][my][mx] += x0*rho1d_6[0][l]; + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- arithmetic mixing +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_a() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0,w; + + // clear 3d density array + + memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + + // loop over my particles, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + int type; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + //do the following for all 4 grids + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + type = atom->type[i]; + z0 = delvolinv_6; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + y0 = z0*rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + x0 = y0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + w = x0*rho1d_6[0][l]; + density_brick_a0[mz][my][mx] += w*B[7*type]; + density_brick_a1[mz][my][mx] += w*B[7*type+1]; + density_brick_a2[mz][my][mx] += w*B[7*type+2]; + density_brick_a3[mz][my][mx] += w*B[7*type+3]; + density_brick_a4[mz][my][mx] += w*B[7*type+4]; + density_brick_a5[mz][my][mx] += w*B[7*type+5]; + density_brick_a6[mz][my][mx] += w*B[7*type+6]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- case when mixing rules don't apply +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_none() +{ + int k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0,w; + + // clear 3d density array + for (k = 0; k < nsplit_alloc; k++) + memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + + + // loop over my particles, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + int type; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + //do the following for all 4 grids + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + type = atom->type[i]; + z0 = delvolinv_6; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + y0 = z0*rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + x0 = y0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + w = x0*rho1d_6[0][l]; + for (k = 0; k < nsplit; k++) + density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k]; + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik differentiation +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2, + FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, + int nx_p, int ny_p, int nz_p, int nft, + int nxlo_ft, int nylo_ft, int nzlo_ft, + int nxhi_ft, int nyhi_ft, int nzhi_ft, + int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + double& egy, double* gfn, + double* kx, double* ky, double* kz, + double* kx2, double* ky2, double* kz2, + FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick, + double* vir, double** vcoeff, double** vcoeff2, + FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, + FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) + + +{ + int i,j,k,n; + double eng; + + // transform charge/dispersion density (r -> k) + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] = dfft[i]; + wk1[n++] = ZEROF; + } + + ft1->compute(wk1,wk1,1); + + // if requested, compute energy and virial contribution + + double scaleinv = 1.0/(nx_p*ny_p*nz_p); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nft; i++) { + eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; + if (eflag_global) egy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nft; i++) { + egy += + s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] *= scaleinv * gfn[i]; + wk1[n++] *= scaleinv * gfn[i]; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x & y direction gradient + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n]; + wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + vx_brick[k][j][i] = wk2[n++]; + vy_brick[k][j][i] = wk2[n++]; + } + + if (!eflag_atom) { + // z direction gradient only + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = kz[k]*wk1[n+1]; + wk2[n+1] = -kz[k]*wk1[n]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + vz_brick[k][j][i] = wk2[n]; + n += 2; + } + + } + + else { + // z direction gradient & per-atom energy + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1]; + wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + vz_brick[k][j][i] = wk2[n++]; + u_pa[k][j][i] = wk2[n++];; + } + } + + if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, + nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, + v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ad differentiation +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2, + FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, + int nx_p, int ny_p, int nz_p, int nft, + int nxlo_ft, int nylo_ft, int nzlo_ft, + int nxhi_ft, int nyhi_ft, int nzhi_ft, + int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + double& egy, double* gfn, + double* vir, double** vcoeff, double** vcoeff2, + FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, + FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) + + +{ + int i,j,k,n; + double eng; + + // transform charge/dispersion density (r -> k) + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] = dfft[i]; + wk1[n++] = ZEROF; + } + + ft1->compute(wk1,wk1,1); + + // if requested, compute energy and virial contribution + + double scaleinv = 1.0/(nx_p*ny_p*nz_p); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nft; i++) { + eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; + if (eflag_global) egy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nft; i++) { + egy += + s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] *= scaleinv * gfn[i]; + wk1[n++] *= scaleinv * gfn[i]; + } + + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = wk1[n]; + wk2[n+1] = wk1[n+1]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + u_pa[k][j][i] = wk2[n++]; + n++; + } + + + if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, + nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, + v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); + +} + +/* ---------------------------------------------------------------------- + Fourier Transform for per atom virial calculations +------------------------------------------------------------------------- */ + +void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2, + double** vcoeff, double** vcoeff2, int nft, + int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, + FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) +{ + //v0 & v1 term + int n, i, j, k; + n = 0; + for (i = 0; i < nft; i++) { + wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1]; + wk2[n+1] = wk1[n+1]*vcoeff[i][0] + wk1[n]*vcoeff[i][1]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + v0_pa[k][j][i] = wk2[n++]; + v1_pa[k][j][i] = wk2[n++]; + } + + //v2 & v3 term + + n = 0; + for (i = 0; i < nft; i++) { + wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0]; + wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + v2_pa[k][j][i] = wk2[n++]; + v3_pa[k][j][i] = wk2[n++]; + } + + //v4 & v5 term + + n = 0; + for (i = 0; i < nft; i++) { + wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2]; + wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + v4_pa[k][j][i] = wk2[n++]; + v5_pa[k][j][i] = wk2[n++]; + } + +} + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, + FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, + FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) + +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vxbrick_1[k][j][i] = work2_6[n++]; + vxbrick_2[k][j][i] = work2_6[n++]; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vybrick_1[k][j][i] = work2_6[n++]; + vybrick_2[k][j][i] = work2_6[n++]; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vzbrick_1[k][j][i] = work2_6[n++]; + vzbrick_2[k][j][i] = work2_6[n++]; + } + + //Per-atom energy + + if (eflag_atom) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa_1[k][j][i] = work2_6[n++]; + u_pa_2[k][j][i] = work2_6[n++]; + } + } + + if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, + v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); +} + + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, + FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, + FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, + FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + + + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vxbrick_1[k][j][i] = B[n1]*work2_6[n++]; + vxbrick_2[k][j][i] = B[n2]*work2_6[n++]; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vybrick_1[k][j][i] = B[n1]*work2_6[n++]; + vybrick_2[k][j][i] = B[n2]*work2_6[n++]; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vzbrick_1[k][j][i] = B[n1]*work2_6[n++]; + vzbrick_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Per-atom energy + + if (eflag_atom) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa[n1][k][j][i] = B[n1]*work2_6[n++]; + u_pa[n2][k][j][i] = B[n2]*work2_6[n++]; + } + } + + if (vflag_atom) poisson_none_peratom(n1,n2, + v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], + v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); +} + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) + +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa_1[k][j][i] = work2_6[n++]; + u_pa_2[k][j][i] = work2_6[n++]; + } + + if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, + v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); +} + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2, + FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, + FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa_1[k][j][i] = B[n1]*work2_6[n++]; + u_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + if (vflag_atom) poisson_none_peratom(n1,n2, + v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], + v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); +} + +/* ---------------------------------------------------------------------- + Fourier Transform for per atom virial calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) +{ + //Compute first virial term v0 + int n, i, j, k; + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v0_pa_1[k][j][i] = work2_6[n++]; + v0_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute second virial term v1 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v1_pa_1[k][j][i] = work2_6[n++]; + v1_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute third virial term v2 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v2_pa_1[k][j][i] = work2_6[n++]; + v2_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute fourth virial term v3 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v3_pa_1[k][j][i] = work2_6[n++]; + v3_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute fifth virial term v4 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v4_pa_1[k][j][i] = work2_6[n++]; + v4_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute last virial term v5 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v5_pa_1[k][j][i] = work2_6[n++]; + v5_pa_2[k][j][i] = work2_6[n++]; + } +} + +/* ---------------------------------------------------------------------- + Fourier Transform for per atom virial calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_none_peratom(int n1, int n2, + FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) +{ + //Compute first virial term v0 + int n, i, j, k; + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v0_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v0_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute second virial term v1 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v1_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v1_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute third virial term v2 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v2_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v2_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute fourth virial term v3 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v3_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v3_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute fifth virial term v4 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v4_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v4_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute last virial term v5 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v5_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v5_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_c_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } +} +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_c_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR ekx,eky,ekz; + double s1,s2,s3; + double sf = 0.0; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm/xprd; + double hy_inv = ny_pppm/yprd; + double hz_inv = nz_pppm/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + // convert E-field to force and substract self forces + const double qfactor = force->qqrd2e * scale; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + sf = sf_coeff[0]*sin(2*MY_PI*s1); + sf += sf_coeff[1]*sin(4*MY_PI*s1); + sf *= 2*q[i]*q[i]; + f[i][0] += qfactor*(ekx*q[i] - sf); + + sf = sf_coeff[2]*sin(2*MY_PI*s2); + sf += sf_coeff[3]*sin(4*MY_PI*s2); + sf *= 2*q[i]*q[i]; + f[i][1] += qfactor*(eky*q[i] - sf); + + + sf = sf_coeff[4]*sin(2*MY_PI*s3); + sf += sf_coeff[5]*sin(4*MY_PI*s3); + sf *= 2*q[i]*q[i]; + if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_c_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u_pa += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + // convert E-field to force + + const double qfactor = 0.5*force->qqrd2e * scale * q[i]; + + if (eflag_atom) eatom[i] += u_pa*qfactor; + if (vflag_atom) { + vatom[i][0] += v0*qfactor; + vatom[i][1] += v1*qfactor; + vatom[i][2] += v2*qfactor; + vatom[i][3] += v3*qfactor; + vatom[i][4] += v4*qfactor; + vatom[i][5] += v5*qfactor; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_g_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + ekx = eky = ekz = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + ekx -= x0*vdx_brick_g[mz][my][mx]; + eky -= x0*vdy_brick_g[mz][my][mx]; + ekz -= x0*vdz_brick_g[mz][my][mx]; + } + } + } + + // convert E-field to force + type = atom->type[i]; + lj = B[type]; + f[i][0] += lj*ekx; + f[i][1] += lj*eky; + if (slabflag != 2) f[i][2] += lj*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_g_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR ekx,eky,ekz; + double s1,s2,s3; + double sf = 0.0; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm_6/xprd; + double hy_inv = ny_pppm_6/yprd; + double hz_inv = nz_pppm_6/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + + for (i = 0; i < nlocal; i++) { + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); + + + ekx = eky = ekz = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; + eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; + ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force + type = atom->type[i]; + lj = B[type]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf += sf_coeff_6[1]*sin(4*MY_PI*s1); + sf *= 2*lj*lj; + f[i][0] += ekx*lj - sf; + + sf = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf += sf_coeff_6[3]*sin(4*MY_PI*s2); + sf *= 2*lj*lj; + f[i][1] += eky*lj - sf; + + + sf = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf += sf_coeff_6[5]*sin(4*MY_PI*s3); + sf *= 2*lj*lj; + if (slabflag != 2) f[i][2] += ekz*lj - sf; + + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule for per atom quantities +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_g_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick_g[mz][my][mx]; + v1 += x0*v1_brick_g[mz][my][mx]; + v2 += x0*v2_brick_g[mz][my][mx]; + v3 += x0*v3_brick_g[mz][my][mx]; + v4 += x0*v4_brick_g[mz][my][mx]; + v5 += x0*v5_brick_g[mz][my][mx]; + } + } + } + } + + // convert E-field to force + type = atom->type[i]; + lj = B[type]*0.5; + + if (eflag_atom) eatom[i] += u_pa*lj; + if (vflag_atom) { + vatom[i][0] += v0*lj; + vatom[i][1] += v1*lj; + vatom[i][2] += v2*lj; + vatom[i][3] += v3*lj; + vatom[i][4] += v4*lj; + vatom[i][5] += v5*lj; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule and ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_a_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj0, lj1, lj2, lj3, lj4, lj5, lj6; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + ekx0 -= x0*vdx_brick_a0[mz][my][mx]; + eky0 -= x0*vdy_brick_a0[mz][my][mx]; + ekz0 -= x0*vdz_brick_a0[mz][my][mx]; + ekx1 -= x0*vdx_brick_a1[mz][my][mx]; + eky1 -= x0*vdy_brick_a1[mz][my][mx]; + ekz1 -= x0*vdz_brick_a1[mz][my][mx]; + ekx2 -= x0*vdx_brick_a2[mz][my][mx]; + eky2 -= x0*vdy_brick_a2[mz][my][mx]; + ekz2 -= x0*vdz_brick_a2[mz][my][mx]; + ekx3 -= x0*vdx_brick_a3[mz][my][mx]; + eky3 -= x0*vdy_brick_a3[mz][my][mx]; + ekz3 -= x0*vdz_brick_a3[mz][my][mx]; + ekx4 -= x0*vdx_brick_a4[mz][my][mx]; + eky4 -= x0*vdy_brick_a4[mz][my][mx]; + ekz4 -= x0*vdz_brick_a4[mz][my][mx]; + ekx5 -= x0*vdx_brick_a5[mz][my][mx]; + eky5 -= x0*vdy_brick_a5[mz][my][mx]; + ekz5 -= x0*vdz_brick_a5[mz][my][mx]; + ekx6 -= x0*vdx_brick_a6[mz][my][mx]; + eky6 -= x0*vdy_brick_a6[mz][my][mx]; + ekz6 -= x0*vdz_brick_a6[mz][my][mx]; + } + } + } + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]; + lj1 = B[7*type+5]; + lj2 = B[7*type+4]; + lj3 = B[7*type+3]; + lj4 = B[7*type+2]; + lj5 = B[7*type+1]; + lj6 = B[7*type]; + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6; + if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for the ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_a_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + + double s1,s2,s3; + double sf = 0.0; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm_6/xprd; + double hy_inv = ny_pppm_6/yprd; + double hz_inv = nz_pppm_6/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj0, lj1, lj2, lj3, lj4, lj5, lj6; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); + + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; + y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; + z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; + + ekx0 += x0*u_brick_a0[mz][my][mx]; + eky0 += y0*u_brick_a0[mz][my][mx]; + ekz0 += z0*u_brick_a0[mz][my][mx]; + + ekx1 += x0*u_brick_a1[mz][my][mx]; + eky1 += y0*u_brick_a1[mz][my][mx]; + ekz1 += z0*u_brick_a1[mz][my][mx]; + + ekx2 += x0*u_brick_a2[mz][my][mx]; + eky2 += y0*u_brick_a2[mz][my][mx]; + ekz2 += z0*u_brick_a2[mz][my][mx]; + + ekx3 += x0*u_brick_a3[mz][my][mx]; + eky3 += y0*u_brick_a3[mz][my][mx]; + ekz3 += z0*u_brick_a3[mz][my][mx]; + + ekx4 += x0*u_brick_a4[mz][my][mx]; + eky4 += y0*u_brick_a4[mz][my][mx]; + ekz4 += z0*u_brick_a4[mz][my][mx]; + + ekx5 += x0*u_brick_a5[mz][my][mx]; + eky5 += y0*u_brick_a5[mz][my][mx]; + ekz5 += z0*u_brick_a5[mz][my][mx]; + + ekx6 += x0*u_brick_a6[mz][my][mx]; + eky6 += y0*u_brick_a6[mz][my][mx]; + ekz6 += z0*u_brick_a6[mz][my][mx]; + } + } + } + + ekx0 *= hx_inv; + eky0 *= hy_inv; + ekz0 *= hz_inv; + + ekx1 *= hx_inv; + eky1 *= hy_inv; + ekz1 *= hz_inv; + + ekx2 *= hx_inv; + eky2 *= hy_inv; + ekz2 *= hz_inv; + + ekx3 *= hx_inv; + eky3 *= hy_inv; + ekz3 *= hz_inv; + + ekx4 *= hx_inv; + eky4 *= hy_inv; + ekz4 *= hz_inv; + + ekx5 *= hx_inv; + eky5 *= hy_inv; + ekz5 *= hz_inv; + + ekx6 *= hx_inv; + eky6 *= hy_inv; + ekz6 *= hz_inv; + + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]; + lj1 = B[7*type+5]; + lj2 = B[7*type+4]; + lj3 = B[7*type+3]; + lj4 = B[7*type+2]; + lj5 = B[7*type+1]; + lj6 = B[7*type]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf += sf_coeff_6[1]*sin(4*MY_PI*s1); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf; + + sf = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf += sf_coeff_6[3]*sin(4*MY_PI*s2); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf; + + sf = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf += sf_coeff_6[5]*sin(4*MY_PI*s3); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for per atom quantities +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_a_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50; + FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51; + FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52; + FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53; + FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54; + FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55; + FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + int type; + double lj0, lj1, lj2, lj3, lj4, lj5, lj6; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF; + u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF; + u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF; + u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF; + u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF; + u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF; + u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + if (eflag_atom) { + u_pa0 += x0*u_brick_a0[mz][my][mx]; + u_pa1 += x0*u_brick_a1[mz][my][mx]; + u_pa2 += x0*u_brick_a2[mz][my][mx]; + u_pa3 += x0*u_brick_a3[mz][my][mx]; + u_pa4 += x0*u_brick_a4[mz][my][mx]; + u_pa5 += x0*u_brick_a5[mz][my][mx]; + u_pa6 += x0*u_brick_a6[mz][my][mx]; + } + if (vflag_atom) { + v00 += x0*v0_brick_a0[mz][my][mx]; + v10 += x0*v1_brick_a0[mz][my][mx]; + v20 += x0*v2_brick_a0[mz][my][mx]; + v30 += x0*v3_brick_a0[mz][my][mx]; + v40 += x0*v4_brick_a0[mz][my][mx]; + v50 += x0*v5_brick_a0[mz][my][mx]; + v01 += x0*v0_brick_a1[mz][my][mx]; + v11 += x0*v1_brick_a1[mz][my][mx]; + v21 += x0*v2_brick_a1[mz][my][mx]; + v31 += x0*v3_brick_a1[mz][my][mx]; + v41 += x0*v4_brick_a1[mz][my][mx]; + v51 += x0*v5_brick_a1[mz][my][mx]; + v02 += x0*v0_brick_a2[mz][my][mx]; + v12 += x0*v1_brick_a2[mz][my][mx]; + v22 += x0*v2_brick_a2[mz][my][mx]; + v32 += x0*v3_brick_a2[mz][my][mx]; + v42 += x0*v4_brick_a2[mz][my][mx]; + v52 += x0*v5_brick_a2[mz][my][mx]; + v03 += x0*v0_brick_a3[mz][my][mx]; + v13 += x0*v1_brick_a3[mz][my][mx]; + v23 += x0*v2_brick_a3[mz][my][mx]; + v33 += x0*v3_brick_a3[mz][my][mx]; + v43 += x0*v4_brick_a3[mz][my][mx]; + v53 += x0*v5_brick_a3[mz][my][mx]; + v04 += x0*v0_brick_a4[mz][my][mx]; + v14 += x0*v1_brick_a4[mz][my][mx]; + v24 += x0*v2_brick_a4[mz][my][mx]; + v34 += x0*v3_brick_a4[mz][my][mx]; + v44 += x0*v4_brick_a4[mz][my][mx]; + v54 += x0*v5_brick_a4[mz][my][mx]; + v05 += x0*v0_brick_a5[mz][my][mx]; + v15 += x0*v1_brick_a5[mz][my][mx]; + v25 += x0*v2_brick_a5[mz][my][mx]; + v35 += x0*v3_brick_a5[mz][my][mx]; + v45 += x0*v4_brick_a5[mz][my][mx]; + v55 += x0*v5_brick_a5[mz][my][mx]; + v06 += x0*v0_brick_a6[mz][my][mx]; + v16 += x0*v1_brick_a6[mz][my][mx]; + v26 += x0*v2_brick_a6[mz][my][mx]; + v36 += x0*v3_brick_a6[mz][my][mx]; + v46 += x0*v4_brick_a6[mz][my][mx]; + v56 += x0*v5_brick_a6[mz][my][mx]; + } + } + } + } + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]*0.5; + lj1 = B[7*type+5]*0.5; + lj2 = B[7*type+4]*0.5; + lj3 = B[7*type+3]*0.5; + lj4 = B[7*type+2]*0.5; + lj5 = B[7*type+1]*0.5; + lj6 = B[7*type]*0.5; + + + if (eflag_atom) + eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 + + u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6; + if (vflag_atom) { + vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + + v04*lj4 + v05*lj5 + v06*lj6; + vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + + v14*lj4 + v15*lj5 + v16*lj6; + vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + + v24*lj4 + v25*lj5 + v26*lj6; + vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + + v34*lj4 + v35*lj5 + v36*lj6; + vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + + v44*lj4 + v45*lj5 + v46*lj6; + vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + + v54*lj4 + v55*lj5 + v56*lj6; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule and ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_none_ik() +{ + int i,k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR *ekx, *eky, *ekz; + + ekx = new FFT_SCALAR[nsplit]; + eky = new FFT_SCALAR[nsplit]; + ekz = new FFT_SCALAR[nsplit]; + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + for (k = 0; k < nsplit; k++) + ekx[k] = eky[k] = ekz[k] = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + for (k = 0; k < nsplit; k++) { + ekx[k] -= x0*vdx_brick_none[k][mz][my][mx]; + eky[k] -= x0*vdy_brick_none[k][mz][my][mx]; + ekz[k] -= x0*vdz_brick_none[k][mz][my][mx]; + } + } + } + } + // convert D-field to force + type = atom->type[i]; + for (k = 0; k < nsplit; k++) { + lj = B[nsplit*type + k]; + f[i][0] += lj*ekx[k]; + f[i][1] +=lj*eky[k]; + if (slabflag != 2) f[i][2] +=lj*ekz[k]; + } + } + + delete [] ekx; + delete [] eky; + delete [] ekz; +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for the ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_none_ad() +{ + int i,k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR *ekx, *eky, *ekz; + + ekx = new FFT_SCALAR[nsplit]; + eky = new FFT_SCALAR[nsplit]; + ekz = new FFT_SCALAR[nsplit]; + + + double s1,s2,s3; + double sf1,sf2,sf3; + double sf = 0.0; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm_6/xprd; + double hy_inv = ny_pppm_6/yprd; + double hz_inv = nz_pppm_6/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); + + for (k = 0; k < nsplit; k++) + ekx[k] = eky[k] = ekz[k] = ZEROF; + + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; + y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; + z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; + + for (k = 0; k < nsplit; k++) { + ekx[k] += x0*u_brick_none[k][mz][my][mx]; + eky[k] += y0*u_brick_none[k][mz][my][mx]; + ekz[k] += z0*u_brick_none[k][mz][my][mx]; + } + } + } + } + + for (k = 0; k < nsplit; k++) { + ekx[k] *= hx_inv; + eky[k] *= hy_inv; + ekz[k] *= hz_inv; + } + + // convert D-field to force + type = atom->type[i]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1); + + sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2); + + sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3); + + for (k = 0; k < nsplit; k++) { + lj = B[nsplit*type + k]; + + sf = sf1*B[k]*2*lj*lj; + f[i][0] += lj*ekx[k] - sf; + + + sf = sf2*B[k]*2*lj*lj; + f[i][1] += lj*eky[k] - sf; + + sf = sf3*B[k]*2*lj*lj; + if (slabflag != 2) f[i][2] += lj*ekz[k] - sf; + } + } + + delete [] ekx; + delete [] eky; + delete [] ekz; +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for per atom quantities +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_none_peratom() +{ + int i,k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5; + + u_pa = new FFT_SCALAR[nsplit]; + v0 = new FFT_SCALAR[nsplit]; + v1 = new FFT_SCALAR[nsplit]; + v2 = new FFT_SCALAR[nsplit]; + v3 = new FFT_SCALAR[nsplit]; + v4 = new FFT_SCALAR[nsplit]; + v5 = new FFT_SCALAR[nsplit]; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + for (k = 0; k < nsplit; k++) + u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF; + + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + if (eflag_atom) { + for (k = 0; k < nsplit; k++) + u_pa[k] += x0*u_brick_none[k][mz][my][mx]; + } + if (vflag_atom) { + for (k = 0; k < nsplit; k++) { + v0[k] += x0*v0_brick_none[k][mz][my][mx]; + v1[k] += x0*v1_brick_none[k][mz][my][mx]; + v2[k] += x0*v2_brick_none[k][mz][my][mx]; + v3[k] += x0*v3_brick_none[k][mz][my][mx]; + v4[k] += x0*v4_brick_none[k][mz][my][mx]; + v5[k] += x0*v5_brick_none[k][mz][my][mx]; + } + } + } + } + } + // convert D-field to force + type = atom->type[i]; + for (k = 0; k < nsplit; k++) { + lj = B[nsplit*type + k]*0.5; + + if (eflag_atom) { + eatom[i] += u_pa[k]*lj; + } + if (vflag_atom) { + vatom[i][0] += v0[k]*lj; + vatom[i][1] += v1[k]*lj; + vatom[i][2] += v2[k]*lj; + vatom[i][3] += v3[k]*lj; + vatom[i][4] += v4[k]*lj; + vatom[i][5] += v5[k]*lj; + } + } + } + + delete [] u_pa; + delete [] v0; + delete [] v1; + delete [] v2; + delete [] v3; + delete [] v4; + delete [] v5; +} + +/* ---------------------------------------------------------------------- + pack values to buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + switch (flag) { + + // Coulomb interactions + + case FORWARD_IK: { + FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + break; + } + + case FORWARD_AD: { + FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + break; + } + + case FORWARD_IK_PERATOM: { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + break; + } + + case FORWARD_AD_PERATOM: { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + break; + } + + // Dispersion interactions, geometric mixing + + case FORWARD_IK_G: { + FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + break; + } + + case FORWARD_AD_G: { + FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + break; + } + + case FORWARD_IK_PERATOM_G: { + FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + break; + } + + case FORWARD_AD_PERATOM_G: { + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + break; + } + + // Dispersion interactions, arithmetic mixing + + case FORWARD_IK_A: { + FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc0[list[i]]; + buf[n++] = ysrc0[list[i]]; + buf[n++] = zsrc0[list[i]]; + + buf[n++] = xsrc1[list[i]]; + buf[n++] = ysrc1[list[i]]; + buf[n++] = zsrc1[list[i]]; + + buf[n++] = xsrc2[list[i]]; + buf[n++] = ysrc2[list[i]]; + buf[n++] = zsrc2[list[i]]; + + buf[n++] = xsrc3[list[i]]; + buf[n++] = ysrc3[list[i]]; + buf[n++] = zsrc3[list[i]]; + + buf[n++] = xsrc4[list[i]]; + buf[n++] = ysrc4[list[i]]; + buf[n++] = zsrc4[list[i]]; + + buf[n++] = xsrc5[list[i]]; + buf[n++] = ysrc5[list[i]]; + buf[n++] = zsrc5[list[i]]; + + buf[n++] = xsrc6[list[i]]; + buf[n++] = ysrc6[list[i]]; + buf[n++] = zsrc6[list[i]]; + } + break; + } + + case FORWARD_AD_A: { + FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + buf[n++] = src0[list[i]]; + buf[n++] = src1[list[i]]; + buf[n++] = src2[list[i]]; + buf[n++] = src3[list[i]]; + buf[n++] = src4[list[i]]; + buf[n++] = src5[list[i]]; + buf[n++] = src6[list[i]]; + } + break; + } + + case FORWARD_IK_PERATOM_A: { + FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + if (eflag_atom) { + buf[n++] = esrc0[list[i]]; + buf[n++] = esrc1[list[i]]; + buf[n++] = esrc2[list[i]]; + buf[n++] = esrc3[list[i]]; + buf[n++] = esrc4[list[i]]; + buf[n++] = esrc5[list[i]]; + buf[n++] = esrc6[list[i]]; + } + if (vflag_atom) { + buf[n++] = v0src0[list[i]]; + buf[n++] = v1src0[list[i]]; + buf[n++] = v2src0[list[i]]; + buf[n++] = v3src0[list[i]]; + buf[n++] = v4src0[list[i]]; + buf[n++] = v5src0[list[i]]; + + buf[n++] = v0src1[list[i]]; + buf[n++] = v1src1[list[i]]; + buf[n++] = v2src1[list[i]]; + buf[n++] = v3src1[list[i]]; + buf[n++] = v4src1[list[i]]; + buf[n++] = v5src1[list[i]]; + + buf[n++] = v0src2[list[i]]; + buf[n++] = v1src2[list[i]]; + buf[n++] = v2src2[list[i]]; + buf[n++] = v3src2[list[i]]; + buf[n++] = v4src2[list[i]]; + buf[n++] = v5src2[list[i]]; + + buf[n++] = v0src3[list[i]]; + buf[n++] = v1src3[list[i]]; + buf[n++] = v2src3[list[i]]; + buf[n++] = v3src3[list[i]]; + buf[n++] = v4src3[list[i]]; + buf[n++] = v5src3[list[i]]; + + buf[n++] = v0src4[list[i]]; + buf[n++] = v1src4[list[i]]; + buf[n++] = v2src4[list[i]]; + buf[n++] = v3src4[list[i]]; + buf[n++] = v4src4[list[i]]; + buf[n++] = v5src4[list[i]]; + + buf[n++] = v0src5[list[i]]; + buf[n++] = v1src5[list[i]]; + buf[n++] = v2src5[list[i]]; + buf[n++] = v3src5[list[i]]; + buf[n++] = v4src5[list[i]]; + buf[n++] = v5src5[list[i]]; + + buf[n++] = v0src6[list[i]]; + buf[n++] = v1src6[list[i]]; + buf[n++] = v2src6[list[i]]; + buf[n++] = v3src6[list[i]]; + buf[n++] = v4src6[list[i]]; + buf[n++] = v5src6[list[i]]; + } + } + break; + } + + case FORWARD_AD_PERATOM_A: { + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src0[list[i]]; + buf[n++] = v1src0[list[i]]; + buf[n++] = v2src0[list[i]]; + buf[n++] = v3src0[list[i]]; + buf[n++] = v4src0[list[i]]; + buf[n++] = v5src0[list[i]]; + + buf[n++] = v0src1[list[i]]; + buf[n++] = v1src1[list[i]]; + buf[n++] = v2src1[list[i]]; + buf[n++] = v3src1[list[i]]; + buf[n++] = v4src1[list[i]]; + buf[n++] = v5src1[list[i]]; + + buf[n++] = v0src2[list[i]]; + buf[n++] = v1src2[list[i]]; + buf[n++] = v2src2[list[i]]; + buf[n++] = v3src2[list[i]]; + buf[n++] = v4src2[list[i]]; + buf[n++] = v5src2[list[i]]; + + buf[n++] = v0src3[list[i]]; + buf[n++] = v1src3[list[i]]; + buf[n++] = v2src3[list[i]]; + buf[n++] = v3src3[list[i]]; + buf[n++] = v4src3[list[i]]; + buf[n++] = v5src3[list[i]]; + + buf[n++] = v0src4[list[i]]; + buf[n++] = v1src4[list[i]]; + buf[n++] = v2src4[list[i]]; + buf[n++] = v3src4[list[i]]; + buf[n++] = v4src4[list[i]]; + buf[n++] = v5src4[list[i]]; + + buf[n++] = v0src5[list[i]]; + buf[n++] = v1src5[list[i]]; + buf[n++] = v2src5[list[i]]; + buf[n++] = v3src5[list[i]]; + buf[n++] = v4src5[list[i]]; + buf[n++] = v5src5[list[i]]; + + buf[n++] = v0src6[list[i]]; + buf[n++] = v1src6[list[i]]; + buf[n++] = v2src6[list[i]]; + buf[n++] = v3src6[list[i]]; + buf[n++] = v4src6[list[i]]; + buf[n++] = v5src6[list[i]]; + } + break; + } + + // Dispersion interactions, no mixing + + case FORWARD_IK_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + } + break; + } + + case FORWARD_AD_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + buf[n++] = src[list[i]]; + } + break; + } + + case FORWARD_IK_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + } + break; + } + + case FORWARD_AD_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + break; + } + + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's own values from buf and set own ghost values +------------------------------------------------------------------------- */ + +void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + switch (flag) { + + // Coulomb interactions + + case FORWARD_IK: { + FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_AD: { + FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[n++]; + break; + } + + case FORWARD_IK_PERATOM: { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_PERATOM: { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + break; + } + + // Disperion interactions, geometric mixing + + case FORWARD_IK_G: { + FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_AD_G: { + FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[n++]; + break; + } + + case FORWARD_IK_PERATOM_G: { + FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_PERATOM_G: { + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + break; + } + + // Disperion interactions, arithmetic mixing + + case FORWARD_IK_A: { + FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + xdest0[list[i]] = buf[n++]; + ydest0[list[i]] = buf[n++]; + zdest0[list[i]] = buf[n++]; + + xdest1[list[i]] = buf[n++]; + ydest1[list[i]] = buf[n++]; + zdest1[list[i]] = buf[n++]; + + xdest2[list[i]] = buf[n++]; + ydest2[list[i]] = buf[n++]; + zdest2[list[i]] = buf[n++]; + + xdest3[list[i]] = buf[n++]; + ydest3[list[i]] = buf[n++]; + zdest3[list[i]] = buf[n++]; + + xdest4[list[i]] = buf[n++]; + ydest4[list[i]] = buf[n++]; + zdest4[list[i]] = buf[n++]; + + xdest5[list[i]] = buf[n++]; + ydest5[list[i]] = buf[n++]; + zdest5[list[i]] = buf[n++]; + + xdest6[list[i]] = buf[n++]; + ydest6[list[i]] = buf[n++]; + zdest6[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_AD_A: { + FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + dest0[list[i]] = buf[n++]; + dest1[list[i]] = buf[n++]; + dest2[list[i]] = buf[n++]; + dest3[list[i]] = buf[n++]; + dest4[list[i]] = buf[n++]; + dest5[list[i]] = buf[n++]; + dest6[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_IK_PERATOM_A: { + FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + if (eflag_atom) { + esrc0[list[i]] = buf[n++]; + esrc1[list[i]] = buf[n++]; + esrc2[list[i]] = buf[n++]; + esrc3[list[i]] = buf[n++]; + esrc4[list[i]] = buf[n++]; + esrc5[list[i]] = buf[n++]; + esrc6[list[i]] = buf[n++]; + } + if (vflag_atom) { + v0src0[list[i]] = buf[n++]; + v1src0[list[i]] = buf[n++]; + v2src0[list[i]] = buf[n++]; + v3src0[list[i]] = buf[n++]; + v4src0[list[i]] = buf[n++]; + v5src0[list[i]] = buf[n++]; + + v0src1[list[i]] = buf[n++]; + v1src1[list[i]] = buf[n++]; + v2src1[list[i]] = buf[n++]; + v3src1[list[i]] = buf[n++]; + v4src1[list[i]] = buf[n++]; + v5src1[list[i]] = buf[n++]; + + v0src2[list[i]] = buf[n++]; + v1src2[list[i]] = buf[n++]; + v2src2[list[i]] = buf[n++]; + v3src2[list[i]] = buf[n++]; + v4src2[list[i]] = buf[n++]; + v5src2[list[i]] = buf[n++]; + + v0src3[list[i]] = buf[n++]; + v1src3[list[i]] = buf[n++]; + v2src3[list[i]] = buf[n++]; + v3src3[list[i]] = buf[n++]; + v4src3[list[i]] = buf[n++]; + v5src3[list[i]] = buf[n++]; + + v0src4[list[i]] = buf[n++]; + v1src4[list[i]] = buf[n++]; + v2src4[list[i]] = buf[n++]; + v3src4[list[i]] = buf[n++]; + v4src4[list[i]] = buf[n++]; + v5src4[list[i]] = buf[n++]; + + v0src5[list[i]] = buf[n++]; + v1src5[list[i]] = buf[n++]; + v2src5[list[i]] = buf[n++]; + v3src5[list[i]] = buf[n++]; + v4src5[list[i]] = buf[n++]; + v5src5[list[i]] = buf[n++]; + + v0src6[list[i]] = buf[n++]; + v1src6[list[i]] = buf[n++]; + v2src6[list[i]] = buf[n++]; + v3src6[list[i]] = buf[n++]; + v4src6[list[i]] = buf[n++]; + v5src6[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_PERATOM_A: { + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + v0src0[list[i]] = buf[n++]; + v1src0[list[i]] = buf[n++]; + v2src0[list[i]] = buf[n++]; + v3src0[list[i]] = buf[n++]; + v4src0[list[i]] = buf[n++]; + v5src0[list[i]] = buf[n++]; + + v0src1[list[i]] = buf[n++]; + v1src1[list[i]] = buf[n++]; + v2src1[list[i]] = buf[n++]; + v3src1[list[i]] = buf[n++]; + v4src1[list[i]] = buf[n++]; + v5src1[list[i]] = buf[n++]; + + v0src2[list[i]] = buf[n++]; + v1src2[list[i]] = buf[n++]; + v2src2[list[i]] = buf[n++]; + v3src2[list[i]] = buf[n++]; + v4src2[list[i]] = buf[n++]; + v5src2[list[i]] = buf[n++]; + + v0src3[list[i]] = buf[n++]; + v1src3[list[i]] = buf[n++]; + v2src3[list[i]] = buf[n++]; + v3src3[list[i]] = buf[n++]; + v4src3[list[i]] = buf[n++]; + v5src3[list[i]] = buf[n++]; + + v0src4[list[i]] = buf[n++]; + v1src4[list[i]] = buf[n++]; + v2src4[list[i]] = buf[n++]; + v3src4[list[i]] = buf[n++]; + v4src4[list[i]] = buf[n++]; + v5src4[list[i]] = buf[n++]; + + v0src5[list[i]] = buf[n++]; + v1src5[list[i]] = buf[n++]; + v2src5[list[i]] = buf[n++]; + v3src5[list[i]] = buf[n++]; + v4src5[list[i]] = buf[n++]; + v5src5[list[i]] = buf[n++]; + + v0src6[list[i]] = buf[n++]; + v1src6[list[i]] = buf[n++]; + v2src6[list[i]] = buf[n++]; + v3src6[list[i]] = buf[n++]; + v4src6[list[i]] = buf[n++]; + v5src6[list[i]] = buf[n++]; + } + break; + } + + // Disperion interactions, geometric mixing + + case FORWARD_IK_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_IK_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + } + break; + } + + case FORWARD_AD_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + break; + } + + } +} + +/* ---------------------------------------------------------------------- + pack ghost values into buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + //Coulomb interactions + + if (flag == REVERSE_RHO) { + FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + + //Dispersion interactions, geometric mixing + + } else if (flag == REVERSE_RHO_G) { + FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + + //Dispersion interactions, arithmetic mixing + + } else if (flag == REVERSE_RHO_A) { + FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = src0[list[i]]; + buf[n++] = src1[list[i]]; + buf[n++] = src2[list[i]]; + buf[n++] = src3[list[i]]; + buf[n++] = src4[list[i]]; + buf[n++] = src5[list[i]]; + buf[n++] = src6[list[i]]; + } + + //Dispersion interactions, no mixing + + } else if (flag == REVERSE_RHO_NONE) { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = src[list[i]]; + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's ghost values from buf and add to own values +------------------------------------------------------------------------- */ + +void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + //Coulomb interactions + + if (flag == REVERSE_RHO) { + FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[i]; + + //Dispersion interactions, geometric mixing + + } else if (flag == REVERSE_RHO_G) { + FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[i]; + + //Dispersion interactions, arithmetic mixing + + } else if (flag == REVERSE_RHO_A) { + FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + dest0[list[i]] += buf[n++]; + dest1[list[i]] += buf[n++]; + dest2[list[i]] += buf[n++]; + dest3[list[i]] += buf[n++]; + dest4[list[i]] += buf[n++]; + dest5[list[i]] += buf[n++]; + dest6[list[i]] += buf[n++]; + } + + //Dispersion interactions, no mixing + + } else if (flag == REVERSE_RHO_NONE) { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + map nprocs to NX by NY grid as PX by PY procs - return optimal px,py +------------------------------------------------------------------------- */ + +void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) +{ + // loop thru all possible factorizations of nprocs + // surf = surface area of largest proc sub-domain + // innermost if test minimizes surface area and surface/volume ratio + + int bestsurf = 2 * (nx + ny); + int bestboxx = 0; + int bestboxy = 0; + + int boxx,boxy,surf,ipx,ipy; + + ipx = 1; + while (ipx <= nprocs) { + if (nprocs % ipx == 0) { + ipy = nprocs/ipx; + boxx = nx/ipx; + if (nx % ipx) boxx++; + boxy = ny/ipy; + if (ny % ipy) boxy++; + surf = boxx + boxy; + if (surf < bestsurf || + (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { + bestsurf = surf; + bestboxx = boxx; + bestboxy = boxy; + *px = ipx; + *py = ipy; + } + } + ipx++; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz, int ord, + FFT_SCALAR **rho_c, FFT_SCALAR **r1d) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-ord)/2; k <= ord/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = ord-1; l >= 0; l--) { + r1 = rho_c[l][k] + r1*dx; + r2 = rho_c[l][k] + r2*dy; + r3 = rho_c[l][k] + r3*dz; + } + r1d[0][k] = r1; + r1d[1][k] = r2; + r1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into drho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz, int ord, + FFT_SCALAR **drho_c, FFT_SCALAR **dr1d) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-ord)/2; k <= ord/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = ord-2; l >= 0; l--) { + r1 = drho_c[l][k] + r1*dx; + r2 = drho_c[l][k] + r2*dy; + r3 = drho_c[l][k] + r3*dz; + } + dr1d[0][k] = r1; + dr1d[1][k] = r2; + dr1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + generate coeffients for the weight function of order n + + (n-1) + Wn(x) = Sum wn(k,x) , Sum is over every other integer + k=-(n-1) + For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 + k is odd integers if n is even and even integers if n is odd + --- + | n-1 + | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 + wn(k,x) = < l=0 + | + | 0 otherwise + --- + a coeffients are packed into the array rho_coeff to eliminate zeros + rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff, + int ord) +{ + int j,k,l,m; + FFT_SCALAR s; + + FFT_SCALAR **a; + memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a"); + + for (k = -ord; k <= ord; k++) + for (l = 0; l < ord; l++) + a[l][k] = 0.0; + + a[0][0] = 1.0; + for (j = 1; j < ord; j++) { + for (k = -j; k <= j; k += 2) { + s = 0.0; + for (l = 0; l < j; l++) { + a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); +#ifdef FFT_SINGLE + s += powf(0.5,(float) l+1) * + (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); +#else + s += pow(0.5,(double) l+1) * + (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); +#endif + } + a[0][k] = s; + } + } + + m = (1-ord)/2; + for (k = -(ord-1); k < ord; k += 2) { + for (l = 0; l < ord; l++) + coeff[l][m] = a[l][k]; + for (l = 1; l < ord; l++) + dcoeff[l-1][m] = l*a[l][k]; + m++; + } + + memory->destroy2d_offset(a,-ord); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPMDisp::slabcorr(int eflag) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy_1 += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + +/* ---------------------------------------------------------------------- + perform and time the 1d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMDisp::timing_1d(int n, double &time1d) +{ + double time1,time2; + int mixing = 1; + if (function[2]) mixing = 4; + if (function[3]) mixing = nsplit_alloc/2; + + if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + if (function[1] + function[2] + function[3]) + for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[0]) { + for (int i = 0; i < n; i++) { + fft1->timing1d(work1,nfft_both,1); + fft2->timing1d(work1,nfft_both,-1); + if (differentiation_flag != 1){ + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d = time2 - time1; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[1] + function[2] + function[3]) { + for (int i = 0; i < n; i++) { + fft1_6->timing1d(work1_6,nfft_both_6,1); + fft2_6->timing1d(work1_6,nfft_both_6,-1); + if (differentiation_flag != 1){ + fft2_6->timing1d(work1_6,nfft_both_6,-1); + fft2_6->timing1d(work1_6,nfft_both_6,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d += (time2 - time1)*mixing; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + perform and time the 3d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMDisp::timing_3d(int n, double &time3d) +{ + double time1,time2; + int mixing = 1; + if (function[2]) mixing = 4; + if (function[3]) mixing = nsplit_alloc/2; + + if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + if (function[1] + function[2] + function[3]) + for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; + + + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[0]) { + for (int i = 0; i < n; i++) { + fft1->compute(work1,work1,1); + fft2->compute(work1,work1,-1); + if (differentiation_flag != 1) { + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d = time2 - time1; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[1] + function[2] + function[3]) { + for (int i = 0; i < n; i++) { + fft1_6->compute(work1_6,work1_6,1); + fft2_6->compute(work1_6,work1_6,-1); + if (differentiation_flag != 1) { + fft2_6->compute(work1_6,work1_6,-1); + fft2_6->compute(work1_6,work1_6,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d += (time2 - time1) * mixing; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPMDisp::memory_usage() +{ + double bytes = nmax*3 * sizeof(double); + int mixing = 1; + int diff = 3; //depends on differentiation + int per = 7; //depends on per atom calculations + if (differentiation_flag) { + diff = 1; + per = 6; + } + if (!evflag_atom) per = 0; + if (function[2]) mixing = 7; + if (function[3]) mixing = nsplit_alloc; + + if (function[0]) { + int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + bytes += (1 + diff + per) * nbrick * sizeof(FFT_SCALAR); //brick memory + bytes += 6 * nfft_both * sizeof(double); // vg + bytes += nfft_both * sizeof(double); // greensfn + bytes += nfft_both * 3 * sizeof(FFT_SCALAR); // density_FFT, work1, work2 + bytes += cg->memory_usage(); + } + + if (function[1] + function[2] + function[3]) { + int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * + (nzhi_out_6-nzlo_out_6+1); + bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing; // density_brick + vd_brick + per atom bricks + bytes += 6 * nfft_both_6 * sizeof(double); // vg + bytes += nfft_both_6 * sizeof(double); // greensfn + bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR); // density_FFT, work1, work2 + bytes += cg_6->memory_usage(); + } + return bytes; +} diff --git a/src/KSPACE/pppm_old.cpp b/src/KSPACE/pppm_old.cpp index a368b5d5b0..22c7471b18 100644 --- a/src/KSPACE/pppm_old.cpp +++ b/src/KSPACE/pppm_old.cpp @@ -1,2863 +1,2863 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) - per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "pppm_old.h" -#include "math_const.h" -#include "atom.h" -#include "comm.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXORDER 7 -#define OFFSET 16384 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); - - triclinic_support = 0; - pppmflag = 1; - group_group_enable = 0; - - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - u_brick = NULL; - v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; - greensfn = NULL; - work1 = work2 = NULL; - vg = NULL; - fkx = fky = fkz = NULL; - buf1 = buf2 = buf3 = buf4 = NULL; - - density_A_brick = density_B_brick = NULL; - density_A_fft = density_B_fft = NULL; - - gf_b = NULL; - rho1d = rho_coeff = NULL; - - fft1 = fft2 = NULL; - remap = NULL; - - nmax = 0; - part2grid = NULL; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMOld::~PPPMOld() -{ - delete [] factors; - deallocate(); - deallocate_peratom(); - deallocate_groups(); - memory->destroy(part2grid); -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPMOld::init() -{ - if (me == 0) { - if (screen) fprintf(screen,"PPPM initialization ...\n"); - if (logfile) fprintf(logfile,"PPPM initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->dimension == 2) error->all(FLERR, - "Cannot use PPPM with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); - if (slabflag) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPM"); - } - - if (order < 2 || order > MAXORDER) { - char str[128]; - sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); - error->all(FLERR,str); - } - - // free all arrays previously allocated - - deallocate(); - deallocate_peratom(); - peratom_allocate_flag = 0; - deallocate_groups(); - group_allocate_flag = 0; - - // extract short-range Coulombic cutoff from pair style - - scale = 1.0; - - pair_check(); - - int itmp=0; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - - // if kspace is TIP4P, extract TIP4P params from pair style - // bond/angle are not yet init(), so insure equilibrium request is valid - - qdist = 0.0; - - if (tip4pflag) { - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - if (typeA < 1 || typeA > atom->nangletypes || - force->angle->setflag[typeA] == 0) - error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); - if (typeB < 1 || typeB > atom->nbondtypes || - force->bond->setflag[typeB] == 0) - error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (cos(0.5*theta) * blen); - } - - // compute qsum & qsqsum and warn if not charge-neutral - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup FFT grid resolution and g_ewald - // normally one iteration thru while loop is all that is required - // if grid stencil extends beyond neighbor proc, reduce order and try again - - int iteration = 0; - - while (order > 1) { - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPM order b/c stencil extends " - "beyond neighbor processor"); - iteration++; - - set_grid(); - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPM grid is too large"); - - // global indices of PPPM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPM grid that I own without ghost cells - // for slab PPPM, assign z grid as if it were not extended - - nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); - nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; - - nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); - nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; - - nzlo_in = static_cast - (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); - nzhi_in = static_cast - (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; - - // nlower,nupper = stencil size for mapping particles to PPPM grid - - nlower = -(order-1)/2; - nupper = order/2; - - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shift = OFFSET + 0.5; - else shift = OFFSET; - if (order % 2) shiftone = 0.0; - else shiftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended - - triclinic = domain->triclinic; - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nxlo_out = nlo + nlower; - nxhi_out = nhi + nupper; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nylo_out = nlo + nlower; - nyhi_out = nhi + nupper; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - - if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) { - nzhi_in = nz_pppm - 1; - nzhi_out = nz_pppm - 1; - } - - // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions - // that overlay domain I own - // proc in that direction tells me via sendrecv() - // if no neighbor proc, value is from self since I have ghosts regardless - - int nplanes; - MPI_Status status; - - nplanes = nxlo_in - nxlo_out; - if (comm->procneigh[0][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, - &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, - world,&status); - else nxhi_ghost = nplanes; - - nplanes = nxhi_out - nxhi_in; - if (comm->procneigh[0][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, - &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], - 0,world,&status); - else nxlo_ghost = nplanes; - - nplanes = nylo_in - nylo_out; - if (comm->procneigh[1][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, - &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, - world,&status); - else nyhi_ghost = nplanes; - - nplanes = nyhi_out - nyhi_in; - if (comm->procneigh[1][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, - &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, - world,&status); - else nylo_ghost = nplanes; - - nplanes = nzlo_in - nzlo_out; - if (comm->procneigh[2][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, - &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, - world,&status); - else nzhi_ghost = nplanes; - - nplanes = nzhi_out - nzhi_in; - if (comm->procneigh[2][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, - &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, - world,&status); - else nzlo_ghost = nplanes; - - // test that ghost overlap is not bigger than my sub-domain - - int flag = 0; - if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; - if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; - - int flag_all; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - - if (flag_all == 0) break; - order--; - } - - if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0"); - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // PPPM grid for this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // FFT arrays on this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); - - // buffer space for use in brick2fft and fillbrick - // idel = max # of ghost planes to send or recv in +/- dir of each dim - // nx,ny,nz = owned planes (including ghosts) in each dim - // nxx,nyy,nzz = max # of grid cells to send in each dim - // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick - - int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; - - idelx = MAX(nxlo_ghost,nxhi_ghost); - idelx = MAX(idelx,nxhi_out-nxhi_in); - idelx = MAX(idelx,nxlo_in-nxlo_out); - - idely = MAX(nylo_ghost,nyhi_ghost); - idely = MAX(idely,nyhi_out-nyhi_in); - idely = MAX(idely,nylo_in-nylo_out); - - idelz = MAX(nzlo_ghost,nzhi_ghost); - idelz = MAX(idelz,nzhi_out-nzhi_in); - idelz = MAX(idelz,nzlo_in-nzlo_out); - - nx = nxhi_out - nxlo_out + 1; - ny = nyhi_out - nylo_out + 1; - nz = nzhi_out - nzlo_out + 1; - - nxx = idelx * ny * nz; - nyy = idely * nx * nz; - nzz = idelz * nx * ny; - - nbuf = MAX(nxx,nyy); - nbuf = MAX(nbuf,nzz); - - nbuf_peratom = 7*nbuf; - nbuf *= 3; - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - } - - // allocate K-space dependent memory - // don't invoke allocate_peratom() here, wait to see if needed - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - compute_rho_coeff(); -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPMOld::setup() -{ - int i,j,k,l,m,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - // fkx,fky,fkz for my FFT grid pts - - double per; - - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per = i - nx_pppm*(2*i/nx_pppm); - fkx[i] = unitkx*per; - } - - for (i = nylo_fft; i <= nyhi_fft; i++) { - per = i - ny_pppm*(2*i/ny_pppm); - fky[i] = unitky*per; - } - - for (i = nzlo_fft; i <= nzhi_fft; i++) { - per = i - nz_pppm*(2*i/nz_pppm); - fkz[i] = unitkz*per; - } - - // virial coefficients - - double sqk,vterm; - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - for (j = nylo_fft; j <= nyhi_fft; j++) { - for (i = nxlo_fft; i <= nxhi_fft; i++) { - sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; - vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; - vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; - vg[n][3] = vterm*fkx[i]*fky[j]; - vg[n][4] = vterm*fkx[i]*fkz[k]; - vg[n][5] = vterm*fky[j]*fkz[k]; - } - n++; - } - } - } - - // modified (Hockney-Eastwood) Coulomb Green's function - - int nx,ny,nz,kper,lper,mper; - double snx,sny,snz,snx2,sny2,snz2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,dot1,dot2; - double numerator,denominator; - - int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * - pow(-log(EPS_HOC),0.25)); - - double form = 1.0; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm); - snz2 = snz*snz; - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - sny = sin(0.5*unitky*lper*yprd/ny_pppm); - sny2 = sny*sny; - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - snx = sin(0.5*unitkx*kper*xprd/nx_pppm); - snx2 = snx*snx; - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - numerator = form*12.5663706/sqk; - denominator = gf_denom(snx2,sny2,snz2); - sum1 = 0.0; - const double dorder = static_cast(order); - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,dorder); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,dorder); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,dorder); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0); - } - } - } - greensfn[n++] = numerator*sum1/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMOld::compute(int eflag, int vflag) -{ - int i,j; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - peratom_allocate_flag = 1; - } - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(part2grid); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - } - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - particle_map(); - make_rho(); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - brick2fft(); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - // also performs per-atom calculations via poisson_peratom() - - poisson(); - - // all procs communicate E-field values - // to fill ghost cells surrounding their 3d bricks - - fillbrick(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fillbrick_peratom(); - - // calculate the force on my particles - - fieldforce(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fieldforce_peratom(); - - // sum global energy across procs and add in volume-dependent term - - const double qscale = force->qqrd2e * scale; - - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // sum global virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - } - - // per-atom energy/virial - // energy includes self-energy correction - - if (evflag_atom) { - double *q = atom->q; - int nlocal = atom->nlocal; - - if (eflag_atom) { - for (i = 0; i < nlocal; i++) { - eatom[i] *= 0.5; - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - } - - if (vflag_atom) { - for (i = 0; i < nlocal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; - } - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::allocate() -{ - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); - - memory->create(density_fft,nfft_both,"pppm:density_fft"); - memory->create(greensfn,nfft_both,"pppm:greensfn"); - memory->create(work1,2*nfft_both,"pppm:work1"); - memory->create(work2,2*nfft_both,"pppm:work2"); - memory->create(vg,nfft_both,6,"pppm:vg"); - - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); - - memory->create(buf1,nbuf,"pppm:buf1"); - memory->create(buf2,nbuf,"pppm:buf2"); - - // summation coeffs - - memory->create(gf_b,order,"pppm:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - - // create 2 FFTs and a Remap - // 1st FFT keeps data in FFT decompostion - // 2nd FFT returns data in 3d brick decomposition - // remap takes data from 3d brick to FFT decomposition - - int tmp; - - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp); - - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp); - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION); -} - -/* ---------------------------------------------------------------------- - allocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::allocate_peratom() -{ - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); - - memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v0_brick"); - memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v1_brick"); - memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v2_brick"); - memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v3_brick"); - memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v4_brick"); - memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v5_brick"); - - memory->create(buf3,nbuf_peratom,"pppm:buf3"); - memory->create(buf4,nbuf_peratom,"pppm:buf4"); -} - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy(density_fft); - memory->destroy(greensfn); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(vg); - - memory->destroy1d_offset(fkx,nxlo_fft); - memory->destroy1d_offset(fky,nylo_fft); - memory->destroy1d_offset(fkz,nzlo_fft); - - memory->destroy(buf1); - memory->destroy(buf2); - - memory->destroy(gf_b); - memory->destroy2d_offset(rho1d,-order/2); - memory->destroy2d_offset(rho_coeff,(1-order)/2); - - delete fft1; - delete fft2; - delete remap; -} - -/* ---------------------------------------------------------------------- - deallocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::deallocate_peratom() -{ - memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy(buf3); - memory->destroy(buf4); -} - -/* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald -------------------------------------------------------------------------- */ - -void PPPMOld::set_grid() -{ - // see JCP 109, pg 7698 for derivation of coefficients - // higher order coefficients may be computed if needed - - double **acons; - memory->create(acons,8,7,"pppm:acons"); - - acons[1][0] = 2.0 / 3.0; - acons[2][0] = 1.0 / 50.0; - acons[2][1] = 5.0 / 294.0; - acons[3][0] = 1.0 / 588.0; - acons[3][1] = 7.0 / 1440.0; - acons[3][2] = 21.0 / 3872.0; - acons[4][0] = 1.0 / 4320.0; - acons[4][1] = 3.0 / 1936.0; - acons[4][2] = 7601.0 / 2271360.0; - acons[4][3] = 143.0 / 28800.0; - acons[5][0] = 1.0 / 23232.0; - acons[5][1] = 7601.0 / 13628160.0; - acons[5][2] = 143.0 / 69120.0; - acons[5][3] = 517231.0 / 106536960.0; - acons[5][4] = 106640677.0 / 11737571328.0; - acons[6][0] = 691.0 / 68140800.0; - acons[6][1] = 13.0 / 57600.0; - acons[6][2] = 47021.0 / 35512320.0; - acons[6][3] = 9694607.0 / 2095994880.0; - acons[6][4] = 733191589.0 / 59609088000.0; - acons[6][5] = 326190917.0 / 11700633600.0; - acons[7][0] = 1.0 / 345600.0; - acons[7][1] = 3617.0 / 35512320.0; - acons[7][2] = 745739.0 / 838397952.0; - acons[7][3] = 56399353.0 / 12773376000.0; - acons[7][4] = 25091609.0 / 1560084480.0; - acons[7][5] = 1755948832039.0 / 36229939200000.0; - acons[7][6] = 4887769399.0 / 37838389248.0; - - double q2 = qsqsum * force->qqrd2e; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h_x,h_y,h_z; - bigint natoms = atom->natoms; - - if (!gewaldflag) { - if (accuracy <= 0.0) - error->all(FLERR,"KSpace accuracy must be > 0"); - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; - else g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy - // nz_pppm uses extended zprd_slab instead of zprd - // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 - // reduce it until accuracy target is met - - if (!gridflag) { - double err; - h_x = h_y = h_z = 1.0/g_ewald; - - nx_pppm = static_cast (xprd/h_x) + 1; - ny_pppm = static_cast (yprd/h_y) + 1; - nz_pppm = static_cast (zprd_slab/h_z) + 1; - - err = rms(h_x,xprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_x,xprd,natoms,q2,acons); - nx_pppm++; - h_x = xprd/nx_pppm; - } - - err = rms(h_y,yprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_y,yprd,natoms,q2,acons); - ny_pppm++; - h_y = yprd/ny_pppm; - } - - err = rms(h_z,zprd_slab,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_z,zprd_slab,natoms,q2,acons); - nz_pppm++; - h_z = zprd_slab/nz_pppm; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; - - // adjust g_ewald for new grid size - - h_x = xprd/static_cast(nx_pppm); - h_y = yprd/static_cast(ny_pppm); - h_z = zprd_slab/static_cast(nz_pppm); - - if (!gewaldflag) { - double gew1,gew2,dgew,f,fmid,hmin,rtb; - int ncount; - - gew1 = 0.0; - g_ewald = gew1; - f = diffpr(h_x,h_y,h_z,q2,acons); - - hmin = MIN(h_x,MIN(h_y,h_z)); - gew2 = 10.0/hmin; - g_ewald = gew2; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - - if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G"); - rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); - ncount = 0; - while (fabs(dgew) > SMALL && fmid != 0.0) { - dgew *= 0.5; - g_ewald = rtb + dgew; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - if (fmid <= 0.0) rtb = g_ewald; - ncount++; - if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G"); - } - } - - // final RMS accuracy - - double lprx = rms(h_x,xprd,natoms,q2,acons); - double lpry = rms(h_y,yprd,natoms,q2,acons); - double lprz = rms(h_z,zprd_slab,natoms,q2,acons); - double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); - double tpr = estimate_table_accuracy(q2_over_sqrt,spr); - double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); - - // free local memory - - memory->destroy(acons); - - // print info - - if (me == 0) { -#ifdef FFT_SINGLE - const char fft_prec[] = "single"; -#else - const char fft_prec[] = "double"; -#endif - if (screen) { - fprintf(screen," G vector (1/distance)= %g\n",g_ewald); - fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," stencil order = %d\n",order); - fprintf(screen," estimated absolute RMS force accuracy = %g\n", - accuracy); - fprintf(screen," estimated relative force accuracy = %g\n", - accuracy/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - } - if (logfile) { - fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," stencil order = %d\n",order); - fprintf(logfile," estimated absolute RMS force accuracy = %g\n", - accuracy); - fprintf(logfile," estimated relative force accuracy = %g\n", - accuracy/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - } - } -} - -/* ---------------------------------------------------------------------- - check if all factors of n are in list of factors - return 1 if yes, 0 if no -------------------------------------------------------------------------- */ - -int PPPMOld::factorable(int n) -{ - int i; - - while (n > 1) { - for (i = 0; i < nfactors; i++) { - if (n % factors[i] == 0) { - n /= factors[i]; - break; - } - } - if (i == nfactors) return 0; - } - - return 1; -} - -/* ---------------------------------------------------------------------- - compute RMS accuracy for a dimension -------------------------------------------------------------------------- */ - -double PPPMOld::rms(double h, double prd, bigint natoms, - double q2, double **acons) -{ - double sum = 0.0; - for (int m = 0; m < order; m++) - sum += acons[order][m] * pow(h*g_ewald,2.0*m); - double value = q2 * pow(h*g_ewald,(double)order) * - sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd); - return value; -} - -/* ---------------------------------------------------------------------- - compute difference in real-space and KSpace RMS accuracy -------------------------------------------------------------------------- */ - -double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2, - double **acons) -{ - double lprx,lpry,lprz,kspace_prec,real_prec; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - bigint natoms = atom->natoms; - - lprx = rms(h_x,xprd,natoms,q2,acons); - lpry = rms(h_y,yprd,natoms,q2,acons); - lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons); - kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(static_cast(natoms)*cutoff*xprd*yprd*zprd); - double value = kspace_prec - real_prec; - return value; -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ - -void PPPMOld::compute_gf_denom() -{ - int k,l,m; - - for (l = 1; l < order; l++) gf_b[l] = 0.0; - gf_b[0] = 1.0; - - for (m = 1; m < order; m++) { - for (l = m; l > 0; l--) - gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); - gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); - } - - bigint ifact = 1; - for (k = 1; k < 2*order; k++) ifact *= k; - double gaminv = 1.0/ifact; - for (l = 0; l < order; l++) gf_b[l] *= gaminv; -} - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFT decomposition -------------------------------------------------------------------------- */ - -void PPPMOld::brick2fft() -{ - int i,n,ix,iy,iz; - MPI_Request request; - MPI_Status status; - - // pack my ghosts for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // remap from 3d brick decomposition to FFT decomposition - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_in; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_fft[n++] = density_brick[iz][iy][ix]; - - remap->perform(density_fft,density_fft,work1); -} - -/* ---------------------------------------------------------------------- - ghost-swap to fill ghost cells of my brick with field values -------------------------------------------------------------------------- */ - -void PPPMOld::fillbrick() -{ - int i,n,ix,iy,iz; - MPI_Request request; - MPI_Status status; - - // pack my real cells for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } -} - -/* ---------------------------------------------------------------------- - ghost-swap to fill ghost cells of my brick with per-atom field values -------------------------------------------------------------------------- */ - -void PPPMOld::fillbrick_peratom() -{ - int i,n,ix,iy,iz; - MPI_Request request; - MPI_Status status; - - // pack my real cells for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[2][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[2][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[1][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[1][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[0][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[0][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPMOld::particle_map() -{ - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - part2grid[i][0] = nx; - part2grid[i][1] = ny; - part2grid[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPMOld::make_rho() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver -------------------------------------------------------------------------- */ - -void PPPMOld::poisson() -{ - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (evflag_atom) poisson_peratom(); - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdx_brick[k][j][i] = work2[n]; - n += 2; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fky[j]*work1[n+1]; - work2[n+1] = -fky[j]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdy_brick[k][j][i] = work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkz[k]*work1[n+1]; - work2[n+1] = -fkz[k]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPMOld::poisson_peratom() -{ - int i,j,k,n; - - // energy - - if (eflag_atom) { - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]; - work2[n+1] = work1[n+1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - u_brick[k][j][i] = work2[n]; - n += 2; - } - } - - // 6 components of virial in v0 thru v5 - - if (!vflag_atom) return; - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][0]; - work2[n+1] = work1[n+1]*vg[i][0]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v0_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][1]; - work2[n+1] = work1[n+1]*vg[i][1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v1_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][2]; - work2[n+1] = work1[n+1]*vg[i][2]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v2_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][3]; - work2[n+1] = work1[n+1]*vg[i][3]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v3_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][4]; - work2[n+1] = work1[n+1]*vg[i][4]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v4_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][5]; - work2[n+1] = work1[n+1]*vg[i][5]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v5_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPMOld::fieldforce() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPMOld::fieldforce_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - if (eflag_atom) eatom[i] += q[i]*u; - if (vflag_atom) { - vatom[i][0] += v0; - vatom[i][1] += v1; - vatom[i][2] += v2; - vatom[i][3] += v3; - vatom[i][4] += v4; - vatom[i][5] += v5; - } - } -} - -/* ---------------------------------------------------------------------- - map nprocs to NX by NY grid as PX by PY procs - return optimal px,py -------------------------------------------------------------------------- */ - -void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) -{ - // loop thru all possible factorizations of nprocs - // surf = surface area of largest proc sub-domain - // innermost if test minimizes surface area and surface/volume ratio - - int bestsurf = 2 * (nx + ny); - int bestboxx = 0; - int bestboxy = 0; - - int boxx,boxy,surf,ipx,ipy; - - ipx = 1; - while (ipx <= nprocs) { - if (nprocs % ipx == 0) { - ipy = nprocs/ipx; - boxx = nx/ipx; - if (nx % ipx) boxx++; - boxy = ny/ipy; - if (ny % ipy) boxy++; - surf = boxx + boxy; - if (surf < bestsurf || - (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { - bestsurf = surf; - bestboxx = boxx; - bestboxy = boxy; - *px = ipx; - *py = ipy; - } - } - ipx++; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into rho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = order-1; l >= 0; l--) { - r1 = rho_coeff[l][k] + r1*dx; - r2 = rho_coeff[l][k] + r2*dy; - r3 = rho_coeff[l][k] + r3*dz; - } - rho1d[0][k] = r1; - rho1d[1][k] = r2; - rho1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - generate coeffients for the weight function of order n - - (n-1) - Wn(x) = Sum wn(k,x) , Sum is over every other integer - k=-(n-1) - For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 - k is odd integers if n is even and even integers if n is odd - --- - | n-1 - | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 - wn(k,x) = < l=0 - | - | 0 otherwise - --- - a coeffients are packed into the array rho_coeff to eliminate zeros - rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) -------------------------------------------------------------------------- */ - -void PPPMOld::compute_rho_coeff() -{ - int j,k,l,m; - FFT_SCALAR s; - - FFT_SCALAR **a; - memory->create2d_offset(a,order,-order,order,"pppm:a"); - - for (k = -order; k <= order; k++) - for (l = 0; l < order; l++) - a[l][k] = 0.0; - - a[0][0] = 1.0; - for (j = 1; j < order; j++) { - for (k = -j; k <= j; k += 2) { - s = 0.0; - for (l = 0; l < j; l++) { - a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); -#ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); -#else - s += pow(0.5,(double) l+1) * - (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); -#endif - } - a[0][k] = s; - } - } - - m = (1-order)/2; - for (k = -(order-1); k < order; k += 2) { - for (l = 0; l < order; l++) - rho_coeff[l][m] = a[l][k]; - m++; - } - - memory->destroy2d_offset(a,-order); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPMOld::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - - -/* ---------------------------------------------------------------------- - perform and time the 1d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMOld::timing_1d(int n, double &time1d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->timing1d(work1,nfft_both,1); - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d = time2 - time1; - - return 4; -} - -/* ---------------------------------------------------------------------- - perform and time the 3d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMOld::timing_3d(int n, double &time3d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->compute(work1,work1,1); - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d = time2 - time1; - - return 4; -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPMOld::memory_usage() -{ - double bytes = nmax*3 * sizeof(double); - int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - bytes += 4 * nbrick * sizeof(FFT_SCALAR); - bytes += 6 * nfft_both * sizeof(double); - bytes += nfft_both * sizeof(double); - bytes += nfft_both*5 * sizeof(FFT_SCALAR); - bytes += 2 * nbuf * sizeof(FFT_SCALAR); - - if (peratom_allocate_flag) { - bytes += 7 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR); - } - - if (group_allocate_flag) { - bytes += 2 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; - } - - return bytes; -} - -/* ---------------------------------------------------------------------- - group-group interactions - ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - compute the PPPM total long-range force and energy for groups A and B - ------------------------------------------------------------------------- */ - -void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag) -{ - if (slabflag) - error->all(FLERR,"Cannot (yet) use K-space slab " - "correction with compute group/group"); - - int i,j; - - if (!group_allocate_flag) { - allocate_groups(); - group_allocate_flag = 1; - } - - e2group = 0; //energy - f2group[0] = 0; //force in x-direction - f2group[1] = 0; //force in y-direction - f2group[2] = 0; //force in z-direction - - double *q = atom->q; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - - // map my particle charge onto my local 3d density grid - - make_rho_groups(groupbit_A,groupbit_B,BA_flag); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - // temporarily store and switch pointers so we can - // use brick2fft() for groups A and B (without - // writing an additional function) - - FFT_SCALAR ***density_brick_real = density_brick; - FFT_SCALAR *density_fft_real = density_fft; - - // group A - - density_brick = density_A_brick; - density_fft = density_A_fft; - - brick2fft(); - - // group B - - density_brick = density_B_brick; - density_fft = density_B_fft; - - brick2fft(); - - // switch back pointers - - density_brick = density_brick_real; - density_fft = density_fft_real; - - // compute potential gradient on my FFT grid and - // portion of group-group energy/force on this proc's FFT grid - - poisson_groups(BA_flag); - - const double qscale = force->qqrd2e * scale; - - // total group A <--> group B energy - // self and boundary correction terms are in compute_group_group.cpp - - double e2group_all; - MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); - e2group = e2group_all; - - e2group *= qscale*0.5*volume; - - // total group A <--> group B force - - double f2group_all[3]; - MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); - - for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i]; -} - -/* ---------------------------------------------------------------------- - allocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPMOld::allocate_groups() -{ - memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_A_brick"); - memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_B_brick"); - memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); - memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); -} - -/* ---------------------------------------------------------------------- - deallocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPMOld::deallocate_groups() -{ - memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(density_A_fft); - memory->destroy(density_B_fft); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag) -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density arrays - - memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - for (int i = 0; i < nlocal; i++) { - - if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B)) - if (BA_flag) continue; - - if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - - // group A - - if (mask[i] & groupbit_A) - density_A_brick[mz][my][mx] += x0*rho1d[0][l]; - - // group B - - if (mask[i] & groupbit_B) - density_B_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPMOld::poisson_groups(int BA_flag) -{ - int i,j,k,n; - double eng; - - // reuse memory (already declared) - - FFT_SCALAR *work_A = work1; - FFT_SCALAR *work_B = work2; - - // transform charge density (r -> k) - - // group A - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] = density_A_fft[i]; - work_A[n++] = ZEROF; - } - - fft1->compute(work_A,work_A,1); - - // group B - - n = 0; - for (i = 0; i < nfft; i++) { - work_B[n++] = density_B_fft[i]; - work_B[n++] = ZEROF; - } - - fft1->compute(work_B,work_B,1); - - // group-group energy and force contribution, - // keep everything in reciprocal space so - // no inverse FFTs needed - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - // energy - - n = 0; - for (i = 0; i < nfft; i++) { - e2group += s2 * greensfn[i] * - (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); - n += 2; - } - - if (BA_flag) return; - - - // multiply by Green's function and s2 - // (only for work_A so it is not squared below) - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] *= s2 * greensfn[i]; - work_A[n++] *= s2 * greensfn[i]; - } - - double partial_group; - - // force, x direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[0] += fkx[i] * partial_group; - n += 2; - } - - // force, y direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[1] += fky[j] * partial_group; - n += 2; - } - - // force, z direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[2] += fkz[k] * partial_group; - n += 2; - } -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "pppm_old.h" +#include "math_const.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXORDER 7 +#define OFFSET 16384 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); + + triclinic_support = 0; + pppmflag = 1; + group_group_enable = 0; + + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + u_brick = NULL; + v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; + greensfn = NULL; + work1 = work2 = NULL; + vg = NULL; + fkx = fky = fkz = NULL; + buf1 = buf2 = buf3 = buf4 = NULL; + + density_A_brick = density_B_brick = NULL; + density_A_fft = density_B_fft = NULL; + + gf_b = NULL; + rho1d = rho_coeff = NULL; + + fft1 = fft2 = NULL; + remap = NULL; + + nmax = 0; + part2grid = NULL; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMOld::~PPPMOld() +{ + delete [] factors; + deallocate(); + deallocate_peratom(); + deallocate_groups(); + memory->destroy(part2grid); +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPMOld::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPM initialization ...\n"); + if (logfile) fprintf(logfile,"PPPM initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->dimension == 2) error->all(FLERR, + "Cannot use PPPM with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPM"); + } + + if (order < 2 || order > MAXORDER) { + char str[128]; + sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); + error->all(FLERR,str); + } + + // free all arrays previously allocated + + deallocate(); + deallocate_peratom(); + peratom_allocate_flag = 0; + deallocate_groups(); + group_allocate_flag = 0; + + // extract short-range Coulombic cutoff from pair style + + scale = 1.0; + + pair_check(); + + int itmp=0; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + + // if kspace is TIP4P, extract TIP4P params from pair style + // bond/angle are not yet init(), so insure equilibrium request is valid + + qdist = 0.0; + + if (tip4pflag) { + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + if (typeA < 1 || typeA > atom->nangletypes || + force->angle->setflag[typeA] == 0) + error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); + if (typeB < 1 || typeB > atom->nbondtypes || + force->bond->setflag[typeB] == 0) + error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (cos(0.5*theta) * blen); + } + + // compute qsum & qsqsum and warn if not charge-neutral + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup FFT grid resolution and g_ewald + // normally one iteration thru while loop is all that is required + // if grid stencil extends beyond neighbor proc, reduce order and try again + + int iteration = 0; + + while (order > 1) { + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPM order b/c stencil extends " + "beyond neighbor processor"); + iteration++; + + set_grid(); + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPM grid is too large"); + + // global indices of PPPM grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPM grid that I own without ghost cells + // for slab PPPM, assign z grid as if it were not extended + + nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); + nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; + + nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); + nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; + + nzlo_in = static_cast + (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); + nzhi_in = static_cast + (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; + + // nlower,nupper = stencil size for mapping particles to PPPM grid + + nlower = -(order-1)/2; + nupper = order/2; + + // shift values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (order % 2) shift = OFFSET + 0.5; + else shift = OFFSET; + if (order % 2) shiftone = 0.0; + else shiftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPM grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPM, assign z grid as if it were not extended + + triclinic = domain->triclinic; + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else { + dist[0] = cuthalf/domain->prd[0]; + dist[1] = cuthalf/domain->prd[1]; + dist[2] = cuthalf/domain->prd[2]; + } + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nxlo_out = nlo + nlower; + nxhi_out = nhi + nupper; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nylo_out = nlo + nlower; + nyhi_out = nhi + nupper; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nzlo_out = nlo + nlower; + nzhi_out = nhi + nupper; + + // for slab PPPM, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPM, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) + + if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) { + nzhi_in = nz_pppm - 1; + nzhi_out = nz_pppm - 1; + } + + // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions + // that overlay domain I own + // proc in that direction tells me via sendrecv() + // if no neighbor proc, value is from self since I have ghosts regardless + + int nplanes; + MPI_Status status; + + nplanes = nxlo_in - nxlo_out; + if (comm->procneigh[0][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, + &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, + world,&status); + else nxhi_ghost = nplanes; + + nplanes = nxhi_out - nxhi_in; + if (comm->procneigh[0][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, + &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], + 0,world,&status); + else nxlo_ghost = nplanes; + + nplanes = nylo_in - nylo_out; + if (comm->procneigh[1][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, + &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, + world,&status); + else nyhi_ghost = nplanes; + + nplanes = nyhi_out - nyhi_in; + if (comm->procneigh[1][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, + &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, + world,&status); + else nylo_ghost = nplanes; + + nplanes = nzlo_in - nzlo_out; + if (comm->procneigh[2][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, + &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, + world,&status); + else nzhi_ghost = nplanes; + + nplanes = nzhi_out - nzhi_in; + if (comm->procneigh[2][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, + &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, + world,&status); + else nzlo_ghost = nplanes; + + // test that ghost overlap is not bigger than my sub-domain + + int flag = 0; + if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; + if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; + if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; + if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; + if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; + if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + + if (flag_all == 0) break; + order--; + } + + if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0"); + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clump of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + // PPPM grid for this proc, including ghosts + + ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + + // FFT arrays on this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * + (nzhi_fft-nzlo_fft+1); + int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * + (nzhi_in-nzlo_in+1); + nfft_both = MAX(nfft,nfft_brick); + + // buffer space for use in brick2fft and fillbrick + // idel = max # of ghost planes to send or recv in +/- dir of each dim + // nx,ny,nz = owned planes (including ghosts) in each dim + // nxx,nyy,nzz = max # of grid cells to send in each dim + // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick + + int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; + + idelx = MAX(nxlo_ghost,nxhi_ghost); + idelx = MAX(idelx,nxhi_out-nxhi_in); + idelx = MAX(idelx,nxlo_in-nxlo_out); + + idely = MAX(nylo_ghost,nyhi_ghost); + idely = MAX(idely,nyhi_out-nyhi_in); + idely = MAX(idely,nylo_in-nylo_out); + + idelz = MAX(nzlo_ghost,nzhi_ghost); + idelz = MAX(idelz,nzhi_out-nzhi_in); + idelz = MAX(idelz,nzlo_in-nzlo_out); + + nx = nxhi_out - nxlo_out + 1; + ny = nyhi_out - nylo_out + 1; + nz = nzhi_out - nzlo_out + 1; + + nxx = idelx * ny * nz; + nyy = idely * nx * nz; + nzz = idelz * nx * ny; + + nbuf = MAX(nxx,nyy); + nbuf = MAX(nbuf,nzz); + + nbuf_peratom = 7*nbuf; + nbuf *= 3; + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", + ngrid_max,nfft_both_max,nbuf_max); + if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", + ngrid_max,nfft_both_max,nbuf_max); + } + + // allocate K-space dependent memory + // don't invoke allocate_peratom() here, wait to see if needed + + allocate(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + compute_rho_coeff(); +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMOld::setup() +{ + int i,j,k,l,m,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + + double per; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + } + + // virial coefficients + + double sqk,vterm; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + } + n++; + } + } + } + + // modified (Hockney-Eastwood) Coulomb Green's function + + int nx,ny,nz,kper,lper,mper; + double snx,sny,snz,snx2,sny2,snz2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + + int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + + double form = 1.0; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm); + snz2 = snz*snz; + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = sin(0.5*unitky*lper*yprd/ny_pppm); + sny2 = sny*sny; + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = sin(0.5*unitkx*kper*xprd/nx_pppm); + snx2 = snx*snx; + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + numerator = form*12.5663706/sqk; + denominator = gf_denom(snx2,sny2,snz2); + sum1 = 0.0; + const double dorder = static_cast(order); + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,dorder); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,dorder); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,dorder); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0); + } + } + } + greensfn[n++] = numerator*sum1/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute the PPPM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMOld::compute(int eflag, int vflag) +{ + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + peratom_allocate_flag = 1; + } + + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(part2grid); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm:part2grid"); + } + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid + + particle_map(); + make_rho(); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + brick2fft(); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + // also performs per-atom calculations via poisson_peratom() + + poisson(); + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks + + fillbrick(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fillbrick_peratom(); + + // calculate the force on my particles + + fieldforce(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fieldforce_peratom(); + + // sum global energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qscale; + } + + // sum global virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + double *q = atom->q; + int nlocal = atom->nlocal; + + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] *= 0.5; + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + } + + if (vflag_atom) { + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; + } + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::allocate() +{ + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdx_brick"); + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdy_brick"); + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdz_brick"); + + memory->create(density_fft,nfft_both,"pppm:density_fft"); + memory->create(greensfn,nfft_both,"pppm:greensfn"); + memory->create(work1,2*nfft_both,"pppm:work1"); + memory->create(work2,2*nfft_both,"pppm:work2"); + memory->create(vg,nfft_both,6,"pppm:vg"); + + memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); + memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); + memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); + + memory->create(buf1,nbuf,"pppm:buf1"); + memory->create(buf2,nbuf,"pppm:buf2"); + + // summation coeffs + + memory->create(gf_b,order,"pppm:gf_b"); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decompostion + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition + + int tmp; + + fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp); + + fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp); + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,FFT_PRECISION); +} + +/* ---------------------------------------------------------------------- + allocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::allocate_peratom() +{ + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:u_brick"); + + memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v0_brick"); + memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v1_brick"); + memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v2_brick"); + memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v3_brick"); + memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v4_brick"); + memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v5_brick"); + + memory->create(buf3,nbuf_peratom,"pppm:buf3"); + memory->create(buf4,nbuf_peratom,"pppm:buf4"); +} + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy(density_fft); + memory->destroy(greensfn); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(vg); + + memory->destroy1d_offset(fkx,nxlo_fft); + memory->destroy1d_offset(fky,nylo_fft); + memory->destroy1d_offset(fkz,nzlo_fft); + + memory->destroy(buf1); + memory->destroy(buf2); + + memory->destroy(gf_b); + memory->destroy2d_offset(rho1d,-order/2); + memory->destroy2d_offset(rho_coeff,(1-order)/2); + + delete fft1; + delete fft2; + delete remap; +} + +/* ---------------------------------------------------------------------- + deallocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::deallocate_peratom() +{ + memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy(buf3); + memory->destroy(buf4); +} + +/* ---------------------------------------------------------------------- + set size of FFT grid (nx,ny,nz_pppm) and g_ewald +------------------------------------------------------------------------- */ + +void PPPMOld::set_grid() +{ + // see JCP 109, pg 7698 for derivation of coefficients + // higher order coefficients may be computed if needed + + double **acons; + memory->create(acons,8,7,"pppm:acons"); + + acons[1][0] = 2.0 / 3.0; + acons[2][0] = 1.0 / 50.0; + acons[2][1] = 5.0 / 294.0; + acons[3][0] = 1.0 / 588.0; + acons[3][1] = 7.0 / 1440.0; + acons[3][2] = 21.0 / 3872.0; + acons[4][0] = 1.0 / 4320.0; + acons[4][1] = 3.0 / 1936.0; + acons[4][2] = 7601.0 / 2271360.0; + acons[4][3] = 143.0 / 28800.0; + acons[5][0] = 1.0 / 23232.0; + acons[5][1] = 7601.0 / 13628160.0; + acons[5][2] = 143.0 / 69120.0; + acons[5][3] = 517231.0 / 106536960.0; + acons[5][4] = 106640677.0 / 11737571328.0; + acons[6][0] = 691.0 / 68140800.0; + acons[6][1] = 13.0 / 57600.0; + acons[6][2] = 47021.0 / 35512320.0; + acons[6][3] = 9694607.0 / 2095994880.0; + acons[6][4] = 733191589.0 / 59609088000.0; + acons[6][5] = 326190917.0 / 11700633600.0; + acons[7][0] = 1.0 / 345600.0; + acons[7][1] = 3617.0 / 35512320.0; + acons[7][2] = 745739.0 / 838397952.0; + acons[7][3] = 56399353.0 / 12773376000.0; + acons[7][4] = 25091609.0 / 1560084480.0; + acons[7][5] = 1755948832039.0 / 36229939200000.0; + acons[7][6] = 4887769399.0 / 37838389248.0; + + double q2 = qsqsum * force->qqrd2e; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h_x,h_y,h_z; + bigint natoms = atom->natoms; + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy + // nz_pppm uses extended zprd_slab instead of zprd + // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 + // reduce it until accuracy target is met + + if (!gridflag) { + double err; + h_x = h_y = h_z = 1.0/g_ewald; + + nx_pppm = static_cast (xprd/h_x) + 1; + ny_pppm = static_cast (yprd/h_y) + 1; + nz_pppm = static_cast (zprd_slab/h_z) + 1; + + err = rms(h_x,xprd,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_x,xprd,natoms,q2,acons); + nx_pppm++; + h_x = xprd/nx_pppm; + } + + err = rms(h_y,yprd,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_y,yprd,natoms,q2,acons); + ny_pppm++; + h_y = yprd/ny_pppm; + } + + err = rms(h_z,zprd_slab,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_z,zprd_slab,natoms,q2,acons); + nz_pppm++; + h_z = zprd_slab/nz_pppm; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; + + // adjust g_ewald for new grid size + + h_x = xprd/static_cast(nx_pppm); + h_y = yprd/static_cast(ny_pppm); + h_z = zprd_slab/static_cast(nz_pppm); + + if (!gewaldflag) { + double gew1,gew2,dgew,f,fmid,hmin,rtb; + int ncount; + + gew1 = 0.0; + g_ewald = gew1; + f = diffpr(h_x,h_y,h_z,q2,acons); + + hmin = MIN(h_x,MIN(h_y,h_z)); + gew2 = 10.0/hmin; + g_ewald = gew2; + fmid = diffpr(h_x,h_y,h_z,q2,acons); + + if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G"); + rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); + ncount = 0; + while (fabs(dgew) > SMALL && fmid != 0.0) { + dgew *= 0.5; + g_ewald = rtb + dgew; + fmid = diffpr(h_x,h_y,h_z,q2,acons); + if (fmid <= 0.0) rtb = g_ewald; + ncount++; + if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G"); + } + } + + // final RMS accuracy + + double lprx = rms(h_x,xprd,natoms,q2,acons); + double lpry = rms(h_y,yprd,natoms,q2,acons); + double lprz = rms(h_z,zprd_slab,natoms,q2,acons); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double tpr = estimate_table_accuracy(q2_over_sqrt,spr); + double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); + + // free local memory + + memory->destroy(acons); + + // print info + + if (me == 0) { +#ifdef FFT_SINGLE + const char fft_prec[] = "single"; +#else + const char fft_prec[] = "double"; +#endif + if (screen) { + fprintf(screen," G vector (1/distance)= %g\n",g_ewald); + fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," stencil order = %d\n",order); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + accuracy/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," stencil order = %d\n",order); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + accuracy/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + } + } +} + +/* ---------------------------------------------------------------------- + check if all factors of n are in list of factors + return 1 if yes, 0 if no +------------------------------------------------------------------------- */ + +int PPPMOld::factorable(int n) +{ + int i; + + while (n > 1) { + for (i = 0; i < nfactors; i++) { + if (n % factors[i] == 0) { + n /= factors[i]; + break; + } + } + if (i == nfactors) return 0; + } + + return 1; +} + +/* ---------------------------------------------------------------------- + compute RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double PPPMOld::rms(double h, double prd, bigint natoms, + double q2, double **acons) +{ + double sum = 0.0; + for (int m = 0; m < order; m++) + sum += acons[order][m] * pow(h*g_ewald,2.0*m); + double value = q2 * pow(h*g_ewald,(double)order) * + sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd); + return value; +} + +/* ---------------------------------------------------------------------- + compute difference in real-space and KSpace RMS accuracy +------------------------------------------------------------------------- */ + +double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2, + double **acons) +{ + double lprx,lpry,lprz,kspace_prec,real_prec; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + bigint natoms = atom->natoms; + + lprx = rms(h_x,xprd,natoms,q2,acons); + lpry = rms(h_y,yprd,natoms,q2,acons); + lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons); + kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(static_cast(natoms)*cutoff*xprd*yprd*zprd); + double value = kspace_prec - real_prec; + return value; +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ + +void PPPMOld::compute_gf_denom() +{ + int k,l,m; + + for (l = 1; l < order; l++) gf_b[l] = 0.0; + gf_b[0] = 1.0; + + for (m = 1; m < order; m++) { + for (l = m; l > 0; l--) + gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); + gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); + } + + bigint ifact = 1; + for (k = 1; k < 2*order; k++) ifact *= k; + double gaminv = 1.0/ifact; + for (l = 0; l < order; l++) gf_b[l] *= gaminv; +} + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFT decomposition +------------------------------------------------------------------------- */ + +void PPPMOld::brick2fft() +{ + int i,n,ix,iy,iz; + MPI_Request request; + MPI_Status status; + + // pack my ghosts for +x processor + // pass data to self or +x processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in+1; ix <= nxhi_out; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[0][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for -x processor + // pass data to self or -x processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_out; ix < nxlo_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[0][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for +y processor + // pass data to self or +y processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in+1; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[1][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for -y processor + // pass data to self or -y processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy < nylo_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[1][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for +z processor + // pass data to self or +z processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzhi_in+1; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[2][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for -z processor + // pass data to self or -z processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz < nzlo_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[2][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // remap from 3d brick decomposition to FFT decomposition + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_in; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_fft[n++] = density_brick[iz][iy][ix]; + + remap->perform(density_fft,density_fft,work1); +} + +/* ---------------------------------------------------------------------- + ghost-swap to fill ghost cells of my brick with field values +------------------------------------------------------------------------- */ + +void PPPMOld::fillbrick() +{ + int i,n,ix,iy,iz; + MPI_Request request; + MPI_Status status; + + // pack my real cells for +z processor + // pass data to self or +z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[2][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz < nzlo_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for -z processor + // pass data to self or -z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[2][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzhi_in+1; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for +y processor + // pass data to self or +y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[1][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy < nylo_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for -y processor + // pass data to self or -y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[1][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in+1; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for +x processor + // pass data to self or +x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[0][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_out; ix < nxlo_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for -x processor + // pass data to self or -x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[0][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } +} + +/* ---------------------------------------------------------------------- + ghost-swap to fill ghost cells of my brick with per-atom field values +------------------------------------------------------------------------- */ + +void PPPMOld::fillbrick_peratom() +{ + int i,n,ix,iy,iz; + MPI_Request request; + MPI_Status status; + + // pack my real cells for +z processor + // pass data to self or +z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[2][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[2][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz < nzlo_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -z processor + // pass data to self or -z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[2][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[2][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzhi_in+1; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for +y processor + // pass data to self or +y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[1][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[1][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy < nylo_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -y processor + // pass data to self or -y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[1][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[1][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in+1; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for +x processor + // pass data to self or +x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[0][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[0][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_out; ix < nxlo_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -x processor + // pass data to self or -x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[0][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[0][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMOld::particle_map() +{ + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + + int flag = 0; + for (int i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMOld::make_rho() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver +------------------------------------------------------------------------- */ + +void PPPMOld::poisson() +{ + int i,j,k,n; + double eng; + + // transform charge density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] = density_fft[i]; + work1[n++] = ZEROF; + } + + fft1->compute(work1,work1,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nfft; i++) { + eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; + if (eflag_global) energy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft; i++) { + energy += + s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] *= scaleinv * greensfn[i]; + work1[n++] *= scaleinv * greensfn[i]; + } + + // extra FFTs for per-atom energy/virial + + if (evflag_atom) poisson_peratom(); + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkx[i]*work1[n+1]; + work2[n+1] = -fkx[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdx_brick[k][j][i] = work2[n]; + n += 2; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fky[j]*work1[n+1]; + work2[n+1] = -fky[j]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdy_brick[k][j][i] = work2[n]; + n += 2; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkz[k]*work1[n+1]; + work2[n+1] = -fkz[k]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdz_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPMOld::poisson_peratom() +{ + int i,j,k,n; + + // energy + + if (eflag_atom) { + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]; + work2[n+1] = work1[n+1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + u_brick[k][j][i] = work2[n]; + n += 2; + } + } + + // 6 components of virial in v0 thru v5 + + if (!vflag_atom) return; + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][0]; + work2[n+1] = work1[n+1]*vg[i][0]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v0_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][1]; + work2[n+1] = work1[n+1]*vg[i][1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v1_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][2]; + work2[n+1] = work1[n+1]*vg[i][2]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v2_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][3]; + work2[n+1] = work1[n+1]*vg[i][3]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v3_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][4]; + work2[n+1] = work1[n+1]*vg[i][4]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v4_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][5]; + work2[n+1] = work1[n+1]*vg[i][5]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v5_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMOld::fieldforce() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPMOld::fieldforce_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += v0; + vatom[i][1] += v1; + vatom[i][2] += v2; + vatom[i][3] += v3; + vatom[i][4] += v4; + vatom[i][5] += v5; + } + } +} + +/* ---------------------------------------------------------------------- + map nprocs to NX by NY grid as PX by PY procs - return optimal px,py +------------------------------------------------------------------------- */ + +void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) +{ + // loop thru all possible factorizations of nprocs + // surf = surface area of largest proc sub-domain + // innermost if test minimizes surface area and surface/volume ratio + + int bestsurf = 2 * (nx + ny); + int bestboxx = 0; + int bestboxy = 0; + + int boxx,boxy,surf,ipx,ipy; + + ipx = 1; + while (ipx <= nprocs) { + if (nprocs % ipx == 0) { + ipy = nprocs/ipx; + boxx = nx/ipx; + if (nx % ipx) boxx++; + boxy = ny/ipy; + if (ny % ipy) boxy++; + surf = boxx + boxy; + if (surf < bestsurf || + (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { + bestsurf = surf; + bestboxx = boxx; + bestboxy = boxy; + *px = ipx; + *py = ipy; + } + } + ipx++; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + rho1d[0][k] = r1; + rho1d[1][k] = r2; + rho1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + generate coeffients for the weight function of order n + + (n-1) + Wn(x) = Sum wn(k,x) , Sum is over every other integer + k=-(n-1) + For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 + k is odd integers if n is even and even integers if n is odd + --- + | n-1 + | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 + wn(k,x) = < l=0 + | + | 0 otherwise + --- + a coeffients are packed into the array rho_coeff to eliminate zeros + rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) +------------------------------------------------------------------------- */ + +void PPPMOld::compute_rho_coeff() +{ + int j,k,l,m; + FFT_SCALAR s; + + FFT_SCALAR **a; + memory->create2d_offset(a,order,-order,order,"pppm:a"); + + for (k = -order; k <= order; k++) + for (l = 0; l < order; l++) + a[l][k] = 0.0; + + a[0][0] = 1.0; + for (j = 1; j < order; j++) { + for (k = -j; k <= j; k += 2) { + s = 0.0; + for (l = 0; l < j; l++) { + a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); +#ifdef FFT_SINGLE + s += powf(0.5,(float) l+1) * + (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); +#else + s += pow(0.5,(double) l+1) * + (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); +#endif + } + a[0][k] = s; + } + } + + m = (1-order)/2; + for (k = -(order-1); k < order; k += 2) { + for (l = 0; l < order; l++) + rho_coeff[l][m] = a[l][k]; + m++; + } + + memory->destroy2d_offset(a,-order); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPMOld::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + + +/* ---------------------------------------------------------------------- + perform and time the 1d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMOld::timing_1d(int n, double &time1d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->timing1d(work1,nfft_both,1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d = time2 - time1; + + return 4; +} + +/* ---------------------------------------------------------------------- + perform and time the 3d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMOld::timing_3d(int n, double &time3d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->compute(work1,work1,1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d = time2 - time1; + + return 4; +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPMOld::memory_usage() +{ + double bytes = nmax*3 * sizeof(double); + int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + bytes += 4 * nbrick * sizeof(FFT_SCALAR); + bytes += 6 * nfft_both * sizeof(double); + bytes += nfft_both * sizeof(double); + bytes += nfft_both*5 * sizeof(FFT_SCALAR); + bytes += 2 * nbuf * sizeof(FFT_SCALAR); + + if (peratom_allocate_flag) { + bytes += 7 * nbrick * sizeof(FFT_SCALAR); + bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR); + } + + if (group_allocate_flag) { + bytes += 2 * nbrick * sizeof(FFT_SCALAR); + bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; + } + + return bytes; +} + +/* ---------------------------------------------------------------------- + group-group interactions + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + compute the PPPM total long-range force and energy for groups A and B + ------------------------------------------------------------------------- */ + +void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag) +{ + if (slabflag) + error->all(FLERR,"Cannot (yet) use K-space slab " + "correction with compute group/group"); + + int i,j; + + if (!group_allocate_flag) { + allocate_groups(); + group_allocate_flag = 1; + } + + e2group = 0; //energy + f2group[0] = 0; //force in x-direction + f2group[1] = 0; //force in y-direction + f2group[2] = 0; //force in z-direction + + double *q = atom->q; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + + // map my particle charge onto my local 3d density grid + + make_rho_groups(groupbit_A,groupbit_B,BA_flag); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + // temporarily store and switch pointers so we can + // use brick2fft() for groups A and B (without + // writing an additional function) + + FFT_SCALAR ***density_brick_real = density_brick; + FFT_SCALAR *density_fft_real = density_fft; + + // group A + + density_brick = density_A_brick; + density_fft = density_A_fft; + + brick2fft(); + + // group B + + density_brick = density_B_brick; + density_fft = density_B_fft; + + brick2fft(); + + // switch back pointers + + density_brick = density_brick_real; + density_fft = density_fft_real; + + // compute potential gradient on my FFT grid and + // portion of group-group energy/force on this proc's FFT grid + + poisson_groups(BA_flag); + + const double qscale = force->qqrd2e * scale; + + // total group A <--> group B energy + // self and boundary correction terms are in compute_group_group.cpp + + double e2group_all; + MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); + e2group = e2group_all; + + e2group *= qscale*0.5*volume; + + // total group A <--> group B force + + double f2group_all[3]; + MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); + + for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i]; +} + +/* ---------------------------------------------------------------------- + allocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPMOld::allocate_groups() +{ + memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_A_brick"); + memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_B_brick"); + memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); + memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); +} + +/* ---------------------------------------------------------------------- + deallocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPMOld::deallocate_groups() +{ + memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(density_A_fft); + memory->destroy(density_B_fft); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag) +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density arrays + + memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + for (int i = 0; i < nlocal; i++) { + + if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B)) + if (BA_flag) continue; + + if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + + // group A + + if (mask[i] & groupbit_A) + density_A_brick[mz][my][mx] += x0*rho1d[0][l]; + + // group B + + if (mask[i] & groupbit_B) + density_B_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPMOld::poisson_groups(int BA_flag) +{ + int i,j,k,n; + double eng; + + // reuse memory (already declared) + + FFT_SCALAR *work_A = work1; + FFT_SCALAR *work_B = work2; + + // transform charge density (r -> k) + + // group A + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] = density_A_fft[i]; + work_A[n++] = ZEROF; + } + + fft1->compute(work_A,work_A,1); + + // group B + + n = 0; + for (i = 0; i < nfft; i++) { + work_B[n++] = density_B_fft[i]; + work_B[n++] = ZEROF; + } + + fft1->compute(work_B,work_B,1); + + // group-group energy and force contribution, + // keep everything in reciprocal space so + // no inverse FFTs needed + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + // energy + + n = 0; + for (i = 0; i < nfft; i++) { + e2group += s2 * greensfn[i] * + (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); + n += 2; + } + + if (BA_flag) return; + + + // multiply by Green's function and s2 + // (only for work_A so it is not squared below) + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] *= s2 * greensfn[i]; + work_A[n++] *= s2 * greensfn[i]; + } + + double partial_group; + + // force, x direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[0] += fkx[i] * partial_group; + n += 2; + } + + // force, y direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[1] += fky[j] * partial_group; + n += 2; + } + + // force, z direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[2] += fkz[k] * partial_group; + n += 2; + } +} From d1fce2720c9573b7ca07ca7b288b4b184d0e76d7 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Thu, 6 Mar 2014 15:29:02 +0000 Subject: [PATCH 06/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11589 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/USER-CUDA/pppm_cuda.cpp | 2872 +++++++++++++++++------------------ 1 file changed, 1436 insertions(+), 1436 deletions(-) diff --git a/src/USER-CUDA/pppm_cuda.cpp b/src/USER-CUDA/pppm_cuda.cpp index 58574c4bd5..2d633b74f4 100644 --- a/src/USER-CUDA/pppm_cuda.cpp +++ b/src/USER-CUDA/pppm_cuda.cpp @@ -1,1436 +1,1436 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - - Original Version: - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - See the README file in the top-level LAMMPS directory. - - ----------------------------------------------------------------------- - - USER-CUDA Package and associated modifications: - https://sourceforge.net/projects/lammpscuda/ - - Christian Trott, christian.trott@tu-ilmenau.de - Lars Winterfeld, lars.winterfeld@tu-ilmenau.de - Theoretical Physics II, University of Technology Ilmenau, Germany - - See the README file in the USER-CUDA directory. - - This software is distributed under the GNU General Public License. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) -------------------------------------------------------------------------- */ - - -#include "mpi.h" -#include -#include -#include -#include -#include "pppm_cuda.h" -#include "atom.h" -#include "comm.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap_cuda.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" -#include "update.h" -#include //crmadd -#include "cuda_wrapper_cu.h" -#include "pppm_cuda_cu.h" -#include "cuda.h" -#include "math_const.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXORDER 7 -#define OFFSET 4096 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - - -void printArray(double* data,int nx, int ny, int nz) -{ - for(int i=0;icuda; - if(cuda == NULL) - error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); - - if ((narg > 3)||(narg<1)) error->all(FLERR,"Illegal kspace_style pppm/cuda command"); - #ifndef FFT_CUFFT - error->all(FLERR,"Using kspace_style pppm/cuda without cufft is not possible. Compile with cufft=1 to include cufft. Aborting."); - #endif - - triclinic_support = 0; - accuracy_relative = atof(arg[0]); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - density_brick = vdx_brick = vdy_brick = vdz_brick = vdx_brick_tmp = NULL; - density_fft = NULL; - greensfn = NULL; - work1 = work2 = NULL; - vg = NULL; - fkx = fky = fkz = NULL; - buf1 = buf2 = NULL; - - gf_b = NULL; - rho1d = rho_coeff = NULL; - - fft1c = fft2c = NULL; - remap = NULL; - - density_brick_int=NULL; - density_intScale=1000000; - cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL; - cu_density_brick = NULL; - cu_density_brick_int = NULL; - cu_density_fft = NULL; - cu_energy=NULL; - cu_greensfn = NULL; - cu_work1 = cu_work2 = cu_work3 = NULL; - cu_vg = NULL; - cu_fkx = cu_fky = cu_fkz = NULL; - - cu_flag = NULL; - cu_debugdata = NULL; - cu_rho_coeff = NULL; - cu_virial = NULL; - - cu_gf_b = NULL; - - cu_slabbuf = NULL; - slabbuf = NULL; - - nmax = 0; - part2grid = NULL; - cu_part2grid = NULL; - adev_data_array=NULL; - poissontime=0; - old_nmax=0; - cu_pppm_grid_n=NULL; - cu_pppm_grid_ids=NULL; - - pppm_grid_nmax=0; - pppm2partgrid=new int[3]; - pppm_grid=new int[3]; - firstpass=true; - scale = 1.0; -} - - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMCuda::~PPPMCuda() -{ - delete [] slabbuf; - delete cu_slabbuf; - - delete [] factors; - factors=NULL; - deallocate(); - delete cu_part2grid; - cu_part2grid=NULL; - memory->destroy(part2grid); - part2grid = NULL; -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPMCuda::init() -{ - - cuda->shared_data.pppm.cudable_force=1; - - //if(cuda->finished_run) {PPPM::init(); return;} - - if (me == 0) { - if (screen) fprintf(screen,"PPPMCuda initialization ...\n"); - if (logfile) fprintf(logfile,"PPPMCuda initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMCuda"); - if (slabflag == 1) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPMCuda"); - } - - if (order < 2 || order > MAXORDER) { - char str[128]; - sprintf(str,"PPPMCuda order cannot be smaller than 2 or greater than %d",MAXORDER); - error->all(FLERR,str); - } - // free all arrays previously allocated - - deallocate(); - - // extract short-range Coulombic cutoff from pair style - - qqrd2e = force->qqrd2e; - - if (force->pair == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - int itmp=0; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - - // if kspace is TIP4P, extract TIP4P params from pair style - - qdist = 0.0; - - if (strcmp(force->kspace_style,"pppm/tip4p") == 0) { - if (force->pair == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (2.0 * cos(0.5*theta) * blen); - } - - // compute qsum & qsqsum and warn if not charge-neutral - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup FFT grid resolution and g_ewald - // normally one iteration thru while loop is all that is required - // if grid stencil extends beyond neighbor proc, reduce order and try again - - int iteration = 0; - - while (order > 1) { - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPMCuda order b/c stencil extends " - "beyond neighbor processor"); - iteration++; - - set_grid(); - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPMCuda grid is too large"); - - // global indices of PPPMCuda grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPMCuda grid that I own without ghost cells - // for slab PPPMCuda, assign z grid as if it were not extended - - nxlo_in = comm->myloc[0]*nx_pppm / comm->procgrid[0]; - nxhi_in = (comm->myloc[0]+1)*nx_pppm / comm->procgrid[0] - 1; - nylo_in = comm->myloc[1]*ny_pppm / comm->procgrid[1]; - nyhi_in = (comm->myloc[1]+1)*ny_pppm / comm->procgrid[1] - 1; - nzlo_in = comm->myloc[2] * - (static_cast (nz_pppm/slab_volfactor)) / comm->procgrid[2]; - nzhi_in = (comm->myloc[2]+1) * - (static_cast (nz_pppm/slab_volfactor)) / comm->procgrid[2] - 1; - - // nlower,nupper = stencil size for mapping particles to PPPMCuda grid - - nlower = -(order-1)/2; - nupper = order/2; - - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shift = OFFSET + 0.5; - else shift = OFFSET; - if (order % 2) shiftone = 0.0; - else shiftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPMCuda grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPMCuda, assign z grid as if it were not extended - - - triclinic = domain->triclinic; - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nxlo_out = nlo + nlower; - nxhi_out = nhi + nupper; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nylo_out = nlo + nlower; - nyhi_out = nhi + nupper; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - // for slab PPPMCuda, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPMCuda, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - - if (slabflag && ((comm->myloc[2]+1) == (comm->procgrid[2]))) { - nzhi_in = nz_pppm - 1; - nzhi_out = nz_pppm - 1; - } - - // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions - // that overlay domain I own - // proc in that direction tells me via sendrecv() - // if no neighbor proc, value is from self since I have ghosts regardless - - int nplanes; - MPI_Status status; - - nplanes = nxlo_in - nxlo_out; - if (comm->procneigh[0][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, - &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, - world,&status); - else nxhi_ghost = nplanes; - - nplanes = nxhi_out - nxhi_in; - if (comm->procneigh[0][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, - &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], - 0,world,&status); - else nxlo_ghost = nplanes; - - nplanes = nylo_in - nylo_out; - if (comm->procneigh[1][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, - &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, - world,&status); - else nyhi_ghost = nplanes; - - nplanes = nyhi_out - nyhi_in; - if (comm->procneigh[1][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, - &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, - world,&status); - else nylo_ghost = nplanes; - - nplanes = nzlo_in - nzlo_out; - if (comm->procneigh[2][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, - &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, - world,&status); - else nzhi_ghost = nplanes; - - nplanes = nzhi_out - nzhi_in; - if (comm->procneigh[2][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, - &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, - world,&status); - else nzlo_ghost = nplanes; - - // test that ghost overlap is not bigger than my sub-domain - - int flag = 0; - if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; - if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; - - int flag_all; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - - if (flag_all == 0) break; - order--; - } - - if (order == 0) error->all(FLERR,"PPPMCuda order has been reduced to 0"); - - - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // PPPMCuda grid for this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // FFT arrays on this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); - - // buffer space for use in brick2fft and fillbrick - // idel = max # of ghost planes to send or recv in +/- dir of each dim - // nx,ny,nz = owned planes (including ghosts) in each dim - // nxx,nyy,nzz = max # of grid cells to send in each dim - // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick - - int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; - - idelx = MAX(nxlo_ghost,nxhi_ghost); - idelx = MAX(idelx,nxhi_out-nxhi_in); - idelx = MAX(idelx,nxlo_in-nxlo_out); - - idely = MAX(nylo_ghost,nyhi_ghost); - idely = MAX(idely,nyhi_out-nyhi_in); - idely = MAX(idely,nylo_in-nylo_out); - - idelz = MAX(nzlo_ghost,nzhi_ghost); - idelz = MAX(idelz,nzhi_out-nzhi_in); - idelz = MAX(idelz,nzlo_in-nzlo_out); - - nx = nxhi_out - nxlo_out + 1; - ny = nyhi_out - nylo_out + 1; - nz = nzhi_out - nzlo_out + 1; - - nxx = idelx * ny * nz; - nyy = idely * nx * nz; - nzz = idelz * nx * ny; - - nbuf = MAX(nxx,nyy); - nbuf = MAX(nbuf,nzz); - nbuf *= 3; - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - } - cuda_shared_pppm* ap=&(cuda->shared_data.pppm); - - ap->density_intScale=density_intScale; - ap->nxlo_in=nxlo_in; - ap->nxhi_in=nxhi_in; - ap->nxlo_out=nxlo_out; - ap->nxhi_out=nxhi_out; - ap->nylo_in=nylo_in; - ap->nyhi_in=nyhi_in; - ap->nylo_out=nylo_out; - ap->nyhi_out=nyhi_out; - ap->nzlo_in=nzlo_in; - ap->nzhi_in=nzhi_in; - ap->nzlo_out=nzlo_out; - ap->nzhi_out=nzhi_out; - ap->nxlo_in=nxlo_fft; - ap->nxhi_in=nxhi_fft; - ap->nylo_in=nylo_fft; - ap->nyhi_in=nyhi_fft; - ap->nzlo_in=nzlo_fft; - ap->nzhi_in=nzhi_fft; - ap->nx_pppm=nx_pppm; - ap->ny_pppm=ny_pppm; - ap->nz_pppm=nz_pppm; - ap->qqrd2e=qqrd2e; - ap->order=order; - ap->nmax=nmax; - ap->nlocal=atom->nlocal; - ap->delxinv=delxinv; - ap->delyinv=delyinv; - ap->delzinv=delzinv; - ap->nlower=nlower; - ap->nupper=nupper; - ap->shiftone=shiftone; - - // allocate K-space dependent memory - - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - compute_rho_coeff(); -} - -/* ---------------------------------------------------------------------- - adjust PPPMCuda coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPMCuda::setup() -{ - double *prd; - cu_gf_b->upload(); - // volume-dependent factors - // adjust z dimension for 2d slab PPPMCuda - // z dimension for 3d PPPMCuda is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - // fkx,fky,fkz for my FFT grid pts - Cuda_PPPM_Setup_fkxyz_vg(nx_pppm, ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald); - - - - // modified (Hockney-Eastwood) Coulomb Green's function - - int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * - pow(-log(EPS_HOC),0.25)); - Cuda_PPPM_setup_greensfn(nx_pppm,ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald, -nbx,nby,nbz,xprd,yprd,zprd_slab); - - -#ifdef FFT_CUFFT - cu_vdx_brick->upload(); - cu_vdy_brick->upload(); - cu_vdz_brick->upload(); -#endif - cu_rho_coeff->upload(); - cu_density_brick->memset_device(0); - pppm_device_init_setup(&cuda->shared_data,shiftone,delxinv,delyinv,delzinv,nlower,nupper); -} - -/* ---------------------------------------------------------------------- - compute the PPPMCuda long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMCuda::compute(int eflag, int vflag) -{ - cuda_shared_atom* cu_atom = & cuda->shared_data.atom; - - int i; - my_times starttime; - my_times endtime; - my_times starttotal; - my_times endtotal; - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of per-atom arrays if necessary - - if ((cu_atom->update_nmax)||(old_nmax==0)) { - memory->destroy(part2grid); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - delete cu_part2grid; - delete [] adev_data_array; - adev_data_array=new dev_array[1]; - cu_part2grid = new cCudaData ((int*)part2grid,adev_data_array, nmax,3); - - pppm_device_update(&cuda->shared_data,cu_part2grid->dev_data(),atom->nlocal,atom->nmax); - old_nmax=nmax; - } - if(cu_atom->update_nlocal) {pppm_update_nlocal(cu_atom->nlocal);} - - energy = 0.0; - if (vflag) - { - for (i = 0; i < 6; i++) virial[i] = 0.0; - cu_virial->memset_device(0); - } - if(eflag) cu_energy->memset_device(0); - my_gettime(CLOCK_REALTIME,&starttotal); - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - - my_gettime(CLOCK_REALTIME,&starttime); - - particle_map(); - - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_particle_map+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - //cu_part2grid->download(); - my_gettime(CLOCK_REALTIME,&starttime); - make_rho(); - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_make_rho+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - int nprocs=comm->nprocs; - - my_gettime(CLOCK_REALTIME,&starttime); - - if(nprocs>1) - { - cu_density_brick->download(); - brick2fft(); - } - else - { - #ifdef FFT_CUFFT - pppm_initfftdata(&cuda->shared_data,(PPPM_FLOAT*)cu_density_brick->dev_data(),(FFT_FLOAT*)cu_work2->dev_data()); - #endif - } - - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_brick2fft+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - - my_gettime(CLOCK_REALTIME,&starttime); - poisson(eflag,vflag); - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_poisson+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // all procs communicate E-field values to fill ghost cells - // surrounding their 3d bricks - - // not necessary since all the calculations are done on one proc - - // calculate the force on my particles - - - my_gettime(CLOCK_REALTIME,&starttime); - fieldforce(); - my_gettime(CLOCK_REALTIME,&endtime); - cuda->shared_data.cuda_timings.pppm_fieldforce+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - // sum energy across procs and add in volume-dependent term - - my_gettime(CLOCK_REALTIME,&endtotal); - cuda->shared_data.cuda_timings.pppm_compute+=(endtotal.tv_sec-starttotal.tv_sec+1.0*(endtotal.tv_nsec-starttotal.tv_nsec)/1000000000); - - if (eflag) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/1.772453851 + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qqrd2e; - } - - // sum virial across procs - - if (vflag) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*volume*virial_all[i]; - } - - // 2d slab correction - - if (slabflag) slabcorr(eflag); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); - - if(firstpass) firstpass=false; -} - - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - - -void PPPMCuda::allocate() -{ - - struct dev_array* dev_tmp=new struct dev_array[20]; - int n_cudata=0; - - - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); - memory->create3d_offset(density_brick_int,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick_int"); - - - cu_density_brick = new cCudaData ((double*) &(density_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - cu_density_brick_int = new cCudaData ((int*) &(density_brick_int[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdx_brick_tmp,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick_tmp"); - - cu_vdx_brick = new cCudaData ((double*) &(vdx_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - cu_vdy_brick = new cCudaData ((double*) &(vdy_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); - cu_vdz_brick = new cCudaData ((double*) &(vdz_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), - (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); - - memory->create(density_fft,nfft_both,"pppm:density_fft"); - - cu_density_fft = new cCudaData (density_fft, & (dev_tmp[n_cudata++]),nfft_both); - - cu_energy = new cCudaData (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm); - cu_virial = new cCudaData (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm*6); - - memory->create(greensfn,nfft_both,"pppm:greensfn"); - cu_greensfn = new cCudaData (greensfn, & (dev_tmp[n_cudata++]) , nx_pppm*ny_pppm*nz_pppm); - - memory->create(work1,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work1"); - memory->create(work2,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work2"); - memory->create(work3,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work3"); - - cu_work1 = new cCudaData (work1, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); - cu_work2 = new cCudaData (work2, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); - cu_work3 = new cCudaData (work3, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); - - - memory->create(fkx,nx_pppm,"pppmcuda:fkx"); - cu_fkx = new cCudaData (fkx, & (dev_tmp[n_cudata++]) , nx_pppm); - memory->create(fky,ny_pppm,"pppmcuda:fky"); - cu_fky = new cCudaData (fky, & (dev_tmp[n_cudata++]) , ny_pppm); - memory->create(fkz,nz_pppm,"pppmcuda:fkz"); - cu_fkz = new cCudaData (fkz, & (dev_tmp[n_cudata++]) , nz_pppm); - - memory->create(vg,nfft_both,6,"pppm:vg"); - - cu_vg = new cCudaData ((double*)vg, & (dev_tmp[n_cudata++]) , nfft_both,6); - - memory->create(buf1,nbuf,"pppm:buf1"); - memory->create(buf2,nbuf,"pppm:buf2"); - - - // summation coeffs - - - gf_b = new double[order]; - cu_gf_b = new cCudaData (gf_b, &(dev_tmp[n_cudata++]) , order); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - - cu_rho_coeff = new cCudaData ((double*) &(rho_coeff[0][(1-order)/2]), & (dev_tmp[n_cudata++]) , order*(order/2-(1-order)/2+1)); - - debugdata=new PPPM_FLOAT[100]; - cu_debugdata = new cCudaData (debugdata,& (dev_tmp[n_cudata++]),100); - cu_flag = new cCudaData (&global_flag,& (dev_tmp[n_cudata++]),3); - - // create 2 FFTs and a Remap - // 1st FFT keeps data in FFT decompostion - // 2nd FFT returns data in 3d brick decomposition - // remap takes data from 3d brick to FFT decomposition - - int tmp; - - - - - fft1c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp,true); - - fft2c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp,false); - - -#ifdef FFT_CUFFT - fft1c->set_cudata(cu_work2->dev_data(),cu_work1->dev_data()); - fft2c->set_cudata(cu_work2->dev_data(),cu_work3->dev_data()); -#endif - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,2); - - -pppm_device_init(cu_density_brick->dev_data(), cu_vdx_brick->dev_data(), cu_vdy_brick->dev_data(), cu_vdz_brick->dev_data(), cu_density_fft->dev_data(),cu_energy->dev_data(),cu_virial->dev_data() - , cu_work1->dev_data(), cu_work2->dev_data(), cu_work3->dev_data(), cu_greensfn->dev_data(), cu_fkx->dev_data(), cu_fky->dev_data(), cu_fkz->dev_data(), cu_vg->dev_data() - ,nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,nx_pppm,ny_pppm,nz_pppm - ,nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,cu_gf_b->dev_data() - ,qqrd2e,order,cu_rho_coeff->dev_data(),cu_debugdata->dev_data(),cu_density_brick_int->dev_data(),slabflag - ); -} - - - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order - ---------------------------------------------------------------------- */ - -void PPPMCuda::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - - memory->destroy(density_fft); - memory->destroy(greensfn); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(vg); - - density_fft = NULL; - greensfn = NULL; - work1 = NULL; - work2 = NULL; - vg = NULL; - - memory->destroy(fkx); - memory->destroy(fky); - memory->destroy(fkz); - - fkx = NULL; - fky = NULL; - fkz = NULL; - - delete cu_density_brick; - delete cu_density_brick_int; - delete cu_vdx_brick; - delete cu_vdy_brick; - delete cu_vdz_brick; - delete cu_density_fft; - delete cu_energy; - delete cu_virial; -#ifdef FFT_CUFFT - delete cu_greensfn; - delete cu_gf_b; - delete cu_vg; - delete cu_work1; - delete cu_work2; - delete cu_work3; - delete cu_fkx; - delete cu_fky; - delete cu_fkz; -#endif - - delete cu_flag; - delete cu_debugdata; - delete cu_rho_coeff; - - - cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL; - cu_density_brick = NULL; - cu_density_brick_int = NULL; - cu_density_fft = NULL; - cu_energy=NULL; - cu_virial=NULL; -#ifdef FFT_CUFFT - cu_greensfn = NULL; - cu_gf_b = NULL; - cu_work1 = cu_work2 = cu_work3 = NULL; - cu_vg = NULL; - cu_fkx = cu_fky = cu_fkz = NULL; -#endif - - cu_flag = NULL; - cu_debugdata = NULL; - cu_rho_coeff = NULL; - cu_part2grid = NULL; - - memory->destroy(buf1); - memory->destroy(buf2); - - delete [] gf_b; - gf_b = NULL; - memory->destroy2d_offset(rho1d,-order/2); rho1d = NULL; - memory->destroy2d_offset(rho_coeff,(1-order)/2); rho_coeff = NULL; - - delete fft1c; - fft1c = NULL; - - delete fft2c; - fft2c = NULL; - delete remap; - remap = NULL; - buf1 = NULL; - buf2 = NULL; -} - -/* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald --------------------------------------------------------------------------*/ - -void PPPMCuda::set_grid() -{ - // see JCP 109, pg 7698 for derivation of coefficients - // higher order coefficients may be computed if needed - - double **acons; - memory->create(acons,8,7,"pppm:acons"); - - acons[1][0] = 2.0 / 3.0; - acons[2][0] = 1.0 / 50.0; - acons[2][1] = 5.0 / 294.0; - acons[3][0] = 1.0 / 588.0; - acons[3][1] = 7.0 / 1440.0; - acons[3][2] = 21.0 / 3872.0; - acons[4][0] = 1.0 / 4320.0; - acons[4][1] = 3.0 / 1936.0; - acons[4][2] = 7601.0 / 2271360.0; - acons[4][3] = 143.0 / 28800.0; - acons[5][0] = 1.0 / 23232.0; - acons[5][1] = 7601.0 / 13628160.0; - acons[5][2] = 143.0 / 69120.0; - acons[5][3] = 517231.0 / 106536960.0; - acons[5][4] = 106640677.0 / 11737571328.0; - acons[6][0] = 691.0 / 68140800.0; - acons[6][1] = 13.0 / 57600.0; - acons[6][2] = 47021.0 / 35512320.0; - acons[6][3] = 9694607.0 / 2095994880.0; - acons[6][4] = 733191589.0 / 59609088000.0; - acons[6][5] = 326190917.0 / 11700633600.0; - acons[7][0] = 1.0 / 345600.0; - acons[7][1] = 3617.0 / 35512320.0; - acons[7][2] = 745739.0 / 838397952.0; - acons[7][3] = 56399353.0 / 12773376000.0; - acons[7][4] = 25091609.0 / 1560084480.0; - acons[7][5] = 1755948832039.0 / 36229939200000.0; - acons[7][6] = 4887769399.0 / 37838389248.0; - - double q2 = qsqsum * force->qqrd2e; - bigint natoms = atom->natoms; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPMCuda - // 3d PPPMCuda just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired error and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h_x,h_y,h_z; - - if (!gewaldflag) - g_ewald = sqrt(-log(accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / - (2.0*q2))) / cutoff; - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and precision - // nz_pppm uses extended zprd_slab instead of zprd - // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 - // reduce it until precision target is met - - if (!gridflag) { - double err; - h_x = h_y = h_z = 1/g_ewald; - - nx_pppm = static_cast (xprd/h_x + 1); - ny_pppm = static_cast (yprd/h_y + 1); - nz_pppm = static_cast (zprd_slab/h_z + 1); - - err = rms(h_x,xprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_x,xprd,natoms,q2,acons); - nx_pppm++; - h_x = xprd/nx_pppm; - } - - err = rms(h_y,yprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_y,yprd,natoms,q2,acons); - ny_pppm++; - h_y = yprd/ny_pppm; - } - - err = rms(h_z,zprd_slab,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_z,zprd_slab,natoms,q2,acons); - nz_pppm++; - h_z = zprd_slab/nz_pppm; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; - - - // adjust g_ewald for new grid size - - h_x = xprd/nx_pppm; - h_y = yprd/ny_pppm; - h_z = zprd_slab/nz_pppm; - - if (!gewaldflag) { - double gew1,gew2,dgew,f,fmid,hmin,rtb; - int ncount; - - gew1 = 0.0; - g_ewald = gew1; - f = diffpr(h_x,h_y,h_z,q2,acons); - - hmin = MIN(h_x,MIN(h_y,h_z)); - gew2 = 10/hmin; - g_ewald = gew2; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - - if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPMCuda G"); - rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); - ncount = 0; - while (fabs(dgew) > SMALL && fmid != 0.0) { - dgew *= 0.5; - g_ewald = rtb + dgew; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - if (fmid <= 0.0) rtb = g_ewald; - ncount++; - if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPMCuda G"); - } - } - - // final RMS precision - - double lprx = rms(h_x,xprd,natoms,q2,acons); - double lpry = rms(h_y,yprd,natoms,q2,acons); - double lprz = rms(h_z,zprd_slab,natoms,q2,acons); - double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - double spr = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - - // free local memory - - memory->destroy(acons); - - // print info - - if (me == 0) { - if (screen) { - fprintf(screen," G vector = %g\n",g_ewald); - fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," stencil order = %d\n",order); - fprintf(screen," absolute RMS force accuracy = %g\n",MAX(lpr,spr)); - fprintf(screen," relative force accuracy = %g\n", - MAX(lpr,spr)/two_charge_force); - } - if (logfile) { - fprintf(logfile," G vector = %g\n",g_ewald); - fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," stencil order = %d\n",order); - fprintf(logfile," absolute RMS force accuracy = %g\n",MAX(lpr,spr)); - fprintf(logfile," relative force accuracy = %g\n", - MAX(lpr,spr)/two_charge_force); - } - } -} - - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - - -void PPPMCuda::particle_map() -{ - MYDBG(printf("# CUDA PPPMCuda::particle_map() ... start\n");) - int flag = 0; - - cu_flag->memset_device(0); - flag=cuda_particle_map(&cuda->shared_data,cu_flag->dev_data()); - if(flag) - { - cu_debugdata->download(); - printf("Out of range atom: "); - printf("ID: %i ",atom->tag[int(debugdata[0])]); - printf("x: %e ",debugdata[7]); - printf("y: %e ",debugdata[8]); - printf("z: %e ",debugdata[9]); - printf("nx: %e ",debugdata[4]); - printf("ny: %e ",debugdata[5]); - - printf("\n"); - //printf("debugdata: cpu: %e %e %e %i\n",boxlo[0],boxlo[1],boxlo[2],atom->nlocal); - cuda->cu_x->download(); - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - for (int i = 0; i < nlocal; i++) { - nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - if(i==1203)printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out || i==1203) {printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); } - } - - } - - int flag_all; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPMCuda!"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - - -void PPPMCuda::make_rho() -{ - cuda_make_rho(&cuda->shared_data,cu_flag->dev_data(),&density_intScale,nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,cu_density_brick->dev_data(),cu_density_brick_int->dev_data()); -} - - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver -------------------------------------------------------------------------- */ -void PPPMCuda::poisson(int eflag, int vflag) -{ - -#ifndef FFT_CUFFT - PPPM::poisson(eflag,vflag); - return; -#endif -#ifdef FFT_CUFFT - my_times starttime; - my_times endtime; - - - my_gettime(CLOCK_REALTIME,&starttime); - fft1c->compute(density_fft,work1,1); - - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - - - if (eflag || vflag) { - poisson_energy(nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,vflag); - ENERGY_FLOAT gpuvirial[6]; - energy+=sum_energy(cu_virial->dev_data(),cu_energy->dev_data(),nx_pppm,ny_pppm,nz_pppm,vflag,gpuvirial); - if(vflag) - { - for(int j=0;j<6;j++) virial[j]+=gpuvirial[j]; - } - } - - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - poisson_scale(nx_pppm,ny_pppm,nz_pppm); - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - - poisson_xgrad(nx_pppm,ny_pppm,nz_pppm); - - - my_gettime(CLOCK_REALTIME,&starttime); - fft2c->compute(work2,work2,-1); - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - poisson_vdx_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); - - - // y direction gradient - - poisson_ygrad(nx_pppm,ny_pppm,nz_pppm); - - my_gettime(CLOCK_REALTIME,&starttime); - fft2c->compute(work2,work2,-1); - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - poisson_vdy_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); - - // z direction gradient - - poisson_zgrad(nx_pppm,ny_pppm,nz_pppm); - - my_gettime(CLOCK_REALTIME,&starttime); - fft2c->compute(work2,work2,-1); - my_gettime(CLOCK_REALTIME,&endtime); - poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); - - poisson_vdz_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); - #endif -} - -/*---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles --------------------------------------------------------------------------*/ - -void PPPMCuda::fieldforce() -{ - cuda_fieldforce(& cuda->shared_data,cu_flag); - return; -} - -/* ---------------------------------------------------------------------- - perform and time the 4 FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMCuda::timing_1d(int n, double &time1d) -{ - time1d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps/4*n; - return 4; -} - -int PPPMCuda::timing_3d(int n, double &time3d) -{ - time3d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps*n; - return 4; -} - -void PPPMCuda::slabcorr(int eflag) -{ - // compute local contribution to global dipole moment - if(slabbuf==NULL) - { - slabbuf=new ENERGY_FLOAT[(atom->nmax+31)/32]; - cu_slabbuf = new cCudaData (slabbuf, (atom->nmax+31)/32); - } - if(unsigned((atom->nlocal+31)/32)*sizeof(ENERGY_FLOAT)>=unsigned(cu_slabbuf->dev_size())) - { - delete [] slabbuf; - delete cu_slabbuf; - slabbuf=new ENERGY_FLOAT[(atom->nmax+31)/32]; - cu_slabbuf = new cCudaData (slabbuf, (atom->nmax+31)/32); - } - - - double dipole = cuda_slabcorr_energy(&cuda->shared_data,slabbuf,(ENERGY_FLOAT*) cu_slabbuf->dev_data()); - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // compute corrections - - double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; - - //if (eflag) energy += qqrd2e*scale * e_slabcorr; - // need to add a correction to make non-neutral systems and per-atom energy translationally invariant - if (eflag || fabs(qsum) > SMALL) - error->all(FLERR,"Cannot (yet) use slab correction with kspace_style pppm/cuda for non-neutral systems or to get per-atom energy. Aborting."); - - double ffact = -4.0*MY_PI*dipole_all/volume; - - cuda_slabcorr_force(&cuda->shared_data,ffact); -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + + Original Version: + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. + + ----------------------------------------------------------------------- + + USER-CUDA Package and associated modifications: + https://sourceforge.net/projects/lammpscuda/ + + Christian Trott, christian.trott@tu-ilmenau.de + Lars Winterfeld, lars.winterfeld@tu-ilmenau.de + Theoretical Physics II, University of Technology Ilmenau, Germany + + See the README file in the USER-CUDA directory. + + This software is distributed under the GNU General Public License. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) +------------------------------------------------------------------------- */ + + +#include "mpi.h" +#include +#include +#include +#include +#include "pppm_cuda.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap_cuda.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" +#include "update.h" +#include //crmadd +#include "cuda_wrapper_cu.h" +#include "pppm_cuda_cu.h" +#include "cuda.h" +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXORDER 7 +#define OFFSET 4096 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + + +void printArray(double* data,int nx, int ny, int nz) +{ + for(int i=0;icuda; + if(cuda == NULL) + error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS.."); + + if ((narg > 3)||(narg<1)) error->all(FLERR,"Illegal kspace_style pppm/cuda command"); + #ifndef FFT_CUFFT + error->all(FLERR,"Using kspace_style pppm/cuda without cufft is not possible. Compile with cufft=1 to include cufft. Aborting."); + #endif + + triclinic_support = 0; + accuracy_relative = atof(arg[0]); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + density_brick = vdx_brick = vdy_brick = vdz_brick = vdx_brick_tmp = NULL; + density_fft = NULL; + greensfn = NULL; + work1 = work2 = NULL; + vg = NULL; + fkx = fky = fkz = NULL; + buf1 = buf2 = NULL; + + gf_b = NULL; + rho1d = rho_coeff = NULL; + + fft1c = fft2c = NULL; + remap = NULL; + + density_brick_int=NULL; + density_intScale=1000000; + cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL; + cu_density_brick = NULL; + cu_density_brick_int = NULL; + cu_density_fft = NULL; + cu_energy=NULL; + cu_greensfn = NULL; + cu_work1 = cu_work2 = cu_work3 = NULL; + cu_vg = NULL; + cu_fkx = cu_fky = cu_fkz = NULL; + + cu_flag = NULL; + cu_debugdata = NULL; + cu_rho_coeff = NULL; + cu_virial = NULL; + + cu_gf_b = NULL; + + cu_slabbuf = NULL; + slabbuf = NULL; + + nmax = 0; + part2grid = NULL; + cu_part2grid = NULL; + adev_data_array=NULL; + poissontime=0; + old_nmax=0; + cu_pppm_grid_n=NULL; + cu_pppm_grid_ids=NULL; + + pppm_grid_nmax=0; + pppm2partgrid=new int[3]; + pppm_grid=new int[3]; + firstpass=true; + scale = 1.0; +} + + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMCuda::~PPPMCuda() +{ + delete [] slabbuf; + delete cu_slabbuf; + + delete [] factors; + factors=NULL; + deallocate(); + delete cu_part2grid; + cu_part2grid=NULL; + memory->destroy(part2grid); + part2grid = NULL; +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPMCuda::init() +{ + + cuda->shared_data.pppm.cudable_force=1; + + //if(cuda->finished_run) {PPPM::init(); return;} + + if (me == 0) { + if (screen) fprintf(screen,"PPPMCuda initialization ...\n"); + if (logfile) fprintf(logfile,"PPPMCuda initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMCuda"); + if (slabflag == 1) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPMCuda"); + } + + if (order < 2 || order > MAXORDER) { + char str[128]; + sprintf(str,"PPPMCuda order cannot be smaller than 2 or greater than %d",MAXORDER); + error->all(FLERR,str); + } + // free all arrays previously allocated + + deallocate(); + + // extract short-range Coulombic cutoff from pair style + + qqrd2e = force->qqrd2e; + + if (force->pair == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + int itmp=0; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + + // if kspace is TIP4P, extract TIP4P params from pair style + + qdist = 0.0; + + if (strcmp(force->kspace_style,"pppm/tip4p") == 0) { + if (force->pair == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (2.0 * cos(0.5*theta) * blen); + } + + // compute qsum & qsqsum and warn if not charge-neutral + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup FFT grid resolution and g_ewald + // normally one iteration thru while loop is all that is required + // if grid stencil extends beyond neighbor proc, reduce order and try again + + int iteration = 0; + + while (order > 1) { + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPMCuda order b/c stencil extends " + "beyond neighbor processor"); + iteration++; + + set_grid(); + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPMCuda grid is too large"); + + // global indices of PPPMCuda grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPMCuda grid that I own without ghost cells + // for slab PPPMCuda, assign z grid as if it were not extended + + nxlo_in = comm->myloc[0]*nx_pppm / comm->procgrid[0]; + nxhi_in = (comm->myloc[0]+1)*nx_pppm / comm->procgrid[0] - 1; + nylo_in = comm->myloc[1]*ny_pppm / comm->procgrid[1]; + nyhi_in = (comm->myloc[1]+1)*ny_pppm / comm->procgrid[1] - 1; + nzlo_in = comm->myloc[2] * + (static_cast (nz_pppm/slab_volfactor)) / comm->procgrid[2]; + nzhi_in = (comm->myloc[2]+1) * + (static_cast (nz_pppm/slab_volfactor)) / comm->procgrid[2] - 1; + + // nlower,nupper = stencil size for mapping particles to PPPMCuda grid + + nlower = -(order-1)/2; + nupper = order/2; + + // shift values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (order % 2) shift = OFFSET + 0.5; + else shift = OFFSET; + if (order % 2) shiftone = 0.0; + else shiftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPMCuda grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPMCuda, assign z grid as if it were not extended + + + triclinic = domain->triclinic; + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else { + dist[0] = cuthalf/domain->prd[0]; + dist[1] = cuthalf/domain->prd[1]; + dist[2] = cuthalf/domain->prd[2]; + } + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nxlo_out = nlo + nlower; + nxhi_out = nhi + nupper; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nylo_out = nlo + nlower; + nyhi_out = nhi + nupper; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nzlo_out = nlo + nlower; + nzhi_out = nhi + nupper; + + // for slab PPPMCuda, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPMCuda, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) + + if (slabflag && ((comm->myloc[2]+1) == (comm->procgrid[2]))) { + nzhi_in = nz_pppm - 1; + nzhi_out = nz_pppm - 1; + } + + // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions + // that overlay domain I own + // proc in that direction tells me via sendrecv() + // if no neighbor proc, value is from self since I have ghosts regardless + + int nplanes; + MPI_Status status; + + nplanes = nxlo_in - nxlo_out; + if (comm->procneigh[0][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, + &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, + world,&status); + else nxhi_ghost = nplanes; + + nplanes = nxhi_out - nxhi_in; + if (comm->procneigh[0][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, + &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], + 0,world,&status); + else nxlo_ghost = nplanes; + + nplanes = nylo_in - nylo_out; + if (comm->procneigh[1][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, + &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, + world,&status); + else nyhi_ghost = nplanes; + + nplanes = nyhi_out - nyhi_in; + if (comm->procneigh[1][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, + &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, + world,&status); + else nylo_ghost = nplanes; + + nplanes = nzlo_in - nzlo_out; + if (comm->procneigh[2][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, + &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, + world,&status); + else nzhi_ghost = nplanes; + + nplanes = nzhi_out - nzhi_in; + if (comm->procneigh[2][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, + &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, + world,&status); + else nzlo_ghost = nplanes; + + // test that ghost overlap is not bigger than my sub-domain + + int flag = 0; + if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; + if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; + if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; + if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; + if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; + if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + + if (flag_all == 0) break; + order--; + } + + if (order == 0) error->all(FLERR,"PPPMCuda order has been reduced to 0"); + + + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clump of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + // PPPMCuda grid for this proc, including ghosts + + ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + + // FFT arrays on this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * + (nzhi_fft-nzlo_fft+1); + int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * + (nzhi_in-nzlo_in+1); + nfft_both = MAX(nfft,nfft_brick); + + // buffer space for use in brick2fft and fillbrick + // idel = max # of ghost planes to send or recv in +/- dir of each dim + // nx,ny,nz = owned planes (including ghosts) in each dim + // nxx,nyy,nzz = max # of grid cells to send in each dim + // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick + + int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; + + idelx = MAX(nxlo_ghost,nxhi_ghost); + idelx = MAX(idelx,nxhi_out-nxhi_in); + idelx = MAX(idelx,nxlo_in-nxlo_out); + + idely = MAX(nylo_ghost,nyhi_ghost); + idely = MAX(idely,nyhi_out-nyhi_in); + idely = MAX(idely,nylo_in-nylo_out); + + idelz = MAX(nzlo_ghost,nzhi_ghost); + idelz = MAX(idelz,nzhi_out-nzhi_in); + idelz = MAX(idelz,nzlo_in-nzlo_out); + + nx = nxhi_out - nxlo_out + 1; + ny = nyhi_out - nylo_out + 1; + nz = nzhi_out - nzlo_out + 1; + + nxx = idelx * ny * nz; + nyy = idely * nx * nz; + nzz = idelz * nx * ny; + + nbuf = MAX(nxx,nyy); + nbuf = MAX(nbuf,nzz); + nbuf *= 3; + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", + ngrid_max,nfft_both_max,nbuf_max); + if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", + ngrid_max,nfft_both_max,nbuf_max); + } + cuda_shared_pppm* ap=&(cuda->shared_data.pppm); + + ap->density_intScale=density_intScale; + ap->nxlo_in=nxlo_in; + ap->nxhi_in=nxhi_in; + ap->nxlo_out=nxlo_out; + ap->nxhi_out=nxhi_out; + ap->nylo_in=nylo_in; + ap->nyhi_in=nyhi_in; + ap->nylo_out=nylo_out; + ap->nyhi_out=nyhi_out; + ap->nzlo_in=nzlo_in; + ap->nzhi_in=nzhi_in; + ap->nzlo_out=nzlo_out; + ap->nzhi_out=nzhi_out; + ap->nxlo_in=nxlo_fft; + ap->nxhi_in=nxhi_fft; + ap->nylo_in=nylo_fft; + ap->nyhi_in=nyhi_fft; + ap->nzlo_in=nzlo_fft; + ap->nzhi_in=nzhi_fft; + ap->nx_pppm=nx_pppm; + ap->ny_pppm=ny_pppm; + ap->nz_pppm=nz_pppm; + ap->qqrd2e=qqrd2e; + ap->order=order; + ap->nmax=nmax; + ap->nlocal=atom->nlocal; + ap->delxinv=delxinv; + ap->delyinv=delyinv; + ap->delzinv=delzinv; + ap->nlower=nlower; + ap->nupper=nupper; + ap->shiftone=shiftone; + + // allocate K-space dependent memory + + + allocate(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + compute_rho_coeff(); +} + +/* ---------------------------------------------------------------------- + adjust PPPMCuda coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMCuda::setup() +{ + double *prd; + cu_gf_b->upload(); + // volume-dependent factors + // adjust z dimension for 2d slab PPPMCuda + // z dimension for 3d PPPMCuda is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + Cuda_PPPM_Setup_fkxyz_vg(nx_pppm, ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald); + + + + // modified (Hockney-Eastwood) Coulomb Green's function + + int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + Cuda_PPPM_setup_greensfn(nx_pppm,ny_pppm,nz_pppm,unitkx,unitky,unitkz,g_ewald, +nbx,nby,nbz,xprd,yprd,zprd_slab); + + +#ifdef FFT_CUFFT + cu_vdx_brick->upload(); + cu_vdy_brick->upload(); + cu_vdz_brick->upload(); +#endif + cu_rho_coeff->upload(); + cu_density_brick->memset_device(0); + pppm_device_init_setup(&cuda->shared_data,shiftone,delxinv,delyinv,delzinv,nlower,nupper); +} + +/* ---------------------------------------------------------------------- + compute the PPPMCuda long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMCuda::compute(int eflag, int vflag) +{ + cuda_shared_atom* cu_atom = & cuda->shared_data.atom; + + int i; + my_times starttime; + my_times endtime; + my_times starttotal; + my_times endtotal; + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + // extend size of per-atom arrays if necessary + + if ((cu_atom->update_nmax)||(old_nmax==0)) { + memory->destroy(part2grid); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm:part2grid"); + delete cu_part2grid; + delete [] adev_data_array; + adev_data_array=new dev_array[1]; + cu_part2grid = new cCudaData ((int*)part2grid,adev_data_array, nmax,3); + + pppm_device_update(&cuda->shared_data,cu_part2grid->dev_data(),atom->nlocal,atom->nmax); + old_nmax=nmax; + } + if(cu_atom->update_nlocal) {pppm_update_nlocal(cu_atom->nlocal);} + + energy = 0.0; + if (vflag) + { + for (i = 0; i < 6; i++) virial[i] = 0.0; + cu_virial->memset_device(0); + } + if(eflag) cu_energy->memset_device(0); + my_gettime(CLOCK_REALTIME,&starttotal); + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid + + + my_gettime(CLOCK_REALTIME,&starttime); + + particle_map(); + + my_gettime(CLOCK_REALTIME,&endtime); + cuda->shared_data.cuda_timings.pppm_particle_map+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + //cu_part2grid->download(); + my_gettime(CLOCK_REALTIME,&starttime); + make_rho(); + my_gettime(CLOCK_REALTIME,&endtime); + cuda->shared_data.cuda_timings.pppm_make_rho+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + int nprocs=comm->nprocs; + + my_gettime(CLOCK_REALTIME,&starttime); + + if(nprocs>1) + { + cu_density_brick->download(); + brick2fft(); + } + else + { + #ifdef FFT_CUFFT + pppm_initfftdata(&cuda->shared_data,(PPPM_FLOAT*)cu_density_brick->dev_data(),(FFT_FLOAT*)cu_work2->dev_data()); + #endif + } + + my_gettime(CLOCK_REALTIME,&endtime); + cuda->shared_data.cuda_timings.pppm_brick2fft+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + + my_gettime(CLOCK_REALTIME,&starttime); + poisson(eflag,vflag); + my_gettime(CLOCK_REALTIME,&endtime); + cuda->shared_data.cuda_timings.pppm_poisson+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + // all procs communicate E-field values to fill ghost cells + // surrounding their 3d bricks + + // not necessary since all the calculations are done on one proc + + // calculate the force on my particles + + + my_gettime(CLOCK_REALTIME,&starttime); + fieldforce(); + my_gettime(CLOCK_REALTIME,&endtime); + cuda->shared_data.cuda_timings.pppm_fieldforce+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + // sum energy across procs and add in volume-dependent term + + my_gettime(CLOCK_REALTIME,&endtotal); + cuda->shared_data.cuda_timings.pppm_compute+=(endtotal.tv_sec-starttotal.tv_sec+1.0*(endtotal.tv_nsec-starttotal.tv_nsec)/1000000000); + + if (eflag) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= g_ewald*qsqsum/1.772453851 + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qqrd2e; + } + + // sum virial across procs + + if (vflag) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*volume*virial_all[i]; + } + + // 2d slab correction + + if (slabflag) slabcorr(eflag); + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); + + if(firstpass) firstpass=false; +} + + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + + +void PPPMCuda::allocate() +{ + + struct dev_array* dev_tmp=new struct dev_array[20]; + int n_cudata=0; + + + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); + memory->create3d_offset(density_brick_int,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick_int"); + + + cu_density_brick = new cCudaData ((double*) &(density_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), + (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); + + cu_density_brick_int = new cCudaData ((int*) &(density_brick_int[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), + (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); + + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdx_brick"); + memory->create3d_offset(vdx_brick_tmp,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdx_brick_tmp"); + + cu_vdx_brick = new cCudaData ((double*) &(vdx_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), + (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); + + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdy_brick"); + cu_vdy_brick = new cCudaData ((double*) &(vdy_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), + (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); + + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdz_brick"); + cu_vdz_brick = new cCudaData ((double*) &(vdz_brick[nzlo_out][nylo_out][nxlo_out]), & (dev_tmp[n_cudata++]), + (nzhi_out-nzlo_out+1)*(nyhi_out-nylo_out+1)*(nxhi_out-nxlo_out+1)); + + memory->create(density_fft,nfft_both,"pppm:density_fft"); + + cu_density_fft = new cCudaData (density_fft, & (dev_tmp[n_cudata++]),nfft_both); + + cu_energy = new cCudaData (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm); + cu_virial = new cCudaData (NULL, &(dev_tmp[n_cudata++]),ny_pppm*nz_pppm*6); + + memory->create(greensfn,nfft_both,"pppm:greensfn"); + cu_greensfn = new cCudaData (greensfn, & (dev_tmp[n_cudata++]) , nx_pppm*ny_pppm*nz_pppm); + + memory->create(work1,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work1"); + memory->create(work2,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work2"); + memory->create(work3,2*nx_pppm*ny_pppm*nz_pppm,"pppm:work3"); + + cu_work1 = new cCudaData (work1, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); + cu_work2 = new cCudaData (work2, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); + cu_work3 = new cCudaData (work3, & (dev_tmp[n_cudata++]) , 2*nx_pppm*ny_pppm*nz_pppm); + + + memory->create(fkx,nx_pppm,"pppmcuda:fkx"); + cu_fkx = new cCudaData (fkx, & (dev_tmp[n_cudata++]) , nx_pppm); + memory->create(fky,ny_pppm,"pppmcuda:fky"); + cu_fky = new cCudaData (fky, & (dev_tmp[n_cudata++]) , ny_pppm); + memory->create(fkz,nz_pppm,"pppmcuda:fkz"); + cu_fkz = new cCudaData (fkz, & (dev_tmp[n_cudata++]) , nz_pppm); + + memory->create(vg,nfft_both,6,"pppm:vg"); + + cu_vg = new cCudaData ((double*)vg, & (dev_tmp[n_cudata++]) , nfft_both,6); + + memory->create(buf1,nbuf,"pppm:buf1"); + memory->create(buf2,nbuf,"pppm:buf2"); + + + // summation coeffs + + + gf_b = new double[order]; + cu_gf_b = new cCudaData (gf_b, &(dev_tmp[n_cudata++]) , order); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); + + cu_rho_coeff = new cCudaData ((double*) &(rho_coeff[0][(1-order)/2]), & (dev_tmp[n_cudata++]) , order*(order/2-(1-order)/2+1)); + + debugdata=new PPPM_FLOAT[100]; + cu_debugdata = new cCudaData (debugdata,& (dev_tmp[n_cudata++]),100); + cu_flag = new cCudaData (&global_flag,& (dev_tmp[n_cudata++]),3); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decompostion + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition + + int tmp; + + + + + fft1c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp,true); + + fft2c = new FFT3dCuda(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp,false); + + +#ifdef FFT_CUFFT + fft1c->set_cudata(cu_work2->dev_data(),cu_work1->dev_data()); + fft2c->set_cudata(cu_work2->dev_data(),cu_work3->dev_data()); +#endif + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,2); + + +pppm_device_init(cu_density_brick->dev_data(), cu_vdx_brick->dev_data(), cu_vdy_brick->dev_data(), cu_vdz_brick->dev_data(), cu_density_fft->dev_data(),cu_energy->dev_data(),cu_virial->dev_data() + , cu_work1->dev_data(), cu_work2->dev_data(), cu_work3->dev_data(), cu_greensfn->dev_data(), cu_fkx->dev_data(), cu_fky->dev_data(), cu_fkz->dev_data(), cu_vg->dev_data() + ,nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,nx_pppm,ny_pppm,nz_pppm + ,nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,cu_gf_b->dev_data() + ,qqrd2e,order,cu_rho_coeff->dev_data(),cu_debugdata->dev_data(),cu_density_brick_int->dev_data(),slabflag + ); +} + + + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order + ---------------------------------------------------------------------- */ + +void PPPMCuda::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + + memory->destroy(density_fft); + memory->destroy(greensfn); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(vg); + + density_fft = NULL; + greensfn = NULL; + work1 = NULL; + work2 = NULL; + vg = NULL; + + memory->destroy(fkx); + memory->destroy(fky); + memory->destroy(fkz); + + fkx = NULL; + fky = NULL; + fkz = NULL; + + delete cu_density_brick; + delete cu_density_brick_int; + delete cu_vdx_brick; + delete cu_vdy_brick; + delete cu_vdz_brick; + delete cu_density_fft; + delete cu_energy; + delete cu_virial; +#ifdef FFT_CUFFT + delete cu_greensfn; + delete cu_gf_b; + delete cu_vg; + delete cu_work1; + delete cu_work2; + delete cu_work3; + delete cu_fkx; + delete cu_fky; + delete cu_fkz; +#endif + + delete cu_flag; + delete cu_debugdata; + delete cu_rho_coeff; + + + cu_vdx_brick = cu_vdy_brick = cu_vdz_brick = NULL; + cu_density_brick = NULL; + cu_density_brick_int = NULL; + cu_density_fft = NULL; + cu_energy=NULL; + cu_virial=NULL; +#ifdef FFT_CUFFT + cu_greensfn = NULL; + cu_gf_b = NULL; + cu_work1 = cu_work2 = cu_work3 = NULL; + cu_vg = NULL; + cu_fkx = cu_fky = cu_fkz = NULL; +#endif + + cu_flag = NULL; + cu_debugdata = NULL; + cu_rho_coeff = NULL; + cu_part2grid = NULL; + + memory->destroy(buf1); + memory->destroy(buf2); + + delete [] gf_b; + gf_b = NULL; + memory->destroy2d_offset(rho1d,-order/2); rho1d = NULL; + memory->destroy2d_offset(rho_coeff,(1-order)/2); rho_coeff = NULL; + + delete fft1c; + fft1c = NULL; + + delete fft2c; + fft2c = NULL; + delete remap; + remap = NULL; + buf1 = NULL; + buf2 = NULL; +} + +/* ---------------------------------------------------------------------- + set size of FFT grid (nx,ny,nz_pppm) and g_ewald +-------------------------------------------------------------------------*/ + +void PPPMCuda::set_grid() +{ + // see JCP 109, pg 7698 for derivation of coefficients + // higher order coefficients may be computed if needed + + double **acons; + memory->create(acons,8,7,"pppm:acons"); + + acons[1][0] = 2.0 / 3.0; + acons[2][0] = 1.0 / 50.0; + acons[2][1] = 5.0 / 294.0; + acons[3][0] = 1.0 / 588.0; + acons[3][1] = 7.0 / 1440.0; + acons[3][2] = 21.0 / 3872.0; + acons[4][0] = 1.0 / 4320.0; + acons[4][1] = 3.0 / 1936.0; + acons[4][2] = 7601.0 / 2271360.0; + acons[4][3] = 143.0 / 28800.0; + acons[5][0] = 1.0 / 23232.0; + acons[5][1] = 7601.0 / 13628160.0; + acons[5][2] = 143.0 / 69120.0; + acons[5][3] = 517231.0 / 106536960.0; + acons[5][4] = 106640677.0 / 11737571328.0; + acons[6][0] = 691.0 / 68140800.0; + acons[6][1] = 13.0 / 57600.0; + acons[6][2] = 47021.0 / 35512320.0; + acons[6][3] = 9694607.0 / 2095994880.0; + acons[6][4] = 733191589.0 / 59609088000.0; + acons[6][5] = 326190917.0 / 11700633600.0; + acons[7][0] = 1.0 / 345600.0; + acons[7][1] = 3617.0 / 35512320.0; + acons[7][2] = 745739.0 / 838397952.0; + acons[7][3] = 56399353.0 / 12773376000.0; + acons[7][4] = 25091609.0 / 1560084480.0; + acons[7][5] = 1755948832039.0 / 36229939200000.0; + acons[7][6] = 4887769399.0 / 37838389248.0; + + double q2 = qsqsum * force->qqrd2e; + bigint natoms = atom->natoms; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPMCuda + // 3d PPPMCuda just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired error and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h_x,h_y,h_z; + + if (!gewaldflag) + g_ewald = sqrt(-log(accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / + (2.0*q2))) / cutoff; + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and precision + // nz_pppm uses extended zprd_slab instead of zprd + // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 + // reduce it until precision target is met + + if (!gridflag) { + double err; + h_x = h_y = h_z = 1/g_ewald; + + nx_pppm = static_cast (xprd/h_x + 1); + ny_pppm = static_cast (yprd/h_y + 1); + nz_pppm = static_cast (zprd_slab/h_z + 1); + + err = rms(h_x,xprd,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_x,xprd,natoms,q2,acons); + nx_pppm++; + h_x = xprd/nx_pppm; + } + + err = rms(h_y,yprd,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_y,yprd,natoms,q2,acons); + ny_pppm++; + h_y = yprd/ny_pppm; + } + + err = rms(h_z,zprd_slab,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_z,zprd_slab,natoms,q2,acons); + nz_pppm++; + h_z = zprd_slab/nz_pppm; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; + + + // adjust g_ewald for new grid size + + h_x = xprd/nx_pppm; + h_y = yprd/ny_pppm; + h_z = zprd_slab/nz_pppm; + + if (!gewaldflag) { + double gew1,gew2,dgew,f,fmid,hmin,rtb; + int ncount; + + gew1 = 0.0; + g_ewald = gew1; + f = diffpr(h_x,h_y,h_z,q2,acons); + + hmin = MIN(h_x,MIN(h_y,h_z)); + gew2 = 10/hmin; + g_ewald = gew2; + fmid = diffpr(h_x,h_y,h_z,q2,acons); + + if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPMCuda G"); + rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); + ncount = 0; + while (fabs(dgew) > SMALL && fmid != 0.0) { + dgew *= 0.5; + g_ewald = rtb + dgew; + fmid = diffpr(h_x,h_y,h_z,q2,acons); + if (fmid <= 0.0) rtb = g_ewald; + ncount++; + if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPMCuda G"); + } + } + + // final RMS precision + + double lprx = rms(h_x,xprd,natoms,q2,acons); + double lpry = rms(h_y,yprd,natoms,q2,acons); + double lprz = rms(h_z,zprd_slab,natoms,q2,acons); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double spr = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + + // free local memory + + memory->destroy(acons); + + // print info + + if (me == 0) { + if (screen) { + fprintf(screen," G vector = %g\n",g_ewald); + fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," stencil order = %d\n",order); + fprintf(screen," absolute RMS force accuracy = %g\n",MAX(lpr,spr)); + fprintf(screen," relative force accuracy = %g\n", + MAX(lpr,spr)/two_charge_force); + } + if (logfile) { + fprintf(logfile," G vector = %g\n",g_ewald); + fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," stencil order = %d\n",order); + fprintf(logfile," absolute RMS force accuracy = %g\n",MAX(lpr,spr)); + fprintf(logfile," relative force accuracy = %g\n", + MAX(lpr,spr)/two_charge_force); + } + } +} + + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + + +void PPPMCuda::particle_map() +{ + MYDBG(printf("# CUDA PPPMCuda::particle_map() ... start\n");) + int flag = 0; + + cu_flag->memset_device(0); + flag=cuda_particle_map(&cuda->shared_data,cu_flag->dev_data()); + if(flag) + { + cu_debugdata->download(); + printf("Out of range atom: "); + printf("ID: %i ",atom->tag[int(debugdata[0])]); + printf("x: %e ",debugdata[7]); + printf("y: %e ",debugdata[8]); + printf("z: %e ",debugdata[9]); + printf("nx: %e ",debugdata[4]); + printf("ny: %e ",debugdata[5]); + + printf("\n"); + //printf("debugdata: cpu: %e %e %e %i\n",boxlo[0],boxlo[1],boxlo[2],atom->nlocal); + cuda->cu_x->download(); + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; + + if(i==1203)printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out || i==1203) {printf("Outside Atom: %i %e %e %e (%i %i %i)\n",i,x[i][0],x[i][1],x[i][2],nx,ny,nz); } + } + + } + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPMCuda!"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + + +void PPPMCuda::make_rho() +{ + cuda_make_rho(&cuda->shared_data,cu_flag->dev_data(),&density_intScale,nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,cu_density_brick->dev_data(),cu_density_brick_int->dev_data()); +} + + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver +------------------------------------------------------------------------- */ +void PPPMCuda::poisson(int eflag, int vflag) +{ + +#ifndef FFT_CUFFT + PPPM::poisson(eflag,vflag); + return; +#endif +#ifdef FFT_CUFFT + my_times starttime; + my_times endtime; + + + my_gettime(CLOCK_REALTIME,&starttime); + fft1c->compute(density_fft,work1,1); + + my_gettime(CLOCK_REALTIME,&endtime); + poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + + + if (eflag || vflag) { + poisson_energy(nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,vflag); + ENERGY_FLOAT gpuvirial[6]; + energy+=sum_energy(cu_virial->dev_data(),cu_energy->dev_data(),nx_pppm,ny_pppm,nz_pppm,vflag,gpuvirial); + if(vflag) + { + for(int j=0;j<6;j++) virial[j]+=gpuvirial[j]; + } + } + + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + poisson_scale(nx_pppm,ny_pppm,nz_pppm); + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + + poisson_xgrad(nx_pppm,ny_pppm,nz_pppm); + + + my_gettime(CLOCK_REALTIME,&starttime); + fft2c->compute(work2,work2,-1); + my_gettime(CLOCK_REALTIME,&endtime); + poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + poisson_vdx_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); + + + // y direction gradient + + poisson_ygrad(nx_pppm,ny_pppm,nz_pppm); + + my_gettime(CLOCK_REALTIME,&starttime); + fft2c->compute(work2,work2,-1); + my_gettime(CLOCK_REALTIME,&endtime); + poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + poisson_vdy_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); + + // z direction gradient + + poisson_zgrad(nx_pppm,ny_pppm,nz_pppm); + + my_gettime(CLOCK_REALTIME,&starttime); + fft2c->compute(work2,work2,-1); + my_gettime(CLOCK_REALTIME,&endtime); + poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000); + + poisson_vdz_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm); + #endif +} + +/*---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +-------------------------------------------------------------------------*/ + +void PPPMCuda::fieldforce() +{ + cuda_fieldforce(& cuda->shared_data,cu_flag); + return; +} + +/* ---------------------------------------------------------------------- + perform and time the 4 FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMCuda::timing_1d(int n, double &time1d) +{ + time1d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps/4*n; + return 4; +} + +int PPPMCuda::timing_3d(int n, double &time3d) +{ + time3d = cuda->shared_data.cuda_timings.pppm_poisson/update->nsteps*n; + return 4; +} + +void PPPMCuda::slabcorr(int eflag) +{ + // compute local contribution to global dipole moment + if(slabbuf==NULL) + { + slabbuf=new ENERGY_FLOAT[(atom->nmax+31)/32]; + cu_slabbuf = new cCudaData (slabbuf, (atom->nmax+31)/32); + } + if(unsigned((atom->nlocal+31)/32)*sizeof(ENERGY_FLOAT)>=unsigned(cu_slabbuf->dev_size())) + { + delete [] slabbuf; + delete cu_slabbuf; + slabbuf=new ENERGY_FLOAT[(atom->nmax+31)/32]; + cu_slabbuf = new cCudaData (slabbuf, (atom->nmax+31)/32); + } + + + double dipole = cuda_slabcorr_energy(&cuda->shared_data,slabbuf,(ENERGY_FLOAT*) cu_slabbuf->dev_data()); + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // compute corrections + + double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; + + //if (eflag) energy += qqrd2e*scale * e_slabcorr; + // need to add a correction to make non-neutral systems and per-atom energy translationally invariant + if (eflag || fabs(qsum) > SMALL) + error->all(FLERR,"Cannot (yet) use slab correction with kspace_style pppm/cuda for non-neutral systems or to get per-atom energy. Aborting."); + + double ffact = -4.0*MY_PI*dipole_all/volume; + + cuda_slabcorr_force(&cuda->shared_data,ffact); +} From d7f0208ac9b778448f9497bbf39fb4f536d00f8a Mon Sep 17 00:00:00 2001 From: sjplimp Date: Thu, 6 Mar 2014 17:24:25 +0000 Subject: [PATCH 07/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11594 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/compute_reduce.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index d4bd7e68c5..9f87fcd09b 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -531,7 +531,8 @@ double ComputeReduce::compute_one(int m, int flag) } else if (which[m] == FIX) { if (update->ntimestep % modify->fix[vidx]->peratom_freq) - error->all(FLERR,"Fix used in compute reduce not computed at compatible time"); + error->all(FLERR,"Fix used in compute reduce not " + "computed at compatible time"); Fix *fix = modify->fix[vidx]; if (flavor[m] == PERATOM) { From 99c18a722fd6bdf00625b38142bb161ce78d2822 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Thu, 6 Mar 2014 17:24:38 +0000 Subject: [PATCH 08/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11595 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/MISC/fix_deposit.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/MISC/fix_deposit.cpp b/src/MISC/fix_deposit.cpp index 6ea10735e2..7c9e4a1ecb 100644 --- a/src/MISC/fix_deposit.cpp +++ b/src/MISC/fix_deposit.cpp @@ -349,6 +349,8 @@ void FixDeposit::pre_exchange() coords[0][0] = coord[0]; coords[0][1] = coord[1]; coords[0][2] = coord[2]; + imageflags[0] = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; } else { if (dimension == 3) { r[0] = random->uniform() - 0.5; From 37d3c55fca8e3c6ff75a95698cba68c3c1c137ee Mon Sep 17 00:00:00 2001 From: sjplimp Date: Thu, 6 Mar 2014 17:33:45 +0000 Subject: [PATCH 09/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11597 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 01bb7ffda5..4340fe74b2 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "4 Mar 2014" +#define LAMMPS_VERSION "6 Mar 2014" From dcab7801eb5751b03445b604bd2133b7e32d8799 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Thu, 6 Mar 2014 17:33:45 +0000 Subject: [PATCH 10/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11598 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- doc/Manual.html | 4 ++-- doc/Manual.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/Manual.html b/doc/Manual.html index 8bbed05b10..5cce482cc9 100644 --- a/doc/Manual.html +++ b/doc/Manual.html @@ -1,7 +1,7 @@ LAMMPS Users Manual - + @@ -22,7 +22,7 @@

LAMMPS Documentation

-

4 Mar 2014 version +

6 Mar 2014 version

Version info:

diff --git a/doc/Manual.txt b/doc/Manual.txt index 0cf8837e4b..a639cc4967 100644 --- a/doc/Manual.txt +++ b/doc/Manual.txt @@ -1,6 +1,6 @@ LAMMPS Users Manual - + @@ -18,7 +18,7 @@

LAMMPS Documentation :c,h3 -4 Mar 2014 version :c,h4 +6 Mar 2014 version :c,h4 Version info: :h4 From 15ecab91a3035ed9205cfc34bc950c942899c2b1 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Fri, 7 Mar 2014 00:23:18 +0000 Subject: [PATCH 11/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11602 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/atom.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/atom.h b/src/atom.h index 0056ecee55..fa917eec76 100644 --- a/src/atom.h +++ b/src/atom.h @@ -187,7 +187,7 @@ class Atom : protected Pointers { void data_bonus(int, char *, class AtomVec *); void data_bodies(int, char *, class AtomVecBody *); - void allocate_type_arrays(); + virtual void allocate_type_arrays(); void set_mass(const char *); void set_mass(int, double); void set_mass(int, char **); @@ -202,7 +202,7 @@ class Atom : protected Pointers { void add_molecule_atom(class Molecule *, int, int, tagint); void first_reorder(); - void sort(); + virtual void sort(); void add_callback(int); void delete_callback(const char *, int); From a1db9a1aaa543d8607eb07b252bf336bf7a1ac5b Mon Sep 17 00:00:00 2001 From: sjplimp Date: Fri, 7 Mar 2014 00:23:36 +0000 Subject: [PATCH 12/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11603 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/MISC/fix_deposit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/MISC/fix_deposit.cpp b/src/MISC/fix_deposit.cpp index 7c9e4a1ecb..0369c0d367 100644 --- a/src/MISC/fix_deposit.cpp +++ b/src/MISC/fix_deposit.cpp @@ -343,7 +343,7 @@ void FixDeposit::pre_exchange() // coords = coords of all atoms // for molecule, perform random rotation around center pt // apply PBC so final coords are inside box - // also store image flag modified due to PBC + // also modify image flags due to PBC if (mode == ATOM) { coords[0][0] = coord[0]; From 88cd9113f091d453ca90253dcb4ffb428b80d71f Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 14:11:39 +0000 Subject: [PATCH 13/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11604 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/KSPACE/pppm_disp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp index a15cf38515..e347d528de 100755 --- a/src/KSPACE/pppm_disp.cpp +++ b/src/KSPACE/pppm_disp.cpp @@ -756,7 +756,7 @@ void PPPMDisp::setup() b = 0.5*sqrt(sqk)*gewinv; bs = b*b; bt = bs*b; - erft = 2*bt*rtpi*erfc(b); + erft = 2*bt*rtpi*erfc((double) b); expt = exp(-bs); nom = erft - 2*bs*expt; denom = nom + expt; From 3f7d79ab82ab37620b1694635bd6a048e37c3031 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 14:24:09 +0000 Subject: [PATCH 14/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11605 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- tools/msi2lmp/src/ReadCarFile.c | 6 +- tools/msi2lmp/src/WriteDataFile.c | 3 +- tools/msi2lmp/src/msi2lmp.c | 8 +- tools/msi2lmp/src/msi2lmp.h | 4 +- .../test/reference/PyAC_bulk-clayff.data | 1608 +++++------ .../test/reference/PyAC_bulk-clayff.data2 | 2562 ++++++++--------- tools/msi2lmp/test/runtests.sh | 6 +- 7 files changed, 2102 insertions(+), 2095 deletions(-) diff --git a/tools/msi2lmp/src/ReadCarFile.c b/tools/msi2lmp/src/ReadCarFile.c index 9a4f0e996b..73209bc994 100644 --- a/tools/msi2lmp/src/ReadCarFile.c +++ b/tools/msi2lmp/src/ReadCarFile.c @@ -25,9 +25,9 @@ void set_box(double box[3][3], double *h, double *h_inv) h_inv[1] = 1.0/h[1]; h_inv[2] = 1.0/h[2]; - h[3] = box[2][0]; + h[3] = box[2][2]; h[4] = box[2][1]; - h[5] = box[2][2]; + h[5] = box[2][0]; h_inv[3] = -h[3] / (h[1]*h[2]); h_inv[4] = (h[3]*h[5] - h[1]*h[4]) / (h[0]*h[1]*h[2]); h_inv[5] = -h[5] / (h[0]*h[1]); @@ -252,6 +252,8 @@ void ReadCarFile(void) } else { + if (pflag > 2) + printf(" pbc[0] %f pbc[1] %f pbc[2] %f\n", pbc[0] ,pbc[1] ,pbc[2]); if (TriclinicFlag == 0) { for (k=0; k < 3; k++) { box[0][k] = -0.5*pbc[k] + center[k] + shift[k]; diff --git a/tools/msi2lmp/src/WriteDataFile.c b/tools/msi2lmp/src/WriteDataFile.c index dbed331889..fec44240a9 100644 --- a/tools/msi2lmp/src/WriteDataFile.c +++ b/tools/msi2lmp/src/WriteDataFile.c @@ -30,7 +30,8 @@ void WriteDataFile(char *nameroot) if (forcefield & (FF_TYPE_CLASS1|FF_TYPE_OPLSAA)) total_no_angle_angles = 0; - fprintf(DatF, "LAMMPS data file from msi2lmp v3.8 for %s\n\n", nameroot); + fprintf(DatF, "LAMMPS data file. msi2lmp " MSI2LMP_VERSION + " / CGCMM for %s\n\n", nameroot); fprintf(DatF, " %6d atoms\n", total_no_atoms); fprintf(DatF, " %6d bonds\n", total_no_bonds); fprintf(DatF, " %6d angles\n",total_no_angles); diff --git a/tools/msi2lmp/src/msi2lmp.c b/tools/msi2lmp/src/msi2lmp.c index 68c290dded..ca4e0ed440 100644 --- a/tools/msi2lmp/src/msi2lmp.c +++ b/tools/msi2lmp/src/msi2lmp.c @@ -2,6 +2,10 @@ * * msi2lmp.exe * +* v3.9.3 AK- Bugfix for triclinic cells. +* +* v3.9.2 AK- Support for writing out force field style hints +* * v3.9.1 AK- Bugfix for Class2. Free allocated memory. Print version number. * * v3.9 AK - Rudimentary support for OPLS-AA @@ -134,8 +138,6 @@ #include #include -static const char version[] = "v3.9.1 / 08-Oct-2013"; - /* global variables */ char *rootname; @@ -326,7 +328,7 @@ int main (int argc, char *argv[]) if (pflag > 0) { - printf("\nRunning msi2lmp %s ...\n",version); + puts("\nRunning msi2lmp " MSI2LMP_VERSION "\n"); if (forcefield & FF_TYPE_CLASS1) puts(" Forcefield: Class I"); if (forcefield & FF_TYPE_CLASS2) puts(" Forcefield: Class II"); if (forcefield & FF_TYPE_OPLSAA) puts(" Forcefield: OPLS-AA"); diff --git a/tools/msi2lmp/src/msi2lmp.h b/tools/msi2lmp/src/msi2lmp.h index 14dc3a91cc..72ebec8530 100644 --- a/tools/msi2lmp/src/msi2lmp.h +++ b/tools/msi2lmp/src/msi2lmp.h @@ -27,7 +27,7 @@ * The thrid version was revised in Fall 2011 by * Stephanie Teich-McGoldrick to add support non-orthogonal cells. * -* The next revision was done in Summer 2013 by +* The next revision was started in Summer/Fall 2013 by * Axel Kohlmeyer to improve portability to Windows compilers, * clean up command line parsing and improve compatibility with * the then current LAMMPS versions. This revision removes @@ -36,6 +36,8 @@ # include +#define MSI2LMP_VERSION "v3.9.3 / 07 Mar 2014" + #define PI_180 0.01745329251994329576 #define MAX_LINE_LENGTH 256 diff --git a/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data b/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data index df9f2dd71f..60523b5bef 100644 --- a/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data +++ b/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data @@ -1,4 +1,4 @@ -LAMMPS data file from msi2lmp v3.8 for PyAC_bulk +LAMMPS data file. msi2lmp v3.9.3 / 07 Mar 2014 / CGCMM for PyAC_bulk-clayff 1280 atoms 128 bonds @@ -16,11 +16,11 @@ LAMMPS data file from msi2lmp v3.8 for PyAC_bulk Masses - 1 26.98154000 - 2 28.08550000 - 3 15.99940000 - 4 15.99940000 - 5 1.00797000 + 1 26.981540 + 2 28.085500 + 3 15.999400 + 4 15.999400 + 5 1.007970 Pair Coeffs @@ -79,7 +79,7 @@ Atoms 41 1 1 1.575000 7.746827709 1.497292339 0.000000000 0 0 0 42 1 2 2.100000 -12.074977890 8.885904024 2.679766593 1 0 0 43 1 2 2.100000 -12.038647070 2.867020152 2.686199476 1 0 0 - 44 1 3 -1.050000 -12.324475216 -0.004862688 1.061430252 1 0 0 + 44 1 3 -1.050000 8.315524784 -0.004862688 1.061430252 0 0 0 45 1 3 -1.050000 -11.885136232 2.739521172 1.064187192 1 0 0 46 1 4 -0.950000 6.155124454 1.708058368 0.993425143 0 0 0 47 1 3 -1.050000 4.829856365 3.423343273 3.298245183 0 0 0 @@ -197,7 +197,7 @@ Atoms 159 1 3 -1.050000 0.334233214 0.575096023 -3.055632561 1 0 0 160 1 5 0.425000 -4.638508017 2.781711457 -1.194847627 1 0 0 161 1 1 1.575000 2.643162624 10.463114961 0.000000000 0 0 0 - 162 1 2 2.100000 3.461357024 17.851726646 2.679766593 0 0 0 + 162 1 2 2.100000 3.236018349 -18.011565429 2.679766593 0 1 0 163 1 2 2.100000 3.497687844 11.832842774 2.686199476 0 0 0 164 1 3 -1.050000 3.211859698 8.960959933 1.061430252 0 0 0 165 1 3 -1.050000 3.651198683 11.705343793 1.064187192 0 0 0 @@ -219,7 +219,7 @@ Atoms 181 1 1 1.575000 2.685841509 16.434352904 0.000000000 0 0 0 182 1 2 2.100000 1.867647109 9.045741219 -2.679766593 0 0 0 183 1 2 2.100000 1.831316289 15.064625091 -2.686199476 0 0 0 - 184 1 3 -1.050000 2.117144434 17.936507931 -1.061430252 0 0 0 + 184 1 3 -1.050000 1.891805759 -17.926784144 -1.061430252 0 1 0 185 1 3 -1.050000 1.677805450 15.192124071 -1.064187192 0 0 0 186 1 4 -0.950000 4.277544765 16.223586875 -0.993425143 0 0 0 187 1 3 -1.050000 5.602812853 14.508301970 -3.298245183 0 0 0 @@ -237,9 +237,9 @@ Atoms 199 1 3 -1.050000 5.550568433 9.540918645 -3.055632561 0 0 0 200 1 5 0.425000 0.577827203 11.747534078 -1.194847627 0 0 0 201 1 1 1.575000 7.803162471 10.463114961 0.000000000 0 0 0 - 202 1 2 2.100000 -12.018643129 17.851726646 2.679766593 1 0 0 + 202 1 2 2.100000 -12.243981804 -18.011565429 2.679766593 1 1 0 203 1 2 2.100000 -11.982312309 11.832842774 2.686199476 1 0 0 - 204 1 3 -1.050000 -12.268140454 8.960959933 1.061430252 1 0 0 + 204 1 3 -1.050000 8.371859546 8.960959933 1.061430252 0 0 0 205 1 3 -1.050000 -11.828801470 11.705343793 1.064187192 1 0 0 206 1 4 -0.950000 6.211459215 10.673880990 0.993425143 0 0 0 207 1 3 -1.050000 4.886191127 12.389165895 3.298245183 0 0 0 @@ -259,7 +259,7 @@ Atoms 221 1 1 1.575000 7.845841356 16.434352904 0.000000000 0 0 0 222 1 2 2.100000 7.027646956 9.045741219 -2.679766593 0 0 0 223 1 2 2.100000 6.991316136 15.064625091 -2.686199476 0 0 0 - 224 1 3 -1.050000 7.277144282 17.936507931 -1.061430252 0 0 0 + 224 1 3 -1.050000 7.051805607 -17.926784144 -1.061430252 0 1 0 225 1 3 -1.050000 6.837805298 15.192124071 -1.064187192 0 0 0 226 1 4 -0.950000 -11.202455388 16.223586875 -0.993425143 1 0 0 227 1 3 -1.050000 -9.877187299 14.508301970 -3.298245183 1 0 0 @@ -277,7 +277,7 @@ Atoms 239 1 3 -1.050000 -9.929431719 9.540918645 -3.055632561 1 0 0 240 1 5 0.425000 5.737827050 11.747534078 -1.194847627 0 0 0 241 1 1 1.575000 -7.676837681 10.463114961 0.000000000 1 0 0 - 242 1 2 2.100000 -6.858643281 17.851726646 2.679766593 1 0 0 + 242 1 2 2.100000 -7.083981956 -18.011565429 2.679766593 1 1 0 243 1 2 2.100000 -6.822312461 11.832842774 2.686199476 1 0 0 244 1 3 -1.050000 -7.108140607 8.960959933 1.061430252 1 0 0 245 1 3 -1.050000 -6.668801623 11.705343793 1.064187192 1 0 0 @@ -299,7 +299,7 @@ Atoms 261 1 1 1.575000 -7.634158796 16.434352904 0.000000000 1 0 0 262 1 2 2.100000 -8.452353196 9.045741219 -2.679766593 1 0 0 263 1 2 2.100000 -8.488684016 15.064625091 -2.686199476 1 0 0 - 264 1 3 -1.050000 -8.202855871 17.936507931 -1.061430252 1 0 0 + 264 1 3 -1.050000 -8.428194546 -17.926784144 -1.061430252 1 1 0 265 1 3 -1.050000 -8.642194855 15.192124071 -1.064187192 1 0 0 266 1 4 -0.950000 -6.042455540 16.223586875 -0.993425143 1 0 0 267 1 3 -1.050000 -4.717187452 14.508301970 -3.298245183 1 0 0 @@ -317,7 +317,7 @@ Atoms 279 1 3 -1.050000 -4.769431872 9.540918645 -3.055632561 1 0 0 280 1 5 0.425000 -9.742173103 11.747534078 -1.194847627 1 0 0 281 1 1 1.575000 -2.516837834 10.463114961 0.000000000 1 0 0 - 282 1 2 2.100000 -1.698643434 17.851726646 2.679766593 1 0 0 + 282 1 2 2.100000 -1.923982109 -18.011565429 2.679766593 1 1 0 283 1 2 2.100000 -1.662312614 11.832842774 2.686199476 1 0 0 284 1 3 -1.050000 -1.948140759 8.960959933 1.061430252 1 0 0 285 1 3 -1.050000 -1.508801775 11.705343793 1.064187192 1 0 0 @@ -339,7 +339,7 @@ Atoms 301 1 1 1.575000 -2.474158949 16.434352904 0.000000000 1 0 0 302 1 2 2.100000 -3.292353349 9.045741219 -2.679766593 1 0 0 303 1 2 2.100000 -3.328684169 15.064625091 -2.686199476 1 0 0 - 304 1 3 -1.050000 -3.042856023 17.936507931 -1.061430252 1 0 0 + 304 1 3 -1.050000 -3.268194698 -17.926784144 -1.061430252 1 1 0 305 1 3 -1.050000 -3.482195007 15.192124071 -1.064187192 1 0 0 306 1 4 -0.950000 -0.882455693 16.223586875 -0.993425143 1 0 0 307 1 3 -1.050000 0.442812395 14.508301970 -3.298245183 1 0 0 @@ -356,346 +356,346 @@ Atoms 318 1 3 -1.050000 -0.478053509 12.046196033 -3.293650032 1 0 0 319 1 3 -1.050000 0.390567976 9.540918645 -3.055632561 1 0 0 320 1 5 0.425000 -4.582173255 11.747534078 -1.194847627 1 0 0 - 321 1 1 1.575000 3.063153909 -16.434354493 0.000000000 0 1 0 - 322 1 2 2.100000 3.881348309 -9.045742808 2.679766593 0 1 0 - 323 1 2 2.100000 3.917679129 -15.064626680 2.686199476 0 1 0 - 324 1 3 -1.050000 3.268194460 17.926782555 1.061430252 0 0 0 - 325 1 3 -1.050000 4.071189967 -15.192125660 1.064187192 0 1 0 - 326 1 4 -0.950000 1.471450653 -16.223588464 0.993425143 0 1 0 - 327 1 3 -1.050000 0.146182564 -14.508303559 3.298245183 0 1 0 - 328 1 3 -1.050000 3.618881011 -16.529108933 3.293650032 0 1 0 - 329 1 3 -1.050000 2.750259527 -14.023831544 3.055632561 0 1 0 - 330 1 5 0.425000 2.563000910 -16.230446978 1.194847627 0 1 0 - 331 1 1 1.575000 5.671321213 -11.951443182 0.000000000 0 1 0 - 332 1 2 2.100000 1.273181004 -13.528654119 2.679766593 0 1 0 - 333 1 2 2.100000 1.365846586 -10.581715369 2.686199476 0 1 0 - 334 1 3 -1.050000 1.080018440 -13.453598210 1.061430252 0 1 0 - 335 1 3 -1.050000 1.519357425 -10.709214349 1.064187192 0 1 0 - 336 1 4 -0.950000 4.079617957 -11.740677153 0.993425143 0 1 0 - 337 1 3 -1.050000 2.754349869 -10.025392248 3.298245183 0 1 0 - 338 1 3 -1.050000 1.067048469 -12.046197622 3.293650032 0 1 0 - 339 1 3 -1.050000 0.198426984 -9.540920234 3.055632561 0 1 0 - 340 1 5 0.425000 5.171168215 -11.747535667 1.194847627 0 1 0 - 341 1 1 1.575000 3.105832794 -10.463116550 0.000000000 0 1 0 - 342 1 2 2.100000 2.287638394 -17.851728235 -2.679766593 0 1 0 - 343 1 2 2.100000 2.251307574 -11.832844363 -2.686199476 0 1 0 - 344 1 3 -1.050000 2.537135719 -8.960961522 -1.061430252 0 1 0 - 345 1 3 -1.050000 2.097796735 -11.705345382 -1.064187192 0 1 0 - 346 1 4 -0.950000 4.697536050 -10.673882579 -0.993425143 0 1 0 - 347 1 3 -1.050000 6.022804138 -12.389167484 -3.298245183 0 1 0 - 348 1 3 -1.050000 2.550105691 -10.368362109 -3.293650032 0 1 0 - 349 1 3 -1.050000 3.418727175 -12.873639498 -3.055632561 0 1 0 - 350 1 5 0.425000 3.605985792 -10.667024064 -1.194847627 0 1 0 - 351 1 1 1.575000 0.497665489 -14.946027861 0.000000000 0 1 0 - 352 1 2 2.100000 4.895805698 -13.368816924 -2.679766593 0 1 0 - 353 1 2 2.100000 4.803140117 -16.315755673 -2.686199476 0 1 0 - 354 1 3 -1.050000 5.088968262 -13.443872833 -1.061430252 0 1 0 - 355 1 3 -1.050000 4.649629278 -16.188256693 -1.064187192 0 1 0 - 356 1 4 -0.950000 2.089368745 -15.156793890 -0.993425143 0 1 0 - 357 1 3 -1.050000 3.414636833 -16.872078794 -3.298245183 0 1 0 - 358 1 3 -1.050000 5.101938234 -14.851273420 -3.293650032 0 1 0 - 359 1 3 -1.050000 5.970559718 -17.356550809 -3.055632561 0 1 0 - 360 1 5 0.425000 0.997818487 -15.149935375 -1.194847627 0 1 0 - 361 1 1 1.575000 8.223153756 -16.434354493 0.000000000 0 1 0 - 362 1 2 2.100000 -11.598651844 -9.045742808 2.679766593 1 1 0 - 363 1 2 2.100000 -11.562321024 -15.064626680 2.686199476 1 1 0 - 364 1 3 -1.050000 -12.211805692 17.926782555 1.061430252 1 0 0 - 365 1 3 -1.050000 -11.408810185 -15.192125660 1.064187192 1 1 0 - 366 1 4 -0.950000 6.631450500 -16.223588464 0.993425143 0 1 0 - 367 1 3 -1.050000 5.306182412 -14.508303559 3.298245183 0 1 0 - 368 1 3 -1.050000 -11.861119141 -16.529108933 3.293650032 1 1 0 - 369 1 3 -1.050000 7.910259374 -14.023831544 3.055632561 0 1 0 - 370 1 5 0.425000 7.723000758 -16.230446978 1.194847627 0 1 0 - 371 1 1 1.575000 -9.808678939 -11.951443182 0.000000000 1 1 0 - 372 1 2 2.100000 6.433180852 -13.528654119 2.679766593 0 1 0 - 373 1 2 2.100000 6.525846433 -10.581715369 2.686199476 0 1 0 - 374 1 3 -1.050000 6.240018288 -13.453598210 1.061430252 0 1 0 - 375 1 3 -1.050000 6.679357272 -10.709214349 1.064187192 0 1 0 - 376 1 4 -0.950000 -11.400382195 -11.740677153 0.993425143 1 1 0 - 377 1 3 -1.050000 7.914349716 -10.025392248 3.298245183 0 1 0 - 378 1 3 -1.050000 6.227048316 -12.046197622 3.293650032 0 1 0 - 379 1 3 -1.050000 5.358426832 -9.540920234 3.055632561 0 1 0 - 380 1 5 0.425000 -10.308831938 -11.747535667 1.194847627 1 1 0 - 381 1 1 1.575000 8.265832641 -10.463116550 0.000000000 0 1 0 - 382 1 2 2.100000 7.447638241 -17.851728235 -2.679766593 0 1 0 - 383 1 2 2.100000 7.411307421 -11.832844363 -2.686199476 0 1 0 - 384 1 3 -1.050000 7.697135567 -8.960961522 -1.061430252 0 1 0 - 385 1 3 -1.050000 7.257796583 -11.705345382 -1.064187192 0 1 0 - 386 1 4 -0.950000 -10.782464103 -10.673882579 -0.993425143 1 1 0 - 387 1 3 -1.050000 -9.457196015 -12.389167484 -3.298245183 1 1 0 - 388 1 3 -1.050000 7.710105538 -10.368362109 -3.293650032 0 1 0 - 389 1 3 -1.050000 8.578727023 -12.873639498 -3.055632561 0 1 0 - 390 1 5 0.425000 8.765985640 -10.667024064 -1.194847627 0 1 0 - 391 1 1 1.575000 5.657665337 -14.946027861 0.000000000 0 1 0 - 392 1 2 2.100000 -10.584194454 -13.368816924 -2.679766593 1 1 0 - 393 1 2 2.100000 -10.676860036 -16.315755673 -2.686199476 1 1 0 - 394 1 3 -1.050000 -10.391031891 -13.443872833 -1.061430252 1 1 0 - 395 1 3 -1.050000 -10.830370875 -16.188256693 -1.064187192 1 1 0 - 396 1 4 -0.950000 7.249368593 -15.156793890 -0.993425143 0 1 0 - 397 1 3 -1.050000 8.574636681 -16.872078794 -3.298245183 0 1 0 - 398 1 3 -1.050000 -10.378061919 -14.851273420 -3.293650032 1 1 0 - 399 1 3 -1.050000 -9.509440434 -17.356550809 -3.055632561 1 1 0 - 400 1 5 0.425000 6.157818335 -15.149935375 -1.194847627 0 1 0 - 401 1 1 1.575000 -7.256846397 -16.434354493 0.000000000 1 1 0 - 402 1 2 2.100000 -6.438651996 -9.045742808 2.679766593 1 1 0 - 403 1 2 2.100000 -6.402321176 -15.064626680 2.686199476 1 1 0 - 404 1 3 -1.050000 -7.051805845 17.926782555 1.061430252 1 0 0 - 405 1 3 -1.050000 -6.248810338 -15.192125660 1.064187192 1 1 0 - 406 1 4 -0.950000 -8.848549652 -16.223588464 0.993425143 1 1 0 - 407 1 3 -1.050000 -10.173817741 -14.508303559 3.298245183 1 1 0 - 408 1 3 -1.050000 -6.701119294 -16.529108933 3.293650032 1 1 0 - 409 1 3 -1.050000 -7.569740778 -14.023831544 3.055632561 1 1 0 - 410 1 5 0.425000 -7.756999395 -16.230446978 1.194847627 1 1 0 - 411 1 1 1.575000 -4.648679092 -11.951443182 0.000000000 1 1 0 - 412 1 2 2.100000 -9.046819301 -13.528654119 2.679766593 1 1 0 - 413 1 2 2.100000 -8.954153719 -10.581715369 2.686199476 1 1 0 - 414 1 3 -1.050000 -9.239981865 -13.453598210 1.061430252 1 1 0 - 415 1 3 -1.050000 -8.800642881 -10.709214349 1.064187192 1 1 0 - 416 1 4 -0.950000 -6.240382348 -11.740677153 0.993425143 1 1 0 - 417 1 3 -1.050000 -7.565650436 -10.025392248 3.298245183 1 1 0 - 418 1 3 -1.050000 -9.252951837 -12.046197622 3.293650032 1 1 0 - 419 1 3 -1.050000 -10.121573321 -9.540920234 3.055632561 1 1 0 - 420 1 5 0.425000 -5.148832090 -11.747535667 1.194847627 1 1 0 - 421 1 1 1.575000 -7.214167511 -10.463116550 0.000000000 1 1 0 - 422 1 2 2.100000 -8.032361912 -17.851728235 -2.679766593 1 1 0 - 423 1 2 2.100000 -8.068692731 -11.832844363 -2.686199476 1 1 0 - 424 1 3 -1.050000 -7.782864586 -8.960961522 -1.061430252 1 1 0 - 425 1 3 -1.050000 -8.222203570 -11.705345382 -1.064187192 1 1 0 - 426 1 4 -0.950000 -5.622464255 -10.673882579 -0.993425143 1 1 0 - 427 1 3 -1.050000 -4.297196167 -12.389167484 -3.298245183 1 1 0 - 428 1 3 -1.050000 -7.769894614 -10.368362109 -3.293650032 1 1 0 - 429 1 3 -1.050000 -6.901273130 -12.873639498 -3.055632561 1 1 0 - 430 1 5 0.425000 -6.714014513 -10.667024064 -1.194847627 1 1 0 - 431 1 1 1.575000 -9.822334816 -14.946027861 0.000000000 1 1 0 - 432 1 2 2.100000 -5.424194607 -13.368816924 -2.679766593 1 1 0 - 433 1 2 2.100000 -5.516860189 -16.315755673 -2.686199476 1 1 0 - 434 1 3 -1.050000 -5.231032043 -13.443872833 -1.061430252 1 1 0 - 435 1 3 -1.050000 -5.670371027 -16.188256693 -1.064187192 1 1 0 - 436 1 4 -0.950000 -8.230631560 -15.156793890 -0.993425143 1 1 0 - 437 1 3 -1.050000 -6.905363472 -16.872078794 -3.298245183 1 1 0 - 438 1 3 -1.050000 -5.218062071 -14.851273420 -3.293650032 1 1 0 - 439 1 3 -1.050000 -4.349440587 -17.356550809 -3.055632561 1 1 0 - 440 1 5 0.425000 -9.322181818 -15.149935375 -1.194847627 1 1 0 - 441 1 1 1.575000 -2.096846549 -16.434354493 0.000000000 1 1 0 - 442 1 2 2.100000 -1.278652149 -9.045742808 2.679766593 1 1 0 - 443 1 2 2.100000 -1.242321329 -15.064626680 2.686199476 1 1 0 - 444 1 3 -1.050000 -1.891805998 17.926782555 1.061430252 1 0 0 - 445 1 3 -1.050000 -1.088810490 -15.192125660 1.064187192 1 1 0 - 446 1 4 -0.950000 -3.688549805 -16.223588464 0.993425143 1 1 0 - 447 1 3 -1.050000 -5.013817893 -14.508303559 3.298245183 1 1 0 - 448 1 3 -1.050000 -1.541119446 -16.529108933 3.293650032 1 1 0 - 449 1 3 -1.050000 -2.409740931 -14.023831544 3.055632561 1 1 0 - 450 1 5 0.425000 -2.596999547 -16.230446978 1.194847627 1 1 0 - 451 1 1 1.575000 0.511320755 -11.951443182 0.000000000 1 1 0 - 452 1 2 2.100000 -3.886819453 -13.528654119 2.679766593 1 1 0 - 453 1 2 2.100000 -3.794153872 -10.581715369 2.686199476 1 1 0 - 454 1 3 -1.050000 -4.079982017 -13.453598210 1.061430252 1 1 0 - 455 1 3 -1.050000 -3.640643033 -10.709214349 1.064187192 1 1 0 - 456 1 4 -0.950000 -1.080382500 -11.740677153 0.993425143 1 1 0 - 457 1 3 -1.050000 -2.405650589 -10.025392248 3.298245183 1 1 0 - 458 1 3 -1.050000 -4.092951989 -12.046197622 3.293650032 1 1 0 - 459 1 3 -1.050000 -4.961573474 -9.540920234 3.055632561 1 1 0 - 460 1 5 0.425000 0.011167757 -11.747535667 1.194847627 1 1 0 - 461 1 1 1.575000 -2.054167664 -10.463116550 0.000000000 1 1 0 - 462 1 2 2.100000 -2.872362064 -17.851728235 -2.679766593 1 1 0 - 463 1 2 2.100000 -2.908692884 -11.832844363 -2.686199476 1 1 0 - 464 1 3 -1.050000 -2.622864739 -8.960961522 -1.061430252 1 1 0 - 465 1 3 -1.050000 -3.062203723 -11.705345382 -1.064187192 1 1 0 - 466 1 4 -0.950000 -0.462464408 -10.673882579 -0.993425143 1 1 0 - 467 1 3 -1.050000 0.862803680 -12.389167484 -3.298245183 1 1 0 - 468 1 3 -1.050000 -2.609894767 -10.368362109 -3.293650032 1 1 0 - 469 1 3 -1.050000 -1.741273282 -12.873639498 -3.055632561 1 1 0 - 470 1 5 0.425000 -1.554014666 -10.667024064 -1.194847627 1 1 0 - 471 1 1 1.575000 -4.662334969 -14.946027861 0.000000000 1 1 0 - 472 1 2 2.100000 -0.264194760 -13.368816924 -2.679766593 1 1 0 - 473 1 2 2.100000 -0.356860341 -16.315755673 -2.686199476 1 1 0 - 474 1 3 -1.050000 -0.071032196 -13.443872833 -1.061430252 1 1 0 - 475 1 3 -1.050000 -0.510371180 -16.188256693 -1.064187192 1 1 0 - 476 1 4 -0.950000 -3.070631713 -15.156793890 -0.993425143 1 1 0 - 477 1 3 -1.050000 -1.745363624 -16.872078794 -3.298245183 1 1 0 - 478 1 3 -1.050000 -0.058062224 -14.851273420 -3.293650032 1 1 0 - 479 1 3 -1.050000 0.810559260 -17.356550809 -3.055632561 1 1 0 - 480 1 5 0.425000 -4.162181970 -15.149935375 -1.194847627 1 1 0 - 481 1 1 1.575000 3.119488670 -7.468531871 0.000000000 0 1 0 - 482 1 2 2.100000 3.937683071 -0.079920186 2.679766593 0 1 0 - 483 1 2 2.100000 3.974013891 -6.098804058 2.686199476 0 1 0 - 484 1 3 -1.050000 3.688185745 -8.970686899 1.061430252 0 1 0 - 485 1 3 -1.050000 4.127524729 -6.226303039 1.064187192 0 1 0 - 486 1 4 -0.950000 1.527785415 -7.257765842 0.993425143 0 1 0 - 487 1 3 -1.050000 0.202517326 -5.542480937 3.298245183 0 1 0 - 488 1 3 -1.050000 3.675215773 -7.563286311 3.293650032 0 1 0 - 489 1 3 -1.050000 2.806594289 -5.058008923 3.055632561 0 1 0 - 490 1 5 0.425000 2.619335672 -7.264624356 1.194847627 0 1 0 - 491 1 1 1.575000 5.727655975 -2.985620560 0.000000000 0 1 0 - 492 1 2 2.100000 1.329515766 -4.562831497 2.679766593 0 1 0 - 493 1 2 2.100000 1.422181348 -1.615892747 2.686199476 0 1 0 - 494 1 3 -1.050000 1.136353202 -4.487775588 1.061430252 0 1 0 - 495 1 3 -1.050000 1.575692186 -1.743391728 1.064187192 0 1 0 - 496 1 4 -0.950000 4.135952719 -2.774854531 0.993425143 0 1 0 - 497 1 3 -1.050000 2.810684631 -1.059569627 3.298245183 0 1 0 - 498 1 3 -1.050000 1.123383230 -3.080375001 3.293650032 0 1 0 - 499 1 3 -1.050000 0.254761746 -0.575097612 3.055632561 0 1 0 - 500 1 5 0.425000 5.227502977 -2.781713046 1.194847627 0 1 0 - 501 1 1 1.575000 3.162167556 -1.497293928 0.000000000 0 1 0 - 502 1 2 2.100000 2.343973155 -8.885905613 -2.679766593 0 1 0 - 503 1 2 2.100000 2.307642336 -2.867021741 -2.686199476 0 1 0 - 504 1 3 -1.050000 2.593470481 0.004861099 -1.061430252 0 1 0 - 505 1 3 -1.050000 2.154131497 -2.739522761 -1.064187192 0 1 0 - 506 1 4 -0.950000 4.753870812 -1.708059957 -0.993425143 0 1 0 - 507 1 3 -1.050000 6.079138900 -3.423344862 -3.298245183 0 1 0 - 508 1 3 -1.050000 2.606440453 -1.402539488 -3.293650032 0 1 0 - 509 1 3 -1.050000 3.475061937 -3.907816877 -3.055632561 0 1 0 - 510 1 5 0.425000 3.662320554 -1.701201443 -1.194847627 0 1 0 - 511 1 1 1.575000 0.554000251 -5.980205239 0.000000000 0 1 0 - 512 1 2 2.100000 4.952140460 -4.402994302 -2.679766593 0 1 0 - 513 1 2 2.100000 4.859474878 -7.349933052 -2.686199476 0 1 0 - 514 1 3 -1.050000 5.145303024 -4.478050211 -1.061430252 0 1 0 - 515 1 3 -1.050000 4.705964040 -7.222434071 -1.064187192 0 1 0 - 516 1 4 -0.950000 2.145703507 -6.190971268 -0.993425143 0 1 0 - 517 1 3 -1.050000 3.470971595 -7.906256173 -3.298245183 0 1 0 - 518 1 3 -1.050000 5.158272996 -5.885450799 -3.293650032 0 1 0 - 519 1 3 -1.050000 6.026894480 -8.390728187 -3.055632561 0 1 0 - 520 1 5 0.425000 1.054153249 -6.184112754 -1.194847627 0 1 0 - 521 1 1 1.575000 8.279488518 -7.468531871 0.000000000 0 1 0 - 522 1 2 2.100000 -11.542317082 -0.079920186 2.679766593 1 1 0 - 523 1 2 2.100000 -11.505986262 -6.098804058 2.686199476 1 1 0 - 524 1 3 -1.050000 -11.791814408 -8.970686899 1.061430252 1 1 0 - 525 1 3 -1.050000 -11.352475423 -6.226303039 1.064187192 1 1 0 - 526 1 4 -0.950000 6.687785262 -7.257765842 0.993425143 0 1 0 - 527 1 3 -1.050000 5.362517174 -5.542480937 3.298245183 0 1 0 - 528 1 3 -1.050000 -11.804784379 -7.563286311 3.293650032 1 1 0 - 529 1 3 -1.050000 -12.673405864 -5.058008923 3.055632561 1 1 0 - 530 1 5 0.425000 7.779335520 -7.264624356 1.194847627 0 1 0 - 531 1 1 1.575000 -9.752344178 -2.985620560 0.000000000 1 1 0 - 532 1 2 2.100000 6.489515613 -4.562831497 2.679766593 0 1 0 - 533 1 2 2.100000 6.582181195 -1.615892747 2.686199476 0 1 0 - 534 1 3 -1.050000 6.296353050 -4.487775588 1.061430252 0 1 0 - 535 1 3 -1.050000 6.735692034 -1.743391728 1.064187192 0 1 0 - 536 1 4 -0.950000 -11.344047433 -2.774854531 0.993425143 1 1 0 - 537 1 3 -1.050000 -12.669315522 -1.059569627 3.298245183 1 1 0 - 538 1 3 -1.050000 6.283383078 -3.080375001 3.293650032 0 1 0 - 539 1 3 -1.050000 5.414761593 -0.575097612 3.055632561 0 1 0 - 540 1 5 0.425000 -10.252497176 -2.781713046 1.194847627 1 1 0 - 541 1 1 1.575000 8.322167403 -1.497293928 0.000000000 0 1 0 - 542 1 2 2.100000 7.503973003 -8.885905613 -2.679766593 0 1 0 - 543 1 2 2.100000 7.467642183 -2.867021741 -2.686199476 0 1 0 - 544 1 3 -1.050000 7.753470328 0.004861099 -1.061430252 0 1 0 - 545 1 3 -1.050000 7.314131344 -2.739522761 -1.064187192 0 1 0 - 546 1 4 -0.950000 -10.726129341 -1.708059957 -0.993425143 1 1 0 - 547 1 3 -1.050000 -9.400861253 -3.423344862 -3.298245183 1 1 0 - 548 1 3 -1.050000 7.766440300 -1.402539488 -3.293650032 0 1 0 - 549 1 3 -1.050000 8.635061785 -3.907816877 -3.055632561 0 1 0 - 550 1 5 0.425000 -11.817679599 -1.701201443 -1.194847627 1 1 0 - 551 1 1 1.575000 5.714000098 -5.980205239 0.000000000 0 1 0 - 552 1 2 2.100000 -10.527859693 -4.402994302 -2.679766593 1 1 0 - 553 1 2 2.100000 -10.620525274 -7.349933052 -2.686199476 1 1 0 - 554 1 3 -1.050000 -10.334697129 -4.478050211 -1.061430252 1 1 0 - 555 1 3 -1.050000 -10.774036113 -7.222434071 -1.064187192 1 1 0 - 556 1 4 -0.950000 7.305703354 -6.190971268 -0.993425143 0 1 0 - 557 1 3 -1.050000 8.630971443 -7.906256173 -3.298245183 0 1 0 - 558 1 3 -1.050000 -10.321727157 -5.885450799 -3.293650032 1 1 0 - 559 1 3 -1.050000 -9.453105673 -8.390728187 -3.055632561 1 1 0 - 560 1 5 0.425000 6.214153097 -6.184112754 -1.194847627 0 1 0 - 561 1 1 1.575000 -7.200511635 -7.468531871 0.000000000 1 1 0 - 562 1 2 2.100000 -6.382317234 -0.079920186 2.679766593 1 1 0 - 563 1 2 2.100000 -6.345986415 -6.098804058 2.686199476 1 1 0 - 564 1 3 -1.050000 -6.631814560 -8.970686899 1.061430252 1 1 0 - 565 1 3 -1.050000 -6.192475576 -6.226303039 1.064187192 1 1 0 - 566 1 4 -0.950000 -8.792214891 -7.257765842 0.993425143 1 1 0 - 567 1 3 -1.050000 -10.117482979 -5.542480937 3.298245183 1 1 0 - 568 1 3 -1.050000 -6.644784532 -7.563286311 3.293650032 1 1 0 - 569 1 3 -1.050000 -7.513406016 -5.058008923 3.055632561 1 1 0 - 570 1 5 0.425000 -7.700664633 -7.264624356 1.194847627 1 1 0 - 571 1 1 1.575000 -4.592344330 -2.985620560 0.000000000 1 1 0 - 572 1 2 2.100000 -8.990484539 -4.562831497 2.679766593 1 1 0 - 573 1 2 2.100000 -8.897818957 -1.615892747 2.686199476 1 1 0 - 574 1 3 -1.050000 -9.183647103 -4.487775588 1.061430252 1 1 0 - 575 1 3 -1.050000 -8.744308119 -1.743391728 1.064187192 1 1 0 - 576 1 4 -0.950000 -6.184047586 -2.774854531 0.993425143 1 1 0 - 577 1 3 -1.050000 -7.509315674 -1.059569627 3.298245183 1 1 0 - 578 1 3 -1.050000 -9.196617075 -3.080375001 3.293650032 1 1 0 - 579 1 3 -1.050000 -10.065238559 -0.575097612 3.055632561 1 1 0 - 580 1 5 0.425000 -5.092497328 -2.781713046 1.194847627 1 1 0 - 581 1 1 1.575000 -7.157832749 -1.497293928 0.000000000 1 1 0 - 582 1 2 2.100000 -7.976027150 -8.885905613 -2.679766593 1 1 0 - 583 1 2 2.100000 -8.012357970 -2.867021741 -2.686199476 1 1 0 - 584 1 3 -1.050000 -7.726529824 0.004861099 -1.061430252 1 1 0 - 585 1 3 -1.050000 -8.165868808 -2.739522761 -1.064187192 1 1 0 - 586 1 4 -0.950000 -5.566129494 -1.708059957 -0.993425143 1 1 0 - 587 1 3 -1.050000 -4.240861405 -3.423344862 -3.298245183 1 1 0 - 588 1 3 -1.050000 -7.713559852 -1.402539488 -3.293650032 1 1 0 - 589 1 3 -1.050000 -6.844938368 -3.907816877 -3.055632561 1 1 0 - 590 1 5 0.425000 -6.657679751 -1.701201443 -1.194847627 1 1 0 - 591 1 1 1.575000 -9.766000054 -5.980205239 0.000000000 1 1 0 - 592 1 2 2.100000 -5.367859845 -4.402994302 -2.679766593 1 1 0 - 593 1 2 2.100000 -5.460525427 -7.349933052 -2.686199476 1 1 0 - 594 1 3 -1.050000 -5.174697281 -4.478050211 -1.061430252 1 1 0 - 595 1 3 -1.050000 -5.614036265 -7.222434071 -1.064187192 1 1 0 - 596 1 4 -0.950000 -8.174296798 -6.190971268 -0.993425143 1 1 0 - 597 1 3 -1.050000 -6.849028710 -7.906256173 -3.298245183 1 1 0 - 598 1 3 -1.050000 -5.161727309 -5.885450799 -3.293650032 1 1 0 - 599 1 3 -1.050000 -4.293105825 -8.390728187 -3.055632561 1 1 0 - 600 1 5 0.425000 -9.265847056 -6.184112754 -1.194847627 1 1 0 - 601 1 1 1.575000 -2.040511787 -7.468531871 0.000000000 1 1 0 - 602 1 2 2.100000 -1.222317387 -0.079920186 2.679766593 1 1 0 - 603 1 2 2.100000 -1.185986567 -6.098804058 2.686199476 1 1 0 - 604 1 3 -1.050000 -1.471814713 -8.970686899 1.061430252 1 1 0 - 605 1 3 -1.050000 -1.032475729 -6.226303039 1.064187192 1 1 0 - 606 1 4 -0.950000 -3.632215043 -7.257765842 0.993425143 1 1 0 - 607 1 3 -1.050000 -4.957483132 -5.542480937 3.298245183 1 1 0 - 608 1 3 -1.050000 -1.484784685 -7.563286311 3.293650032 1 1 0 - 609 1 3 -1.050000 -2.353406169 -5.058008923 3.055632561 1 1 0 - 610 1 5 0.425000 -2.540664786 -7.264624356 1.194847627 1 1 0 - 611 1 1 1.575000 0.567655517 -2.985620560 0.000000000 1 1 0 - 612 1 2 2.100000 -3.830484692 -4.562831497 2.679766593 1 1 0 - 613 1 2 2.100000 -3.737819110 -1.615892747 2.686199476 1 1 0 - 614 1 3 -1.050000 -4.023647256 -4.487775588 1.061430252 1 1 0 - 615 1 3 -1.050000 -3.584308271 -1.743391728 1.064187192 1 1 0 - 616 1 4 -0.950000 -1.024047739 -2.774854531 0.993425143 1 1 0 - 617 1 3 -1.050000 -2.349315827 -1.059569627 3.298245183 1 1 0 - 618 1 3 -1.050000 -4.036617227 -3.080375001 3.293650032 1 1 0 - 619 1 3 -1.050000 -4.905238712 -0.575097612 3.055632561 1 1 0 - 620 1 5 0.425000 0.067502519 -2.781713046 1.194847627 1 1 0 - 621 1 1 1.575000 -1.997832902 -1.497293928 0.000000000 1 1 0 - 622 1 2 2.100000 -2.816027302 -8.885905613 -2.679766593 1 1 0 - 623 1 2 2.100000 -2.852358122 -2.867021741 -2.686199476 1 1 0 - 624 1 3 -1.050000 -2.566529977 0.004861099 -1.061430252 1 1 0 - 625 1 3 -1.050000 -3.005868961 -2.739522761 -1.064187192 1 1 0 - 626 1 4 -0.950000 -0.406129646 -1.708059957 -0.993425143 1 1 0 - 627 1 3 -1.050000 0.919138442 -3.423344862 -3.298245183 1 1 0 - 628 1 3 -1.050000 -2.553560005 -1.402539488 -3.293650032 1 1 0 - 629 1 3 -1.050000 -1.684938521 -3.907816877 -3.055632561 1 1 0 - 630 1 5 0.425000 -1.497679904 -1.701201443 -1.194847627 1 1 0 - 631 1 1 1.575000 -4.606000207 -5.980205239 0.000000000 1 1 0 - 632 1 2 2.100000 -0.207859998 -4.402994302 -2.679766593 1 1 0 - 633 1 2 2.100000 -0.300525579 -7.349933052 -2.686199476 1 1 0 - 634 1 3 -1.050000 -0.014697434 -4.478050211 -1.061430252 1 1 0 - 635 1 3 -1.050000 -0.454036418 -7.222434071 -1.064187192 1 1 0 - 636 1 4 -0.950000 -3.014296951 -6.190971268 -0.993425143 1 1 0 - 637 1 3 -1.050000 -1.689028863 -7.906256173 -3.298245183 1 1 0 - 638 1 3 -1.050000 -0.001727462 -5.885450799 -3.293650032 1 1 0 - 639 1 3 -1.050000 0.866894022 -8.390728187 -3.055632561 1 1 0 - 640 1 5 0.425000 -4.105847208 -6.184112754 -1.194847627 1 1 0 + 321 1 1 1.575000 2.474158711 -16.434354493 0.000000000 0 1 0 + 322 1 2 2.100000 3.292353111 -9.045742808 2.679766593 0 1 0 + 323 1 2 2.100000 3.328683931 -15.064626680 2.686199476 0 1 0 + 324 1 3 -1.050000 3.042855785 -17.936509520 1.061430252 0 1 0 + 325 1 3 -1.050000 3.482194769 -15.192125660 1.064187192 0 1 0 + 326 1 4 -0.950000 0.882455455 -16.223588464 0.993425143 0 1 0 + 327 1 3 -1.050000 -0.442812634 -14.508303559 3.298245183 0 1 0 + 328 1 3 -1.050000 3.029885813 -16.529108933 3.293650032 0 1 0 + 329 1 3 -1.050000 2.161264329 -14.023831544 3.055632561 0 1 0 + 330 1 5 0.425000 1.974005712 -16.230446978 1.194847627 0 1 0 + 331 1 1 1.575000 5.082326015 -11.951443182 0.000000000 0 1 0 + 332 1 2 2.100000 0.684185806 -13.528654119 2.679766593 0 1 0 + 333 1 2 2.100000 0.776851388 -10.581715369 2.686199476 0 1 0 + 334 1 3 -1.050000 0.491023242 -13.453598210 1.061430252 0 1 0 + 335 1 3 -1.050000 0.930362227 -10.709214349 1.064187192 0 1 0 + 336 1 4 -0.950000 3.490622759 -11.740677153 0.993425143 0 1 0 + 337 1 3 -1.050000 2.165354671 -10.025392248 3.298245183 0 1 0 + 338 1 3 -1.050000 0.478053271 -12.046197622 3.293650032 0 1 0 + 339 1 3 -1.050000 -0.390568214 -9.540920234 3.055632561 0 1 0 + 340 1 5 0.425000 4.582173017 -11.747535667 1.194847627 0 1 0 + 341 1 1 1.575000 2.516837596 -10.463116550 0.000000000 0 1 0 + 342 1 2 2.100000 1.698643196 -17.851728235 -2.679766593 0 1 0 + 343 1 2 2.100000 1.662312376 -11.832844363 -2.686199476 0 1 0 + 344 1 3 -1.050000 1.948140521 -8.960961522 -1.061430252 0 1 0 + 345 1 3 -1.050000 1.508801537 -11.705345382 -1.064187192 0 1 0 + 346 1 4 -0.950000 4.108540852 -10.673882579 -0.993425143 0 1 0 + 347 1 3 -1.050000 5.433808940 -12.389167484 -3.298245183 0 1 0 + 348 1 3 -1.050000 1.961110493 -10.368362109 -3.293650032 0 1 0 + 349 1 3 -1.050000 2.829731977 -12.873639498 -3.055632561 0 1 0 + 350 1 5 0.425000 3.016990594 -10.667024064 -1.194847627 0 1 0 + 351 1 1 1.575000 -0.091329709 -14.946027861 0.000000000 0 1 0 + 352 1 2 2.100000 4.306810500 -13.368816924 -2.679766593 0 1 0 + 353 1 2 2.100000 4.214144919 -16.315755673 -2.686199476 0 1 0 + 354 1 3 -1.050000 4.499973064 -13.443872833 -1.061430252 0 1 0 + 355 1 3 -1.050000 4.060634080 -16.188256693 -1.064187192 0 1 0 + 356 1 4 -0.950000 1.500373547 -15.156793890 -0.993425143 0 1 0 + 357 1 3 -1.050000 2.825641635 -16.872078794 -3.298245183 0 1 0 + 358 1 3 -1.050000 4.512943036 -14.851273420 -3.293650032 0 1 0 + 359 1 3 -1.050000 5.381564520 -17.356550809 -3.055632561 0 1 0 + 360 1 5 0.425000 0.408823289 -15.149935375 -1.194847627 0 1 0 + 361 1 1 1.575000 7.634158558 -16.434354493 0.000000000 0 1 0 + 362 1 2 2.100000 -12.187647042 -9.045742808 2.679766593 1 1 0 + 363 1 2 2.100000 -12.151316222 -15.064626680 2.686199476 1 1 0 + 364 1 3 -1.050000 8.202855633 -17.936509520 1.061430252 0 1 0 + 365 1 3 -1.050000 -11.997805383 -15.192125660 1.064187192 1 1 0 + 366 1 4 -0.950000 6.042455302 -16.223588464 0.993425143 0 1 0 + 367 1 3 -1.050000 4.717187214 -14.508303559 3.298245183 0 1 0 + 368 1 3 -1.050000 -12.450114339 -16.529108933 3.293650032 1 1 0 + 369 1 3 -1.050000 7.321264176 -14.023831544 3.055632561 0 1 0 + 370 1 5 0.425000 7.134005560 -16.230446978 1.194847627 0 1 0 + 371 1 1 1.575000 -10.397674137 -11.951443182 0.000000000 1 1 0 + 372 1 2 2.100000 5.844185654 -13.528654119 2.679766593 0 1 0 + 373 1 2 2.100000 5.936851235 -10.581715369 2.686199476 0 1 0 + 374 1 3 -1.050000 5.651023090 -13.453598210 1.061430252 0 1 0 + 375 1 3 -1.050000 6.090362074 -10.709214349 1.064187192 0 1 0 + 376 1 4 -0.950000 -11.989377393 -11.740677153 0.993425143 1 1 0 + 377 1 3 -1.050000 7.325354518 -10.025392248 3.298245183 0 1 0 + 378 1 3 -1.050000 5.638053118 -12.046197622 3.293650032 0 1 0 + 379 1 3 -1.050000 4.769431634 -9.540920234 3.055632561 0 1 0 + 380 1 5 0.425000 -10.897827136 -11.747535667 1.194847627 1 1 0 + 381 1 1 1.575000 7.676837443 -10.463116550 0.000000000 0 1 0 + 382 1 2 2.100000 6.858643043 -17.851728235 -2.679766593 0 1 0 + 383 1 2 2.100000 6.822312223 -11.832844363 -2.686199476 0 1 0 + 384 1 3 -1.050000 7.108140369 -8.960961522 -1.061430252 0 1 0 + 385 1 3 -1.050000 6.668801385 -11.705345382 -1.064187192 0 1 0 + 386 1 4 -0.950000 -11.371459301 -10.673882579 -0.993425143 1 1 0 + 387 1 3 -1.050000 -10.046191213 -12.389167484 -3.298245183 1 1 0 + 388 1 3 -1.050000 7.121110340 -10.368362109 -3.293650032 0 1 0 + 389 1 3 -1.050000 7.989731825 -12.873639498 -3.055632561 0 1 0 + 390 1 5 0.425000 8.176990442 -10.667024064 -1.194847627 0 1 0 + 391 1 1 1.575000 5.068670139 -14.946027861 0.000000000 0 1 0 + 392 1 2 2.100000 -11.173189652 -13.368816924 -2.679766593 1 1 0 + 393 1 2 2.100000 -11.265855234 -16.315755673 -2.686199476 1 1 0 + 394 1 3 -1.050000 -10.980027089 -13.443872833 -1.061430252 1 1 0 + 395 1 3 -1.050000 -11.419366073 -16.188256693 -1.064187192 1 1 0 + 396 1 4 -0.950000 6.660373395 -15.156793890 -0.993425143 0 1 0 + 397 1 3 -1.050000 7.985641483 -16.872078794 -3.298245183 0 1 0 + 398 1 3 -1.050000 -10.967057117 -14.851273420 -3.293650032 1 1 0 + 399 1 3 -1.050000 -10.098435632 -17.356550809 -3.055632561 1 1 0 + 400 1 5 0.425000 5.568823137 -15.149935375 -1.194847627 0 1 0 + 401 1 1 1.575000 -7.845841595 -16.434354493 0.000000000 1 1 0 + 402 1 2 2.100000 -7.027647194 -9.045742808 2.679766593 1 1 0 + 403 1 2 2.100000 -6.991316374 -15.064626680 2.686199476 1 1 0 + 404 1 3 -1.050000 -7.277144520 -17.936509520 1.061430252 1 1 0 + 405 1 3 -1.050000 -6.837805536 -15.192125660 1.064187192 1 1 0 + 406 1 4 -0.950000 -9.437544850 -16.223588464 0.993425143 1 1 0 + 407 1 3 -1.050000 -10.762812939 -14.508303559 3.298245183 1 1 0 + 408 1 3 -1.050000 -7.290114492 -16.529108933 3.293650032 1 1 0 + 409 1 3 -1.050000 -8.158735976 -14.023831544 3.055632561 1 1 0 + 410 1 5 0.425000 -8.345994593 -16.230446978 1.194847627 1 1 0 + 411 1 1 1.575000 -5.237674290 -11.951443182 0.000000000 1 1 0 + 412 1 2 2.100000 -9.635814499 -13.528654119 2.679766593 1 1 0 + 413 1 2 2.100000 -9.543148917 -10.581715369 2.686199476 1 1 0 + 414 1 3 -1.050000 -9.828977063 -13.453598210 1.061430252 1 1 0 + 415 1 3 -1.050000 -9.389638079 -10.709214349 1.064187192 1 1 0 + 416 1 4 -0.950000 -6.829377546 -11.740677153 0.993425143 1 1 0 + 417 1 3 -1.050000 -8.154645634 -10.025392248 3.298245183 1 1 0 + 418 1 3 -1.050000 -9.841947035 -12.046197622 3.293650032 1 1 0 + 419 1 3 -1.050000 -10.710568519 -9.540920234 3.055632561 1 1 0 + 420 1 5 0.425000 -5.737827288 -11.747535667 1.194847627 1 1 0 + 421 1 1 1.575000 -7.803162709 -10.463116550 0.000000000 1 1 0 + 422 1 2 2.100000 -8.621357110 -17.851728235 -2.679766593 1 1 0 + 423 1 2 2.100000 -8.657687929 -11.832844363 -2.686199476 1 1 0 + 424 1 3 -1.050000 -8.371859784 -8.960961522 -1.061430252 1 1 0 + 425 1 3 -1.050000 -8.811198768 -11.705345382 -1.064187192 1 1 0 + 426 1 4 -0.950000 -6.211459453 -10.673882579 -0.993425143 1 1 0 + 427 1 3 -1.050000 -4.886191365 -12.389167484 -3.298245183 1 1 0 + 428 1 3 -1.050000 -8.358889812 -10.368362109 -3.293650032 1 1 0 + 429 1 3 -1.050000 -7.490268328 -12.873639498 -3.055632561 1 1 0 + 430 1 5 0.425000 -7.303009711 -10.667024064 -1.194847627 1 1 0 + 431 1 1 1.575000 -10.411330014 -14.946027861 0.000000000 1 1 0 + 432 1 2 2.100000 -6.013189805 -13.368816924 -2.679766593 1 1 0 + 433 1 2 2.100000 -6.105855387 -16.315755673 -2.686199476 1 1 0 + 434 1 3 -1.050000 -5.820027241 -13.443872833 -1.061430252 1 1 0 + 435 1 3 -1.050000 -6.259366225 -16.188256693 -1.064187192 1 1 0 + 436 1 4 -0.950000 -8.819626758 -15.156793890 -0.993425143 1 1 0 + 437 1 3 -1.050000 -7.494358670 -16.872078794 -3.298245183 1 1 0 + 438 1 3 -1.050000 -5.807057269 -14.851273420 -3.293650032 1 1 0 + 439 1 3 -1.050000 -4.938435785 -17.356550809 -3.055632561 1 1 0 + 440 1 5 0.425000 -9.911177016 -15.149935375 -1.194847627 1 1 0 + 441 1 1 1.575000 -2.685841747 -16.434354493 0.000000000 1 1 0 + 442 1 2 2.100000 -1.867647347 -9.045742808 2.679766593 1 1 0 + 443 1 2 2.100000 -1.831316527 -15.064626680 2.686199476 1 1 0 + 444 1 3 -1.050000 -2.117144673 -17.936509520 1.061430252 1 1 0 + 445 1 3 -1.050000 -1.677805688 -15.192125660 1.064187192 1 1 0 + 446 1 4 -0.950000 -4.277545003 -16.223588464 0.993425143 1 1 0 + 447 1 3 -1.050000 -5.602813091 -14.508303559 3.298245183 1 1 0 + 448 1 3 -1.050000 -2.130114644 -16.529108933 3.293650032 1 1 0 + 449 1 3 -1.050000 -2.998736129 -14.023831544 3.055632561 1 1 0 + 450 1 5 0.425000 -3.185994745 -16.230446978 1.194847627 1 1 0 + 451 1 1 1.575000 -0.077674443 -11.951443182 0.000000000 1 1 0 + 452 1 2 2.100000 -4.475814651 -13.528654119 2.679766593 1 1 0 + 453 1 2 2.100000 -4.383149070 -10.581715369 2.686199476 1 1 0 + 454 1 3 -1.050000 -4.668977215 -13.453598210 1.061430252 1 1 0 + 455 1 3 -1.050000 -4.229638231 -10.709214349 1.064187192 1 1 0 + 456 1 4 -0.950000 -1.669377698 -11.740677153 0.993425143 1 1 0 + 457 1 3 -1.050000 -2.994645787 -10.025392248 3.298245183 1 1 0 + 458 1 3 -1.050000 -4.681947187 -12.046197622 3.293650032 1 1 0 + 459 1 3 -1.050000 -5.550568672 -9.540920234 3.055632561 1 1 0 + 460 1 5 0.425000 -0.577827441 -11.747535667 1.194847627 1 1 0 + 461 1 1 1.575000 -2.643162862 -10.463116550 0.000000000 1 1 0 + 462 1 2 2.100000 -3.461357262 -17.851728235 -2.679766593 1 1 0 + 463 1 2 2.100000 -3.497688082 -11.832844363 -2.686199476 1 1 0 + 464 1 3 -1.050000 -3.211859937 -8.960961522 -1.061430252 1 1 0 + 465 1 3 -1.050000 -3.651198921 -11.705345382 -1.064187192 1 1 0 + 466 1 4 -0.950000 -1.051459606 -10.673882579 -0.993425143 1 1 0 + 467 1 3 -1.050000 0.273808482 -12.389167484 -3.298245183 1 1 0 + 468 1 3 -1.050000 -3.198889965 -10.368362109 -3.293650032 1 1 0 + 469 1 3 -1.050000 -2.330268480 -12.873639498 -3.055632561 1 1 0 + 470 1 5 0.425000 -2.143009864 -10.667024064 -1.194847627 1 1 0 + 471 1 1 1.575000 -5.251330167 -14.946027861 0.000000000 1 1 0 + 472 1 2 2.100000 -0.853189958 -13.368816924 -2.679766593 1 1 0 + 473 1 2 2.100000 -0.945855539 -16.315755673 -2.686199476 1 1 0 + 474 1 3 -1.050000 -0.660027394 -13.443872833 -1.061430252 1 1 0 + 475 1 3 -1.050000 -1.099366378 -16.188256693 -1.064187192 1 1 0 + 476 1 4 -0.950000 -3.659626911 -15.156793890 -0.993425143 1 1 0 + 477 1 3 -1.050000 -2.334358822 -16.872078794 -3.298245183 1 1 0 + 478 1 3 -1.050000 -0.647057422 -14.851273420 -3.293650032 1 1 0 + 479 1 3 -1.050000 0.221564062 -17.356550809 -3.055632561 1 1 0 + 480 1 5 0.425000 -4.751177168 -15.149935375 -1.194847627 1 1 0 + 481 1 1 1.575000 2.530493472 -7.468531871 0.000000000 0 1 0 + 482 1 2 2.100000 3.348687873 -0.079920186 2.679766593 0 1 0 + 483 1 2 2.100000 3.385018693 -6.098804058 2.686199476 0 1 0 + 484 1 3 -1.050000 3.099190547 -8.970686899 1.061430252 0 1 0 + 485 1 3 -1.050000 3.538529531 -6.226303039 1.064187192 0 1 0 + 486 1 4 -0.950000 0.938790217 -7.257765842 0.993425143 0 1 0 + 487 1 3 -1.050000 -0.386477872 -5.542480937 3.298245183 0 1 0 + 488 1 3 -1.050000 3.086220575 -7.563286311 3.293650032 0 1 0 + 489 1 3 -1.050000 2.217599091 -5.058008923 3.055632561 0 1 0 + 490 1 5 0.425000 2.030340474 -7.264624356 1.194847627 0 1 0 + 491 1 1 1.575000 5.138660777 -2.985620560 0.000000000 0 1 0 + 492 1 2 2.100000 0.740520568 -4.562831497 2.679766593 0 1 0 + 493 1 2 2.100000 0.833186150 -1.615892747 2.686199476 0 1 0 + 494 1 3 -1.050000 0.547358004 -4.487775588 1.061430252 0 1 0 + 495 1 3 -1.050000 0.986696988 -1.743391728 1.064187192 0 1 0 + 496 1 4 -0.950000 3.546957521 -2.774854531 0.993425143 0 1 0 + 497 1 3 -1.050000 2.221689433 -1.059569627 3.298245183 0 1 0 + 498 1 3 -1.050000 0.534388032 -3.080375001 3.293650032 0 1 0 + 499 1 3 -1.050000 -0.334233452 -0.575097612 3.055632561 0 1 0 + 500 1 5 0.425000 4.638507779 -2.781713046 1.194847627 0 1 0 + 501 1 1 1.575000 2.573172358 -1.497293928 0.000000000 0 1 0 + 502 1 2 2.100000 1.754977957 -8.885905613 -2.679766593 0 1 0 + 503 1 2 2.100000 1.718647138 -2.867021741 -2.686199476 0 1 0 + 504 1 3 -1.050000 2.004475283 0.004861099 -1.061430252 0 1 0 + 505 1 3 -1.050000 1.565136299 -2.739522761 -1.064187192 0 1 0 + 506 1 4 -0.950000 4.164875614 -1.708059957 -0.993425143 0 1 0 + 507 1 3 -1.050000 5.490143702 -3.423344862 -3.298245183 0 1 0 + 508 1 3 -1.050000 2.017445255 -1.402539488 -3.293650032 0 1 0 + 509 1 3 -1.050000 2.886066739 -3.907816877 -3.055632561 0 1 0 + 510 1 5 0.425000 3.073325356 -1.701201443 -1.194847627 0 1 0 + 511 1 1 1.575000 -0.034994947 -5.980205239 0.000000000 0 1 0 + 512 1 2 2.100000 4.363145262 -4.402994302 -2.679766593 0 1 0 + 513 1 2 2.100000 4.270479680 -7.349933052 -2.686199476 0 1 0 + 514 1 3 -1.050000 4.556307826 -4.478050211 -1.061430252 0 1 0 + 515 1 3 -1.050000 4.116968842 -7.222434071 -1.064187192 0 1 0 + 516 1 4 -0.950000 1.556708309 -6.190971268 -0.993425143 0 1 0 + 517 1 3 -1.050000 2.881976397 -7.906256173 -3.298245183 0 1 0 + 518 1 3 -1.050000 4.569277798 -5.885450799 -3.293650032 0 1 0 + 519 1 3 -1.050000 5.437899282 -8.390728187 -3.055632561 0 1 0 + 520 1 5 0.425000 0.465158051 -6.184112754 -1.194847627 0 1 0 + 521 1 1 1.575000 7.690493320 -7.468531871 0.000000000 0 1 0 + 522 1 2 2.100000 -12.131312280 -0.079920186 2.679766593 1 1 0 + 523 1 2 2.100000 -12.094981460 -6.098804058 2.686199476 1 1 0 + 524 1 3 -1.050000 8.259190394 -8.970686899 1.061430252 0 1 0 + 525 1 3 -1.050000 -11.941470621 -6.226303039 1.064187192 1 1 0 + 526 1 4 -0.950000 6.098790064 -7.257765842 0.993425143 0 1 0 + 527 1 3 -1.050000 4.773521976 -5.542480937 3.298245183 0 1 0 + 528 1 3 -1.050000 -12.393779577 -7.563286311 3.293650032 1 1 0 + 529 1 3 -1.050000 7.377598938 -5.058008923 3.055632561 0 1 0 + 530 1 5 0.425000 7.190340322 -7.264624356 1.194847627 0 1 0 + 531 1 1 1.575000 -10.341339376 -2.985620560 0.000000000 1 1 0 + 532 1 2 2.100000 5.900520415 -4.562831497 2.679766593 0 1 0 + 533 1 2 2.100000 5.993185997 -1.615892747 2.686199476 0 1 0 + 534 1 3 -1.050000 5.707357852 -4.487775588 1.061430252 0 1 0 + 535 1 3 -1.050000 6.146696836 -1.743391728 1.064187192 0 1 0 + 536 1 4 -0.950000 -11.933042631 -2.774854531 0.993425143 1 1 0 + 537 1 3 -1.050000 7.381689280 -1.059569627 3.298245183 0 1 0 + 538 1 3 -1.050000 5.694387880 -3.080375001 3.293650032 0 1 0 + 539 1 3 -1.050000 4.825766395 -0.575097612 3.055632561 0 1 0 + 540 1 5 0.425000 -10.841492374 -2.781713046 1.194847627 1 1 0 + 541 1 1 1.575000 7.733172205 -1.497293928 0.000000000 0 1 0 + 542 1 2 2.100000 6.914977805 -8.885905613 -2.679766593 0 1 0 + 543 1 2 2.100000 6.878646985 -2.867021741 -2.686199476 0 1 0 + 544 1 3 -1.050000 7.164475130 0.004861099 -1.061430252 0 1 0 + 545 1 3 -1.050000 6.725136146 -2.739522761 -1.064187192 0 1 0 + 546 1 4 -0.950000 -11.315124539 -1.708059957 -0.993425143 1 1 0 + 547 1 3 -1.050000 -9.989856451 -3.423344862 -3.298245183 1 1 0 + 548 1 3 -1.050000 7.177445102 -1.402539488 -3.293650032 0 1 0 + 549 1 3 -1.050000 8.046066587 -3.907816877 -3.055632561 0 1 0 + 550 1 5 0.425000 8.233325203 -1.701201443 -1.194847627 0 1 0 + 551 1 1 1.575000 5.125004900 -5.980205239 0.000000000 0 1 0 + 552 1 2 2.100000 -11.116854891 -4.402994302 -2.679766593 1 1 0 + 553 1 2 2.100000 -11.209520472 -7.349933052 -2.686199476 1 1 0 + 554 1 3 -1.050000 -10.923692327 -4.478050211 -1.061430252 1 1 0 + 555 1 3 -1.050000 -11.363031311 -7.222434071 -1.064187192 1 1 0 + 556 1 4 -0.950000 6.716708156 -6.190971268 -0.993425143 0 1 0 + 557 1 3 -1.050000 8.041976245 -7.906256173 -3.298245183 0 1 0 + 558 1 3 -1.050000 -10.910722355 -5.885450799 -3.293650032 1 1 0 + 559 1 3 -1.050000 -10.042100871 -8.390728187 -3.055632561 1 1 0 + 560 1 5 0.425000 5.625157899 -6.184112754 -1.194847627 0 1 0 + 561 1 1 1.575000 -7.789506833 -7.468531871 0.000000000 1 1 0 + 562 1 2 2.100000 -6.971312432 -0.079920186 2.679766593 1 1 0 + 563 1 2 2.100000 -6.934981613 -6.098804058 2.686199476 1 1 0 + 564 1 3 -1.050000 -7.220809758 -8.970686899 1.061430252 1 1 0 + 565 1 3 -1.050000 -6.781470774 -6.226303039 1.064187192 1 1 0 + 566 1 4 -0.950000 -9.381210089 -7.257765842 0.993425143 1 1 0 + 567 1 3 -1.050000 -10.706478177 -5.542480937 3.298245183 1 1 0 + 568 1 3 -1.050000 -7.233779730 -7.563286311 3.293650032 1 1 0 + 569 1 3 -1.050000 -8.102401214 -5.058008923 3.055632561 1 1 0 + 570 1 5 0.425000 -8.289659831 -7.264624356 1.194847627 1 1 0 + 571 1 1 1.575000 -5.181339528 -2.985620560 0.000000000 1 1 0 + 572 1 2 2.100000 -9.579479737 -4.562831497 2.679766593 1 1 0 + 573 1 2 2.100000 -9.486814155 -1.615892747 2.686199476 1 1 0 + 574 1 3 -1.050000 -9.772642301 -4.487775588 1.061430252 1 1 0 + 575 1 3 -1.050000 -9.333303317 -1.743391728 1.064187192 1 1 0 + 576 1 4 -0.950000 -6.773042784 -2.774854531 0.993425143 1 1 0 + 577 1 3 -1.050000 -8.098310872 -1.059569627 3.298245183 1 1 0 + 578 1 3 -1.050000 -9.785612273 -3.080375001 3.293650032 1 1 0 + 579 1 3 -1.050000 -10.654233757 -0.575097612 3.055632561 1 1 0 + 580 1 5 0.425000 -5.681492526 -2.781713046 1.194847627 1 1 0 + 581 1 1 1.575000 -7.746827947 -1.497293928 0.000000000 1 1 0 + 582 1 2 2.100000 -8.565022348 -8.885905613 -2.679766593 1 1 0 + 583 1 2 2.100000 -8.601353168 -2.867021741 -2.686199476 1 1 0 + 584 1 3 -1.050000 -8.315525022 0.004861099 -1.061430252 1 1 0 + 585 1 3 -1.050000 -8.754864006 -2.739522761 -1.064187192 1 1 0 + 586 1 4 -0.950000 -6.155124692 -1.708059957 -0.993425143 1 1 0 + 587 1 3 -1.050000 -4.829856603 -3.423344862 -3.298245183 1 1 0 + 588 1 3 -1.050000 -8.302555050 -1.402539488 -3.293650032 1 1 0 + 589 1 3 -1.050000 -7.433933566 -3.907816877 -3.055632561 1 1 0 + 590 1 5 0.425000 -7.246674949 -1.701201443 -1.194847627 1 1 0 + 591 1 1 1.575000 -10.354995252 -5.980205239 0.000000000 1 1 0 + 592 1 2 2.100000 -5.956855043 -4.402994302 -2.679766593 1 1 0 + 593 1 2 2.100000 -6.049520625 -7.349933052 -2.686199476 1 1 0 + 594 1 3 -1.050000 -5.763692479 -4.478050211 -1.061430252 1 1 0 + 595 1 3 -1.050000 -6.203031463 -7.222434071 -1.064187192 1 1 0 + 596 1 4 -0.950000 -8.763291996 -6.190971268 -0.993425143 1 1 0 + 597 1 3 -1.050000 -7.438023908 -7.906256173 -3.298245183 1 1 0 + 598 1 3 -1.050000 -5.750722507 -5.885450799 -3.293650032 1 1 0 + 599 1 3 -1.050000 -4.882101023 -8.390728187 -3.055632561 1 1 0 + 600 1 5 0.425000 -9.854842254 -6.184112754 -1.194847627 1 1 0 + 601 1 1 1.575000 -2.629506985 -7.468531871 0.000000000 1 1 0 + 602 1 2 2.100000 -1.811312585 -0.079920186 2.679766593 1 1 0 + 603 1 2 2.100000 -1.774981765 -6.098804058 2.686199476 1 1 0 + 604 1 3 -1.050000 -2.060809911 -8.970686899 1.061430252 1 1 0 + 605 1 3 -1.050000 -1.621470927 -6.226303039 1.064187192 1 1 0 + 606 1 4 -0.950000 -4.221210241 -7.257765842 0.993425143 1 1 0 + 607 1 3 -1.050000 -5.546478330 -5.542480937 3.298245183 1 1 0 + 608 1 3 -1.050000 -2.073779883 -7.563286311 3.293650032 1 1 0 + 609 1 3 -1.050000 -2.942401367 -5.058008923 3.055632561 1 1 0 + 610 1 5 0.425000 -3.129659984 -7.264624356 1.194847627 1 1 0 + 611 1 1 1.575000 -0.021339681 -2.985620560 0.000000000 1 1 0 + 612 1 2 2.100000 -4.419479890 -4.562831497 2.679766593 1 1 0 + 613 1 2 2.100000 -4.326814308 -1.615892747 2.686199476 1 1 0 + 614 1 3 -1.050000 -4.612642454 -4.487775588 1.061430252 1 1 0 + 615 1 3 -1.050000 -4.173303469 -1.743391728 1.064187192 1 1 0 + 616 1 4 -0.950000 -1.613042937 -2.774854531 0.993425143 1 1 0 + 617 1 3 -1.050000 -2.938311025 -1.059569627 3.298245183 1 1 0 + 618 1 3 -1.050000 -4.625612425 -3.080375001 3.293650032 1 1 0 + 619 1 3 -1.050000 -5.494233910 -0.575097612 3.055632561 1 1 0 + 620 1 5 0.425000 -0.521492679 -2.781713046 1.194847627 1 1 0 + 621 1 1 1.575000 -2.586828100 -1.497293928 0.000000000 1 1 0 + 622 1 2 2.100000 -3.405022500 -8.885905613 -2.679766593 1 1 0 + 623 1 2 2.100000 -3.441353320 -2.867021741 -2.686199476 1 1 0 + 624 1 3 -1.050000 -3.155525175 0.004861099 -1.061430252 1 1 0 + 625 1 3 -1.050000 -3.594864159 -2.739522761 -1.064187192 1 1 0 + 626 1 4 -0.950000 -0.995124844 -1.708059957 -0.993425143 1 1 0 + 627 1 3 -1.050000 0.330143244 -3.423344862 -3.298245183 1 1 0 + 628 1 3 -1.050000 -3.142555203 -1.402539488 -3.293650032 1 1 0 + 629 1 3 -1.050000 -2.273933719 -3.907816877 -3.055632561 1 1 0 + 630 1 5 0.425000 -2.086675102 -1.701201443 -1.194847627 1 1 0 + 631 1 1 1.575000 -5.194995405 -5.980205239 0.000000000 1 1 0 + 632 1 2 2.100000 -0.796855196 -4.402994302 -2.679766593 1 1 0 + 633 1 2 2.100000 -0.889520777 -7.349933052 -2.686199476 1 1 0 + 634 1 3 -1.050000 -0.603692632 -4.478050211 -1.061430252 1 1 0 + 635 1 3 -1.050000 -1.043031616 -7.222434071 -1.064187192 1 1 0 + 636 1 4 -0.950000 -3.603292149 -6.190971268 -0.993425143 1 1 0 + 637 1 3 -1.050000 -2.278024061 -7.906256173 -3.298245183 1 1 0 + 638 1 3 -1.050000 -0.590722660 -5.885450799 -3.293650032 1 1 0 + 639 1 3 -1.050000 0.277898824 -8.390728187 -3.055632561 1 1 0 + 640 1 5 0.425000 -4.694842406 -6.184112754 -1.194847627 1 1 0 641 1 1 1.575000 0.889889112 1.315464043 9.189872068 0 0 0 - 642 1 2 2.100000 5.101961261 8.478737052 -6.523590700 0 0 1 - 643 1 2 2.100000 5.138292081 2.459853180 -6.517157818 0 0 1 - 644 1 3 -1.050000 4.852463935 -0.412029660 -8.141927041 0 0 1 - 645 1 3 -1.050000 5.291802919 2.332354200 -8.139170101 0 0 1 - 646 1 4 -0.950000 2.692063605 1.300891397 -8.209932150 0 0 1 - 647 1 3 -1.050000 1.366795516 3.016176301 -5.905112111 0 0 1 - 648 1 3 -1.050000 4.839493963 0.995370927 -5.909707262 0 0 1 - 649 1 3 -1.050000 3.970872479 3.500648316 -6.147724732 0 0 1 - 650 1 5 0.425000 3.783613862 1.294032882 -8.008509666 0 0 1 + 642 1 2 2.100000 5.101961261 9.067732250 -6.523590700 0 0 1 + 643 1 2 2.100000 5.138292081 3.048848378 -6.517157818 0 0 1 + 644 1 3 -1.050000 4.852463935 0.176965538 -8.141927041 0 0 1 + 645 1 3 -1.050000 5.291802919 2.921349398 -8.139170101 0 0 1 + 646 1 4 -0.950000 2.692063605 1.889886595 -8.209932150 0 0 1 + 647 1 3 -1.050000 1.366795516 3.605171499 -5.905112111 0 0 1 + 648 1 3 -1.050000 4.839493963 1.584366125 -5.909707262 0 0 1 + 649 1 3 -1.050000 3.970872479 4.089643514 -6.147724732 0 0 1 + 650 1 5 0.425000 3.783613862 1.883028080 -8.008509666 0 0 1 651 1 1 1.575000 3.498056417 5.798375353 9.189872068 0 0 0 - 652 1 2 2.100000 2.493793956 3.995825742 -6.523590700 0 0 1 - 653 1 2 2.100000 2.586459538 6.942764491 -6.517157818 0 0 1 - 654 1 3 -1.050000 2.300631392 4.070881651 -8.141927041 0 0 1 - 655 1 3 -1.050000 2.739970376 6.815265511 -8.139170101 0 0 1 - 656 1 4 -0.950000 5.300230909 5.783802707 -8.209932150 0 0 1 - 657 1 3 -1.050000 3.974962821 7.499087612 -5.905112111 0 0 1 - 658 1 3 -1.050000 2.287661420 5.478282238 -5.909707262 0 0 1 - 659 1 3 -1.050000 1.419039936 7.983559627 -6.147724732 0 0 1 - 660 1 5 0.425000 6.391781167 5.776944193 -8.008509666 0 0 1 + 652 1 2 2.100000 2.493793956 4.584820940 -6.523590700 0 0 1 + 653 1 2 2.100000 2.586459538 7.531759689 -6.517157818 0 0 1 + 654 1 3 -1.050000 2.300631392 4.659876849 -8.141927041 0 0 1 + 655 1 3 -1.050000 2.739970376 7.404260709 -8.139170101 0 0 1 + 656 1 4 -0.950000 5.300230909 6.372797905 -8.209932150 0 0 1 + 657 1 3 -1.050000 3.974962821 8.088082810 -5.905112111 0 0 1 + 658 1 3 -1.050000 2.287661420 6.067277436 -5.909707262 0 0 1 + 659 1 3 -1.050000 1.419039936 8.572554825 -6.147724732 0 0 1 + 660 1 5 0.425000 6.391781167 6.365939391 -8.008509666 0 0 1 661 1 1 1.575000 0.932567998 7.286701985 9.189872068 0 0 0 662 1 2 2.100000 0.114373597 -0.101909699 6.510105475 0 0 0 663 1 2 2.100000 0.078042778 5.916974173 6.503672592 0 0 0 @@ -717,25 +717,25 @@ Atoms 679 1 3 -1.050000 3.797294922 0.393267726 6.134239507 0 0 0 680 1 5 0.425000 -1.175446309 2.599883160 7.995024441 0 0 0 681 1 1 1.575000 6.049888960 1.315464043 9.189872068 0 0 0 - 682 1 2 2.100000 -10.378038892 8.478737052 -6.523590700 1 0 1 - 683 1 2 2.100000 -10.341708072 2.459853180 -6.517157818 1 0 1 - 684 1 3 -1.050000 -10.627536218 -0.412029660 -8.141927041 1 0 1 - 685 1 3 -1.050000 -10.188197233 2.332354200 -8.139170101 1 0 1 - 686 1 4 -0.950000 7.852063452 1.300891397 -8.209932150 0 0 1 - 687 1 3 -1.050000 6.526795364 3.016176301 -5.905112111 0 0 1 - 688 1 3 -1.050000 -10.640506189 0.995370927 -5.909707262 1 0 1 - 689 1 3 -1.050000 9.130872326 3.500648316 -6.147724732 0 0 1 - 690 1 5 0.425000 8.943613710 1.294032882 -8.008509666 0 0 1 + 682 1 2 2.100000 -10.378038892 9.067732250 -6.523590700 1 0 1 + 683 1 2 2.100000 -10.341708072 3.048848378 -6.517157818 1 0 1 + 684 1 3 -1.050000 10.012463782 0.176965538 -8.141927041 0 0 1 + 685 1 3 -1.050000 -10.188197233 2.921349398 -8.139170101 1 0 1 + 686 1 4 -0.950000 7.852063452 1.889886595 -8.209932150 0 0 1 + 687 1 3 -1.050000 6.526795364 3.605171499 -5.905112111 0 0 1 + 688 1 3 -1.050000 -10.640506189 1.584366125 -5.909707262 1 0 1 + 689 1 3 -1.050000 9.130872326 4.089643514 -6.147724732 0 0 1 + 690 1 5 0.425000 8.943613710 1.883028080 -8.008509666 0 0 1 691 1 1 1.575000 -11.981943736 5.798375353 9.189872068 1 0 0 - 692 1 2 2.100000 7.653793804 3.995825742 -6.523590700 0 0 1 - 693 1 2 2.100000 7.746459385 6.942764491 -6.517157818 0 0 1 - 694 1 3 -1.050000 7.460631240 4.070881651 -8.141927041 0 0 1 - 695 1 3 -1.050000 7.899970224 6.815265511 -8.139170101 0 0 1 - 696 1 4 -0.950000 -10.179769243 5.783802707 -8.209932150 1 0 1 - 697 1 3 -1.050000 9.134962668 7.499087612 -5.905112111 0 0 1 - 698 1 3 -1.050000 7.447661268 5.478282238 -5.909707262 0 0 1 - 699 1 3 -1.050000 6.579039784 7.983559627 -6.147724732 0 0 1 - 700 1 5 0.425000 -9.088218986 5.776944193 -8.008509666 1 0 1 + 692 1 2 2.100000 7.653793804 4.584820940 -6.523590700 0 0 1 + 693 1 2 2.100000 7.746459385 7.531759689 -6.517157818 0 0 1 + 694 1 3 -1.050000 7.460631240 4.659876849 -8.141927041 0 0 1 + 695 1 3 -1.050000 7.899970224 7.404260709 -8.139170101 0 0 1 + 696 1 4 -0.950000 -10.179769243 6.372797905 -8.209932150 1 0 1 + 697 1 3 -1.050000 9.134962668 8.088082810 -5.905112111 0 0 1 + 698 1 3 -1.050000 7.447661268 6.067277436 -5.909707262 0 0 1 + 699 1 3 -1.050000 6.579039784 8.572554825 -6.147724732 0 0 1 + 700 1 5 0.425000 -9.088218986 6.365939391 -8.008509666 1 0 1 701 1 1 1.575000 6.092567845 7.286701985 9.189872068 0 0 0 702 1 2 2.100000 5.274373445 -0.101909699 6.510105475 0 0 0 703 1 2 2.100000 5.238042625 5.916974173 6.503672592 0 0 0 @@ -757,25 +757,25 @@ Atoms 719 1 3 -1.050000 -11.682705231 0.393267726 6.134239507 1 0 0 720 1 5 0.425000 3.984553539 2.599883160 7.995024441 0 0 0 721 1 1 1.575000 -9.430111193 1.315464043 9.189872068 1 0 0 - 722 1 2 2.100000 -5.218039044 8.478737052 -6.523590700 1 0 1 - 723 1 2 2.100000 -5.181708225 2.459853180 -6.517157818 1 0 1 - 724 1 3 -1.050000 -5.467536370 -0.412029660 -8.141927041 1 0 1 - 725 1 3 -1.050000 -5.028197386 2.332354200 -8.139170101 1 0 1 - 726 1 4 -0.950000 -7.627936701 1.300891397 -8.209932150 1 0 1 - 727 1 3 -1.050000 -8.953204789 3.016176301 -5.905112111 1 0 1 - 728 1 3 -1.050000 -5.480506342 0.995370927 -5.909707262 1 0 1 - 729 1 3 -1.050000 -6.349127826 3.500648316 -6.147724732 1 0 1 - 730 1 5 0.425000 -6.536386443 1.294032882 -8.008509666 1 0 1 + 722 1 2 2.100000 -5.218039044 9.067732250 -6.523590700 1 0 1 + 723 1 2 2.100000 -5.181708225 3.048848378 -6.517157818 1 0 1 + 724 1 3 -1.050000 -5.467536370 0.176965538 -8.141927041 1 0 1 + 725 1 3 -1.050000 -5.028197386 2.921349398 -8.139170101 1 0 1 + 726 1 4 -0.950000 -7.627936701 1.889886595 -8.209932150 1 0 1 + 727 1 3 -1.050000 -8.953204789 3.605171499 -5.905112111 1 0 1 + 728 1 3 -1.050000 -5.480506342 1.584366125 -5.909707262 1 0 1 + 729 1 3 -1.050000 -6.349127826 4.089643514 -6.147724732 1 0 1 + 730 1 5 0.425000 -6.536386443 1.883028080 -8.008509666 1 0 1 731 1 1 1.575000 -6.821943888 5.798375353 9.189872068 1 0 0 - 732 1 2 2.100000 -7.826206349 3.995825742 -6.523590700 1 0 1 - 733 1 2 2.100000 -7.733540767 6.942764491 -6.517157818 1 0 1 - 734 1 3 -1.050000 -8.019368913 4.070881651 -8.141927041 1 0 1 - 735 1 3 -1.050000 -7.580029929 6.815265511 -8.139170101 1 0 1 - 736 1 4 -0.950000 -5.019769396 5.783802707 -8.209932150 1 0 1 - 737 1 3 -1.050000 -6.345037484 7.499087612 -5.905112111 1 0 1 - 738 1 3 -1.050000 -8.032338885 5.478282238 -5.909707262 1 0 1 - 739 1 3 -1.050000 -8.900960369 7.983559627 -6.147724732 1 0 1 - 740 1 5 0.425000 -3.928219138 5.776944193 -8.008509666 1 0 1 + 732 1 2 2.100000 -7.826206349 4.584820940 -6.523590700 1 0 1 + 733 1 2 2.100000 -7.733540767 7.531759689 -6.517157818 1 0 1 + 734 1 3 -1.050000 -8.019368913 4.659876849 -8.141927041 1 0 1 + 735 1 3 -1.050000 -7.580029929 7.404260709 -8.139170101 1 0 1 + 736 1 4 -0.950000 -5.019769396 6.372797905 -8.209932150 1 0 1 + 737 1 3 -1.050000 -6.345037484 8.088082810 -5.905112111 1 0 1 + 738 1 3 -1.050000 -8.032338885 6.067277436 -5.909707262 1 0 1 + 739 1 3 -1.050000 -8.900960369 8.572554825 -6.147724732 1 0 1 + 740 1 5 0.425000 -3.928219138 6.365939391 -8.008509666 1 0 1 741 1 1 1.575000 -9.387432307 7.286701985 9.189872068 1 0 0 742 1 2 2.100000 -10.205626708 -0.101909699 6.510105475 1 0 0 743 1 2 2.100000 -10.241957528 5.916974173 6.503672592 1 0 0 @@ -797,25 +797,25 @@ Atoms 759 1 3 -1.050000 -6.522705383 0.393267726 6.134239507 1 0 0 760 1 5 0.425000 -11.495446614 2.599883160 7.995024441 1 0 0 761 1 1 1.575000 -4.270111345 1.315464043 9.189872068 1 0 0 - 762 1 2 2.100000 -0.058039197 8.478737052 -6.523590700 1 0 1 - 763 1 2 2.100000 -0.021708377 2.459853180 -6.517157818 1 0 1 - 764 1 3 -1.050000 -0.307536523 -0.412029660 -8.141927041 1 0 1 - 765 1 3 -1.050000 0.131802461 2.332354200 -8.139170101 1 0 1 - 766 1 4 -0.950000 -2.467936853 1.300891397 -8.209932150 1 0 1 - 767 1 3 -1.050000 -3.793204941 3.016176301 -5.905112111 1 0 1 - 768 1 3 -1.050000 -0.320506495 0.995370927 -5.909707262 1 0 1 - 769 1 3 -1.050000 -1.189127979 3.500648316 -6.147724732 1 0 1 - 770 1 5 0.425000 -1.376386596 1.294032882 -8.008509666 1 0 1 + 762 1 2 2.100000 -0.058039197 9.067732250 -6.523590700 1 0 1 + 763 1 2 2.100000 -0.021708377 3.048848378 -6.517157818 1 0 1 + 764 1 3 -1.050000 -0.307536523 0.176965538 -8.141927041 1 0 1 + 765 1 3 -1.050000 0.131802461 2.921349398 -8.139170101 1 0 1 + 766 1 4 -0.950000 -2.467936853 1.889886595 -8.209932150 1 0 1 + 767 1 3 -1.050000 -3.793204941 3.605171499 -5.905112111 1 0 1 + 768 1 3 -1.050000 -0.320506495 1.584366125 -5.909707262 1 0 1 + 769 1 3 -1.050000 -1.189127979 4.089643514 -6.147724732 1 0 1 + 770 1 5 0.425000 -1.376386596 1.883028080 -8.008509666 1 0 1 771 1 1 1.575000 -1.661944041 5.798375353 9.189872068 1 0 0 - 772 1 2 2.100000 -2.666206502 3.995825742 -6.523590700 1 0 1 - 773 1 2 2.100000 -2.573540920 6.942764491 -6.517157818 1 0 1 - 774 1 3 -1.050000 -2.859369065 4.070881651 -8.141927041 1 0 1 - 775 1 3 -1.050000 -2.420030081 6.815265511 -8.139170101 1 0 1 - 776 1 4 -0.950000 0.140230451 5.783802707 -8.209932150 1 0 1 - 777 1 3 -1.050000 -1.185037637 7.499087612 -5.905112111 1 0 1 - 778 1 3 -1.050000 -2.872339037 5.478282238 -5.909707262 1 0 1 - 779 1 3 -1.050000 -3.740960522 7.983559627 -6.147724732 1 0 1 - 780 1 5 0.425000 1.231780709 5.776944193 -8.008509666 1 0 1 + 772 1 2 2.100000 -2.666206502 4.584820940 -6.523590700 1 0 1 + 773 1 2 2.100000 -2.573540920 7.531759689 -6.517157818 1 0 1 + 774 1 3 -1.050000 -2.859369065 4.659876849 -8.141927041 1 0 1 + 775 1 3 -1.050000 -2.420030081 7.404260709 -8.139170101 1 0 1 + 776 1 4 -0.950000 0.140230451 6.372797905 -8.209932150 1 0 1 + 777 1 3 -1.050000 -1.185037637 8.088082810 -5.905112111 1 0 1 + 778 1 3 -1.050000 -2.872339037 6.067277436 -5.909707262 1 0 1 + 779 1 3 -1.050000 -3.740960522 8.572554825 -6.147724732 1 0 1 + 780 1 5 0.425000 1.231780709 6.365939391 -8.008509666 1 0 1 781 1 1 1.575000 -4.227432460 7.286701985 9.189872068 1 0 0 782 1 2 2.100000 -5.045626860 -0.101909699 6.510105475 1 0 0 783 1 2 2.100000 -5.081957680 5.916974173 6.503672592 1 0 0 @@ -837,29 +837,29 @@ Atoms 799 1 3 -1.050000 -1.362705536 0.393267726 6.134239507 1 0 0 800 1 5 0.425000 -6.335446766 2.599883160 7.995024441 1 0 0 801 1 1 1.575000 0.946223874 10.281286664 9.189872068 0 0 0 - 802 1 2 2.100000 5.158296023 17.444559674 -6.523590700 0 0 1 - 803 1 2 2.100000 5.194626842 11.425675802 -6.517157818 0 0 1 - 804 1 3 -1.050000 4.908798697 8.553792961 -8.141927041 0 0 1 - 805 1 3 -1.050000 5.348137681 11.298176821 -8.139170101 0 0 1 - 806 1 4 -0.950000 2.748398366 10.266714018 -8.209932150 0 0 1 - 807 1 3 -1.050000 1.423130278 11.981998923 -5.905112111 0 0 1 - 808 1 3 -1.050000 4.895828725 9.961193549 -5.909707262 0 0 1 - 809 1 3 -1.050000 4.027207241 12.466470937 -6.147724732 0 0 1 - 810 1 5 0.425000 3.839948624 10.259855504 -8.008509666 0 0 1 + 802 1 2 2.100000 4.932957348 -17.829737203 -6.523590700 0 1 1 + 803 1 2 2.100000 5.194626842 12.014671000 -6.517157818 0 0 1 + 804 1 3 -1.050000 4.908798697 9.142788159 -8.141927041 0 0 1 + 805 1 3 -1.050000 5.348137681 11.887172019 -8.139170101 0 0 1 + 806 1 4 -0.950000 2.748398366 10.855709216 -8.209932150 0 0 1 + 807 1 3 -1.050000 1.423130278 12.570994121 -5.905112111 0 0 1 + 808 1 3 -1.050000 4.895828725 10.550188747 -5.909707262 0 0 1 + 809 1 3 -1.050000 4.027207241 13.055466135 -6.147724732 0 0 1 + 810 1 5 0.425000 3.839948624 10.848850702 -8.008509666 0 0 1 811 1 1 1.575000 3.554391179 14.764197975 9.189872068 0 0 0 - 812 1 2 2.100000 2.550128718 12.961648363 -6.523590700 0 0 1 - 813 1 2 2.100000 2.642794300 15.908587113 -6.517157818 0 0 1 - 814 1 3 -1.050000 2.356966154 13.036704272 -8.141927041 0 0 1 - 815 1 3 -1.050000 2.796305138 15.781088132 -8.139170101 0 0 1 - 816 1 4 -0.950000 5.356565671 14.749625329 -8.209932150 0 0 1 - 817 1 3 -1.050000 4.031297583 16.464910234 -5.905112111 0 0 1 - 818 1 3 -1.050000 2.343996182 14.444104860 -5.909707262 0 0 1 - 819 1 3 -1.050000 1.475374698 16.949382248 -6.147724732 0 0 1 - 820 1 5 0.425000 6.448115929 14.742766815 -8.008509666 0 0 1 + 812 1 2 2.100000 2.550128718 13.550643561 -6.523590700 0 0 1 + 813 1 2 2.100000 2.642794300 16.497582311 -6.517157818 0 0 1 + 814 1 3 -1.050000 2.356966154 13.625699470 -8.141927041 0 0 1 + 815 1 3 -1.050000 2.796305138 16.370083330 -8.139170101 0 0 1 + 816 1 4 -0.950000 5.356565671 15.338620527 -8.209932150 0 0 1 + 817 1 3 -1.050000 4.031297583 17.053905432 -5.905112111 0 0 1 + 818 1 3 -1.050000 2.343996182 15.033100058 -5.909707262 0 0 1 + 819 1 3 -1.050000 1.475374698 17.538377446 -6.147724732 0 0 1 + 820 1 5 0.425000 6.448115929 15.331762013 -8.008509666 0 0 1 821 1 1 1.575000 0.988902760 16.252524607 9.189872068 0 0 0 822 1 2 2.100000 0.170708359 8.863912922 6.510105475 0 0 0 823 1 2 2.100000 0.134377539 14.882796794 6.503672592 0 0 0 - 824 1 3 -1.050000 0.420205685 17.754679635 8.128441816 0 0 0 + 824 1 3 -1.050000 0.194867010 -18.108612440 8.128441816 0 1 0 825 1 3 -1.050000 -0.019133299 15.010295775 8.125684876 0 0 0 826 1 4 -0.950000 2.580606015 16.041758578 8.196446925 0 0 0 827 1 3 -1.050000 3.905874104 14.326473673 5.891626886 0 0 0 @@ -877,29 +877,29 @@ Atoms 839 1 3 -1.050000 3.853629684 9.359090348 6.134239507 0 0 0 840 1 5 0.425000 -1.119111547 11.565705782 7.995024441 0 0 0 841 1 1 1.575000 6.106223722 10.281286664 9.189872068 0 0 0 - 842 1 2 2.100000 -10.321704130 17.444559674 -6.523590700 1 0 1 - 843 1 2 2.100000 -10.285373310 11.425675802 -6.517157818 1 0 1 - 844 1 3 -1.050000 -10.571201456 8.553792961 -8.141927041 1 0 1 - 845 1 3 -1.050000 -10.131862472 11.298176821 -8.139170101 1 0 1 - 846 1 4 -0.950000 7.908398214 10.266714018 -8.209932150 0 0 1 - 847 1 3 -1.050000 6.583130126 11.981998923 -5.905112111 0 0 1 - 848 1 3 -1.050000 -10.584171428 9.961193549 -5.909707262 1 0 1 - 849 1 3 -1.050000 9.187207088 12.466470937 -6.147724732 0 0 1 - 850 1 5 0.425000 8.999948471 10.259855504 -8.008509666 0 0 1 + 842 1 2 2.100000 -10.547042805 -17.829737203 -6.523590700 1 1 1 + 843 1 2 2.100000 -10.285373310 12.014671000 -6.517157818 1 0 1 + 844 1 3 -1.050000 10.068798544 9.142788159 -8.141927041 0 0 1 + 845 1 3 -1.050000 -10.131862472 11.887172019 -8.139170101 1 0 1 + 846 1 4 -0.950000 7.908398214 10.855709216 -8.209932150 0 0 1 + 847 1 3 -1.050000 6.583130126 12.570994121 -5.905112111 0 0 1 + 848 1 3 -1.050000 -10.584171428 10.550188747 -5.909707262 1 0 1 + 849 1 3 -1.050000 9.187207088 13.055466135 -6.147724732 0 0 1 + 850 1 5 0.425000 8.999948471 10.848850702 -8.008509666 0 0 1 851 1 1 1.575000 -11.925608974 14.764197975 9.189872068 1 0 0 - 852 1 2 2.100000 7.710128565 12.961648363 -6.523590700 0 0 1 - 853 1 2 2.100000 7.802794147 15.908587113 -6.517157818 0 0 1 - 854 1 3 -1.050000 7.516966002 13.036704272 -8.141927041 0 0 1 - 855 1 3 -1.050000 7.956304986 15.781088132 -8.139170101 0 0 1 - 856 1 4 -0.950000 -10.123434482 14.749625329 -8.209932150 1 0 1 - 857 1 3 -1.050000 9.191297430 16.464910234 -5.905112111 0 0 1 - 858 1 3 -1.050000 7.503996030 14.444104860 -5.909707262 0 0 1 - 859 1 3 -1.050000 6.635374545 16.949382248 -6.147724732 0 0 1 - 860 1 5 0.425000 -9.031884224 14.742766815 -8.008509666 1 0 1 + 852 1 2 2.100000 7.710128565 13.550643561 -6.523590700 0 0 1 + 853 1 2 2.100000 7.802794147 16.497582311 -6.517157818 0 0 1 + 854 1 3 -1.050000 7.516966002 13.625699470 -8.141927041 0 0 1 + 855 1 3 -1.050000 7.956304986 16.370083330 -8.139170101 0 0 1 + 856 1 4 -0.950000 -10.123434482 15.338620527 -8.209932150 1 0 1 + 857 1 3 -1.050000 9.191297430 17.053905432 -5.905112111 0 0 1 + 858 1 3 -1.050000 7.503996030 15.033100058 -5.909707262 0 0 1 + 859 1 3 -1.050000 6.635374545 17.538377446 -6.147724732 0 0 1 + 860 1 5 0.425000 -9.031884224 15.331762013 -8.008509666 1 0 1 861 1 1 1.575000 6.148902607 16.252524607 9.189872068 0 0 0 862 1 2 2.100000 5.330708207 8.863912922 6.510105475 0 0 0 863 1 2 2.100000 5.294377387 14.882796794 6.503672592 0 0 0 - 864 1 3 -1.050000 5.580205532 17.754679635 8.128441816 0 0 0 + 864 1 3 -1.050000 5.354866857 -18.108612440 8.128441816 0 1 0 865 1 3 -1.050000 5.140866548 15.010295775 8.125684876 0 0 0 866 1 4 -0.950000 -12.899394137 16.041758578 8.196446925 1 0 0 867 1 3 -1.050000 -11.574126049 14.326473673 5.891626886 1 0 0 @@ -917,29 +917,29 @@ Atoms 879 1 3 -1.050000 -11.626370469 9.359090348 6.134239507 1 0 0 880 1 5 0.425000 4.040888301 11.565705782 7.995024441 0 0 0 881 1 1 1.575000 -9.373776431 10.281286664 9.189872068 1 0 0 - 882 1 2 2.100000 -5.161704283 17.444559674 -6.523590700 1 0 1 - 883 1 2 2.100000 -5.125373463 11.425675802 -6.517157818 1 0 1 - 884 1 3 -1.050000 -5.411201608 8.553792961 -8.141927041 1 0 1 - 885 1 3 -1.050000 -4.971862624 11.298176821 -8.139170101 1 0 1 - 886 1 4 -0.950000 -7.571601939 10.266714018 -8.209932150 1 0 1 - 887 1 3 -1.050000 -8.896870027 11.981998923 -5.905112111 1 0 1 - 888 1 3 -1.050000 -5.424171580 9.961193549 -5.909707262 1 0 1 - 889 1 3 -1.050000 -6.292793064 12.466470937 -6.147724732 1 0 1 - 890 1 5 0.425000 -6.480051681 10.259855504 -8.008509666 1 0 1 + 882 1 2 2.100000 -5.387042958 -17.829737203 -6.523590700 1 1 1 + 883 1 2 2.100000 -5.125373463 12.014671000 -6.517157818 1 0 1 + 884 1 3 -1.050000 -5.411201608 9.142788159 -8.141927041 1 0 1 + 885 1 3 -1.050000 -4.971862624 11.887172019 -8.139170101 1 0 1 + 886 1 4 -0.950000 -7.571601939 10.855709216 -8.209932150 1 0 1 + 887 1 3 -1.050000 -8.896870027 12.570994121 -5.905112111 1 0 1 + 888 1 3 -1.050000 -5.424171580 10.550188747 -5.909707262 1 0 1 + 889 1 3 -1.050000 -6.292793064 13.055466135 -6.147724732 1 0 1 + 890 1 5 0.425000 -6.480051681 10.848850702 -8.008509666 1 0 1 891 1 1 1.575000 -6.765609126 14.764197975 9.189872068 1 0 0 - 892 1 2 2.100000 -7.769871587 12.961648363 -6.523590700 1 0 1 - 893 1 2 2.100000 -7.677206006 15.908587113 -6.517157818 1 0 1 - 894 1 3 -1.050000 -7.963034151 13.036704272 -8.141927041 1 0 1 - 895 1 3 -1.050000 -7.523695167 15.781088132 -8.139170101 1 0 1 - 896 1 4 -0.950000 -4.963434634 14.749625329 -8.209932150 1 0 1 - 897 1 3 -1.050000 -6.288702722 16.464910234 -5.905112111 1 0 1 - 898 1 3 -1.050000 -7.976004123 14.444104860 -5.909707262 1 0 1 - 899 1 3 -1.050000 -8.844625607 16.949382248 -6.147724732 1 0 1 - 900 1 5 0.425000 -3.871884377 14.742766815 -8.008509666 1 0 1 + 892 1 2 2.100000 -7.769871587 13.550643561 -6.523590700 1 0 1 + 893 1 2 2.100000 -7.677206006 16.497582311 -6.517157818 1 0 1 + 894 1 3 -1.050000 -7.963034151 13.625699470 -8.141927041 1 0 1 + 895 1 3 -1.050000 -7.523695167 16.370083330 -8.139170101 1 0 1 + 896 1 4 -0.950000 -4.963434634 15.338620527 -8.209932150 1 0 1 + 897 1 3 -1.050000 -6.288702722 17.053905432 -5.905112111 1 0 1 + 898 1 3 -1.050000 -7.976004123 15.033100058 -5.909707262 1 0 1 + 899 1 3 -1.050000 -8.844625607 17.538377446 -6.147724732 1 0 1 + 900 1 5 0.425000 -3.871884377 15.331762013 -8.008509666 1 0 1 901 1 1 1.575000 -9.331097546 16.252524607 9.189872068 1 0 0 902 1 2 2.100000 -10.149291946 8.863912922 6.510105475 1 0 0 903 1 2 2.100000 -10.185622766 14.882796794 6.503672592 1 0 0 - 904 1 3 -1.050000 -9.899794620 17.754679635 8.128441816 1 0 0 + 904 1 3 -1.050000 -10.125133295 -18.108612440 8.128441816 1 1 0 905 1 3 -1.050000 -10.339133604 15.010295775 8.125684876 1 0 0 906 1 4 -0.950000 -7.739394290 16.041758578 8.196446925 1 0 0 907 1 3 -1.050000 -6.414126201 14.326473673 5.891626886 1 0 0 @@ -957,29 +957,29 @@ Atoms 919 1 3 -1.050000 -6.466370621 9.359090348 6.134239507 1 0 0 920 1 5 0.425000 -11.439111852 11.565705782 7.995024441 1 0 0 921 1 1 1.575000 -4.213776583 10.281286664 9.189872068 1 0 0 - 922 1 2 2.100000 -0.001704435 17.444559674 -6.523590700 1 0 1 - 923 1 2 2.100000 0.034626385 11.425675802 -6.517157818 1 0 1 - 924 1 3 -1.050000 -0.251201761 8.553792961 -8.141927041 1 0 1 - 925 1 3 -1.050000 0.188137223 11.298176821 -8.139170101 1 0 1 - 926 1 4 -0.950000 -2.411602091 10.266714018 -8.209932150 1 0 1 - 927 1 3 -1.050000 -3.736870180 11.981998923 -5.905112111 1 0 1 - 928 1 3 -1.050000 -0.264171733 9.961193549 -5.909707262 1 0 1 - 929 1 3 -1.050000 -1.132793217 12.466470937 -6.147724732 1 0 1 - 930 1 5 0.425000 -1.320051834 10.259855504 -8.008509666 1 0 1 + 922 1 2 2.100000 -0.227043110 -17.829737203 -6.523590700 1 1 1 + 923 1 2 2.100000 0.034626385 12.014671000 -6.517157818 1 0 1 + 924 1 3 -1.050000 -0.251201761 9.142788159 -8.141927041 1 0 1 + 925 1 3 -1.050000 0.188137223 11.887172019 -8.139170101 1 0 1 + 926 1 4 -0.950000 -2.411602091 10.855709216 -8.209932150 1 0 1 + 927 1 3 -1.050000 -3.736870180 12.570994121 -5.905112111 1 0 1 + 928 1 3 -1.050000 -0.264171733 10.550188747 -5.909707262 1 0 1 + 929 1 3 -1.050000 -1.132793217 13.055466135 -6.147724732 1 0 1 + 930 1 5 0.425000 -1.320051834 10.848850702 -8.008509666 1 0 1 931 1 1 1.575000 -1.605609279 14.764197975 9.189872068 1 0 0 - 932 1 2 2.100000 -2.609871740 12.961648363 -6.523590700 1 0 1 - 933 1 2 2.100000 -2.517206158 15.908587113 -6.517157818 1 0 1 - 934 1 3 -1.050000 -2.803034304 13.036704272 -8.141927041 1 0 1 - 935 1 3 -1.050000 -2.363695320 15.781088132 -8.139170101 1 0 1 - 936 1 4 -0.950000 0.196565213 14.749625329 -8.209932150 1 0 1 - 937 1 3 -1.050000 -1.128702875 16.464910234 -5.905112111 1 0 1 - 938 1 3 -1.050000 -2.816004275 14.444104860 -5.909707262 1 0 1 - 939 1 3 -1.050000 -3.684625760 16.949382248 -6.147724732 1 0 1 - 940 1 5 0.425000 1.288115471 14.742766815 -8.008509666 1 0 1 + 932 1 2 2.100000 -2.609871740 13.550643561 -6.523590700 1 0 1 + 933 1 2 2.100000 -2.517206158 16.497582311 -6.517157818 1 0 1 + 934 1 3 -1.050000 -2.803034304 13.625699470 -8.141927041 1 0 1 + 935 1 3 -1.050000 -2.363695320 16.370083330 -8.139170101 1 0 1 + 936 1 4 -0.950000 0.196565213 15.338620527 -8.209932150 1 0 1 + 937 1 3 -1.050000 -1.128702875 17.053905432 -5.905112111 1 0 1 + 938 1 3 -1.050000 -2.816004275 15.033100058 -5.909707262 1 0 1 + 939 1 3 -1.050000 -3.684625760 17.538377446 -6.147724732 1 0 1 + 940 1 5 0.425000 1.288115471 15.331762013 -8.008509666 1 0 1 941 1 1 1.575000 -4.171097698 16.252524607 9.189872068 1 0 0 942 1 2 2.100000 -4.989292099 8.863912922 6.510105475 1 0 0 943 1 2 2.100000 -5.025622918 14.882796794 6.503672592 1 0 0 - 944 1 3 -1.050000 -4.739794773 17.754679635 8.128441816 1 0 0 + 944 1 3 -1.050000 -4.965133448 -18.108612440 8.128441816 1 1 0 945 1 3 -1.050000 -5.179133757 15.010295775 8.125684876 1 0 0 946 1 4 -0.950000 -2.579394442 16.041758578 8.196446925 1 0 0 947 1 3 -1.050000 -1.254126354 14.326473673 5.891626886 1 0 0 @@ -996,326 +996,326 @@ Atoms 958 1 3 -1.050000 -2.174992258 11.864367737 5.896222036 1 0 0 959 1 3 -1.050000 -1.306370774 9.359090348 6.134239507 1 0 0 960 1 5 0.425000 -6.279112005 11.565705782 7.995024441 1 0 0 - 961 1 1 1.575000 1.366215159 -16.616182789 9.189872068 0 1 0 - 962 1 2 2.100000 5.578287308 -9.452909781 -6.523590700 0 1 1 - 963 1 2 2.100000 5.614618128 -15.471793653 -6.517157818 0 1 1 - 964 1 3 -1.050000 4.965133459 17.519615583 -8.141927041 0 0 1 - 965 1 3 -1.050000 5.768128967 -15.599292633 -8.139170101 0 1 1 - 966 1 4 -0.950000 3.168389652 -16.630755436 -8.209932150 0 1 1 - 967 1 3 -1.050000 1.843121564 -14.915470532 -5.905112111 0 1 1 - 968 1 3 -1.050000 5.315820011 -16.936275906 -5.909707262 0 1 1 - 969 1 3 -1.050000 4.447198527 -14.430998517 -6.147724732 0 1 1 - 970 1 5 0.425000 4.259939910 -16.637613951 -8.008509666 0 1 1 - 971 1 1 1.575000 3.974382464 -12.133271479 9.189872068 0 1 0 - 972 1 2 2.100000 2.970120004 -13.935821091 -6.523590700 0 1 1 - 973 1 2 2.100000 3.062785585 -10.988882342 -6.517157818 0 1 1 - 974 1 3 -1.050000 2.776957440 -13.860765182 -8.141927041 0 1 1 - 975 1 3 -1.050000 3.216296424 -11.116381322 -8.139170101 0 1 1 - 976 1 4 -0.950000 5.776556957 -12.147844126 -8.209932150 0 1 1 - 977 1 3 -1.050000 4.451288869 -10.432559221 -5.905112111 0 1 1 - 978 1 3 -1.050000 2.763987468 -12.453364595 -5.909707262 0 1 1 - 979 1 3 -1.050000 1.895365984 -9.948087206 -6.147724732 0 1 1 - 980 1 5 0.425000 6.868107214 -12.154702640 -8.008509666 0 1 1 - 981 1 1 1.575000 1.408894044 -10.644944847 9.189872068 0 1 0 - 982 1 2 2.100000 0.227043121 17.829735544 6.510105475 0 0 0 - 983 1 2 2.100000 0.554368824 -12.014672659 6.503672592 0 1 0 - 984 1 3 -1.050000 0.840196970 -9.142789819 8.128441816 0 1 0 - 985 1 3 -1.050000 0.400857986 -11.887173679 8.125684876 0 1 0 - 986 1 4 -0.950000 3.000597300 -10.855710876 8.196446925 0 1 0 - 987 1 3 -1.050000 4.325865388 -12.570995780 5.891626886 0 1 0 - 988 1 3 -1.050000 0.853166942 -10.550190406 5.896222036 0 1 0 - 989 1 3 -1.050000 1.721788426 -13.055467795 6.134239507 0 1 0 - 990 1 5 0.425000 1.909047043 -10.848852361 7.995024441 0 1 0 - 991 1 1 1.575000 -1.199273260 -15.127856157 9.189872068 0 1 0 - 992 1 2 2.100000 3.198866949 -13.550645221 6.510105475 0 1 0 - 993 1 2 2.100000 3.106201367 -16.497583970 6.503672592 0 1 0 - 994 1 3 -1.050000 3.392029513 -13.625701130 8.128441816 0 1 0 - 995 1 3 -1.050000 2.952690528 -16.370084990 8.125684876 0 1 0 - 996 1 4 -0.950000 0.392429996 -15.338622186 8.196446925 0 1 0 - 997 1 3 -1.050000 1.717698084 -17.053907091 5.891626886 0 1 0 - 998 1 3 -1.050000 3.404999484 -15.033101717 5.896222036 0 1 0 - 999 1 3 -1.050000 4.273620969 -17.538379106 6.134239507 0 1 0 - 1000 1 5 0.425000 -0.699120262 -15.331763672 7.995024441 0 1 0 - 1001 1 1 1.575000 6.526215006 -16.616182789 9.189872068 0 1 0 - 1002 1 2 2.100000 -9.901712844 -9.452909781 -6.523590700 1 1 1 - 1003 1 2 2.100000 -9.865382024 -15.471793653 -6.517157818 1 1 1 - 1004 1 3 -1.050000 -10.514866694 17.519615583 -8.141927041 1 0 1 - 1005 1 3 -1.050000 -9.711871186 -15.599292633 -8.139170101 1 1 1 - 1006 1 4 -0.950000 8.328389500 -16.630755436 -8.209932150 0 1 1 - 1007 1 3 -1.050000 7.003121411 -14.915470532 -5.905112111 0 1 1 - 1008 1 3 -1.050000 -10.164180142 -16.936275906 -5.909707262 1 1 1 - 1009 1 3 -1.050000 9.607198374 -14.430998517 -6.147724732 0 1 1 - 1010 1 5 0.425000 9.419939757 -16.637613951 -8.008509666 0 1 1 - 1011 1 1 1.575000 -11.505617689 -12.133271479 9.189872068 1 1 0 - 1012 1 2 2.100000 8.130119851 -13.935821091 -6.523590700 0 1 1 - 1013 1 2 2.100000 8.222785433 -10.988882342 -6.517157818 0 1 1 - 1014 1 3 -1.050000 7.936957287 -13.860765182 -8.141927041 0 1 1 - 1015 1 3 -1.050000 8.376296271 -11.116381322 -8.139170101 0 1 1 - 1016 1 4 -0.950000 -9.703443196 -12.147844126 -8.209932150 1 1 1 - 1017 1 3 -1.050000 9.611288716 -10.432559221 -5.905112111 0 1 1 - 1018 1 3 -1.050000 7.923987315 -12.453364595 -5.909707262 0 1 1 - 1019 1 3 -1.050000 7.055365831 -9.948087206 -6.147724732 0 1 1 - 1020 1 5 0.425000 -8.611892938 -12.154702640 -8.008509666 1 1 1 - 1021 1 1 1.575000 6.568893892 -10.644944847 9.189872068 0 1 0 - 1022 1 2 2.100000 5.387042968 17.829735544 6.510105475 0 0 0 - 1023 1 2 2.100000 5.714368672 -12.014672659 6.503672592 0 1 0 - 1024 1 3 -1.050000 6.000196817 -9.142789819 8.128441816 0 1 0 - 1025 1 3 -1.050000 5.560857833 -11.887173679 8.125684876 0 1 0 - 1026 1 4 -0.950000 -12.479402852 -10.855710876 8.196446925 1 1 0 - 1027 1 3 -1.050000 -11.154134764 -12.570995780 5.891626886 1 1 0 - 1028 1 3 -1.050000 6.013166789 -10.550190406 5.896222036 0 1 0 - 1029 1 3 -1.050000 6.881788273 -13.055467795 6.134239507 0 1 0 - 1030 1 5 0.425000 7.069046890 -10.848852361 7.995024441 0 1 0 - 1031 1 1 1.575000 3.960726587 -15.127856157 9.189872068 0 1 0 - 1032 1 2 2.100000 -12.281133204 -13.550645221 6.510105475 1 1 0 - 1033 1 2 2.100000 -12.373798786 -16.497583970 6.503672592 1 1 0 - 1034 1 3 -1.050000 -12.087970640 -13.625701130 8.128441816 1 1 0 - 1035 1 3 -1.050000 -12.527309624 -16.370084990 8.125684876 1 1 0 - 1036 1 4 -0.950000 5.552429843 -15.338622186 8.196446925 0 1 0 - 1037 1 3 -1.050000 6.877697931 -17.053907091 5.891626886 0 1 0 - 1038 1 3 -1.050000 -12.075000668 -15.033101717 5.896222036 1 1 0 - 1039 1 3 -1.050000 -11.206379184 -17.538379106 6.134239507 1 1 0 - 1040 1 5 0.425000 4.460879585 -15.331763672 7.995024441 0 1 0 - 1041 1 1 1.575000 -8.953785146 -16.616182789 9.189872068 1 1 0 - 1042 1 2 2.100000 -4.741712997 -9.452909781 -6.523590700 1 1 1 - 1043 1 2 2.100000 -4.705382177 -15.471793653 -6.517157818 1 1 1 - 1044 1 3 -1.050000 -5.354866846 17.519615583 -8.141927041 1 0 1 - 1045 1 3 -1.050000 -4.551871338 -15.599292633 -8.139170101 1 1 1 - 1046 1 4 -0.950000 -7.151610653 -16.630755436 -8.209932150 1 1 1 - 1047 1 3 -1.050000 -8.476878741 -14.915470532 -5.905112111 1 1 1 - 1048 1 3 -1.050000 -5.004180294 -16.936275906 -5.909707262 1 1 1 - 1049 1 3 -1.050000 -5.872801779 -14.430998517 -6.147724732 1 1 1 - 1050 1 5 0.425000 -6.060060395 -16.637613951 -8.008509666 1 1 1 - 1051 1 1 1.575000 -6.345617841 -12.133271479 9.189872068 1 1 0 - 1052 1 2 2.100000 -7.349880301 -13.935821091 -6.523590700 1 1 1 - 1053 1 2 2.100000 -7.257214720 -10.988882342 -6.517157818 1 1 1 - 1054 1 3 -1.050000 -7.543042865 -13.860765182 -8.141927041 1 1 1 - 1055 1 3 -1.050000 -7.103703881 -11.116381322 -8.139170101 1 1 1 - 1056 1 4 -0.950000 -4.543443348 -12.147844126 -8.209932150 1 1 1 - 1057 1 3 -1.050000 -5.868711437 -10.432559221 -5.905112111 1 1 1 - 1058 1 3 -1.050000 -7.556012837 -12.453364595 -5.909707262 1 1 1 - 1059 1 3 -1.050000 -8.424634321 -9.948087206 -6.147724732 1 1 1 - 1060 1 5 0.425000 -3.451893091 -12.154702640 -8.008509666 1 1 1 - 1061 1 1 1.575000 -8.911106261 -10.644944847 9.189872068 1 1 0 - 1062 1 2 2.100000 -10.092957184 17.829735544 6.510105475 1 0 0 - 1063 1 2 2.100000 -9.765631481 -12.014672659 6.503672592 1 1 0 - 1064 1 3 -1.050000 -9.479803335 -9.142789819 8.128441816 1 1 0 - 1065 1 3 -1.050000 -9.919142320 -11.887173679 8.125684876 1 1 0 - 1066 1 4 -0.950000 -7.319403005 -10.855710876 8.196446925 1 1 0 - 1067 1 3 -1.050000 -5.994134917 -12.570995780 5.891626886 1 1 0 - 1068 1 3 -1.050000 -9.466833364 -10.550190406 5.896222036 1 1 0 - 1069 1 3 -1.050000 -8.598211879 -13.055467795 6.134239507 1 1 0 - 1070 1 5 0.425000 -8.410953263 -10.848852361 7.995024441 1 1 0 - 1071 1 1 1.575000 -11.519273565 -15.127856157 9.189872068 1 1 0 - 1072 1 2 2.100000 -7.121133356 -13.550645221 6.510105475 1 1 0 - 1073 1 2 2.100000 -7.213798938 -16.497583970 6.503672592 1 1 0 - 1074 1 3 -1.050000 -6.927970793 -13.625701130 8.128441816 1 1 0 - 1075 1 3 -1.050000 -7.367309777 -16.370084990 8.125684876 1 1 0 - 1076 1 4 -0.950000 -9.927570310 -15.338622186 8.196446925 1 1 0 - 1077 1 3 -1.050000 -8.602302221 -17.053907091 5.891626886 1 1 0 - 1078 1 3 -1.050000 -6.915000821 -15.033101717 5.896222036 1 1 0 - 1079 1 3 -1.050000 -6.046379336 -17.538379106 6.134239507 1 1 0 - 1080 1 5 0.425000 -11.019120567 -15.331763672 7.995024441 1 1 0 - 1081 1 1 1.575000 -3.793785299 -16.616182789 9.189872068 1 1 0 - 1082 1 2 2.100000 0.418286851 -9.452909781 -6.523590700 1 1 1 - 1083 1 2 2.100000 0.454617670 -15.471793653 -6.517157818 1 1 1 - 1084 1 3 -1.050000 -0.194866999 17.519615583 -8.141927041 1 0 1 - 1085 1 3 -1.050000 0.608128509 -15.599292633 -8.139170101 1 1 1 - 1086 1 4 -0.950000 -1.991610806 -16.630755436 -8.209932150 1 1 1 - 1087 1 3 -1.050000 -3.316878894 -14.915470532 -5.905112111 1 1 1 - 1088 1 3 -1.050000 0.155819553 -16.936275906 -5.909707262 1 1 1 - 1089 1 3 -1.050000 -0.712801931 -14.430998517 -6.147724732 1 1 1 - 1090 1 5 0.425000 -0.900060548 -16.637613951 -8.008509666 1 1 1 - 1091 1 1 1.575000 -1.185617994 -12.133271479 9.189872068 1 1 0 - 1092 1 2 2.100000 -2.189880454 -13.935821091 -6.523590700 1 1 1 - 1093 1 2 2.100000 -2.097214872 -10.988882342 -6.517157818 1 1 1 - 1094 1 3 -1.050000 -2.383043018 -13.860765182 -8.141927041 1 1 1 - 1095 1 3 -1.050000 -1.943704034 -11.116381322 -8.139170101 1 1 1 - 1096 1 4 -0.950000 0.616556499 -12.147844126 -8.209932150 1 1 1 - 1097 1 3 -1.050000 -0.708711589 -10.432559221 -5.905112111 1 1 1 - 1098 1 3 -1.050000 -2.396012990 -12.453364595 -5.909707262 1 1 1 - 1099 1 3 -1.050000 -3.264634474 -9.948087206 -6.147724732 1 1 1 - 1100 1 5 0.425000 1.708106757 -12.154702640 -8.008509666 1 1 1 - 1101 1 1 1.575000 -3.751106413 -10.644944847 9.189872068 1 1 0 - 1102 1 2 2.100000 -4.932957337 17.829735544 6.510105475 1 0 0 - 1103 1 2 2.100000 -4.605631634 -12.014672659 6.503672592 1 1 0 - 1104 1 3 -1.050000 -4.319803488 -9.142789819 8.128441816 1 1 0 - 1105 1 3 -1.050000 -4.759142472 -11.887173679 8.125684876 1 1 0 - 1106 1 4 -0.950000 -2.159403158 -10.855710876 8.196446925 1 1 0 - 1107 1 3 -1.050000 -0.834135069 -12.570995780 5.891626886 1 1 0 - 1108 1 3 -1.050000 -4.306833516 -10.550190406 5.896222036 1 1 0 - 1109 1 3 -1.050000 -3.438212032 -13.055467795 6.134239507 1 1 0 - 1110 1 5 0.425000 -3.250953415 -10.848852361 7.995024441 1 1 0 - 1111 1 1 1.575000 -6.359273718 -15.127856157 9.189872068 1 1 0 - 1112 1 2 2.100000 -1.961133509 -13.550645221 6.510105475 1 1 0 - 1113 1 2 2.100000 -2.053799091 -16.497583970 6.503672592 1 1 0 - 1114 1 3 -1.050000 -1.767970945 -13.625701130 8.128441816 1 1 0 - 1115 1 3 -1.050000 -2.207309929 -16.370084990 8.125684876 1 1 0 - 1116 1 4 -0.950000 -4.767570462 -15.338622186 8.196446925 1 1 0 - 1117 1 3 -1.050000 -3.442302374 -17.053907091 5.891626886 1 1 0 - 1118 1 3 -1.050000 -1.755000973 -15.033101717 5.896222036 1 1 0 - 1119 1 3 -1.050000 -0.886379489 -17.538379106 6.134239507 1 1 0 - 1120 1 5 0.425000 -5.859120720 -15.331763672 7.995024441 1 1 0 - 1121 1 1 1.575000 1.422549921 -7.650360168 9.189872068 0 1 0 - 1122 1 2 2.100000 5.634622070 -0.487087159 -6.523590700 0 1 1 - 1123 1 2 2.100000 5.670952890 -6.505971031 -6.517157818 0 1 1 - 1124 1 3 -1.050000 5.385124745 -9.377853872 -8.141927041 0 1 1 - 1125 1 3 -1.050000 5.824463729 -6.633470011 -8.139170101 0 1 1 - 1126 1 4 -0.950000 3.224724414 -7.664932815 -8.209932150 0 1 1 - 1127 1 3 -1.050000 1.899456326 -5.949647910 -5.905112111 0 1 1 - 1128 1 3 -1.050000 5.372154773 -7.970453284 -5.909707262 0 1 1 - 1129 1 3 -1.050000 4.503533288 -5.465175896 -6.147724732 0 1 1 - 1130 1 5 0.425000 4.316274672 -7.671791329 -8.008509666 0 1 1 - 1131 1 1 1.575000 4.030717226 -3.167448857 9.189872068 0 1 0 - 1132 1 2 2.100000 3.026454766 -4.969998470 -6.523590700 0 1 1 - 1133 1 2 2.100000 3.119120347 -2.023059720 -6.517157818 0 1 1 - 1134 1 3 -1.050000 2.833292202 -4.894942561 -8.141927041 0 1 1 - 1135 1 3 -1.050000 3.272631186 -2.150558701 -8.139170101 0 1 1 - 1136 1 4 -0.950000 5.832891719 -3.182021504 -8.209932150 0 1 1 - 1137 1 3 -1.050000 4.507623630 -1.466736599 -5.905112111 0 1 1 - 1138 1 3 -1.050000 2.820322230 -3.487541973 -5.909707262 0 1 1 - 1139 1 3 -1.050000 1.951700746 -0.982264585 -6.147724732 0 1 1 - 1140 1 5 0.425000 6.924441976 -3.188880018 -8.008509666 0 1 1 - 1141 1 1 1.575000 1.465228806 -1.679122225 9.189872068 0 1 0 - 1142 1 2 2.100000 0.647034406 -9.067733910 6.510105475 0 1 0 - 1143 1 2 2.100000 0.610703586 -3.048850038 6.503672592 0 1 0 - 1144 1 3 -1.050000 0.896531732 -0.176967197 8.128441816 0 1 0 - 1145 1 3 -1.050000 0.457192747 -2.921351057 8.125684876 0 1 0 - 1146 1 4 -0.950000 3.056932062 -1.889888254 8.196446925 0 1 0 - 1147 1 3 -1.050000 4.382200150 -3.605173159 5.891626886 0 1 0 - 1148 1 3 -1.050000 0.909501703 -1.584367785 5.896222036 0 1 0 - 1149 1 3 -1.050000 1.778123188 -4.089645173 6.134239507 0 1 0 - 1150 1 5 0.425000 1.965381804 -1.883029740 7.995024441 0 1 0 - 1151 1 1 1.575000 -1.142938498 -6.162033536 9.189872068 0 1 0 - 1152 1 2 2.100000 3.255201710 -4.584822599 6.510105475 0 1 0 - 1153 1 2 2.100000 3.162536129 -7.531761349 6.503672592 0 1 0 - 1154 1 3 -1.050000 3.448364274 -4.659878508 8.128441816 0 1 0 - 1155 1 3 -1.050000 3.009025290 -7.404262368 8.125684876 0 1 0 - 1156 1 4 -0.950000 0.448764757 -6.372799565 8.196446925 0 1 0 - 1157 1 3 -1.050000 1.774032846 -8.088084470 5.891626886 0 1 0 - 1158 1 3 -1.050000 3.461334246 -6.067279095 5.896222036 0 1 0 - 1159 1 3 -1.050000 4.329955730 -8.572556484 6.134239507 0 1 0 - 1160 1 5 0.425000 -0.642785500 -6.365941050 7.995024441 0 1 0 - 1161 1 1 1.575000 6.582549768 -7.650360168 9.189872068 0 1 0 - 1162 1 2 2.100000 -9.845378082 -0.487087159 -6.523590700 1 1 1 - 1163 1 2 2.100000 -9.809047263 -6.505971031 -6.517157818 1 1 1 - 1164 1 3 -1.050000 -10.094875408 -9.377853872 -8.141927041 1 1 1 - 1165 1 3 -1.050000 -9.655536424 -6.633470011 -8.139170101 1 1 1 - 1166 1 4 -0.950000 8.384724261 -7.664932815 -8.209932150 0 1 1 - 1167 1 3 -1.050000 7.059456173 -5.949647910 -5.905112111 0 1 1 - 1168 1 3 -1.050000 -10.107845380 -7.970453284 -5.909707262 1 1 1 - 1169 1 3 -1.050000 -10.976466864 -5.465175896 -6.147724732 1 1 1 - 1170 1 5 0.425000 9.476274519 -7.671791329 -8.008509666 0 1 1 - 1171 1 1 1.575000 -11.449282927 -3.167448857 9.189872068 1 1 0 - 1172 1 2 2.100000 8.186454613 -4.969998470 -6.523590700 0 1 1 - 1173 1 2 2.100000 8.279120195 -2.023059720 -6.517157818 0 1 1 - 1174 1 3 -1.050000 7.993292049 -4.894942561 -8.141927041 0 1 1 - 1175 1 3 -1.050000 8.432631033 -2.150558701 -8.139170101 0 1 1 - 1176 1 4 -0.950000 -9.647108434 -3.182021504 -8.209932150 1 1 1 - 1177 1 3 -1.050000 -10.972376522 -1.466736599 -5.905112111 1 1 1 - 1178 1 3 -1.050000 7.980322077 -3.487541973 -5.909707262 0 1 1 - 1179 1 3 -1.050000 7.111700593 -0.982264585 -6.147724732 0 1 1 - 1180 1 5 0.425000 -8.555558176 -3.188880018 -8.008509666 1 1 1 - 1181 1 1 1.575000 6.625228654 -1.679122225 9.189872068 0 1 0 - 1182 1 2 2.100000 5.807034253 -9.067733910 6.510105475 0 1 0 - 1183 1 2 2.100000 5.770703433 -3.048850038 6.503672592 0 1 0 - 1184 1 3 -1.050000 6.056531579 -0.176967197 8.128441816 0 1 0 - 1185 1 3 -1.050000 5.617192595 -2.921351057 8.125684876 0 1 0 - 1186 1 4 -0.950000 -12.423068091 -1.889888254 8.196446925 1 1 0 - 1187 1 3 -1.050000 -11.097800002 -3.605173159 5.891626886 1 1 0 - 1188 1 3 -1.050000 6.069501551 -1.584367785 5.896222036 0 1 0 - 1189 1 3 -1.050000 6.938123035 -4.089645173 6.134239507 0 1 0 - 1190 1 5 0.425000 -13.514618348 -1.883029740 7.995024441 1 1 0 - 1191 1 1 1.575000 4.017061349 -6.162033536 9.189872068 0 1 0 - 1192 1 2 2.100000 -12.224798442 -4.584822599 6.510105475 1 1 0 - 1193 1 2 2.100000 -12.317464024 -7.531761349 6.503672592 1 1 0 - 1194 1 3 -1.050000 -12.031635878 -4.659878508 8.128441816 1 1 0 - 1195 1 3 -1.050000 -12.470974862 -7.404262368 8.125684876 1 1 0 - 1196 1 4 -0.950000 5.608764605 -6.372799565 8.196446925 0 1 0 - 1197 1 3 -1.050000 6.934032693 -8.088084470 5.891626886 0 1 0 - 1198 1 3 -1.050000 -12.018665906 -6.067279095 5.896222036 1 1 0 - 1199 1 3 -1.050000 -11.150044422 -8.572556484 6.134239507 1 1 0 - 1200 1 5 0.425000 4.517214347 -6.365941050 7.995024441 0 1 0 - 1201 1 1 1.575000 -8.897450384 -7.650360168 9.189872068 1 1 0 - 1202 1 2 2.100000 -4.685378235 -0.487087159 -6.523590700 1 1 1 - 1203 1 2 2.100000 -4.649047415 -6.505971031 -6.517157818 1 1 1 - 1204 1 3 -1.050000 -4.934875561 -9.377853872 -8.141927041 1 1 1 - 1205 1 3 -1.050000 -4.495536577 -6.633470011 -8.139170101 1 1 1 - 1206 1 4 -0.950000 -7.095275891 -7.664932815 -8.209932150 1 1 1 - 1207 1 3 -1.050000 -8.420543979 -5.949647910 -5.905112111 1 1 1 - 1208 1 3 -1.050000 -4.947845532 -7.970453284 -5.909707262 1 1 1 - 1209 1 3 -1.050000 -5.816467017 -5.465175896 -6.147724732 1 1 1 - 1210 1 5 0.425000 -6.003725634 -7.671791329 -8.008509666 1 1 1 - 1211 1 1 1.575000 -6.289283080 -3.167448857 9.189872068 1 1 0 - 1212 1 2 2.100000 -7.293545540 -4.969998470 -6.523590700 1 1 1 - 1213 1 2 2.100000 -7.200879958 -2.023059720 -6.517157818 1 1 1 - 1214 1 3 -1.050000 -7.486708103 -4.894942561 -8.141927041 1 1 1 - 1215 1 3 -1.050000 -7.047369119 -2.150558701 -8.139170101 1 1 1 - 1216 1 4 -0.950000 -4.487108587 -3.182021504 -8.209932150 1 1 1 - 1217 1 3 -1.050000 -5.812376675 -1.466736599 -5.905112111 1 1 1 - 1218 1 3 -1.050000 -7.499678075 -3.487541973 -5.909707262 1 1 1 - 1219 1 3 -1.050000 -8.368299560 -0.982264585 -6.147724732 1 1 1 - 1220 1 5 0.425000 -3.395558329 -3.188880018 -8.008509666 1 1 1 - 1221 1 1 1.575000 -8.854771499 -1.679122225 9.189872068 1 1 0 - 1222 1 2 2.100000 -9.672965899 -9.067733910 6.510105475 1 1 0 - 1223 1 2 2.100000 -9.709296719 -3.048850038 6.503672592 1 1 0 - 1224 1 3 -1.050000 -9.423468574 -0.176967197 8.128441816 1 1 0 - 1225 1 3 -1.050000 -9.862807558 -2.921351057 8.125684876 1 1 0 - 1226 1 4 -0.950000 -7.263068243 -1.889888254 8.196446925 1 1 0 - 1227 1 3 -1.050000 -5.937800155 -3.605173159 5.891626886 1 1 0 - 1228 1 3 -1.050000 -9.410498602 -1.584367785 5.896222036 1 1 0 - 1229 1 3 -1.050000 -8.541877117 -4.089645173 6.134239507 1 1 0 - 1230 1 5 0.425000 -8.354618501 -1.883029740 7.995024441 1 1 0 - 1231 1 1 1.575000 -11.462938804 -6.162033536 9.189872068 1 1 0 - 1232 1 2 2.100000 -7.064798595 -4.584822599 6.510105475 1 1 0 - 1233 1 2 2.100000 -7.157464176 -7.531761349 6.503672592 1 1 0 - 1234 1 3 -1.050000 -6.871636031 -4.659878508 8.128441816 1 1 0 - 1235 1 3 -1.050000 -7.310975015 -7.404262368 8.125684876 1 1 0 - 1236 1 4 -0.950000 -9.871235548 -6.372799565 8.196446925 1 1 0 - 1237 1 3 -1.050000 -8.545967459 -8.088084470 5.891626886 1 1 0 - 1238 1 3 -1.050000 -6.858666059 -6.067279095 5.896222036 1 1 0 - 1239 1 3 -1.050000 -5.990044575 -8.572556484 6.134239507 1 1 0 - 1240 1 5 0.425000 -10.962785805 -6.365941050 7.995024441 1 1 0 - 1241 1 1 1.575000 -3.737450537 -7.650360168 9.189872068 1 1 0 - 1242 1 2 2.100000 0.474621612 -0.487087159 -6.523590700 1 1 1 - 1243 1 2 2.100000 0.510952432 -6.505971031 -6.517157818 1 1 1 - 1244 1 3 -1.050000 0.225124287 -9.377853872 -8.141927041 1 1 1 - 1245 1 3 -1.050000 0.664463271 -6.633470011 -8.139170101 1 1 1 - 1246 1 4 -0.950000 -1.935276044 -7.664932815 -8.209932150 1 1 1 - 1247 1 3 -1.050000 -3.260544132 -5.949647910 -5.905112111 1 1 1 - 1248 1 3 -1.050000 0.212154315 -7.970453284 -5.909707262 1 1 1 - 1249 1 3 -1.050000 -0.656467169 -5.465175896 -6.147724732 1 1 1 - 1250 1 5 0.425000 -0.843725786 -7.671791329 -8.008509666 1 1 1 - 1251 1 1 1.575000 -1.129283232 -3.167448857 9.189872068 1 1 0 - 1252 1 2 2.100000 -2.133545692 -4.969998470 -6.523590700 1 1 1 - 1253 1 2 2.100000 -2.040880111 -2.023059720 -6.517157818 1 1 1 - 1254 1 3 -1.050000 -2.326708256 -4.894942561 -8.141927041 1 1 1 - 1255 1 3 -1.050000 -1.887369272 -2.150558701 -8.139170101 1 1 1 - 1256 1 4 -0.950000 0.672891261 -3.182021504 -8.209932150 1 1 1 - 1257 1 3 -1.050000 -0.652376827 -1.466736599 -5.905112111 1 1 1 - 1258 1 3 -1.050000 -2.339678228 -3.487541973 -5.909707262 1 1 1 - 1259 1 3 -1.050000 -3.208299712 -0.982264585 -6.147724732 1 1 1 - 1260 1 5 0.425000 1.764441519 -3.188880018 -8.008509666 1 1 1 - 1261 1 1 1.575000 -3.694771652 -1.679122225 9.189872068 1 1 0 - 1262 1 2 2.100000 -4.512966052 -9.067733910 6.510105475 1 1 0 - 1263 1 2 2.100000 -4.549296872 -3.048850038 6.503672592 1 1 0 - 1264 1 3 -1.050000 -4.263468726 -0.176967197 8.128441816 1 1 0 - 1265 1 3 -1.050000 -4.702807710 -2.921351057 8.125684876 1 1 0 - 1266 1 4 -0.950000 -2.103068396 -1.889888254 8.196446925 1 1 0 - 1267 1 3 -1.050000 -0.777800307 -3.605173159 5.891626886 1 1 0 - 1268 1 3 -1.050000 -4.250498754 -1.584367785 5.896222036 1 1 0 - 1269 1 3 -1.050000 -3.381877270 -4.089645173 6.134239507 1 1 0 - 1270 1 5 0.425000 -3.194618653 -1.883029740 7.995024441 1 1 0 - 1271 1 1 1.575000 -6.302938956 -6.162033536 9.189872068 1 1 0 - 1272 1 2 2.100000 -1.904798747 -4.584822599 6.510105475 1 1 0 - 1273 1 2 2.100000 -1.997464329 -7.531761349 6.503672592 1 1 0 - 1274 1 3 -1.050000 -1.711636183 -4.659878508 8.128441816 1 1 0 - 1275 1 3 -1.050000 -2.150975168 -7.404262368 8.125684876 1 1 0 - 1276 1 4 -0.950000 -4.711235700 -6.372799565 8.196446925 1 1 0 - 1277 1 3 -1.050000 -3.385967612 -8.088084470 5.891626886 1 1 0 - 1278 1 3 -1.050000 -1.698666212 -6.067279095 5.896222036 1 1 0 - 1279 1 3 -1.050000 -0.830044727 -8.572556484 6.134239507 1 1 0 - 1280 1 5 0.425000 -5.802785958 -6.365941050 7.995024441 1 1 0 + 961 1 1 1.575000 0.777219961 -16.616182789 9.189872068 0 1 0 + 962 1 2 2.100000 4.989292109 -8.863914582 -6.523590700 0 1 1 + 963 1 2 2.100000 5.025622929 -14.882798454 -6.517157818 0 1 1 + 964 1 3 -1.050000 4.739794784 -17.754681294 -8.141927041 0 1 1 + 965 1 3 -1.050000 5.179133768 -15.010297434 -8.139170101 0 1 1 + 966 1 4 -0.950000 2.579394453 -16.041760237 -8.209932150 0 1 1 + 967 1 3 -1.050000 1.254126365 -14.326475333 -5.905112111 0 1 1 + 968 1 3 -1.050000 4.726824812 -16.347280707 -5.909707262 0 1 1 + 969 1 3 -1.050000 3.858203328 -13.842003318 -6.147724732 0 1 1 + 970 1 5 0.425000 3.670944711 -16.048618752 -8.008509666 0 1 1 + 971 1 1 1.575000 3.385387266 -12.133271479 9.189872068 0 1 0 + 972 1 2 2.100000 2.381124805 -13.346825892 -6.523590700 0 1 1 + 973 1 2 2.100000 2.473790386 -10.399887143 -6.517157818 0 1 1 + 974 1 3 -1.050000 2.187962241 -13.271769983 -8.141927041 0 1 1 + 975 1 3 -1.050000 2.627301225 -10.527386123 -8.139170101 0 1 1 + 976 1 4 -0.950000 5.187561758 -11.558848927 -8.209932150 0 1 1 + 977 1 3 -1.050000 3.862293670 -9.843564022 -5.905112111 0 1 1 + 978 1 3 -1.050000 2.174992269 -11.864369396 -5.909707262 0 1 1 + 979 1 3 -1.050000 1.306370785 -9.359092007 -6.147724732 0 1 1 + 980 1 5 0.425000 6.279112015 -11.565707441 -8.008509666 0 1 1 + 981 1 1 1.575000 0.819898846 -10.644944847 9.189872068 0 1 0 + 982 1 2 2.100000 0.001704446 -18.033556531 6.510105475 0 1 0 + 983 1 2 2.100000 -0.034626374 -12.014672659 6.503672592 0 1 0 + 984 1 3 -1.050000 0.251201772 -9.142789819 8.128441816 0 1 0 + 985 1 3 -1.050000 -0.188137212 -11.887173679 8.125684876 0 1 0 + 986 1 4 -0.950000 2.411602102 -10.855710876 8.196446925 0 1 0 + 987 1 3 -1.050000 3.736870190 -12.570995780 5.891626886 0 1 0 + 988 1 3 -1.050000 0.264171744 -10.550190406 5.896222036 0 1 0 + 989 1 3 -1.050000 1.132793228 -13.055467795 6.134239507 0 1 0 + 990 1 5 0.425000 1.320051845 -10.848852361 7.995024441 0 1 0 + 991 1 1 1.575000 -1.788268458 -15.127856157 9.189872068 0 1 0 + 992 1 2 2.100000 2.609871751 -13.550645221 6.510105475 0 1 0 + 993 1 2 2.100000 2.517206169 -16.497583970 6.503672592 0 1 0 + 994 1 3 -1.050000 2.803034315 -13.625701130 8.128441816 0 1 0 + 995 1 3 -1.050000 2.363695330 -16.370084990 8.125684876 0 1 0 + 996 1 4 -0.950000 -0.196565202 -15.338622186 8.196446925 0 1 0 + 997 1 3 -1.050000 1.128702886 -17.053907091 5.891626886 0 1 0 + 998 1 3 -1.050000 2.816004286 -15.033101717 5.896222036 0 1 0 + 999 1 3 -1.050000 3.684625771 -17.538379106 6.134239507 0 1 0 + 1000 1 5 0.425000 -1.288115460 -15.331763672 7.995024441 0 1 0 + 1001 1 1 1.575000 5.937219808 -16.616182789 9.189872068 0 1 0 + 1002 1 2 2.100000 -10.490708043 -8.863914582 -6.523590700 1 1 1 + 1003 1 2 2.100000 -10.454377223 -14.882798454 -6.517157818 1 1 1 + 1004 1 3 -1.050000 9.899794631 -17.754681294 -8.141927041 0 1 1 + 1005 1 3 -1.050000 -10.300866385 -15.010297434 -8.139170101 1 1 1 + 1006 1 4 -0.950000 7.739394301 -16.041760237 -8.209932150 0 1 1 + 1007 1 3 -1.050000 6.414126212 -14.326475333 -5.905112111 0 1 1 + 1008 1 3 -1.050000 -10.753175341 -16.347280707 -5.909707262 1 1 1 + 1009 1 3 -1.050000 9.018203175 -13.842003318 -6.147724732 0 1 1 + 1010 1 5 0.425000 8.830944558 -16.048618752 -8.008509666 0 1 1 + 1011 1 1 1.575000 -12.094612887 -12.133271479 9.189872068 1 1 0 + 1012 1 2 2.100000 7.541124652 -13.346825892 -6.523590700 0 1 1 + 1013 1 2 2.100000 7.633790234 -10.399887143 -6.517157818 0 1 1 + 1014 1 3 -1.050000 7.347962088 -13.271769983 -8.141927041 0 1 1 + 1015 1 3 -1.050000 7.787301072 -10.527386123 -8.139170101 0 1 1 + 1016 1 4 -0.950000 -10.292438395 -11.558848927 -8.209932150 1 1 1 + 1017 1 3 -1.050000 9.022293517 -9.843564022 -5.905112111 0 1 1 + 1018 1 3 -1.050000 7.334992116 -11.864369396 -5.909707262 0 1 1 + 1019 1 3 -1.050000 6.466370632 -9.359092007 -6.147724732 0 1 1 + 1020 1 5 0.425000 -9.200888137 -11.565707441 -8.008509666 1 1 1 + 1021 1 1 1.575000 5.979898694 -10.644944847 9.189872068 0 1 0 + 1022 1 2 2.100000 5.161704293 -18.033556531 6.510105475 0 1 0 + 1023 1 2 2.100000 5.125373474 -12.014672659 6.503672592 0 1 0 + 1024 1 3 -1.050000 5.411201619 -9.142789819 8.128441816 0 1 0 + 1025 1 3 -1.050000 4.971862635 -11.887173679 8.125684876 0 1 0 + 1026 1 4 -0.950000 -13.068398050 -10.855710876 8.196446925 1 1 0 + 1027 1 3 -1.050000 -11.743129962 -12.570995780 5.891626886 1 1 0 + 1028 1 3 -1.050000 5.424171591 -10.550190406 5.896222036 0 1 0 + 1029 1 3 -1.050000 6.292793075 -13.055467795 6.134239507 0 1 0 + 1030 1 5 0.425000 6.480051692 -10.848852361 7.995024441 0 1 0 + 1031 1 1 1.575000 3.371731389 -15.127856157 9.189872068 0 1 0 + 1032 1 2 2.100000 -12.870128402 -13.550645221 6.510105475 1 1 0 + 1033 1 2 2.100000 -12.962793984 -16.497583970 6.503672592 1 1 0 + 1034 1 3 -1.050000 -12.676965838 -13.625701130 8.128441816 1 1 0 + 1035 1 3 -1.050000 -13.116304822 -16.370084990 8.125684876 1 1 0 + 1036 1 4 -0.950000 4.963434645 -15.338622186 8.196446925 0 1 0 + 1037 1 3 -1.050000 6.288702733 -17.053907091 5.891626886 0 1 0 + 1038 1 3 -1.050000 -12.663995866 -15.033101717 5.896222036 1 1 0 + 1039 1 3 -1.050000 -11.795374382 -17.538379106 6.134239507 1 1 0 + 1040 1 5 0.425000 3.871884387 -15.331763672 7.995024441 0 1 0 + 1041 1 1 1.575000 -9.542780344 -16.616182789 9.189872068 1 1 0 + 1042 1 2 2.100000 -5.330708196 -8.863914582 -6.523590700 1 1 1 + 1043 1 2 2.100000 -5.294377376 -14.882798454 -6.517157818 1 1 1 + 1044 1 3 -1.050000 -5.580205521 -17.754681294 -8.141927041 1 1 1 + 1045 1 3 -1.050000 -5.140866537 -15.010297434 -8.139170101 1 1 1 + 1046 1 4 -0.950000 -7.740605852 -16.041760237 -8.209932150 1 1 1 + 1047 1 3 -1.050000 -9.065873940 -14.326475333 -5.905112111 1 1 1 + 1048 1 3 -1.050000 -5.593175493 -16.347280707 -5.909707262 1 1 1 + 1049 1 3 -1.050000 -6.461796978 -13.842003318 -6.147724732 1 1 1 + 1050 1 5 0.425000 -6.649055594 -16.048618752 -8.008509666 1 1 1 + 1051 1 1 1.575000 -6.934613039 -12.133271479 9.189872068 1 1 0 + 1052 1 2 2.100000 -7.938875500 -13.346825892 -6.523590700 1 1 1 + 1053 1 2 2.100000 -7.846209919 -10.399887143 -6.517157818 1 1 1 + 1054 1 3 -1.050000 -8.132038064 -13.271769983 -8.141927041 1 1 1 + 1055 1 3 -1.050000 -7.692699080 -10.527386123 -8.139170101 1 1 1 + 1056 1 4 -0.950000 -5.132438547 -11.558848927 -8.209932150 1 1 1 + 1057 1 3 -1.050000 -6.457706636 -9.843564022 -5.905112111 1 1 1 + 1058 1 3 -1.050000 -8.145008036 -11.864369396 -5.909707262 1 1 1 + 1059 1 3 -1.050000 -9.013629520 -9.359092007 -6.147724732 1 1 1 + 1060 1 5 0.425000 -4.040888290 -11.565707441 -8.008509666 1 1 1 + 1061 1 1 1.575000 -9.500101459 -10.644944847 9.189872068 1 1 0 + 1062 1 2 2.100000 -10.318295859 -18.033556531 6.510105475 1 1 0 + 1063 1 2 2.100000 -10.354626679 -12.014672659 6.503672592 1 1 0 + 1064 1 3 -1.050000 -10.068798533 -9.142789819 8.128441816 1 1 0 + 1065 1 3 -1.050000 -10.508137518 -11.887173679 8.125684876 1 1 0 + 1066 1 4 -0.950000 -7.908398203 -10.855710876 8.196446925 1 1 0 + 1067 1 3 -1.050000 -6.583130115 -12.570995780 5.891626886 1 1 0 + 1068 1 3 -1.050000 -10.055828562 -10.550190406 5.896222036 1 1 0 + 1069 1 3 -1.050000 -9.187207077 -13.055467795 6.134239507 1 1 0 + 1070 1 5 0.425000 -8.999948461 -10.848852361 7.995024441 1 1 0 + 1071 1 1 1.575000 -12.108268763 -15.127856157 9.189872068 1 1 0 + 1072 1 2 2.100000 -7.710128554 -13.550645221 6.510105475 1 1 0 + 1073 1 2 2.100000 -7.802794136 -16.497583970 6.503672592 1 1 0 + 1074 1 3 -1.050000 -7.516965991 -13.625701130 8.128441816 1 1 0 + 1075 1 3 -1.050000 -7.956304975 -16.370084990 8.125684876 1 1 0 + 1076 1 4 -0.950000 -10.516565508 -15.338622186 8.196446925 1 1 0 + 1077 1 3 -1.050000 -9.191297419 -17.053907091 5.891626886 1 1 0 + 1078 1 3 -1.050000 -7.503996019 -15.033101717 5.896222036 1 1 0 + 1079 1 3 -1.050000 -6.635374534 -17.538379106 6.134239507 1 1 0 + 1080 1 5 0.425000 -11.608115765 -15.331763672 7.995024441 1 1 0 + 1081 1 1 1.575000 -4.382780497 -16.616182789 9.189872068 1 1 0 + 1082 1 2 2.100000 -0.170708348 -8.863914582 -6.523590700 1 1 1 + 1083 1 2 2.100000 -0.134377529 -14.882798454 -6.517157818 1 1 1 + 1084 1 3 -1.050000 -0.420205674 -17.754681294 -8.141927041 1 1 1 + 1085 1 3 -1.050000 0.019133310 -15.010297434 -8.139170101 1 1 1 + 1086 1 4 -0.950000 -2.580606005 -16.041760237 -8.209932150 1 1 1 + 1087 1 3 -1.050000 -3.905874093 -14.326475333 -5.905112111 1 1 1 + 1088 1 3 -1.050000 -0.433175646 -16.347280707 -5.909707262 1 1 1 + 1089 1 3 -1.050000 -1.301797130 -13.842003318 -6.147724732 1 1 1 + 1090 1 5 0.425000 -1.489055747 -16.048618752 -8.008509666 1 1 1 + 1091 1 1 1.575000 -1.774613192 -12.133271479 9.189872068 1 1 0 + 1092 1 2 2.100000 -2.778875653 -13.346825892 -6.523590700 1 1 1 + 1093 1 2 2.100000 -2.686210071 -10.399887143 -6.517157818 1 1 1 + 1094 1 3 -1.050000 -2.972038217 -13.271769983 -8.141927041 1 1 1 + 1095 1 3 -1.050000 -2.532699233 -10.527386123 -8.139170101 1 1 1 + 1096 1 4 -0.950000 0.027561300 -11.558848927 -8.209932150 1 1 1 + 1097 1 3 -1.050000 -1.297706788 -9.843564022 -5.905112111 1 1 1 + 1098 1 3 -1.050000 -2.985008189 -11.864369396 -5.909707262 1 1 1 + 1099 1 3 -1.050000 -3.853629673 -9.359092007 -6.147724732 1 1 1 + 1100 1 5 0.425000 1.119111558 -11.565707441 -8.008509666 1 1 1 + 1101 1 1 1.575000 -4.340101611 -10.644944847 9.189872068 1 1 0 + 1102 1 2 2.100000 -5.158296012 -18.033556531 6.510105475 1 1 0 + 1103 1 2 2.100000 -5.194626832 -12.014672659 6.503672592 1 1 0 + 1104 1 3 -1.050000 -4.908798686 -9.142789819 8.128441816 1 1 0 + 1105 1 3 -1.050000 -5.348137670 -11.887173679 8.125684876 1 1 0 + 1106 1 4 -0.950000 -2.748398356 -10.855710876 8.196446925 1 1 0 + 1107 1 3 -1.050000 -1.423130267 -12.570995780 5.891626886 1 1 0 + 1108 1 3 -1.050000 -4.895828714 -10.550190406 5.896222036 1 1 0 + 1109 1 3 -1.050000 -4.027207230 -13.055467795 6.134239507 1 1 0 + 1110 1 5 0.425000 -3.839948613 -10.848852361 7.995024441 1 1 0 + 1111 1 1 1.575000 -6.948268916 -15.127856157 9.189872068 1 1 0 + 1112 1 2 2.100000 -2.550128707 -13.550645221 6.510105475 1 1 0 + 1113 1 2 2.100000 -2.642794289 -16.497583970 6.503672592 1 1 0 + 1114 1 3 -1.050000 -2.356966143 -13.625701130 8.128441816 1 1 0 + 1115 1 3 -1.050000 -2.796305127 -16.370084990 8.125684876 1 1 0 + 1116 1 4 -0.950000 -5.356565660 -15.338622186 8.196446925 1 1 0 + 1117 1 3 -1.050000 -4.031297572 -17.053907091 5.891626886 1 1 0 + 1118 1 3 -1.050000 -2.343996171 -15.033101717 5.896222036 1 1 0 + 1119 1 3 -1.050000 -1.475374687 -17.538379106 6.134239507 1 1 0 + 1120 1 5 0.425000 -6.448115918 -15.331763672 7.995024441 1 1 0 + 1121 1 1 1.575000 0.833554723 -7.650360168 9.189872068 0 1 0 + 1122 1 2 2.100000 5.045626871 0.101908040 -6.523590700 0 1 1 + 1123 1 2 2.100000 5.081957691 -5.916975832 -6.517157818 0 1 1 + 1124 1 3 -1.050000 4.796129546 -8.788858673 -8.141927041 0 1 1 + 1125 1 3 -1.050000 5.235468530 -6.044474812 -8.139170101 0 1 1 + 1126 1 4 -0.950000 2.635729215 -7.075937616 -8.209932150 0 1 1 + 1127 1 3 -1.050000 1.310461127 -5.360652711 -5.905112111 0 1 1 + 1128 1 3 -1.050000 4.783159574 -7.381458085 -5.909707262 0 1 1 + 1129 1 3 -1.050000 3.914538089 -4.876180697 -6.147724732 0 1 1 + 1130 1 5 0.425000 3.727279473 -7.082796130 -8.008509666 0 1 1 + 1131 1 1 1.575000 3.441722028 -3.167448857 9.189872068 0 1 0 + 1132 1 2 2.100000 2.437459567 -4.381003271 -6.523590700 0 1 1 + 1133 1 2 2.100000 2.530125148 -1.434064521 -6.517157818 0 1 1 + 1134 1 3 -1.050000 2.244297003 -4.305947362 -8.141927041 0 1 1 + 1135 1 3 -1.050000 2.683635987 -1.561563502 -8.139170101 0 1 1 + 1136 1 4 -0.950000 5.243896520 -2.593026305 -8.209932150 0 1 1 + 1137 1 3 -1.050000 3.918628431 -0.877741400 -5.905112111 0 1 1 + 1138 1 3 -1.050000 2.231327031 -2.898546774 -5.909707262 0 1 1 + 1139 1 3 -1.050000 1.362705547 -0.393269386 -6.147724732 0 1 1 + 1140 1 5 0.425000 6.335446777 -2.599884819 -8.008509666 0 1 1 + 1141 1 1 1.575000 0.876233608 -1.679122225 9.189872068 0 1 0 + 1142 1 2 2.100000 0.058039208 -9.067733910 6.510105475 0 1 0 + 1143 1 2 2.100000 0.021708388 -3.048850038 6.503672592 0 1 0 + 1144 1 3 -1.050000 0.307536534 -0.176967197 8.128441816 0 1 0 + 1145 1 3 -1.050000 -0.131802451 -2.921351057 8.125684876 0 1 0 + 1146 1 4 -0.950000 2.467936864 -1.889888254 8.196446925 0 1 0 + 1147 1 3 -1.050000 3.793204952 -3.605173159 5.891626886 0 1 0 + 1148 1 3 -1.050000 0.320506505 -1.584367785 5.896222036 0 1 0 + 1149 1 3 -1.050000 1.189127990 -4.089645173 6.134239507 0 1 0 + 1150 1 5 0.425000 1.376386606 -1.883029740 7.995024441 0 1 0 + 1151 1 1 1.575000 -1.731933696 -6.162033536 9.189872068 0 1 0 + 1152 1 2 2.100000 2.666206512 -4.584822599 6.510105475 0 1 0 + 1153 1 2 2.100000 2.573540931 -7.531761349 6.503672592 0 1 0 + 1154 1 3 -1.050000 2.859369076 -4.659878508 8.128441816 0 1 0 + 1155 1 3 -1.050000 2.420030092 -7.404262368 8.125684876 0 1 0 + 1156 1 4 -0.950000 -0.140230441 -6.372799565 8.196446925 0 1 0 + 1157 1 3 -1.050000 1.185037648 -8.088084470 5.891626886 0 1 0 + 1158 1 3 -1.050000 2.872339048 -6.067279095 5.896222036 0 1 0 + 1159 1 3 -1.050000 3.740960532 -8.572556484 6.134239507 0 1 0 + 1160 1 5 0.425000 -1.231780698 -6.365941050 7.995024441 0 1 0 + 1161 1 1 1.575000 5.993554570 -7.650360168 9.189872068 0 1 0 + 1162 1 2 2.100000 -10.434373281 0.101908040 -6.523590700 1 1 1 + 1163 1 2 2.100000 -10.398042462 -5.916975832 -6.517157818 1 1 1 + 1164 1 3 -1.050000 9.956129393 -8.788858673 -8.141927041 0 1 1 + 1165 1 3 -1.050000 -10.244531623 -6.044474812 -8.139170101 1 1 1 + 1166 1 4 -0.950000 7.795729062 -7.075937616 -8.209932150 0 1 1 + 1167 1 3 -1.050000 6.470460974 -5.360652711 -5.905112111 0 1 1 + 1168 1 3 -1.050000 -10.696840579 -7.381458085 -5.909707262 1 1 1 + 1169 1 3 -1.050000 9.074537937 -4.876180697 -6.147724732 0 1 1 + 1170 1 5 0.425000 8.887279320 -7.082796130 -8.008509666 0 1 1 + 1171 1 1 1.575000 -12.038278125 -3.167448857 9.189872068 1 1 0 + 1172 1 2 2.100000 7.597459414 -4.381003271 -6.523590700 0 1 1 + 1173 1 2 2.100000 7.690124996 -1.434064521 -6.517157818 0 1 1 + 1174 1 3 -1.050000 7.404296850 -4.305947362 -8.141927041 0 1 1 + 1175 1 3 -1.050000 7.843635834 -1.561563502 -8.139170101 0 1 1 + 1176 1 4 -0.950000 -10.236103633 -2.593026305 -8.209932150 1 1 1 + 1177 1 3 -1.050000 9.078628279 -0.877741400 -5.905112111 0 1 1 + 1178 1 3 -1.050000 7.391326878 -2.898546774 -5.909707262 0 1 1 + 1179 1 3 -1.050000 6.522705394 -0.393269386 -6.147724732 0 1 1 + 1180 1 5 0.425000 -9.144553375 -2.599884819 -8.008509666 1 1 1 + 1181 1 1 1.575000 6.036233456 -1.679122225 9.189872068 0 1 0 + 1182 1 2 2.100000 5.218039055 -9.067733910 6.510105475 0 1 0 + 1183 1 2 2.100000 5.181708235 -3.048850038 6.503672592 0 1 0 + 1184 1 3 -1.050000 5.467536381 -0.176967197 8.128441816 0 1 0 + 1185 1 3 -1.050000 5.028197397 -2.921351057 8.125684876 0 1 0 + 1186 1 4 -0.950000 -13.012063289 -1.889888254 8.196446925 1 1 0 + 1187 1 3 -1.050000 -11.686795200 -3.605173159 5.891626886 1 1 0 + 1188 1 3 -1.050000 5.480506353 -1.584367785 5.896222036 0 1 0 + 1189 1 3 -1.050000 6.349127837 -4.089645173 6.134239507 0 1 0 + 1190 1 5 0.425000 6.536386454 -1.883029740 7.995024441 0 1 0 + 1191 1 1 1.575000 3.428066151 -6.162033536 9.189872068 0 1 0 + 1192 1 2 2.100000 -12.813793640 -4.584822599 6.510105475 1 1 0 + 1193 1 2 2.100000 -12.906459222 -7.531761349 6.503672592 1 1 0 + 1194 1 3 -1.050000 -12.620631076 -4.659878508 8.128441816 1 1 0 + 1195 1 3 -1.050000 -13.059970060 -7.404262368 8.125684876 1 1 0 + 1196 1 4 -0.950000 5.019769407 -6.372799565 8.196446925 0 1 0 + 1197 1 3 -1.050000 6.345037495 -8.088084470 5.891626886 0 1 0 + 1198 1 3 -1.050000 -12.607661104 -6.067279095 5.896222036 1 1 0 + 1199 1 3 -1.050000 -11.739039620 -8.572556484 6.134239507 1 1 0 + 1200 1 5 0.425000 3.928219149 -6.365941050 7.995024441 0 1 0 + 1201 1 1 1.575000 -9.486445582 -7.650360168 9.189872068 1 1 0 + 1202 1 2 2.100000 -5.274373434 0.101908040 -6.523590700 1 1 1 + 1203 1 2 2.100000 -5.238042614 -5.916975832 -6.517157818 1 1 1 + 1204 1 3 -1.050000 -5.523870760 -8.788858673 -8.141927041 1 1 1 + 1205 1 3 -1.050000 -5.084531776 -6.044474812 -8.139170101 1 1 1 + 1206 1 4 -0.950000 -7.684271090 -7.075937616 -8.209932150 1 1 1 + 1207 1 3 -1.050000 -9.009539178 -5.360652711 -5.905112111 1 1 1 + 1208 1 3 -1.050000 -5.536840731 -7.381458085 -5.909707262 1 1 1 + 1209 1 3 -1.050000 -6.405462216 -4.876180697 -6.147724732 1 1 1 + 1210 1 5 0.425000 -6.592720833 -7.082796130 -8.008509666 1 1 1 + 1211 1 1 1.575000 -6.878278278 -3.167448857 9.189872068 1 1 0 + 1212 1 2 2.100000 -7.882540739 -4.381003271 -6.523590700 1 1 1 + 1213 1 2 2.100000 -7.789875157 -1.434064521 -6.517157818 1 1 1 + 1214 1 3 -1.050000 -8.075703302 -4.305947362 -8.141927041 1 1 1 + 1215 1 3 -1.050000 -7.636364318 -1.561563502 -8.139170101 1 1 1 + 1216 1 4 -0.950000 -5.076103786 -2.593026305 -8.209932150 1 1 1 + 1217 1 3 -1.050000 -6.401371874 -0.877741400 -5.905112111 1 1 1 + 1218 1 3 -1.050000 -8.088673274 -2.898546774 -5.909707262 1 1 1 + 1219 1 3 -1.050000 -8.957294759 -0.393269386 -6.147724732 1 1 1 + 1220 1 5 0.425000 -3.984553528 -2.599884819 -8.008509666 1 1 1 + 1221 1 1 1.575000 -9.443766697 -1.679122225 9.189872068 1 1 0 + 1222 1 2 2.100000 -10.261961097 -9.067733910 6.510105475 1 1 0 + 1223 1 2 2.100000 -10.298291917 -3.048850038 6.503672592 1 1 0 + 1224 1 3 -1.050000 -10.012463772 -0.176967197 8.128441816 1 1 0 + 1225 1 3 -1.050000 -10.451802756 -2.921351057 8.125684876 1 1 0 + 1226 1 4 -0.950000 -7.852063441 -1.889888254 8.196446925 1 1 0 + 1227 1 3 -1.050000 -6.526795353 -3.605173159 5.891626886 1 1 0 + 1228 1 3 -1.050000 -9.999493800 -1.584367785 5.896222036 1 1 0 + 1229 1 3 -1.050000 -9.130872315 -4.089645173 6.134239507 1 1 0 + 1230 1 5 0.425000 -8.943613699 -1.883029740 7.995024441 1 1 0 + 1231 1 1 1.575000 -12.051934002 -6.162033536 9.189872068 1 1 0 + 1232 1 2 2.100000 -7.653793793 -4.584822599 6.510105475 1 1 0 + 1233 1 2 2.100000 -7.746459374 -7.531761349 6.503672592 1 1 0 + 1234 1 3 -1.050000 -7.460631229 -4.659878508 8.128441816 1 1 0 + 1235 1 3 -1.050000 -7.899970213 -7.404262368 8.125684876 1 1 0 + 1236 1 4 -0.950000 -10.460230746 -6.372799565 8.196446925 1 1 0 + 1237 1 3 -1.050000 -9.134962657 -8.088084470 5.891626886 1 1 0 + 1238 1 3 -1.050000 -7.447661257 -6.067279095 5.896222036 1 1 0 + 1239 1 3 -1.050000 -6.579039773 -8.572556484 6.134239507 1 1 0 + 1240 1 5 0.425000 -11.551781003 -6.365941050 7.995024441 1 1 0 + 1241 1 1 1.575000 -4.326445735 -7.650360168 9.189872068 1 1 0 + 1242 1 2 2.100000 -0.114373587 0.101908040 -6.523590700 1 1 1 + 1243 1 2 2.100000 -0.078042767 -5.916975832 -6.517157818 1 1 1 + 1244 1 3 -1.050000 -0.363870912 -8.788858673 -8.141927041 1 1 1 + 1245 1 3 -1.050000 0.075468072 -6.044474812 -8.139170101 1 1 1 + 1246 1 4 -0.950000 -2.524271243 -7.075937616 -8.209932150 1 1 1 + 1247 1 3 -1.050000 -3.849539331 -5.360652711 -5.905112111 1 1 1 + 1248 1 3 -1.050000 -0.376840884 -7.381458085 -5.909707262 1 1 1 + 1249 1 3 -1.050000 -1.245462368 -4.876180697 -6.147724732 1 1 1 + 1250 1 5 0.425000 -1.432720985 -7.082796130 -8.008509666 1 1 1 + 1251 1 1 1.575000 -1.718278430 -3.167448857 9.189872068 1 1 0 + 1252 1 2 2.100000 -2.722540891 -4.381003271 -6.523590700 1 1 1 + 1253 1 2 2.100000 -2.629875310 -1.434064521 -6.517157818 1 1 1 + 1254 1 3 -1.050000 -2.915703455 -4.305947362 -8.141927041 1 1 1 + 1255 1 3 -1.050000 -2.476364471 -1.561563502 -8.139170101 1 1 1 + 1256 1 4 -0.950000 0.083896062 -2.593026305 -8.209932150 1 1 1 + 1257 1 3 -1.050000 -1.241372026 -0.877741400 -5.905112111 1 1 1 + 1258 1 3 -1.050000 -2.928673427 -2.898546774 -5.909707262 1 1 1 + 1259 1 3 -1.050000 -3.797294911 -0.393269386 -6.147724732 1 1 1 + 1260 1 5 0.425000 1.175446320 -2.599884819 -8.008509666 1 1 1 + 1261 1 1 1.575000 -4.283766850 -1.679122225 9.189872068 1 1 0 + 1262 1 2 2.100000 -5.101961250 -9.067733910 6.510105475 1 1 0 + 1263 1 2 2.100000 -5.138292070 -3.048850038 6.503672592 1 1 0 + 1264 1 3 -1.050000 -4.852463924 -0.176967197 8.128441816 1 1 0 + 1265 1 3 -1.050000 -5.291802908 -2.921351057 8.125684876 1 1 0 + 1266 1 4 -0.950000 -2.692063594 -1.889888254 8.196446925 1 1 0 + 1267 1 3 -1.050000 -1.366795505 -3.605173159 5.891626886 1 1 0 + 1268 1 3 -1.050000 -4.839493952 -1.584367785 5.896222036 1 1 0 + 1269 1 3 -1.050000 -3.970872468 -4.089645173 6.134239507 1 1 0 + 1270 1 5 0.425000 -3.783613851 -1.883029740 7.995024441 1 1 0 + 1271 1 1 1.575000 -6.891934154 -6.162033536 9.189872068 1 1 0 + 1272 1 2 2.100000 -2.493793945 -4.584822599 6.510105475 1 1 0 + 1273 1 2 2.100000 -2.586459527 -7.531761349 6.503672592 1 1 0 + 1274 1 3 -1.050000 -2.300631381 -4.659878508 8.128441816 1 1 0 + 1275 1 3 -1.050000 -2.739970366 -7.404262368 8.125684876 1 1 0 + 1276 1 4 -0.950000 -5.300230898 -6.372799565 8.196446925 1 1 0 + 1277 1 3 -1.050000 -3.974962810 -8.088084470 5.891626886 1 1 0 + 1278 1 3 -1.050000 -2.287661410 -6.067279095 5.896222036 1 1 0 + 1279 1 3 -1.050000 -1.419039925 -8.572556484 6.134239507 1 1 0 + 1280 1 5 0.425000 -6.391781156 -6.365941050 7.995024441 1 1 0 Bonds diff --git a/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data2 b/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data2 index 742aa59587..1e755d6210 100644 --- a/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data2 +++ b/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data2 @@ -1,4 +1,4 @@ -LAMMPS data file via write_data, version 8 Jul 2013-ICMS, timestep = 100 +LAMMPS data file via write_data, version 4 Mar 2014, timestep = 100 1280 atoms 5 atom types @@ -32,1286 +32,1286 @@ Bond Coeffs Atoms -1 1 1 1.5750000000000000e+00 2.9181843045640097e+00 1.3940498960847059e+00 9.2340967653878891e-03 0 0 0 -2 1 2 2.1000000000000001e+00 3.7073513941069294e+00 8.7577386345739576e+00 2.7737258238835487e+00 0 0 0 -3 1 2 2.1000000000000001e+00 3.7295387542643610e+00 2.7514817922552126e+00 2.7754883709415132e+00 0 0 0 -4 1 3 -1.0500000000000000e+00 3.5099247198465129e+00 -1.2549820598252026e-01 1.1455245288613796e+00 0 0 0 -5 1 3 -1.0500000000000000e+00 3.8641459288626336e+00 2.6606338286250626e+00 1.1515490570111524e+00 0 0 0 -6 1 4 -9.4999999999999996e-01 1.2257831605194038e+00 1.4682034965493607e+00 1.0173047259698347e+00 0 0 0 -7 1 3 -1.0500000000000000e+00 -1.2371834622510569e-01 3.4500622213920131e+00 3.2505942162571255e+00 0 0 0 -8 1 3 -1.0500000000000000e+00 3.6134514555627302e+00 1.2774235654227972e+00 3.2503790568677005e+00 0 0 0 -9 1 3 -1.0500000000000000e+00 2.4668324640692454e+00 3.6068322258659897e+00 3.0625972692880978e+00 0 0 0 -10 1 5 4.2499999999999999e-01 7.1502360807393472e-01 5.9168397672752704e-01 1.1550714134040199e+00 0 0 0 -11 1 1 1.5750000000000000e+00 5.5287803573232814e+00 5.8777628276084535e+00 1.5818558759413293e-02 0 0 0 -12 1 2 2.1000000000000001e+00 1.0982632776410952e+00 4.2776698897313032e+00 2.7687836138940334e+00 0 0 0 -13 1 2 2.1000000000000001e+00 1.1793807246604047e+00 7.2313579056133115e+00 2.7814240681785058e+00 0 0 0 -14 1 3 -1.0500000000000000e+00 9.5858684032157804e-01 4.3507830467182309e+00 1.1529783450605535e+00 0 0 0 -15 1 3 -1.0500000000000000e+00 1.3151092622291145e+00 7.1397445667227899e+00 1.1600283120641173e+00 0 0 0 -16 1 4 -9.4999999999999996e-01 3.8414815014337531e+00 5.9562970318948807e+00 1.0308706572416639e+00 0 0 0 -17 1 3 -1.0500000000000000e+00 2.4870332642081223e+00 7.9282412354621101e+00 3.2591748232874060e+00 0 0 0 -18 1 3 -1.0500000000000000e+00 1.0603410295172733e+00 5.7574985699545884e+00 3.2570920262590857e+00 0 0 0 -19 1 3 -1.0500000000000000e+00 -8.1286792635351546e-02 8.0923795800776510e+00 3.0683190788551666e+00 0 0 0 -20 1 5 4.2499999999999999e-01 3.3348353656920935e+00 5.0822010840192107e+00 1.1943947080225996e+00 0 0 0 -21 1 1 1.5750000000000000e+00 2.9393536672515719e+00 7.3804881375233045e+00 1.5838507742252261e-02 0 0 0 -22 1 2 2.1000000000000001e+00 2.1496285432378848e+00 8.5746434203954891e-03 -2.7504981414008842e+00 0 0 0 -23 1 2 2.1000000000000001e+00 2.1254678497731980e+00 6.0184503373647615e+00 -2.7475528826831859e+00 0 0 0 -24 1 3 -1.0500000000000000e+00 2.3580919423094198e+00 8.8949831871341516e+00 -1.1236671837246828e+00 0 0 0 -25 1 3 -1.0500000000000000e+00 1.9840721366565788e+00 6.1124703193525001e+00 -1.1260675124463422e+00 0 0 0 -26 1 4 -9.4999999999999996e-01 4.6159387449941516e+00 7.2751601759450502e+00 -1.0223401924299189e+00 0 0 0 -27 1 3 -1.0500000000000000e+00 5.9794462716790733e+00 5.3164707099933146e+00 -3.2244722322227304e+00 0 0 0 -28 1 3 -1.0500000000000000e+00 2.2399583197725050e+00 7.4922069379996366e+00 -3.2264936141302707e+00 0 0 0 -29 1 3 -1.0500000000000000e+00 3.3884528032433323e+00 5.1626134100698202e+00 -3.0295950264996439e+00 0 0 0 -30 1 5 4.2499999999999999e-01 5.1031273830721577e+00 8.1185536590698604e+00 -1.3314430838681233e+00 0 0 0 -31 1 1 1.5750000000000000e+00 3.2846504706283852e-01 2.8988361277322028e+00 8.8183866668440913e-03 0 0 0 -32 1 2 2.1000000000000001e+00 4.7576783051225942e+00 4.4895526366419816e+00 -2.7425066314812634e+00 0 0 0 -33 1 2 2.1000000000000001e+00 4.6783183817373200e+00 1.5354330327670951e+00 -2.7551492847444621e+00 0 0 0 -34 1 3 -1.0500000000000000e+00 4.9013036267823971e+00 4.4108681127969582e+00 -1.1266212975805683e+00 0 0 0 -35 1 3 -1.0500000000000000e+00 4.5393998046955915e+00 1.6266484909844152e+00 -1.1320991832911744e+00 0 0 0 -36 1 4 -9.4999999999999996e-01 2.0152440438407631e+00 2.8097969815934825e+00 -1.0152029311013973e+00 0 0 0 -37 1 3 -1.0500000000000000e+00 3.3736351876767703e+00 8.3256378957671373e-01 -3.2315257474119896e+00 0 0 0 -38 1 3 -1.0500000000000000e+00 4.7888478578463296e+00 3.0099718008256282e+00 -3.2317549292159882e+00 0 0 0 -39 1 3 -1.0500000000000000e+00 5.9445206570517364e+00 6.8681326575416080e-01 -3.0420279397414287e+00 0 0 0 -40 1 5 4.2499999999999999e-01 2.5169698186996960e+00 3.6735773254810269e+00 -1.2354457711152893e+00 0 0 0 -41 1 1 1.5750000000000000e+00 8.0781895657408569e+00 1.3940468229074483e+00 9.2361555151203589e-03 0 0 0 -42 1 2 2.1000000000000001e+00 -1.1772646971508982e+01 8.7577405536920274e+00 2.7737281083448622e+00 1 0 0 -43 1 2 2.1000000000000001e+00 -1.1750460337451164e+01 2.7514815564821156e+00 2.7754884219953055e+00 1 0 0 -44 1 3 -1.0500000000000000e+00 -1.1970075430711471e+01 -1.2549763511069756e-01 1.1455243943334121e+00 1 0 0 -45 1 3 -1.0500000000000000e+00 -1.1615849369310848e+01 2.6606336128145749e+00 1.1515484018176458e+00 1 0 0 -46 1 4 -9.4999999999999996e-01 6.3857830750754125e+00 1.4682093342990079e+00 1.0173137533521235e+00 0 0 0 -47 1 3 -1.0500000000000000e+00 5.0362809862511320e+00 3.4500599757949004e+00 3.2505953992179180e+00 0 0 0 -48 1 3 -1.0500000000000000e+00 -1.1866548408412738e+01 1.2774249873109973e+00 3.2503792901090698e+00 1 0 0 -49 1 3 -1.0500000000000000e+00 7.6268338070717832e+00 3.6068351104669105e+00 3.0625928215330944e+00 0 0 0 -50 1 5 4.2499999999999999e-01 5.8750442223033303e+00 5.9168671979982435e-01 1.1551294404129369e+00 0 0 0 -51 1 1 1.5750000000000000e+00 -9.9512284322095397e+00 5.8777650953334337e+00 1.5817217107313652e-02 1 0 0 -52 1 2 2.1000000000000001e+00 6.2582624746836295e+00 4.2776692843107149e+00 2.7687857006526020e+00 0 0 0 -53 1 2 2.1000000000000001e+00 6.3393827858479526e+00 7.2313559094803104e+00 2.7814260197871423e+00 0 0 0 -54 1 3 -1.0500000000000000e+00 6.1185815666729972e+00 4.3507849444963291e+00 1.1529802875402311e+00 0 0 0 -55 1 3 -1.0500000000000000e+00 6.4751093766765173e+00 7.1397395345991512e+00 1.1600299881376017e+00 0 0 0 -56 1 4 -9.4999999999999996e-01 -1.1638509783727779e+01 5.9563055887117642e+00 1.0308802681714724e+00 1 0 0 -57 1 3 -1.0500000000000000e+00 7.6470325371171128e+00 7.9282431370321795e+00 3.2591737328207824e+00 0 0 0 -58 1 3 -1.0500000000000000e+00 6.2203384582398371e+00 5.7574978624101867e+00 3.2570952709813383e+00 0 0 0 -59 1 3 -1.0500000000000000e+00 5.0787139693767251e+00 8.0923800371900967e+00 3.0683158645681008e+00 0 0 0 -60 1 5 4.2499999999999999e-01 8.4948503598131495e+00 5.0822210438605957e+00 1.1944782647191694e+00 0 0 0 -61 1 1 1.5750000000000000e+00 8.0993576857564378e+00 7.3804875358899587e+00 1.5840494012762107e-02 0 0 0 -62 1 2 2.1000000000000001e+00 7.3096283952920835e+00 8.5744292033496095e-03 -2.7504961665351892e+00 0 0 0 -63 1 2 2.1000000000000001e+00 7.2854695889394101e+00 6.0184492959381934e+00 -2.7475512604452366e+00 0 0 0 -64 1 3 -1.0500000000000000e+00 7.5180889968676183e+00 8.8949859947902752e+00 -1.1236653178517138e+00 0 0 0 -65 1 3 -1.0500000000000000e+00 7.1440732178329540e+00 6.1124677551670281e+00 -1.1260659323614917e+00 0 0 0 -66 1 4 -9.4999999999999996e-01 -1.0864063656127900e+01 7.2751613852182473e+00 -1.0223394833825719e+00 1 0 0 -67 1 3 -1.0500000000000000e+00 -9.5005529248024025e+00 5.3164682422424576e+00 -3.2244718566537820e+00 1 0 0 -68 1 3 -1.0500000000000000e+00 7.3999552275758660e+00 7.4922075333439615e+00 -3.2264905770272998e+00 0 0 0 -69 1 3 -1.0500000000000000e+00 8.5484538103238989e+00 5.1626159124099331e+00 -3.0295986933437327e+00 0 0 0 -70 1 5 4.2499999999999999e-01 -1.0376885999858491e+01 8.1185570528737649e+00 -1.3314536045877965e+00 1 0 0 -71 1 1 1.5750000000000000e+00 5.4884728848472744e+00 2.8988335733956632e+00 8.8201821199298536e-03 0 0 0 -72 1 2 2.1000000000000001e+00 -1.0722324379445324e+01 4.4895519320603832e+00 -2.7425082732406754e+00 1 0 0 -73 1 2 2.1000000000000001e+00 -1.0801683278311836e+01 1.5354328954119474e+00 -2.7551497880755766e+00 1 0 0 -74 1 3 -1.0500000000000000e+00 -1.0578698158457764e+01 4.4108681250488111e+00 -1.1266209820685091e+00 1 0 0 -75 1 3 -1.0500000000000000e+00 -1.0940602894455520e+01 1.6266467007604959e+00 -1.1320989503609731e+00 1 0 0 -76 1 4 -9.4999999999999996e-01 7.1752475064374508e+00 2.8098060229146071e+00 -1.0151906589766995e+00 0 0 0 -77 1 3 -1.0500000000000000e+00 8.5336345700919480e+00 8.3256136942025805e-01 -3.2315256914186872e+00 0 0 0 -78 1 3 -1.0500000000000000e+00 -1.0691151880887796e+01 3.0099709082197244e+00 -3.2317547822419375e+00 1 0 0 -79 1 3 -1.0500000000000000e+00 -9.5354801454304390e+00 6.8681165495986463e-01 -3.0420254175386541e+00 1 0 0 -80 1 5 4.2499999999999999e-01 7.6769953270217641e+00 3.6735936136308780e+00 -1.2353597799009401e+00 0 0 0 -81 1 1 1.5750000000000000e+00 -7.4018156834157516e+00 1.3940498267845207e+00 9.2341172191829912e-03 1 0 0 -82 1 2 2.1000000000000001e+00 -6.6126486139241258e+00 8.7577386564423421e+00 2.7737258262471336e+00 1 0 0 -83 1 2 2.1000000000000001e+00 -6.5904613283607496e+00 2.7514816785118619e+00 2.7754883822192458e+00 1 0 0 -84 1 3 -1.0500000000000000e+00 -6.8100751799104264e+00 -1.2549822203802918e-01 1.1455245250037489e+00 1 0 0 -85 1 3 -1.0500000000000000e+00 -6.4558540038809511e+00 2.6606338686734112e+00 1.1515490428830653e+00 1 0 0 -86 1 4 -9.4999999999999996e-01 -9.0942168786093571e+00 1.4682033564725145e+00 1.0173048353315082e+00 1 0 0 -87 1 3 -1.0500000000000000e+00 -1.0443718281141235e+01 3.4500623943480377e+00 3.2505941748888905e+00 1 0 0 -88 1 3 -1.0500000000000000e+00 -6.7065484694691486e+00 1.2774237217274482e+00 3.2503789830048451e+00 1 0 0 -89 1 3 -1.0500000000000000e+00 -7.8531676029576225e+00 3.6068321305210063e+00 3.0625972745673877e+00 1 0 0 -90 1 5 4.2499999999999999e-01 -9.6049763952466609e+00 5.9168402807765119e-01 1.1550717463805391e+00 1 0 0 -91 1 1 1.5750000000000000e+00 -4.7912196555084909e+00 5.8777628185963628e+00 1.5818637392785106e-02 1 0 0 -92 1 2 2.1000000000000001e+00 -9.2217367423489147e+00 4.2776698061977569e+00 2.7687836693612979e+00 1 0 0 -93 1 2 2.1000000000000001e+00 -9.1406192690064536e+00 7.2313579369426684e+00 2.7814240346917387e+00 1 0 0 -94 1 3 -1.0500000000000000e+00 -9.3614131129886093e+00 4.3507830694144864e+00 1.1529784059432853e+00 1 0 0 -95 1 3 -1.0500000000000000e+00 -9.0048906852551376e+00 7.1397446087555245e+00 1.1600283019177606e+00 1 0 0 -96 1 4 -9.4999999999999996e-01 -6.4785184509382354e+00 5.9562970434063125e+00 1.0308706332329152e+00 1 0 0 -97 1 3 -1.0500000000000000e+00 -7.8329667310252340e+00 7.9282413242120136e+00 3.2591748265528295e+00 1 0 0 -98 1 3 -1.0500000000000000e+00 -9.2596588585334061e+00 5.7574987313866437e+00 3.2570919723466822e+00 1 0 0 -99 1 3 -1.0500000000000000e+00 -1.0401286862581250e+01 8.0923795374929135e+00 3.0683190457546186e+00 1 0 0 -100 1 5 4.2499999999999999e-01 -6.9851646544916939e+00 5.0822011697243248e+00 1.1943944362131411e+00 1 0 0 -101 1 1 1.5750000000000000e+00 -7.3806463357299190e+00 7.3804880885211404e+00 1.5838429298753454e-02 1 0 0 -102 1 2 2.1000000000000001e+00 -8.1703713310771988e+00 8.5746608785690626e-03 -2.7504980424842866e+00 1 0 0 -103 1 2 2.1000000000000001e+00 -8.1945323275444899e+00 6.0184502500917283e+00 -2.7475528837539054e+00 1 0 0 -104 1 3 -1.0500000000000000e+00 -7.9619080637885542e+00 8.8949831933859862e+00 -1.1236671500565745e+00 1 0 0 -105 1 3 -1.0500000000000000e+00 -8.3359278558246022e+00 6.1124703111367253e+00 -1.1260675054861888e+00 1 0 0 -106 1 4 -9.4999999999999996e-01 -5.7040612194432985e+00 7.2751602466710921e+00 -1.0223403226134700e+00 1 0 0 -107 1 3 -1.0500000000000000e+00 -4.3405537538255263e+00 5.3164706883910391e+00 -3.2244722490595308e+00 1 0 0 -108 1 3 -1.0500000000000000e+00 -8.0800417151952999e+00 7.4922069547101877e+00 -3.2264935905892305e+00 1 0 0 -109 1 3 -1.0500000000000000e+00 -6.9315471505023067e+00 5.1626133176122515e+00 -3.0295950390391173e+00 1 0 0 -110 1 5 4.2499999999999999e-01 -5.2168726148730524e+00 8.1185535511438154e+00 -1.3314430702931386e+00 1 0 0 -111 1 1 1.5750000000000000e+00 -9.9915348614344328e+00 2.8988361677063885e+00 8.8184517237905879e-03 1 0 0 -112 1 2 2.1000000000000001e+00 -5.5623217993451153e+00 4.4895527215617719e+00 -2.7425065919058378e+00 1 0 0 -113 1 2 2.1000000000000001e+00 -5.6416816379433659e+00 1.5354330960486955e+00 -2.7551492648452243e+00 1 0 0 -114 1 3 -1.0500000000000000e+00 -5.4186964164041349e+00 4.4108681390809927e+00 -1.1266212245683622e+00 1 0 0 -115 1 3 -1.0500000000000000e+00 -5.7806001318650173e+00 1.6266485007847322e+00 -1.1320991357765973e+00 1 0 0 -116 1 4 -9.4999999999999996e-01 -8.3047558725651225e+00 2.8097969608772360e+00 -1.0152028096430641e+00 1 0 0 -117 1 3 -1.0500000000000000e+00 -6.9463648941270124e+00 8.3256378373028284e-01 -3.2315257455775122e+00 1 0 0 -118 1 3 -1.0500000000000000e+00 -5.5311521520922371e+00 3.0099717297467308e+00 -3.2317549067275442e+00 1 0 0 -119 1 3 -1.0500000000000000e+00 -4.3754792787657371e+00 6.8681320804201818e-01 -3.0420279836711339e+00 1 0 0 -120 1 5 4.2499999999999999e-01 -7.8030301078286080e+00 3.6735772188252938e+00 -1.2354456232096709e+00 1 0 0 -121 1 1 1.5750000000000000e+00 -2.2418105330661700e+00 1.3940467442253244e+00 9.2362127554714846e-03 1 0 0 -122 1 2 2.1000000000000001e+00 -1.4526470583814692e+00 8.7577405549317682e+00 2.7737280503457864e+00 1 0 0 -123 1 2 2.1000000000000001e+00 -1.4304603016429134e+00 2.7514816082304527e+00 2.7754884447533730e+00 1 0 0 -124 1 3 -1.0500000000000000e+00 -1.6500755227706385e+00 -1.2549770273042071e-01 1.1455244330478109e+00 1 0 0 -125 1 3 -1.0500000000000000e+00 -1.2958494055542360e+00 2.6606336034784768e+00 1.1515484397802584e+00 1 0 0 -126 1 4 -9.4999999999999996e-01 -3.9342168915633415e+00 1.4682094214141266e+00 1.0173138250219012e+00 1 0 0 -127 1 3 -1.0500000000000000e+00 -5.2837189522970025e+00 3.4500600861308470e+00 3.2505954302319502e+00 1 0 0 -128 1 3 -1.0500000000000000e+00 -1.5465484917570702e+00 1.2774248326576476e+00 3.2503792898076469e+00 1 0 0 -129 1 3 -1.0500000000000000e+00 -2.6931660935902819e+00 3.6068350139432646e+00 3.0625928773896689e+00 1 0 0 -130 1 5 4.2499999999999999e-01 -4.4449558633498123e+00 5.9168668395037827e-01 1.1551293896194927e+00 1 0 0 -131 1 1 1.5750000000000000e+00 3.6877164782193006e-01 5.8777650912165385e+00 1.5817150747189146e-02 1 0 0 -132 1 2 2.1000000000000001e+00 -4.0617375772762188e+00 4.2776693106083989e+00 2.7687857850208601e+00 1 0 0 -133 1 2 2.1000000000000001e+00 -3.9806172582495076e+00 7.2313558938290434e+00 2.7814259950804896e+00 1 0 0 -134 1 3 -1.0500000000000000e+00 -4.2014184402319890e+00 4.3507849103013854e+00 1.1529802947660208e+00 1 0 0 -135 1 3 -1.0500000000000000e+00 -3.8448906073918021e+00 7.1397395282490983e+00 1.1600299603315243e+00 1 0 0 -136 1 4 -9.4999999999999996e-01 -1.3185098561518060e+00 5.9563056682421198e+00 1.0308802485197628e+00 1 0 0 -137 1 3 -1.0500000000000000e+00 -2.6729672971842531e+00 7.9282432283389461e+00 3.2591736523341588e+00 1 0 0 -138 1 3 -1.0500000000000000e+00 -4.0996614283272201e+00 5.7574979032638858e+00 3.2570953323553891e+00 1 0 0 -139 1 3 -1.0500000000000000e+00 -5.2412859022758642e+00 8.0923799801955703e+00 3.0683158558257126e+00 1 0 0 -140 1 5 4.2499999999999999e-01 -1.8251496447516047e+00 5.0822210510157575e+00 1.1944783318261365e+00 1 0 0 -141 1 1 1.5750000000000000e+00 -2.2206422623948683e+00 7.3804874661035456e+00 1.5840568424078327e-02 1 0 0 -142 1 2 2.1000000000000001e+00 -3.0103717366590503e+00 8.5744582599929231e-03 -2.7504962140350369e+00 1 0 0 -143 1 2 2.1000000000000001e+00 -3.0345303616566373e+00 6.0184491524444859e+00 -2.7475512257680279e+00 1 0 0 -144 1 3 -1.0500000000000000e+00 -2.8019109978936916e+00 8.8949859869856240e+00 -1.1236652992525631e+00 1 0 0 -145 1 3 -1.0500000000000000e+00 -3.1759267152560131e+00 6.1124676460171585e+00 -1.1260659219048126e+00 1 0 0 -146 1 4 -9.4999999999999996e-01 -5.4406366880684587e-01 7.2751613330570528e+00 -1.0223395364677970e+00 1 0 0 -147 1 3 -1.0500000000000000e+00 8.1944706179112003e-01 5.3164681505885234e+00 -3.2244718846913507e+00 1 0 0 -148 1 3 -1.0500000000000000e+00 -2.9200448159008241e+00 7.4922076620364635e+00 -3.2264905243236663e+00 1 0 0 -149 1 3 -1.0500000000000000e+00 -1.7715461358892455e+00 5.1626160314823899e+00 -3.0295988156649276e+00 1 0 0 -150 1 5 4.2499999999999999e-01 -5.6886054816962073e-02 8.1185569279399452e+00 -1.3314539098197047e+00 1 0 0 -151 1 1 1.5750000000000000e+00 -4.8315271785617604e+00 2.8988336154917995e+00 8.8202724600794369e-03 1 0 0 -152 1 2 2.1000000000000001e+00 -4.0232442311636873e-01 4.4895519600030376e+00 -2.7425083189174675e+00 1 0 0 -153 1 2 2.1000000000000001e+00 -4.8168326179287391e-01 1.5354329215305640e+00 -2.7551498218354213e+00 1 0 0 -154 1 3 -1.0500000000000000e+00 -2.5869819069960442e-01 4.4108681978982212e+00 -1.1266210916639228e+00 1 0 0 -155 1 3 -1.0500000000000000e+00 -6.2060283466819754e-01 1.6266466941235151e+00 -1.1320990169920631e+00 1 0 0 -156 1 4 -9.4999999999999996e-01 -3.1447525156123621e+00 2.8098060903938169e+00 -1.0151905064714946e+00 1 0 0 -157 1 3 -1.0500000000000000e+00 -1.7863654685868706e+00 8.3256151520433264e-01 -3.2315256544495776e+00 1 0 0 -158 1 3 -1.0500000000000000e+00 -3.7115181573242850e-01 3.0099709263728371e+00 -3.2317547966740801e+00 1 0 0 -159 1 3 -1.0500000000000000e+00 7.8451981401563486e-01 6.8681159879902154e-01 -3.0420255052720684e+00 1 0 0 -160 1 5 4.2499999999999999e-01 -2.6430045345789743e+00 3.6735938310135730e+00 -1.2353588053241564e+00 1 0 0 -161 1 1 1.5750000000000000e+00 2.9783608836912538e+00 1.0361137149912420e+01 1.7818011148255763e-02 0 0 0 -162 1 2 2.1000000000000001e+00 3.5360610925203950e+00 -1.8139331519186836e+01 2.7666566992251962e+00 0 1 0 -163 1 2 2.1000000000000001e+00 3.7878536835413374e+00 1.1711311679533136e+01 2.7826602133250642e+00 0 0 0 -164 1 3 -1.0500000000000000e+00 3.5653428391143507e+00 8.8297483956056233e+00 1.1605414908315286e+00 0 0 0 -165 1 3 -1.0500000000000000e+00 3.9263695506288627e+00 1.1616561478078168e+01 1.1647557742292012e+00 0 0 0 -166 1 4 -9.4999999999999996e-01 1.2974878727728569e+00 1.0447737594475388e+01 1.0416496805015107e+00 0 0 0 -167 1 3 -1.0500000000000000e+00 -6.3021639232545112e-02 1.2409056591381603e+01 3.2633807761805826e+00 0 0 0 -168 1 3 -1.0500000000000000e+00 3.6664195690611177e+00 1.0239152625121701e+01 3.2629017848636845e+00 0 0 0 -169 1 3 -1.0500000000000000e+00 2.5302999201623724e+00 1.2579024164378968e+01 3.0668956329912849e+00 0 0 0 -170 1 5 4.2499999999999999e-01 7.9409053633778370e-01 9.5801425518505248e+00 1.2421975274295267e+00 0 0 0 -171 1 1 1.5750000000000000e+00 5.5839590050957000e+00 1.4846810910182977e+01 1.4343205784191682e-02 0 0 0 -172 1 2 2.1000000000000001e+00 1.1542891896852669e+00 1.3239508636212776e+01 2.7731733135274830e+00 0 0 0 -173 1 2 2.1000000000000001e+00 1.2358945477303358e+00 1.6194044048123825e+01 2.7772564879657722e+00 0 0 0 -174 1 3 -1.0500000000000000e+00 1.0083238392628431e+00 1.3310815603498916e+01 1.1627306398612589e+00 0 0 0 -175 1 3 -1.0500000000000000e+00 1.3784083725434364e+00 1.6094083912736924e+01 1.1623932033251911e+00 0 0 0 -176 1 4 -9.4999999999999996e-01 3.9147696520963926e+00 1.4947007806925168e+01 1.0534403808015398e+00 0 0 0 -177 1 3 -1.0500000000000000e+00 2.5450693612275508e+00 1.6892539796118843e+01 3.2594483860460262e+00 0 0 0 -178 1 3 -1.0500000000000000e+00 1.1133022063366802e+00 1.4721821021247077e+01 3.2622862006760229e+00 0 0 0 -179 1 3 -1.0500000000000000e+00 -1.9530947538630272e-02 1.7067101007112267e+01 3.0570089938348861e+00 0 0 0 -180 1 5 4.2499999999999999e-01 3.4193086019084191e+00 1.4098682230405988e+01 1.3335380343095551e+00 0 0 0 -181 1 1 1.5750000000000000e+00 2.9944564334951682e+00 1.6348111086997815e+01 1.3243770296220703e-02 0 0 0 -182 1 2 2.1000000000000001e+00 2.2089867936558889e+00 8.9733111365550648e+00 -2.7376759799791408e+00 0 0 0 -183 1 2 2.1000000000000001e+00 2.1822712630895698e+00 1.4988637755244564e+01 -2.7464837418945329e+00 0 0 0 -184 1 3 -1.0500000000000000e+00 2.1917626419814873e+00 -1.7993560546134169e+01 -1.1315987307430788e+00 0 1 0 -185 1 3 -1.0500000000000000e+00 2.0372877927078044e+00 1.5089547257540911e+01 -1.1295753873849819e+00 0 0 0 -186 1 4 -9.4999999999999996e-01 4.6591257762935818e+00 1.6226168454664073e+01 -1.0381632629831472e+00 0 0 0 -187 1 3 -1.0500000000000000e+00 6.0349970632070047e+00 1.4289632110234994e+01 -3.2262701712642237e+00 0 0 0 -188 1 3 -1.0500000000000000e+00 2.3012251400693380e+00 1.6462007761365609e+01 -3.2306427498990100e+00 0 0 0 -189 1 3 -1.0500000000000000e+00 3.4412176059525983e+00 1.4121357475468901e+01 -3.0230792894126743e+00 0 0 0 -190 1 5 4.2499999999999999e-01 5.1287875255231530e+00 1.7040736546186022e+01 -1.4353361578853887e+00 0 0 0 -191 1 1 1.5750000000000000e+00 3.8878642736347047e-01 1.1864775961711629e+01 1.6890098528769215e-02 0 0 0 -192 1 2 2.1000000000000001e+00 4.8162900109864335e+00 1.3458296905412244e+01 -2.7383273178478280e+00 0 0 0 -193 1 2 2.1000000000000001e+00 4.7356045818963715e+00 1.0503926464188318e+01 -2.7445029950718407e+00 0 0 0 -194 1 3 -1.0500000000000000e+00 4.9690367294199547e+00 1.3381511896730270e+01 -1.1261950334109994e+00 0 0 0 -195 1 3 -1.0500000000000000e+00 4.5917570390922791e+00 1.0602165922805003e+01 -1.1254358992485987e+00 0 0 0 -196 1 4 -9.4999999999999996e-01 2.0602496638742895e+00 1.1751304296580837e+01 -1.0268356169791080e+00 0 0 0 -197 1 3 -1.0500000000000000e+00 3.4291100571305080e+00 9.8011865768196849e+00 -3.2224222876191853e+00 0 0 0 -198 1 3 -1.0500000000000000e+00 4.8519057468596785e+00 1.1976547366140775e+01 -3.2274542143700788e+00 0 0 0 -199 1 3 -1.0500000000000000e+00 5.9959651536241303e+00 9.6414241198837090e+00 -3.0221895713126656e+00 0 0 0 -200 1 5 4.2499999999999999e-01 2.5405620157524282e+00 1.2583249715526708e+01 -1.3733151940910142e+00 0 0 0 -201 1 1 1.5750000000000000e+00 8.1383646073800620e+00 1.0361133237349737e+01 1.7820341906062964e-02 0 0 0 -202 1 2 2.1000000000000001e+00 -1.1943935795427686e+01 -1.8139330176232271e+01 2.7666600362438487e+00 1 1 0 -203 1 2 2.1000000000000001e+00 -1.1692145556148489e+01 1.1711312097895455e+01 2.7826590535000033e+00 1 0 0 -204 1 3 -1.0500000000000000e+00 -1.1914655824563102e+01 8.8297512948342494e+00 1.1605416188708837e+00 1 0 0 -205 1 3 -1.0500000000000000e+00 -1.1553624791340591e+01 1.1616561682139807e+01 1.1647545028390489e+00 1 0 0 -206 1 4 -9.4999999999999996e-01 6.4574907972067734e+00 1.0447749907864232e+01 1.0416636025249151e+00 0 0 0 -207 1 3 -1.0500000000000000e+00 5.0969781928512070e+00 1.2409054402093162e+01 3.2633812888135836e+00 0 0 0 -208 1 3 -1.0500000000000000e+00 -1.1813582114821591e+01 1.0239154834512600e+01 3.2629021125861790e+00 1 0 0 -209 1 3 -1.0500000000000000e+00 7.6903015872308416e+00 1.2579029120613026e+01 3.0668902965460401e+00 0 0 0 -210 1 5 4.2499999999999999e-01 5.9541190163049258e+00 9.5801610836046720e+00 1.2422970363998846e+00 0 0 0 -211 1 1 1.5750000000000000e+00 -9.8960492593139016e+00 1.4846815234832970e+01 1.4342402300192347e-02 1 0 0 -212 1 2 2.1000000000000001e+00 6.3142883211295739e+00 1.3239509020309772e+01 2.7731744408659829e+00 0 0 0 -213 1 2 2.1000000000000001e+00 6.3958956240524572e+00 1.6194042666892908e+01 2.7772537088161755e+00 0 0 0 -214 1 3 -1.0500000000000000e+00 6.1683178030456389e+00 1.3310817780517400e+01 1.1627316331062758e+00 0 0 0 -215 1 3 -1.0500000000000000e+00 6.5384104922080866e+00 1.6094076947664920e+01 1.1623916653934216e+00 0 0 0 -216 1 4 -9.4999999999999996e-01 -1.1565221641268149e+01 1.4947015828332521e+01 1.0534477535062798e+00 1 0 0 -217 1 3 -1.0500000000000000e+00 7.7050674326143174e+00 1.6892542633948342e+01 3.2594442131284342e+00 0 0 0 -218 1 3 -1.0500000000000000e+00 6.2732982406463762e+00 1.4721821920681034e+01 3.2622872594924104e+00 0 0 0 -219 1 3 -1.0500000000000000e+00 5.1404708770320617e+00 1.7067105110981739e+01 3.0570014877775762e+00 0 0 0 -220 1 5 4.2499999999999999e-01 8.5793270204319043e+00 1.4098710011064792e+01 1.3336198322446364e+00 0 0 0 -221 1 1 1.5750000000000000e+00 8.1544599341941719e+00 1.6348110060895163e+01 1.3244573881346255e-02 0 0 0 -222 1 2 2.1000000000000001e+00 7.3689873813956410e+00 8.9733117458578242e+00 -2.7376738266526139e+00 0 0 0 -223 1 2 2.1000000000000001e+00 7.3422724488335263e+00 1.4988636115878901e+01 -2.7464857209241362e+00 0 0 0 -224 1 3 -1.0500000000000000e+00 7.3517583195579768e+00 -1.7993559185319651e+01 -1.1315966915466529e+00 0 1 0 -225 1 3 -1.0500000000000000e+00 7.1972896415131373e+00 1.5089540814203456e+01 -1.1295765246506893e+00 0 0 0 -226 1 4 -9.4999999999999996e-01 -1.0820878876636492e+01 1.6226165745145710e+01 -1.0381670693918519e+00 1 0 0 -227 1 3 -1.0500000000000000e+00 -9.4450013339900689e+00 1.4289629181644845e+01 -3.2262664911026713e+00 1 0 0 -228 1 3 -1.0500000000000000e+00 7.4612223758556979e+00 1.6462008397402204e+01 -3.2306407173588498e+00 0 0 0 -229 1 3 -1.0500000000000000e+00 8.6012195616269409e+00 1.4121361092212663e+01 -3.0230862187392962e+00 0 0 0 -230 1 5 4.2499999999999999e-01 -1.0351233930438994e+01 1.7040720778333384e+01 -1.4353856967800827e+00 1 0 0 -231 1 1 1.5750000000000000e+00 5.5487940997396823e+00 1.1864772524178154e+01 1.6891514908358829e-02 0 0 0 -232 1 2 2.1000000000000001e+00 -1.0663713283684089e+01 1.3458295804738260e+01 -2.7383296765061846e+00 1 0 0 -233 1 2 2.1000000000000001e+00 -1.0744397339388300e+01 1.0503925770550204e+01 -2.7445039325704652e+00 1 0 0 -234 1 3 -1.0500000000000000e+00 -1.0510968018230542e+01 1.3381510416261797e+01 -1.1261944296159641e+00 1 0 0 -235 1 3 -1.0500000000000000e+00 -1.0888244637065185e+01 1.0602162516304642e+01 -1.1254361280344156e+00 1 0 0 -236 1 4 -9.4999999999999996e-01 7.2202556898806094e+00 1.1751316569368914e+01 -1.0268202541754157e+00 0 0 0 -237 1 3 -1.0500000000000000e+00 8.5891098165858928e+00 9.8011848490458569e+00 -3.2224231985062648e+00 0 0 0 -238 1 3 -1.0500000000000000e+00 -1.0628092120351399e+01 1.1976546125541244e+01 -3.2274542345776522e+00 1 0 0 -239 1 3 -1.0500000000000000e+00 -9.4840346265497324e+00 9.6414229547445096e+00 -3.0221893387967729e+00 1 0 0 -240 1 5 4.2499999999999999e-01 7.7006026473903084e+00 1.2583293714327670e+01 -1.3731810034861018e+00 0 0 0 -241 1 1 1.5750000000000000e+00 -7.3416390884732872e+00 1.0361137065989368e+01 1.7818044228196328e-02 1 0 0 -242 1 2 2.1000000000000001e+00 -6.7839389957330116e+00 -1.8139331612392382e+01 2.7666566472304908e+00 1 1 0 -243 1 2 2.1000000000000001e+00 -6.5321463458777478e+00 1.1711311673787431e+01 2.7826601743259527e+00 1 0 0 -244 1 3 -1.0500000000000000e+00 -6.7546571183761301e+00 8.8297483950964128e+00 1.1605414494384583e+00 1 0 0 -245 1 3 -1.0500000000000000e+00 -6.3936304346746065e+00 1.1616561473590366e+01 1.1647557084996514e+00 1 0 0 -246 1 4 -9.4999999999999996e-01 -9.0225120723155499e+00 1.0447737640560419e+01 1.0416496415487835e+00 1 0 0 -247 1 3 -1.0500000000000000e+00 -1.0383021731794118e+01 1.2409056546665678e+01 3.2633806080194816e+00 1 0 0 -248 1 3 -1.0500000000000000e+00 -6.6535803904754705e+00 1.0239152753178164e+01 3.2629017007907830e+00 1 0 0 -249 1 3 -1.0500000000000000e+00 -7.7897001346039225e+00 1.2579024161975589e+01 3.0668955391428803e+00 1 0 0 -250 1 5 4.2499999999999999e-01 -9.5259095312066897e+00 9.5801425441314869e+00 1.2421975380421504e+00 1 0 0 -251 1 1 1.5750000000000000e+00 -4.7360410604967615e+00 1.4846810946995458e+01 1.4343197521535345e-02 1 0 0 -252 1 2 2.1000000000000001e+00 -9.1657107948645198e+00 1.3239508713140442e+01 2.7731732396027429e+00 1 0 0 -253 1 2 2.1000000000000001e+00 -9.0841053821491382e+00 1.6194044122541168e+01 2.7772564773437409e+00 1 0 0 -254 1 3 -1.0500000000000000e+00 -9.3116761462039026e+00 1.3310815730221659e+01 1.1627305869096034e+00 1 0 0 -255 1 3 -1.0500000000000000e+00 -8.9415916988525144e+00 1.6094083872865529e+01 1.1623931384500175e+00 1 0 0 -256 1 4 -9.4999999999999996e-01 -6.4052303030821998e+00 1.4947007772814278e+01 1.0534403819067641e+00 1 0 0 -257 1 3 -1.0500000000000000e+00 -7.7749306647931391e+00 1.6892539787918214e+01 3.2594482676079277e+00 1 0 0 -258 1 3 -1.0500000000000000e+00 -9.2066978664561550e+00 1.4721820916556890e+01 3.2622862354455329e+00 1 0 0 -259 1 3 -1.0500000000000000e+00 -1.0339530895637068e+01 1.7067100919316967e+01 3.0570089831301797e+00 1 0 0 -260 1 5 4.2499999999999999e-01 -6.9006913216819488e+00 1.4098682134598018e+01 1.3335380803066581e+00 1 0 0 -261 1 1 1.5750000000000000e+00 -7.3255435688894259e+00 1.6348111117637242e+01 1.3243724919478339e-02 1 0 0 -262 1 2 2.1000000000000001e+00 -8.1110131978153994e+00 8.9733111402449914e+00 -2.7376759896976850e+00 1 0 0 -263 1 2 2.1000000000000001e+00 -8.1377286895226764e+00 1.4988637839851020e+01 -2.7464838529056834e+00 1 0 0 -264 1 3 -1.0500000000000000e+00 -8.1282373960741925e+00 -1.7993560481920024e+01 -1.1315987781637968e+00 1 1 0 -265 1 3 -1.0500000000000000e+00 -8.2827121352983788e+00 1.5089547304532989e+01 -1.1295755161565779e+00 1 0 0 -266 1 4 -9.4999999999999996e-01 -5.6608742375976195e+00 1.6226168365016267e+01 -1.0381632709338700e+00 1 0 0 -267 1 3 -1.0500000000000000e+00 -4.2850029133757808e+00 1.4289632163447674e+01 -3.2262701127718385e+00 1 0 0 -268 1 3 -1.0500000000000000e+00 -8.0187746834260079e+00 1.6462007756316670e+01 -3.2306428284705229e+00 1 0 0 -269 1 3 -1.0500000000000000e+00 -6.8787824161432223e+00 1.4121357439479663e+01 -3.0230794038774143e+00 1 0 0 -270 1 5 4.2499999999999999e-01 -5.1912124921061569e+00 1.7040736557995412e+01 -1.4353359589520860e+00 1 0 0 -271 1 1 1.5750000000000000e+00 -9.9312134439261275e+00 1.1864776107213540e+01 1.6889961970017353e-02 1 0 0 -272 1 2 2.1000000000000001e+00 -5.5037099545752319e+00 1.3458296829340515e+01 -2.7383273234632401e+00 1 0 0 -273 1 2 2.1000000000000001e+00 -5.5843954631697512e+00 1.0503926417690209e+01 -2.7445029816461339e+00 1 0 0 -274 1 3 -1.0500000000000000e+00 -5.3509633400226182e+00 1.3381511934338040e+01 -1.1261949802481368e+00 1 0 0 -275 1 3 -1.0500000000000000e+00 -5.7282429505785490e+00 1.0602165885013910e+01 -1.1254358372746882e+00 1 0 0 -276 1 4 -9.4999999999999996e-01 -8.2597503387018136e+00 1.1751304311672609e+01 -1.0268355832229581e+00 1 0 0 -277 1 3 -1.0500000000000000e+00 -6.8908898552417188e+00 9.8011865897218264e+00 -3.2224222577606803e+00 1 0 0 -278 1 3 -1.0500000000000000e+00 -5.4680942474966816e+00 1.1976547387974261e+01 -3.2274541762679272e+00 1 0 0 -279 1 3 -1.0500000000000000e+00 -4.3240347508573436e+00 9.6414240753549194e+00 -3.0221895475394076e+00 1 0 0 -280 1 5 4.2499999999999999e-01 -7.7794379461355554e+00 1.2583249716404847e+01 -1.3733153657013855e+00 1 0 0 -281 1 1 1.5750000000000000e+00 -2.1816354804815212e+00 1.0361133264641335e+01 1.7820299757785207e-02 1 0 0 -282 1 2 2.1000000000000001e+00 -1.6239357820564635e+00 -1.8139330312581368e+01 2.7666600889781048e+00 1 1 0 -283 1 2 2.1000000000000001e+00 -1.3721455806852845e+00 1.1711312133838398e+01 2.7826592479177599e+00 1 0 0 -284 1 3 -1.0500000000000000e+00 -1.5946557698805499e+00 8.8297513752261843e+00 1.1605416325453923e+00 1 0 0 -285 1 3 -1.0500000000000000e+00 -1.2336248014253357e+00 1.1616561712732413e+01 1.1647546387968895e+00 1 0 0 -286 1 4 -9.4999999999999996e-01 -3.8625092722720638e+00 1.0447749842346159e+01 1.0416635804961789e+00 1 0 0 -287 1 3 -1.0500000000000000e+00 -5.2230218409656857e+00 1.2409054487429284e+01 3.2633812328956928e+00 1 0 0 -288 1 3 -1.0500000000000000e+00 -1.4935820970772600e+00 1.0239154957403851e+01 3.2629022261826304e+00 1 0 0 -289 1 3 -1.0500000000000000e+00 -2.6296984723462185e+00 1.2579029210380060e+01 3.0668904634831335e+00 1 0 0 -290 1 5 4.2499999999999999e-01 -4.3658809471884599e+00 9.5801611429280236e+00 1.2422968259449103e+00 1 0 0 -291 1 1 1.5750000000000000e+00 4.2395062155204499e-01 1.4846815208449417e+01 1.4342442680247203e-02 1 0 0 -292 1 2 2.1000000000000001e+00 -4.0057117106339035e+00 1.3239509048836165e+01 2.7731744555134803e+00 1 0 0 -293 1 2 2.1000000000000001e+00 -3.9241044450840779e+00 1.6194042619677628e+01 2.7772537502354808e+00 1 0 0 -294 1 3 -1.0500000000000000e+00 -4.1516821305405607e+00 1.3310817757062328e+01 1.1627316744860607e+00 1 0 0 -295 1 3 -1.0500000000000000e+00 -3.7815895368732058e+00 1.6094076991815665e+01 1.1623916594768318e+00 1 0 0 -296 1 4 -9.4999999999999996e-01 -1.2452216331992716e+00 1.4947015785387560e+01 1.0534477329531811e+00 1 0 0 -297 1 3 -1.0500000000000000e+00 -2.6149324871456923e+00 1.6892542649662897e+01 3.2594442544917541e+00 1 0 0 -298 1 3 -1.0500000000000000e+00 -4.0467018355476867e+00 1.4721821997443417e+01 3.2622872657567754e+00 1 0 0 -299 1 3 -1.0500000000000000e+00 -5.1795290969235008e+00 1.7067105168626295e+01 3.0570015074520072e+00 1 0 0 -300 1 5 4.2499999999999999e-01 -1.7406731322250319e+00 1.4098709882835809e+01 1.3336193028175387e+00 1 0 0 -301 1 1 1.5750000000000000e+00 -2.1655400965340394e+00 1.6348110067873076e+01 1.3244662229093151e-02 1 0 0 -302 1 2 2.1000000000000001e+00 -2.9510126745799319e+00 8.9733117229205810e+00 -2.7376738194117731e+00 1 0 0 -303 1 2 2.1000000000000001e+00 -2.9777275693141476e+00 1.4988636013270860e+01 -2.7464856618644049e+00 1 0 0 -304 1 3 -1.0500000000000000e+00 -2.9682416502755427e+00 -1.7993559294887365e+01 -1.1315967064056114e+00 1 1 0 -305 1 3 -1.0500000000000000e+00 -3.1227103462612993e+00 1.5089540847557057e+01 -1.1295764664880981e+00 1 0 0 -306 1 4 -9.4999999999999996e-01 -5.0087882779926041e-01 1.6226165844261129e+01 -1.0381670064799788e+00 1 0 0 -307 1 3 -1.0500000000000000e+00 8.7499864611971212e-01 1.4289629149313253e+01 -3.2262664021874876e+00 1 0 0 -308 1 3 -1.0500000000000000e+00 -2.8587775825628583e+00 1.6462008354063695e+01 -3.2306406895817350e+00 1 0 0 -309 1 3 -1.0500000000000000e+00 -1.7187804151292063e+00 1.4121361014263147e+01 -3.0230861457704785e+00 1 0 0 -310 1 5 4.2499999999999999e-01 -3.1233987057749601e-02 1.7040720785402922e+01 -1.4353858185744199e+00 1 0 0 -311 1 1 1.5750000000000000e+00 -4.7712059983323876e+00 1.1864772496514952e+01 1.6891591949507401e-02 1 0 0 -312 1 2 2.1000000000000001e+00 -3.4371342323993481e-01 1.3458295708251566e+01 -2.7383296072719734e+00 1 0 0 -313 1 2 2.1000000000000001e+00 -4.2439733982349281e-01 1.0503925742302489e+01 -2.7445039577644152e+00 1 0 0 -314 1 3 -1.0500000000000000e+00 -1.9096813598586060e-01 1.3381510378030519e+01 -1.1261944033250657e+00 1 0 0 -315 1 3 -1.0500000000000000e+00 -5.6824466394843398e-01 1.0602162567800910e+01 -1.1254361371532529e+00 1 0 0 -316 1 4 -9.4999999999999996e-01 -3.0997442311207068e+00 1.1751316693230283e+01 -1.0268202781218996e+00 1 0 0 -317 1 3 -1.0500000000000000e+00 -1.7308901666201866e+00 9.8011847941787771e+00 -3.2224231969123718e+00 1 0 0 -318 1 3 -1.0500000000000000e+00 -3.0809210268383325e-01 1.1976546141754817e+01 -3.2274542574120408e+00 1 0 0 -319 1 3 -1.0500000000000000e+00 8.3596533722183430e-01 9.6414229289338067e+00 -3.0221893296920745e+00 1 0 0 -320 1 5 4.2499999999999999e-01 -2.6193972685914382e+00 1.2583293760554227e+01 -1.3731806944183900e+00 1 0 0 -321 1 1 1.5750000000000000e+00 2.8035417368538429e+00 -1.6537982879158850e+01 1.0861000867066650e-02 0 1 0 -322 1 2 2.1000000000000001e+00 3.5913916262839525e+00 -9.1666927457410079e+00 2.7565515232154461e+00 0 1 0 -323 1 2 2.1000000000000001e+00 3.6185602449039163e+00 -1.5183069058217326e+01 2.7719727941173922e+00 0 1 0 -324 1 3 -1.0500000000000000e+00 3.3836611183527374e+00 -1.8065973571422482e+01 1.1583930090747501e+00 0 1 0 -325 1 3 -1.0500000000000000e+00 3.7632612006337673e+00 -1.5283302577408046e+01 1.1569187513321015e+00 0 1 0 -326 1 4 -9.4999999999999996e-01 1.1423547490218233e+00 -1.6419634068784653e+01 1.0628982310147546e+00 0 1 0 -327 1 3 -1.0500000000000000e+00 -2.3178677590017394e-01 -1.4485993416323359e+01 3.2535077720514600e+00 0 1 0 -328 1 3 -1.0500000000000000e+00 3.4961954974443188e+00 -1.6656288134428006e+01 3.2559774579987355e+00 0 1 0 -329 1 3 -1.0500000000000000e+00 2.3621964325164448e+00 -1.4310883199106490e+01 3.0494317291176873e+00 0 1 0 -330 1 5 4.2499999999999999e-01 6.6609937717503165e-01 -1.7236904751244520e+01 1.4463800551316499e+00 0 1 0 -331 1 1 1.5750000000000000e+00 5.4100700043761787e+00 -1.2056581080048922e+01 5.0975347717674424e-03 0 1 0 -332 1 2 2.1000000000000001e+00 9.8389406230439747e-01 -1.3653039114538107e+01 2.7609674858555469e+00 0 1 0 -333 1 2 2.1000000000000001e+00 1.0652326595752797e+00 -1.0697389795316624e+01 2.7665073225582830e+00 0 1 0 -334 1 3 -1.0500000000000000e+00 8.2987152884171245e-01 -1.3577514807597375e+01 1.1516104809467667e+00 0 1 0 -335 1 3 -1.0500000000000000e+00 1.2087182576362636e+00 -1.0795861279397045e+01 1.1493538131982906e+00 0 1 0 -336 1 4 -9.4999999999999996e-01 3.7428364367179690e+00 -1.1940185527233320e+01 1.0532702975800543e+00 0 1 0 -337 1 3 -1.0500000000000000e+00 2.3736848179941585e+00 -9.9979403097008142e+00 3.2454469469157274e+00 0 1 0 -338 1 3 -1.0500000000000000e+00 9.4487044379564900e-01 -1.2170071055125963e+01 3.2494166689806310e+00 0 1 0 -339 1 3 -1.0500000000000000e+00 -1.9319249303027775e-01 -9.8293853999859309e+00 3.0442951408407453e+00 0 1 0 -340 1 5 4.2499999999999999e-01 3.2639939901037547e+00 -1.2765666228176649e+01 1.4169249443439025e+00 0 1 0 -341 1 1 1.5750000000000000e+00 2.8208170125484369e+00 -1.0553631118799327e+01 2.7700461709159185e-03 0 1 0 -342 1 2 2.1000000000000001e+00 2.0379971986881014e+00 -1.7918765667375254e+01 -2.7425506087004763e+00 0 1 0 -343 1 2 2.1000000000000001e+00 2.0122889533798958e+00 -1.1904329355514136e+01 -2.7591014745747344e+00 0 1 0 -344 1 3 -1.0500000000000000e+00 2.2355149868721433e+00 -9.0219226979020295e+00 -1.1405054872148526e+00 0 1 0 -345 1 3 -1.0500000000000000e+00 1.8725086202648047e+00 -1.1806668454703988e+01 -1.1408477798643215e+00 0 1 0 -346 1 4 -9.4999999999999996e-01 4.5004971473287672e+00 -1.0643331024988472e+01 -1.0215529473496332e+00 0 1 0 -347 1 3 -1.0500000000000000e+00 5.8651982779431897e+00 -1.2605535770568759e+01 -3.2382289960303794e+00 0 1 0 -348 1 3 -1.0500000000000000e+00 2.1303446032151836e+00 -1.0433415957946796e+01 -3.2425234131092129e+00 0 1 0 -349 1 3 -1.0500000000000000e+00 3.2711365235323981e+00 -1.2770486597484602e+01 -3.0397146953396295e+00 0 1 0 -350 1 5 4.2499999999999999e-01 5.0016708770787996e+00 -9.7794943568853245e+00 -1.2415420265536019e+00 0 1 0 -351 1 1 1.5750000000000000e+00 2.1416495799036284e-01 -1.5037371023730078e+01 8.6249359688022054e-03 0 1 0 -352 1 2 2.1000000000000001e+00 4.6461784652719320e+00 -1.3433774699063715e+01 -2.7510697899339895e+00 0 1 0 -353 1 2 2.1000000000000001e+00 4.5636391700325838e+00 -1.6388054307527849e+01 -2.7510830955963357e+00 0 1 0 -354 1 3 -1.0500000000000000e+00 4.7940552974542623e+00 -1.3505566041132930e+01 -1.1382520254699955e+00 0 1 0 -355 1 3 -1.0500000000000000e+00 4.4206594118739417e+00 -1.6285917952453438e+01 -1.1354753747171138e+00 0 1 0 -356 1 4 -9.4999999999999996e-01 1.8844819420040135e+00 -1.5143367884789036e+01 -1.0315344036044110e+00 0 1 0 -357 1 3 -1.0500000000000000e+00 3.2568134687617594e+00 -1.7088385772847932e+01 -3.2316073173979287e+00 0 1 0 -358 1 3 -1.0500000000000000e+00 4.6841185987755694e+00 -1.4916690475264419e+01 -3.2383595256204902e+00 0 1 0 -359 1 3 -1.0500000000000000e+00 5.8205727804059286e+00 -1.7259299315259803e+01 -3.0281858385254505e+00 0 1 0 -360 1 5 4.2499999999999999e-01 2.3720004699708870e+00 -1.4301321744401438e+01 -1.3415407614855956e+00 0 1 0 -361 1 1 1.5750000000000000e+00 7.9635454366178564e+00 -1.6537986461608114e+01 1.0864102526010555e-02 0 1 0 -362 1 2 2.1000000000000001e+00 -1.1888605724203680e+01 -9.1666925218890043e+00 2.7565538542885868e+00 1 1 0 -363 1 2 2.1000000000000001e+00 -1.1861438154673010e+01 -1.5183068046445340e+01 2.7719756063464516e+00 1 1 0 -364 1 3 -1.0500000000000000e+00 -1.2096332353224783e+01 -1.8065969991994077e+01 1.1583930153726136e+00 1 1 0 -365 1 3 -1.0500000000000000e+00 -1.1716737009061450e+01 -1.5283298453899418e+01 1.1569197624113574e+00 1 1 0 -366 1 4 -9.4999999999999996e-01 6.3023607071234444e+00 -1.6419621008290999e+01 1.0629151910980106e+00 0 1 0 -367 1 3 -1.0500000000000000e+00 4.9282117105796104e+00 -1.4485994079497049e+01 3.2535052195712542e+00 0 1 0 -368 1 3 -1.0500000000000000e+00 -1.1983805963569750e+01 -1.6656287261976907e+01 3.2559787673627447e+00 1 1 0 -369 1 3 -1.0500000000000000e+00 7.5221956579608431e+00 -1.4310883697915727e+01 3.0494339560615522e+00 0 1 0 -370 1 5 4.2499999999999999e-01 5.8261487540977868e+00 -1.7236852265476834e+01 1.4465311319484471e+00 0 1 0 -371 1 1 1.5750000000000000e+00 -1.0069936268348172e+01 -1.2056578002089982e+01 5.0960682666492119e-03 1 1 0 -372 1 2 2.1000000000000001e+00 6.1438963466828795e+00 -1.3653038123063755e+01 2.7609702592912182e+00 0 1 0 -373 1 2 2.1000000000000001e+00 6.2252352163652347e+00 -1.0697391251481111e+01 2.7665065641588882e+00 0 1 0 -374 1 3 -1.0500000000000000e+00 5.9898709887273114e+00 -1.3577511859049283e+01 1.1516120616970404e+00 0 1 0 -375 1 3 -1.0500000000000000e+00 6.3687212983680190e+00 -1.0795865068132670e+01 1.1493537946416073e+00 0 1 0 -376 1 4 -9.4999999999999996e-01 -1.1737163387208664e+01 -1.1940191502882348e+01 1.0532636661659307e+00 1 1 0 -377 1 3 -1.0500000000000000e+00 7.5336833945915842e+00 -9.9979369738023145e+00 3.2454456074830009e+00 0 1 0 -378 1 3 -1.0500000000000000e+00 6.1048658576948007e+00 -1.2170070358228521e+01 3.2494191365586289e+00 0 1 0 -379 1 3 -1.0500000000000000e+00 4.9668093599128156e+00 -9.8293819532612083e+00 3.0442888963630725e+00 0 1 0 -380 1 5 4.2499999999999999e-01 -1.2216010150654379e+01 -1.2765685805679464e+01 1.4168833175310791e+00 1 1 0 -381 1 1 1.5750000000000000e+00 7.9808229670606110e+00 -1.0553632328720617e+01 2.7706534967109064e-03 0 1 0 -382 1 2 2.1000000000000001e+00 7.1979970406151459e+00 -1.7918765502196223e+01 -2.7425488717605635e+00 0 1 0 -383 1 2 2.1000000000000001e+00 7.1722899941630693e+00 -1.1904331087379228e+01 -2.7591014242919147e+00 0 1 0 -384 1 3 -1.0500000000000000e+00 7.3955087442215657e+00 -9.0219223073082269e+00 -1.1405034843944239e+00 0 1 0 -385 1 3 -1.0500000000000000e+00 7.0325084338985491e+00 -1.1806675645351344e+01 -1.1408474064476106e+00 0 1 0 -386 1 4 -9.4999999999999996e-01 -1.0979513290344796e+01 -1.0643344977939535e+01 -1.0215683215419578e+00 1 1 0 -387 1 3 -1.0500000000000000e+00 -9.6147999400661828e+00 -1.2605537468705439e+01 -3.2382259489353320e+00 1 1 0 -388 1 3 -1.0500000000000000e+00 7.2903427342981750e+00 -1.0433417118884407e+01 -3.2425211279893427e+00 0 1 0 -389 1 3 -1.0500000000000000e+00 8.4311376482619771e+00 -1.2770485196706371e+01 -3.0397195266833013e+00 0 1 0 -390 1 5 4.2499999999999999e-01 -1.0478357565056537e+01 -9.7795286912054493e+00 -1.2416710086669758e+00 1 1 0 -391 1 1 1.5750000000000000e+00 5.3741703975846384e+00 -1.5037376670483933e+01 8.6273113018648928e-03 0 1 0 -392 1 2 2.1000000000000001e+00 -1.0833824308490069e+01 -1.3433776814119870e+01 -2.7510723737451785e+00 1 1 0 -393 1 2 2.1000000000000001e+00 -1.0916362154030157e+01 -1.6388054683739004e+01 -2.7510824291843630e+00 1 1 0 -394 1 3 -1.0500000000000000e+00 -1.0685947609368585e+01 -1.3505570226040346e+01 -1.1382524916673020e+00 1 1 0 -395 1 3 -1.0500000000000000e+00 -1.1059343576921858e+01 -1.6285918049216274e+01 -1.1354743735525474e+00 1 1 0 -396 1 4 -9.4999999999999996e-01 7.0444851693846608e+00 -1.5143356778269499e+01 -1.0315193364818942e+00 0 1 0 -397 1 3 -1.0500000000000000e+00 8.4168134021028749e+00 -1.7088387647016091e+01 -3.2316063631752368e+00 0 1 0 -398 1 3 -1.0500000000000000e+00 -1.0795878788491111e+01 -1.4916691833427853e+01 -3.2383609866319993e+00 1 1 0 -399 1 3 -1.0500000000000000e+00 -9.6594281729635725e+00 -1.7259301944155037e+01 -3.0281833634335884e+00 1 1 0 -400 1 5 4.2499999999999999e-01 7.5320329606824998e+00 -1.4301290939775029e+01 -1.3414306260430369e+00 0 1 0 -401 1 1 1.5750000000000000e+00 -7.5164583015877202e+00 -1.6537982709986142e+01 1.0860948651142621e-02 1 1 0 -402 1 2 2.1000000000000001e+00 -6.7286083634253586e+00 -9.1666928913841410e+00 2.7565515166727224e+00 1 1 0 -403 1 2 2.1000000000000001e+00 -6.7014398917982394e+00 -1.5183069044838401e+01 2.7719727468163260e+00 1 1 0 -404 1 3 -1.0500000000000000e+00 -6.9363387829149410e+00 -1.8065973628987432e+01 1.1583928921739748e+00 1 1 0 -405 1 3 -1.0500000000000000e+00 -6.5567387653515556e+00 -1.5283302479111441e+01 1.1569187182190461e+00 1 1 0 -406 1 4 -9.4999999999999996e-01 -9.1776452405777107e+00 -1.6419634110676846e+01 1.0628982908730098e+00 1 1 0 -407 1 3 -1.0500000000000000e+00 -1.0551787278729160e+01 -1.4485993663065766e+01 3.2535077997632147e+00 1 1 0 -408 1 3 -1.0500000000000000e+00 -6.8238045171904682e+00 -1.6656288031657951e+01 3.2559772960176740e+00 1 1 0 -409 1 3 -1.0500000000000000e+00 -7.9578035894078436e+00 -1.4310883259763630e+01 3.0494317519961296e+00 1 1 0 -410 1 5 4.2499999999999999e-01 -9.6539006203503224e+00 -1.7236904887802019e+01 1.4463799018908148e+00 1 1 0 -411 1 1 1.5750000000000000e+00 -4.9099299269112269e+00 -1.2056581046762455e+01 5.0975152389245437e-03 1 1 0 -412 1 2 2.1000000000000001e+00 -9.3361057677534074e+00 -1.3653038872334317e+01 2.7609674353505014e+00 1 1 0 -413 1 2 2.1000000000000001e+00 -9.2547673051165322e+00 -1.0697389567014685e+01 2.7665073550029007e+00 1 1 0 -414 1 3 -1.0500000000000000e+00 -9.4901284949936624e+00 -1.3577514789464921e+01 1.1516105013225868e+00 1 1 0 -415 1 3 -1.0500000000000000e+00 -9.1112818269409068e+00 -1.0795861204525032e+01 1.1493537912334766e+00 1 1 0 -416 1 4 -9.4999999999999996e-01 -6.5771636763792856e+00 -1.1940185638786705e+01 1.0532702934787856e+00 1 1 0 -417 1 3 -1.0500000000000000e+00 -7.9463151252650128e+00 -9.9979403366565087e+00 3.2454469094271428e+00 1 1 0 -418 1 3 -1.0500000000000000e+00 -9.3751296080744542e+00 -1.2170071639000168e+01 3.2494167066829664e+00 1 1 0 -419 1 3 -1.0500000000000000e+00 -1.0513192247684479e+01 -9.8293854547066442e+00 3.0442952314422005e+00 1 1 0 -420 1 5 4.2499999999999999e-01 -7.0560061273997992e+00 -1.2765666390852058e+01 1.4169247811593966e+00 1 1 0 -421 1 1 1.5750000000000000e+00 -7.4991828524594517e+00 -1.0553631098195316e+01 2.7700964255910066e-03 1 1 0 -422 1 2 2.1000000000000001e+00 -8.2820027974883335e+00 -1.7918765675707210e+01 -2.7425505995784007e+00 1 1 0 -423 1 2 2.1000000000000001e+00 -8.3077111017359488e+00 -1.1904329247022861e+01 -2.7591013184925881e+00 1 1 0 -424 1 3 -1.0500000000000000e+00 -8.0844849442805558e+00 -9.0219227001353470e+00 -1.1405054549989710e+00 1 1 0 -425 1 3 -1.0500000000000000e+00 -8.4474912953105505e+00 -1.1806668497127227e+01 -1.1408476470566562e+00 1 1 0 -426 1 4 -9.4999999999999996e-01 -5.8195028037343697e+00 -1.0643331194556545e+01 -1.0215529747337371e+00 1 1 0 -427 1 3 -1.0500000000000000e+00 -4.4548015609946150e+00 -1.2605535685293054e+01 -3.2382289958675141e+00 1 1 0 -428 1 3 -1.0500000000000000e+00 -8.1896554520154794e+00 -1.0433416248987673e+01 -3.2425233054080724e+00 1 1 0 -429 1 3 -1.0500000000000000e+00 -7.0488633548145589e+00 -1.2770486635921547e+01 -3.0397145986447542e+00 1 1 0 -430 1 5 4.2499999999999999e-01 -5.3183292387545782e+00 -9.7794945817055883e+00 -1.2415425085026754e+00 1 1 0 -431 1 1 1.5750000000000000e+00 -1.0105835090698806e+01 -1.5037370868222572e+01 8.6248079639830166e-03 1 1 0 -432 1 2 2.1000000000000001e+00 -5.6738215427864862e+00 -1.3433774695776268e+01 -2.7510697137374738e+00 1 1 0 -433 1 2 2.1000000000000001e+00 -5.7563607108853736e+00 -1.6388054247274404e+01 -2.7510830308454945e+00 1 1 0 -434 1 3 -1.0500000000000000e+00 -5.5259445895493595e+00 -1.3505566006970168e+01 -1.1382519723371303e+00 1 1 0 -435 1 3 -1.0500000000000000e+00 -5.8993405609649923e+00 -1.6285917903739904e+01 -1.1354753462416856e+00 1 1 0 -436 1 4 -9.4999999999999996e-01 -8.4355180533864225e+00 -1.5143367944990963e+01 -1.0315343692263674e+00 1 1 0 -437 1 3 -1.0500000000000000e+00 -7.0631865660776212e+00 -1.7088385729027223e+01 -3.2316072745379776e+00 1 1 0 -438 1 3 -1.0500000000000000e+00 -5.6358813505116583e+00 -1.4916690438080733e+01 -3.2383594851694859e+00 1 1 0 -439 1 3 -1.0500000000000000e+00 -4.4994271788135016e+00 -1.7259299322441073e+01 -3.0281858039009517e+00 1 1 0 -440 1 5 4.2499999999999999e-01 -7.9479995295496879e+00 -1.4301321770032208e+01 -1.3415406304368469e+00 1 1 0 -441 1 1 1.5750000000000000e+00 -2.3564545862570396e+00 -1.6537986593946776e+01 1.0864141713367914e-02 1 1 0 -442 1 2 2.1000000000000001e+00 -1.5686055779484072e+00 -9.1666925393845418e+00 2.7565538477471172e+00 1 1 0 -443 1 2 2.1000000000000001e+00 -1.5414383801959186e+00 -1.5183068190505880e+01 2.7719756932149409e+00 1 1 0 -444 1 3 -1.0500000000000000e+00 -1.7763323965331601e+00 -1.8065970006853526e+01 1.1583930713025961e+00 1 1 0 -445 1 3 -1.0500000000000000e+00 -1.3967369717832252e+00 -1.5283298403160133e+01 1.1569198386126143e+00 1 1 0 -446 1 4 -9.4999999999999996e-01 -4.0176392706114594e+00 -1.6419621202043608e+01 1.0629151264212435e+00 1 1 0 -447 1 3 -1.0500000000000000e+00 -5.3917882121163290e+00 -1.4485994073643855e+01 3.2535052665375535e+00 1 1 0 -448 1 3 -1.0500000000000000e+00 -1.6638059365665931e+00 -1.6656286984116278e+01 3.2559787621821119e+00 1 1 0 -449 1 3 -1.0500000000000000e+00 -2.7978041167465291e+00 -1.4310883773619036e+01 3.0494340565535119e+00 1 1 0 -450 1 5 4.2499999999999999e-01 -4.4938513675873697e+00 -1.7236852374538739e+01 1.4465311379137802e+00 1 1 0 -451 1 1 1.5750000000000000e+00 2.5006363383229946e-01 -1.2056578091250655e+01 5.0959012523481562e-03 1 1 0 -452 1 2 2.1000000000000001e+00 -4.1761037074413672e+00 -1.3653037987967030e+01 2.7609703034089783e+00 1 1 0 -453 1 2 2.1000000000000001e+00 -4.0947647325183976e+00 -1.0697391142448371e+01 2.7665065520992389e+00 1 1 0 -454 1 3 -1.0500000000000000e+00 -4.3301289055550285e+00 -1.3577511864032029e+01 1.1516121200275826e+00 1 1 0 -455 1 3 -1.0500000000000000e+00 -3.9512787735859156e+00 -1.0795865090888144e+01 1.1493537704630086e+00 1 1 0 -456 1 4 -9.4999999999999996e-01 -1.4171634467778009e+00 -1.1940191561208989e+01 1.0532635016197283e+00 1 1 0 -457 1 3 -1.0500000000000000e+00 -2.7863169232387399e+00 -9.9979370067912843e+00 3.2454455895018555e+00 1 1 0 -458 1 3 -1.0500000000000000e+00 -4.2151341815886649e+00 -1.2170070525378776e+01 3.2494191352366055e+00 1 1 0 -459 1 3 -1.0500000000000000e+00 -5.3531907390878031e+00 -9.8293818683168404e+00 3.0442888609016912e+00 1 1 0 -460 1 5 4.2499999999999999e-01 -1.8960102758457644e+00 -1.2765686137869810e+01 1.4168825493464219e+00 1 1 0 -461 1 1 1.5750000000000000e+00 -2.3391770450650400e+00 -1.0553632197834641e+01 2.7705977710557050e-03 1 1 0 -462 1 2 2.1000000000000001e+00 -3.1220029184157889e+00 -1.7918765505761495e+01 -2.7425488851699082e+00 1 1 0 -463 1 2 2.1000000000000001e+00 -3.1477100424297220e+00 -1.1904331201622901e+01 -2.7591014673602565e+00 1 1 0 -464 1 3 -1.0500000000000000e+00 -2.9244911890018193e+00 -9.0219223406330507e+00 -1.1405035462049451e+00 1 1 0 -465 1 3 -1.0500000000000000e+00 -3.2874915242805303e+00 -1.1806675643126630e+01 -1.1408474507211430e+00 1 1 0 -466 1 4 -9.4999999999999996e-01 -6.5951340776267919e-01 -1.0643344929270292e+01 -1.0215683718625339e+00 1 1 0 -467 1 3 -1.0500000000000000e+00 7.0520008845003090e-01 -1.2605537454924868e+01 -3.2382261613755015e+00 1 1 0 -468 1 3 -1.0500000000000000e+00 -3.0296573228770560e+00 -1.0433416912854424e+01 -3.2425211536109906e+00 1 1 0 -469 1 3 -1.0500000000000000e+00 -1.8888623156236797e+00 -1.2770485126405639e+01 -3.0397196098633845e+00 1 1 0 -470 1 5 4.2499999999999999e-01 -1.5835753987611589e-01 -9.7795286587877950e+00 -1.2416709956126635e+00 1 1 0 -471 1 1 1.5750000000000000e+00 -4.9458295062320037e+00 -1.5037376610482283e+01 8.6273026011980392e-03 1 1 0 -472 1 2 2.1000000000000001e+00 -5.1382437786611845e-01 -1.3433776821851868e+01 -2.7510724046921284e+00 1 1 0 -473 1 2 2.1000000000000001e+00 -5.9636217551245707e-01 -1.6388054687239428e+01 -2.7510822618911233e+00 1 1 0 -474 1 3 -1.0500000000000000e+00 -3.6594755058336403e-01 -1.3505570061033037e+01 -1.1382525898483742e+00 1 1 0 -475 1 3 -1.0500000000000000e+00 -7.3934365234228494e-01 -1.6285918093959150e+01 -1.1354743073955618e+00 1 1 0 -476 1 4 -9.4999999999999996e-01 -3.2755148314143581e+00 -1.5143356828385484e+01 -1.0315193712919282e+00 1 1 0 -477 1 3 -1.0500000000000000e+00 -1.9031865846917722e+00 -1.7088387723588216e+01 -3.2316063607183416e+00 1 1 0 -478 1 3 -1.0500000000000000e+00 -4.7587887460540657e-01 -1.4916691750791164e+01 -3.2383609010873373e+00 1 1 0 -479 1 3 -1.0500000000000000e+00 6.6057179954975176e-01 -1.7259301882353789e+01 -3.0281832339508528e+00 1 1 0 -480 1 5 4.2499999999999999e-01 -2.7879671788753608e+00 -1.4301291062856247e+01 -1.3414309843034626e+00 1 1 0 -481 1 1 1.5750000000000000e+00 2.8579762879649273e+00 -7.5728252904140003e+00 2.5509249011470558e-03 0 1 0 -482 1 2 2.1000000000000001e+00 3.6501914526752000e+00 -2.0142566154856567e-01 2.7623916440212657e+00 0 1 0 -483 1 2 2.1000000000000001e+00 3.6738868943392688e+00 -6.2110960529670844e+00 2.7655007847467132e+00 0 1 0 -484 1 3 -1.0500000000000000e+00 3.4402694546938299e+00 -9.0893531417367281e+00 1.1451063311536220e+00 0 1 0 -485 1 3 -1.0500000000000000e+00 3.8144585621038498e+00 -6.3061322151310240e+00 1.1447400150672262e+00 0 1 0 -486 1 4 -9.4999999999999996e-01 1.1843820437445629e+00 -7.4646531850243605e+00 1.0430100025069979e+00 0 1 0 -487 1 3 -1.0500000000000000e+00 -1.7811834108327140e-01 -5.5129573118250921e+00 3.2417786174013958e+00 0 1 0 -488 1 3 -1.0500000000000000e+00 3.5554479941314376e+00 -7.6849122082701946e+00 3.2442094498771166e+00 0 1 0 -489 1 3 -1.0500000000000000e+00 2.4127904555945108e+00 -5.3504081707150828e+00 3.0463937175838591e+00 0 1 0 -490 1 5 4.2499999999999999e-01 6.9865664300930241e-01 -8.3028304512569342e+00 1.3676485367194822e+00 0 1 0 -491 1 1 1.5750000000000000e+00 5.4691335276720210e+00 -3.0913866453500631e+00 5.3623125957997786e-03 0 1 0 -492 1 2 2.1000000000000001e+00 1.0413811798928077e+00 -4.6822322851143525e+00 2.7570054548179996e+00 0 1 0 -493 1 2 2.1000000000000001e+00 1.1214909654529901e+00 -1.7278611441714133e+00 2.7692250845468216e+00 0 1 0 -494 1 3 -1.0500000000000000e+00 8.9594382123554972e-01 -4.6050075864508848e+00 1.1423674104802934e+00 0 1 0 -495 1 3 -1.0500000000000000e+00 1.2594156889842107e+00 -1.8197700876716993e+00 1.1454565899014320e+00 0 1 0 -496 1 4 -9.4999999999999996e-01 3.7839951755514889e+00 -3.0002809518433331e+00 1.0292600146457325e+00 0 1 0 -497 1 3 -1.0500000000000000e+00 2.4288955695376711e+00 -1.0297351082812547e+00 3.2441105415884497e+00 0 1 0 -498 1 3 -1.0500000000000000e+00 1.0054012957711169e+00 -3.2024146376977267e+00 3.2442767880289498e+00 0 1 0 -499 1 3 -1.0500000000000000e+00 -1.4206485854321649e-01 -8.7428910072043919e-01 3.0544830544669797e+00 0 1 0 -500 1 5 4.2499999999999999e-01 3.2824116300736907e+00 -3.8617657205625093e+00 1.2574884075973127e+00 0 1 0 -501 1 1 1.5750000000000000e+00 2.8796544512547690e+00 -1.5864684533134472e+00 3.8496726901779965e-03 0 1 0 -502 1 2 2.1000000000000001e+00 2.0928423053383547e+00 -8.9511195736643785e+00 -2.7557264781083211e+00 0 1 0 -503 1 2 2.1000000000000001e+00 2.0706402973847275e+00 -2.9438976451570102e+00 -2.7611073302053804e+00 0 1 0 -504 1 3 -1.0500000000000000e+00 2.2885190100959534e+00 -6.7363396993872726e-02 -1.1334884106485070e+00 0 1 0 -505 1 3 -1.0500000000000000e+00 1.9349280280725605e+00 -2.8519854981012038e+00 -1.1385470376972755e+00 0 1 0 -506 1 4 -9.4999999999999996e-01 4.5728365673716063e+00 -1.6595817991953581e+00 -1.0043031257923012e+00 0 1 0 -507 1 3 -1.0500000000000000e+00 5.9264932830663639e+00 -3.6474348922194793e+00 -3.2374584108865090e+00 0 1 0 -508 1 3 -1.0500000000000000e+00 2.1813742996212540e+00 -1.4704087609181435e+00 -3.2399129954866126e+00 0 1 0 -509 1 3 -1.0500000000000000e+00 3.3359850714815700e+00 -3.7947931447448529e+00 -3.0483298883225629e+00 0 1 0 -510 1 5 4.2499999999999999e-01 5.0835223616429968e+00 -7.8213293737469058e-01 -1.1382575582881014e+00 0 1 0 -511 1 1 1.5750000000000000e+00 2.6924878738895508e-01 -6.0706793349898973e+00 8.0167511365480948e-04 0 1 0 -512 1 2 2.1000000000000001e+00 4.7023968082310450e+00 -4.4706501738271243e+00 -2.7557456523064978e+00 0 1 0 -513 1 2 2.1000000000000001e+00 4.6202291256510648e+00 -7.4238326224119042e+00 -2.7625784198604322e+00 0 1 0 -514 1 3 -1.0500000000000000e+00 4.8415123216481657e+00 -4.5424679957613492e+00 -1.1390727257864111e+00 0 1 0 -515 1 3 -1.0500000000000000e+00 4.4835955933211054e+00 -7.3292401396217279e+00 -1.1416945953616455e+00 0 1 0 -516 1 4 -9.4999999999999996e-01 1.9560386762304240e+00 -6.1501695089400332e+00 -1.0146779074202783e+00 0 1 0 -517 1 3 -1.0500000000000000e+00 3.3146734768695438e+00 -8.1246074320744750e+00 -3.2401003227564544e+00 0 1 0 -518 1 3 -1.0500000000000000e+00 4.7352353432803671e+00 -5.9511366217083221e+00 -3.2430291327097081e+00 0 1 0 -519 1 3 -1.0500000000000000e+00 5.8828740640937731e+00 -8.2820520646230484e+00 -3.0474815656004104e+00 0 1 0 -520 1 5 4.2499999999999999e-01 2.4617802670391278e+00 -5.2774722702156094e+00 -1.1880676486919004e+00 0 1 0 -521 1 1 1.5750000000000000e+00 8.0179815797416758e+00 -7.5728278489069254e+00 2.5529833230653054e-03 0 1 0 -522 1 2 2.1000000000000001e+00 -1.1829807535410978e+01 -2.0142479401242497e-01 2.7623931696793118e+00 1 1 0 -523 1 2 2.1000000000000001e+00 -1.1806111939384355e+01 -6.2110957215882614e+00 2.7655016468189295e+00 1 1 0 -524 1 3 -1.0500000000000000e+00 -1.2039725741150827e+01 -9.0893538417248667e+00 1.1451058121310815e+00 1 1 0 -525 1 3 -1.0500000000000000e+00 -1.1665538945778664e+01 -6.3061298037699824e+00 1.1447398955172048e+00 1 1 0 -526 1 4 -9.4999999999999996e-01 6.3443877315714730e+00 -7.4646422403867323e+00 1.0430239385824134e+00 0 1 0 -527 1 3 -1.0500000000000000e+00 4.9818809511307176e+00 -5.5129582651659472e+00 3.2417776485688208e+00 0 1 0 -528 1 3 -1.0500000000000000e+00 -1.1924553120114746e+01 -7.6849115839103384e+00 3.2442089879952665e+00 1 1 0 -529 1 3 -1.0500000000000000e+00 7.5727905137683535e+00 -5.3504077364477496e+00 3.0463929573158008e+00 0 1 0 -530 1 5 4.2499999999999999e-01 5.8586972993145281e+00 -8.3027960599174335e+00 1.3677711561974668e+00 0 1 0 -531 1 1 1.5750000000000000e+00 -1.0010874047557001e+01 -3.0913848217188153e+00 5.3598138056081268e-03 1 1 0 -532 1 2 2.1000000000000001e+00 6.2013826051921086e+00 -4.6822317933986159e+00 2.7570069006010378e+00 0 1 0 -533 1 2 2.1000000000000001e+00 6.2814932668508412e+00 -1.7278618859161767e+00 2.7692282250412461e+00 0 1 0 -534 1 3 -1.0500000000000000e+00 6.0559408936262251e+00 -4.6050048370396066e+00 1.1423684194567798e+00 0 1 0 -535 1 3 -1.0500000000000000e+00 6.4194156020707460e+00 -1.8197706776071527e+00 1.1454590936630336e+00 0 1 0 -536 1 4 -9.4999999999999996e-01 -1.1696001928258351e+01 -3.0002808347035383e+00 1.0292590333675573e+00 1 1 0 -537 1 3 -1.0500000000000000e+00 7.5888953481225485e+00 -1.0297331488138575e+00 3.2441116260617644e+00 0 1 0 -538 1 3 -1.0500000000000000e+00 6.1653989384036194e+00 -3.2024146808373715e+00 3.2442795238862292e+00 0 1 0 -539 1 3 -1.0500000000000000e+00 5.0179353702996128e+00 -8.7428878813011579e-01 3.0544823862504167e+00 0 1 0 -540 1 5 4.2499999999999999e-01 8.4424186174965605e+00 -3.8617653162725158e+00 1.2574981176351390e+00 0 1 0 -541 1 1 1.5750000000000000e+00 8.0396596425029756e+00 -1.5864685665940463e+00 3.8518859417138174e-03 0 1 0 -542 1 2 2.1000000000000001e+00 7.2528400067980030e+00 -8.9511202643500738e+00 -2.7557249977654177e+00 0 1 0 -543 1 2 2.1000000000000001e+00 7.2306418585295624e+00 -2.9438985842837262e+00 -2.7611037975280901e+00 0 1 0 -544 1 3 -1.0500000000000000e+00 7.4485149971431319e+00 -6.7361353887147146e-02 -1.1334867930496415e+00 0 1 0 -545 1 3 -1.0500000000000000e+00 7.0949264762885988e+00 -2.8519875489725575e+00 -1.1385442896889941e+00 0 1 0 -546 1 4 -9.4999999999999996e-01 -1.0907168191628390e+01 -1.6595867350948836e+00 -1.0043099201030330e+00 1 1 0 -547 1 3 -1.0500000000000000e+00 -9.5535063953498085e+00 -3.6474360351771136e+00 -3.2374592604081087e+00 1 1 0 -548 1 3 -1.0500000000000000e+00 7.3413721075789304e+00 -1.4704096620537968e+00 -3.2399099597021994e+00 0 1 0 -549 1 3 -1.0500000000000000e+00 8.4959850972702569e+00 -3.7947934031758024e+00 -3.0483296689972921e+00 0 1 0 -550 1 5 4.2499999999999999e-01 -1.0396493430356603e+01 -7.8213974884010895e-01 -1.1383144742577613e+00 1 1 0 -551 1 1 1.5750000000000000e+00 5.4292562954781758e+00 -6.0706834179896454e+00 8.0304793805119346e-04 0 1 0 -552 1 2 2.1000000000000001e+00 -1.0777604139245819e+01 -4.4706514109339750e+00 -2.7557474311308923e+00 1 1 0 -553 1 2 2.1000000000000001e+00 -1.0859772403827316e+01 -7.4238319871439664e+00 -2.7625777341350330e+00 1 1 0 -554 1 3 -1.0500000000000000e+00 -1.0638486257434380e+01 -4.5424699280617133e+00 -1.1390731058750738e+00 1 1 0 -555 1 3 -1.0500000000000000e+00 -1.0996409164267124e+01 -7.3292383684650382e+00 -1.1416941131260039e+00 1 1 0 -556 1 4 -9.4999999999999996e-01 7.1160371847441404e+00 -6.1501644150176080e+00 -1.0146702266915781e+00 0 1 0 -557 1 3 -1.0500000000000000e+00 8.4746733682503077e+00 -8.1246107745132647e+00 -3.2400982587548217e+00 0 1 0 -558 1 3 -1.0500000000000000e+00 -1.0744763598205394e+01 -5.9511379136323690e+00 -3.2430302733109038e+00 1 1 0 -559 1 3 -1.0500000000000000e+00 -9.5971282323576546e+00 -8.2820564399613925e+00 -3.0474755677701841e+00 1 1 0 -560 1 5 4.2499999999999999e-01 7.6217971982909702e+00 -5.2774697204014789e+00 -1.1880237198010946e+00 0 1 0 -561 1 1 1.5750000000000000e+00 -7.4620238051391024e+00 -7.5728252386857093e+00 2.5509910226624299e-03 1 1 0 -562 1 2 2.1000000000000001e+00 -6.6698085276533998e+00 -2.0142572104126444e-01 2.7623916516067091e+00 1 1 0 -563 1 2 2.1000000000000001e+00 -6.6461129766623888e+00 -6.2110960114036544e+00 2.7655008528157552e+00 1 1 0 -564 1 3 -1.0500000000000000e+00 -6.8797305145918308e+00 -9.0893531210439544e+00 1.1451063816790850e+00 1 1 0 -565 1 3 -1.0500000000000000e+00 -6.5055414858338407e+00 -6.3061322110945479e+00 1.1447400754486665e+00 1 1 0 -566 1 4 -9.4999999999999996e-01 -9.1356180044504196e+00 -7.4646532668590613e+00 1.0430100172190766e+00 1 1 0 -567 1 3 -1.0500000000000000e+00 -1.0498118355439564e+01 -5.5129574360895806e+00 3.2417786003108500e+00 1 1 0 -568 1 3 -1.0500000000000000e+00 -6.7645520307749800e+00 -7.6849121740649835e+00 3.2442094863119628e+00 1 1 0 -569 1 3 -1.0500000000000000e+00 -7.9072096204926661e+00 -5.3504081251227653e+00 3.0463937987666085e+00 1 1 0 -570 1 5 4.2499999999999999e-01 -9.6213434010407504e+00 -8.3028305254396848e+00 1.3676483388533125e+00 1 1 0 -571 1 1 1.5750000000000000e+00 -4.8508665100432289e+00 -3.0913866590570027e+00 5.3622863656777042e-03 1 1 0 -572 1 2 2.1000000000000001e+00 -9.2786186931489016e+00 -4.6822322876050126e+00 2.7570054308053500e+00 1 1 0 -573 1 2 2.1000000000000001e+00 -9.1985090512562753e+00 -1.7278612526071484e+00 2.7692251161144732e+00 1 1 0 -574 1 3 -1.0500000000000000e+00 -9.4240560849308448e+00 -4.6050076018035764e+00 1.1423674288041994e+00 1 1 0 -575 1 3 -1.0500000000000000e+00 -9.0605843309322562e+00 -1.8197700648234800e+00 1.1454565705577924e+00 1 1 0 -576 1 4 -9.4999999999999996e-01 -6.5360049148092720e+00 -3.0002808140551949e+00 1.0292600826451430e+00 1 1 0 -577 1 3 -1.0500000000000000e+00 -7.8911043609157137e+00 -1.0297350368835474e+00 3.2441104869236419e+00 1 1 0 -578 1 3 -1.0500000000000000e+00 -9.3145986620772643e+00 -3.2024146564087808e+00 3.2442767520874209e+00 1 1 0 -579 1 3 -1.0500000000000000e+00 -1.0462064910734954e+01 -8.7428913799177721e-01 3.0544830609219087e+00 1 1 0 -580 1 5 4.2499999999999999e-01 -7.0375884152331309e+00 -3.8617656055414766e+00 1.2574886852588172e+00 1 1 0 -581 1 1 1.5750000000000000e+00 -7.4403455804764791e+00 -1.5864684864316523e+00 3.8497869949676300e-03 1 1 0 -582 1 2 2.1000000000000001e+00 -8.2271577010716257e+00 -8.9511194253847695e+00 -2.7557264410039561e+00 1 1 0 -583 1 2 2.1000000000000001e+00 -8.2493596706777161e+00 -2.9438976080963570e+00 -2.7611073160394257e+00 1 1 0 -584 1 3 -1.0500000000000000e+00 -8.0314809066519022e+00 -6.7363393780894398e-02 -1.1334883134546114e+00 1 1 0 -585 1 3 -1.0500000000000000e+00 -8.3850720404267030e+00 -2.8519855510711611e+00 -1.1385470385732503e+00 1 1 0 -586 1 4 -9.4999999999999996e-01 -5.7471633830521700e+00 -1.6595818760350767e+00 -1.0043030468145293e+00 1 1 0 -587 1 3 -1.0500000000000000e+00 -4.3935068666009238e+00 -3.6474348687539795e+00 -3.2374584186385587e+00 1 1 0 -588 1 3 -1.0500000000000000e+00 -8.1386257292904105e+00 -1.4704087680783076e+00 -3.2399129325807046e+00 1 1 0 -589 1 3 -1.0500000000000000e+00 -6.9840150068117222e+00 -3.7947931658217371e+00 -3.0483298891716677e+00 1 1 0 -590 1 5 4.2499999999999999e-01 -5.2364777045552824e+00 -7.8213293828070007e-01 -1.1382574609017624e+00 1 1 0 -591 1 1 1.5750000000000000e+00 -1.0050751348075135e+01 -6.0706792322617247e+00 8.0168631165733473e-04 1 1 0 -592 1 2 2.1000000000000001e+00 -5.6176031632905161e+00 -4.4706501969322900e+00 -2.7557456914388245e+00 1 1 0 -593 1 2 2.1000000000000001e+00 -5.6997708417635211e+00 -7.4238326848277154e+00 -2.7625784406379745e+00 1 1 0 -594 1 3 -1.0500000000000000e+00 -5.4784876689296738e+00 -4.5424680573607450e+00 -1.1390726897933874e+00 1 1 0 -595 1 3 -1.0500000000000000e+00 -5.8364044191370255e+00 -7.3292401211120151e+00 -1.1416946489172730e+00 1 1 0 -596 1 4 -9.4999999999999996e-01 -8.3639613762878664e+00 -6.1501694738902586e+00 -1.0146778669269132e+00 1 1 0 -597 1 3 -1.0500000000000000e+00 -7.0053265782881136e+00 -8.1246075531191924e+00 -3.2401003281787872e+00 1 1 0 -598 1 3 -1.0500000000000000e+00 -5.5847646461292024e+00 -5.9511365484457244e+00 -3.2430291585670830e+00 1 1 0 -599 1 3 -1.0500000000000000e+00 -4.4371259724731145e+00 -8.2820520000459510e+00 -3.0474815102095665e+00 1 1 0 -600 1 5 4.2499999999999999e-01 -7.8582196859492122e+00 -5.2774722159275420e+00 -1.1880674304732235e+00 1 1 0 -601 1 1 1.5750000000000000e+00 -2.3020183781100894e+00 -7.5728278232892112e+00 2.5529845371998761e-03 1 1 0 -602 1 2 2.1000000000000001e+00 -1.5098074509344350e+00 -2.0142470929206624e-01 2.7623931935558872e+00 1 1 0 -603 1 2 2.1000000000000001e+00 -1.4861119613838358e+00 -6.2110957547256156e+00 2.7655016862043240e+00 1 1 0 -604 1 3 -1.0500000000000000e+00 -1.7197258144231284e+00 -9.0893537657233114e+00 1.1451058227458670e+00 1 1 0 -605 1 3 -1.0500000000000000e+00 -1.3455389741778507e+00 -6.3061299048187074e+00 1.1447399002991308e+00 1 1 0 -606 1 4 -9.4999999999999996e-01 -3.9756122803284573e+00 -7.4646421830238037e+00 1.0430238223775667e+00 1 1 0 -607 1 3 -1.0500000000000000e+00 -5.3381190714352069e+00 -5.5129583082631122e+00 3.2417776522169905e+00 1 1 0 -608 1 3 -1.0500000000000000e+00 -1.6045530808867614e+00 -7.6849114491275010e+00 3.2442091125137438e+00 1 1 0 -609 1 3 -1.0500000000000000e+00 -2.7472094562838016e+00 -5.3504077496497509e+00 3.0463929161931667e+00 1 1 0 -610 1 5 4.2499999999999999e-01 -4.4613026455189715e+00 -8.3027959622235965e+00 1.3677713806854559e+00 1 1 0 -611 1 1 1.5750000000000000e+00 3.0912589486300845e-01 -3.0913848425438921e+00 5.3598596223931594e-03 1 1 0 -612 1 2 2.1000000000000001e+00 -4.1186173966004134e+00 -4.6822317382957657e+00 2.7570069556378680e+00 1 1 0 -613 1 2 2.1000000000000001e+00 -4.0385067451637218e+00 -1.7278618514344402e+00 2.7692283046885215e+00 1 1 0 -614 1 3 -1.0500000000000000e+00 -4.2640591560616237e+00 -4.6050047632386075e+00 1.1423684580241389e+00 1 1 0 -615 1 3 -1.0500000000000000e+00 -3.9005843533876661e+00 -1.8197707223832715e+00 1.1454591479339715e+00 1 1 0 -616 1 4 -9.4999999999999996e-01 -1.3760019343556529e+00 -3.0002808585551701e+00 1.0292589652233630e+00 1 1 0 -617 1 3 -1.0500000000000000e+00 -2.7311046911460419e+00 -1.0297332102435170e+00 3.2441117594112168e+00 1 1 0 -618 1 3 -1.0500000000000000e+00 -4.1546010614102968e+00 -3.2024147708397983e+00 3.2442796046460121e+00 1 1 0 -619 1 3 -1.0500000000000000e+00 -5.3020646113913141e+00 -8.7428883007132896e-01 3.0544824056822968e+00 1 1 0 -620 1 5 4.2499999999999999e-01 -1.8775813499744594e+00 -3.8617652601028691e+00 1.2574979871588283e+00 1 1 0 -621 1 1 1.5750000000000000e+00 -2.2803404225566020e+00 -1.5864686066072871e+00 3.8518536994391894e-03 1 1 0 -622 1 2 2.1000000000000001e+00 -3.0671598052473215e+00 -8.9511202869001796e+00 -2.7557249648415336e+00 1 1 0 -623 1 2 2.1000000000000001e+00 -3.0893582402262725e+00 -2.9438984500370804e+00 -2.7611038593016612e+00 1 1 0 -624 1 3 -1.0500000000000000e+00 -2.8714850709791317e+00 -6.7361320920927170e-02 -1.1334868392260038e+00 1 1 0 -625 1 3 -1.0500000000000000e+00 -3.2250735683220251e+00 -2.8519874877291347e+00 -1.1385442657909550e+00 1 1 0 -626 1 4 -9.4999999999999996e-01 -5.8716822071637864e-01 -1.6595867142136242e+00 -1.0043100492250261e+00 1 1 0 -627 1 3 -1.0500000000000000e+00 7.6649360710437797e-01 -3.6474359902480114e+00 -3.2374591482750077e+00 1 1 0 -628 1 3 -1.0500000000000000e+00 -2.9786278063952061e+00 -1.4704096530414432e+00 -3.2399099461552190e+00 1 1 0 -629 1 3 -1.0500000000000000e+00 -1.8240148784156958e+00 -3.7947934832736081e+00 -3.0483296255513057e+00 1 1 0 -630 1 5 4.2499999999999999e-01 -7.6493471071549024e-02 -7.8213987752477010e-01 -1.1383145348412551e+00 1 1 0 -631 1 1 1.5750000000000000e+00 -4.8907437309204376e+00 -6.0706834881352734e+00 8.0298073607565357e-04 1 1 0 -632 1 2 2.1000000000000001e+00 -4.5760413642809183e-01 -4.4706514213149759e+00 -2.7557474231313694e+00 1 1 0 -633 1 2 2.1000000000000001e+00 -5.3977240818138661e-01 -7.4238320200344070e+00 -2.7625778432626920e+00 1 1 0 -634 1 3 -1.0500000000000000e+00 -3.1848626824941739e-01 -4.5424699901676249e+00 -1.1390731294877057e+00 1 1 0 -635 1 3 -1.0500000000000000e+00 -6.7640904217469000e-01 -7.3292383397478211e+00 -1.1416941919961197e+00 1 1 0 -636 1 4 -9.4999999999999996e-01 -3.2039627843292253e+00 -6.1501644366493018e+00 -1.0146703056020741e+00 1 1 0 -637 1 3 -1.0500000000000000e+00 -1.8453264957454394e+00 -8.1246107934229581e+00 -3.2400982422800482e+00 1 1 0 -638 1 3 -1.0500000000000000e+00 -4.2476357784045682e-01 -5.9511378171462663e+00 -3.2430303304121573e+00 1 1 0 -639 1 3 -1.0500000000000000e+00 7.2287180950103291e-01 -8.2820563729161005e+00 -3.0474757316959042e+00 1 1 0 -640 1 5 4.2499999999999999e-01 -2.6982027688223678e+00 -5.2774696728243740e+00 -1.1880236588853865e+00 1 1 0 -641 1 1 1.5750000000000000e+00 1.1619653017597216e+00 1.1410397746683358e+00 9.1918628908884870e+00 0 0 0 -642 1 2 2.1000000000000001e+00 5.3482120140535887e+00 8.8785278860666175e+00 -6.4521175757047988e+00 0 0 1 -643 1 2 2.1000000000000001e+00 5.3756161067166044e+00 2.8704051174475218e+00 -6.4440596879171785e+00 0 0 1 -644 1 3 -1.0500000000000000e+00 5.1513824811129894e+00 -1.2483323209846020e-02 -8.0699180714227818e+00 0 0 1 -645 1 3 -1.0500000000000000e+00 5.5125063426240626e+00 2.7779921989870822e+00 -8.0689170104942836e+00 0 0 1 -646 1 4 -9.4999999999999996e-01 2.8707824074952111e+00 1.5776832321695906e+00 -8.1949923896510004e+00 0 0 1 -647 1 3 -1.0500000000000000e+00 1.5183448281546745e+00 3.5735858867446630e+00 -5.9676617293586913e+00 0 0 1 -648 1 3 -1.0500000000000000e+00 5.2636629480887009e+00 1.3956934813352184e+00 -5.9680409902933684e+00 0 0 1 -649 1 3 -1.0500000000000000e+00 4.1072776640863022e+00 3.7186667794164769e+00 -6.1625912616753471e+00 0 0 1 -650 1 5 4.2499999999999999e-01 2.3618815780483793e+00 6.9993703811269370e-01 -8.0548023620137119e+00 0 0 1 -651 1 1 1.5750000000000000e+00 3.7720873820172987e+00 5.6213400023662139e+00 9.1935525128423343e+00 0 0 0 -652 1 2 2.1000000000000001e+00 2.7425373193003235e+00 4.3976711435195490e+00 -6.4504365503983987e+00 0 0 1 -653 1 2 2.1000000000000001e+00 2.8207009956787434e+00 7.3515370916477103e+00 -6.4430847277355952e+00 0 0 1 -654 1 3 -1.0500000000000000e+00 2.5992565408495842e+00 4.4721677488282907e+00 -8.0688120651190864e+00 0 0 1 -655 1 3 -1.0500000000000000e+00 2.9570677395783225e+00 7.2579703948586243e+00 -8.0675985813153499e+00 0 0 1 -656 1 4 -9.4999999999999996e-01 5.4750360928108588e+00 6.0547214596860002e+00 -8.2007964802884032e+00 0 0 1 -657 1 3 -1.0500000000000000e+00 4.1219223544438357e+00 8.0581779431143303e+00 -5.9676364601265730e+00 0 0 1 -658 1 3 -1.0500000000000000e+00 2.7125672713879840e+00 5.8775405788520558e+00 -5.9639955286706536e+00 0 0 1 -659 1 3 -1.0500000000000000e+00 1.5501673725519289e+00 8.1950766543449802e+00 -6.1605955196254083e+00 0 0 1 -660 1 5 4.2499999999999999e-01 4.9665529277089835e+00 5.1716053945125253e+00 -8.0925139065458236e+00 0 0 1 -661 1 1 1.5750000000000000e+00 1.1780823509209686e+00 7.1278830106107129e+00 9.1794557644119550e+00 0 0 0 -662 1 2 2.1000000000000001e+00 3.8889022172935661e-01 -2.4961732932459668e-01 6.4302158092980584e+00 0 0 0 -663 1 2 2.1000000000000001e+00 3.6197352537882566e-01 5.7595949541908276e+00 6.4204291396255115e+00 0 0 0 -664 1 3 -1.0500000000000000e+00 5.8774490476124264e-01 8.6366165518115672e+00 8.0464291478225078e+00 0 0 0 -665 1 3 -1.0500000000000000e+00 2.2541633029667096e-01 5.8546514202919830e+00 8.0453149381947711e+00 0 0 0 -666 1 4 -9.4999999999999996e-01 2.8649518385571877e+00 7.0414705396206436e+00 8.1670660423171952e+00 0 0 0 -667 1 3 -1.0500000000000000e+00 4.2167757389700569e+00 5.0583935920416749e+00 5.9473206668998486e+00 0 0 0 -668 1 3 -1.0500000000000000e+00 4.7475954047286173e-01 7.2347183117084590e+00 5.9437722567568052e+00 0 0 0 -669 1 3 -1.0500000000000000e+00 1.6281216656912587e+00 4.9070694798622227e+00 6.1416918235048161e+00 0 0 0 -670 1 5 4.2499999999999999e-01 3.3702642009780064e+00 7.9087008791570490e+00 7.9654949234581061e+00 0 0 0 -671 1 1 1.5750000000000000e+00 -1.4313372458792628e+00 2.6468514725993337e+00 9.1792579600199531e+00 0 0 0 -672 1 2 2.1000000000000001e+00 2.9951130295318418e+00 4.2311987071544586e+00 6.4301644557642277e+00 0 0 0 -673 1 2 2.1000000000000001e+00 2.9179140842435096e+00 1.2768701006788881e+00 6.4203463158624281e+00 0 0 0 -674 1 3 -1.0500000000000000e+00 3.1382814524649731e+00 4.1560094414318982e+00 8.0483097968687503e+00 0 0 0 -675 1 3 -1.0500000000000000e+00 2.7844907315329710e+00 1.3702502170217983e+00 8.0465500877165432e+00 0 0 0 -676 1 4 -9.4999999999999996e-01 2.6515480135039837e-01 2.5711904904263143e+00 8.1785856650471587e+00 0 0 0 -677 1 3 -1.0500000000000000e+00 1.6126931539633631e+00 5.7621432647207627e-01 5.9494856028376670e+00 0 0 0 -678 1 3 -1.0500000000000000e+00 3.0295489901588049e+00 2.7521218732731114e+00 5.9450341490346013e+00 0 0 0 -679 1 3 -1.0500000000000000e+00 4.1839509599675040e+00 4.2743274941575393e-01 6.1368174382011684e+00 0 0 0 -680 1 5 4.2499999999999999e-01 7.7501198062341459e-01 3.4495347076940632e+00 8.0416481051605047e+00 0 0 0 -681 1 1 1.5750000000000000e+00 6.3219716216801309e+00 1.1410373510774789e+00 9.1918643783338965e+00 0 0 0 -682 1 2 2.1000000000000001e+00 -1.0131788239632902e+01 8.8785278357562056e+00 -6.4521165834469247e+00 1 0 1 -683 1 2 2.1000000000000001e+00 -1.0104383385435193e+01 2.8704043887222177e+00 -6.4440588134660706e+00 1 0 1 -684 1 3 -1.0500000000000000e+00 -1.0328618899312357e+01 -1.2482927518092879e-02 -8.0699179687262284e+00 1 0 1 -685 1 3 -1.0500000000000000e+00 -9.9674921714269313e+00 2.7779913324784928e+00 -8.0689168759828274e+00 1 0 1 -686 1 4 -9.4999999999999996e-01 8.0307779176995382e+00 1.5776835914964877e+00 -8.1949892817862988e+00 0 0 1 -687 1 3 -1.0500000000000000e+00 6.6783440369410876e+00 3.5735858738359525e+00 -5.9676617411755108e+00 0 0 1 -688 1 3 -1.0500000000000000e+00 -1.0216336690371481e+01 1.3956937383188581e+00 -5.9680402070754761e+00 1 0 1 -689 1 3 -1.0500000000000000e+00 9.2672778657800094e+00 3.7186670694972150e+00 -6.1625926106661444e+00 0 0 1 -690 1 5 4.2499999999999999e-01 7.5218938878248451e+00 6.9992914332407352e-01 -8.0547916809438949e+00 0 0 1 -691 1 1 1.5750000000000000e+00 -1.1707917404057641e+01 5.6213390017706928e+00 9.1935505818605883e+00 1 0 0 -692 1 2 2.1000000000000001e+00 7.9025382379131166e+00 4.3976712881134681e+00 -6.4504342967107533e+00 0 0 1 -693 1 2 2.1000000000000001e+00 7.9807021527956650e+00 7.3515373618461162e+00 -6.4430821481306735e+00 0 0 1 -694 1 3 -1.0500000000000000e+00 7.7592544353633706e+00 4.4721686212574419e+00 -8.0688109551124541e+00 0 0 1 -695 1 3 -1.0500000000000000e+00 8.1170648572959507e+00 7.2579709818705247e+00 -8.0675974692353218e+00 0 0 1 -696 1 4 -9.4999999999999996e-01 -1.0004962284329546e+01 6.0547235831110164e+00 -8.2007938886564311e+00 1 0 1 -697 1 3 -1.0500000000000000e+00 9.2819223520646474e+00 8.0581780644489562e+00 -5.9676359092506726e+00 0 0 1 -698 1 3 -1.0500000000000000e+00 7.8725655048664294e+00 5.8775408887777125e+00 -5.9639945786136659e+00 0 0 1 -699 1 3 -1.0500000000000000e+00 6.7101664880056973e+00 8.1950764117394037e+00 -6.1605934556647206e+00 0 0 1 -700 1 5 4.2499999999999999e-01 1.0126560972312859e+01 5.1716067730330728e+00 -8.0924892322925537e+00 0 0 1 -701 1 1 1.5750000000000000e+00 6.3380890551619835e+00 7.1278820924118449e+00 9.1794568247133768e+00 0 0 0 -702 1 2 2.1000000000000001e+00 5.5488909133454545e+00 -2.4961712179031892e-01 6.4302174413264499e+00 0 0 0 -703 1 2 2.1000000000000001e+00 5.5219751931240264e+00 5.7595947918292367e+00 6.4204302903978263e+00 0 0 0 -704 1 3 -1.0500000000000000e+00 5.7477422288524451e+00 8.6366187123915950e+00 8.0464294588614749e+00 0 0 0 -705 1 3 -1.0500000000000000e+00 5.3854158667145668e+00 5.8546522158454977e+00 8.0453157533778565e+00 0 0 0 -706 1 4 -9.4999999999999996e-01 -1.2615048407561728e+01 7.0414742204401541e+00 8.1670698103044472e+00 1 0 0 -707 1 3 -1.0500000000000000e+00 -1.1263223647398949e+01 5.0583921238930998e+00 5.9473203127071343e+00 1 0 0 -708 1 3 -1.0500000000000000e+00 5.6347554696665227e+00 7.2347193424397354e+00 5.9437729671344570e+00 0 0 0 -709 1 3 -1.0500000000000000e+00 6.7881217602424861e+00 4.9070714345696160e+00 6.1416899736831425e+00 0 0 0 -710 1 5 4.2499999999999999e-01 -1.2109738888915201e+01 7.9087103057462862e+00 7.9655145894109900e+00 1 0 0 -711 1 1 1.5750000000000000e+00 3.7286660766631314e+00 2.6468497544138820e+00 9.1792617844902722e+00 0 0 0 -712 1 2 2.1000000000000001e+00 -1.2484888381054626e+01 4.2311986767793606e+00 6.4301626543881074e+00 1 0 0 -713 1 2 2.1000000000000001e+00 -1.2562086164763274e+01 1.2768702711800870e+00 6.4203454232835657e+00 1 0 0 -714 1 3 -1.0500000000000000e+00 -1.2341719856751300e+01 4.1560099014284795e+00 8.0483097925880998e+00 1 0 0 -715 1 3 -1.0500000000000000e+00 -1.2695510322868529e+01 1.3702498395932317e+00 8.0465506442101500e+00 1 0 0 -716 1 4 -9.4999999999999996e-01 5.4251558501708992e+00 2.5711938106787109e+00 8.1785935841135213e+00 0 0 0 -717 1 3 -1.0500000000000000e+00 6.7726925328232710e+00 5.7621426238668505e-01 5.9494861086459743e+00 0 0 0 -718 1 3 -1.0500000000000000e+00 -1.2450451848110724e+01 2.7521219957727290e+00 5.9450339763644990e+00 1 0 0 -719 1 3 -1.0500000000000000e+00 -1.1296049224848582e+01 4.2743232356497529e-01 6.1368187720233625e+00 1 0 0 -720 1 5 4.2499999999999999e-01 5.9350316957814258e+00 3.4495340265009595e+00 8.0416948991733790e+00 0 0 0 -721 1 1 1.5750000000000000e+00 -9.1580347227906209e+00 1.1410397798499794e+00 9.1918626272116661e+00 1 0 0 -722 1 2 2.1000000000000001e+00 -4.9717880189991721e+00 8.8785278201231783e+00 -6.4521175686831196e+00 1 0 1 -723 1 2 2.1000000000000001e+00 -4.9443840032931767e+00 2.8704050938020096e+00 -6.4440597689439816e+00 1 0 1 -724 1 3 -1.0500000000000000e+00 -5.1686174520233639e+00 -1.2483480882405473e-02 -8.0699180947438318e+00 1 0 1 -725 1 3 -1.0500000000000000e+00 -4.8074936175278511e+00 2.7779922774163808e+00 -8.0689170718296666e+00 1 0 1 -726 1 4 -9.4999999999999996e-01 -7.4492175991400735e+00 1.5776831572608536e+00 -8.1949924919480921e+00 1 0 1 -727 1 3 -1.0500000000000000e+00 -8.8016551771884011e+00 3.5735860641514527e+00 -5.9676617030199273e+00 1 0 1 -728 1 3 -1.0500000000000000e+00 -5.0563370392538198e+00 1.3956935709363769e+00 -5.9680411398900644e+00 1 0 1 -729 1 3 -1.0500000000000000e+00 -6.2127223137861858e+00 3.7186665845338389e+00 -6.1625913527749985e+00 1 0 1 -730 1 5 4.2499999999999999e-01 -7.9581185117646847e+00 6.9993698697291507e-01 -8.0548023115157541e+00 1 0 1 -731 1 1 1.5750000000000000e+00 -6.5479126145972710e+00 5.6213400149017012e+00 9.1935524115213845e+00 1 0 0 -732 1 2 2.1000000000000001e+00 -7.5774628478888308e+00 4.3976711263961050e+00 -6.4504365563071975e+00 1 0 1 -733 1 2 2.1000000000000001e+00 -7.4992988909106950e+00 7.3515370681573700e+00 -6.4430847045445683e+00 1 0 1 -734 1 3 -1.0500000000000000e+00 -7.7207435424701583e+00 4.4721677213608118e+00 -8.0688120850430014e+00 1 0 1 -735 1 3 -1.0500000000000000e+00 -7.3629322045265049e+00 7.2579704287181848e+00 -8.0675985506078494e+00 1 0 1 -736 1 4 -9.4999999999999996e-01 -4.8449639360506920e+00 6.0547214987994096e+00 -8.2007965449287141e+00 1 0 1 -737 1 3 -1.0500000000000000e+00 -6.1980776921711218e+00 8.0581778891847513e+00 -5.9676363926129730e+00 1 0 1 -738 1 3 -1.0500000000000000e+00 -7.6074326998544528e+00 5.8775406785414894e+00 -5.9639955482894935e+00 1 0 1 -739 1 3 -1.0500000000000000e+00 -8.7698325795395640e+00 8.1950767571874437e+00 -6.1605955458705317e+00 1 0 1 -740 1 5 4.2499999999999999e-01 -5.3534470791472577e+00 5.1716052307200862e+00 -8.0925139229449030e+00 1 0 1 -741 1 1 1.5750000000000000e+00 -9.1419176491890060e+00 7.1278829736280436e+00 9.1794557746441399e+00 1 0 0 -742 1 2 2.1000000000000001e+00 -9.9311097929927428e+00 -2.4961748404046347e-01 6.4302158103889901e+00 1 0 0 -743 1 2 2.1000000000000001e+00 -9.9580264639996461e+00 5.7595949482413538e+00 6.4204290711408305e+00 1 0 0 -744 1 3 -1.0500000000000000e+00 -9.7322550718260761e+00 8.6366164883267658e+00 8.0464292422039811e+00 1 0 0 -745 1 3 -1.0500000000000000e+00 -1.0094583625953563e+01 5.8546513704827170e+00 8.0453149495769054e+00 1 0 0 -746 1 4 -9.4999999999999996e-01 -7.4550481753199449e+00 7.0414705028671030e+00 8.1670660825583425e+00 1 0 0 -747 1 3 -1.0500000000000000e+00 -6.1032242432072978e+00 5.0583936701665522e+00 5.9473206547742059e+00 1 0 0 -748 1 3 -1.0500000000000000e+00 -9.8452404225728625e+00 7.2347183307571576e+00 5.9437722178602801e+00 1 0 0 -749 1 3 -1.0500000000000000e+00 -8.6918782477832082e+00 4.9070694966111468e+00 6.1416918078354001e+00 1 0 0 -750 1 5 4.2499999999999999e-01 -6.9497357564641584e+00 7.9087010545917416e+00 7.9654952679455882e+00 1 0 0 -751 1 1 1.5750000000000000e+00 -1.1751337311047225e+01 2.6468513447896100e+00 9.1792580773032242e+00 1 0 0 -752 1 2 2.1000000000000001e+00 -7.3248869041654459e+00 4.2311987913820701e+00 6.4301644191631269e+00 1 0 0 -753 1 2 2.1000000000000001e+00 -7.4020860214690067e+00 1.2768701250122341e+00 6.4203463163303596e+00 1 0 0 -754 1 3 -1.0500000000000000e+00 -7.1817184613676339e+00 4.1560095157768409e+00 8.0483098100061383e+00 1 0 0 -755 1 3 -1.0500000000000000e+00 -7.5355093448539057e+00 1.3702502198088133e+00 8.0465500892182433e+00 1 0 0 -756 1 4 -9.4999999999999996e-01 -1.0054845155370645e+01 2.5711905205222791e+00 8.1785856053139199e+00 1 0 0 -757 1 3 -1.0500000000000000e+00 -8.7073068789184980e+00 5.7621447876162435e-01 5.9494856166972010e+00 1 0 0 -758 1 3 -1.0500000000000000e+00 -7.2904509788921148e+00 2.7521218811317745e+00 5.9450342020159770e+00 1 0 0 -759 1 3 -1.0500000000000000e+00 -6.1360488973737075e+00 4.2743265659879626e-01 6.1368175216136223e+00 1 0 0 -760 1 5 4.2499999999999999e-01 -9.5449881570252071e+00 3.4495347829749932e+00 8.0416480162169961e+00 1 0 0 -761 1 1 1.5750000000000000e+00 -3.9980283838608042e+00 1.1410374222607160e+00 9.1918643704566918e+00 1 0 0 -762 1 2 2.1000000000000001e+00 1.8821174679660579e-01 8.8785278967404935e+00 -6.4521165494653818e+00 1 0 1 -763 1 2 2.1000000000000001e+00 2.1561667669842954e-01 2.8704045589712663e+00 -6.4440588386536399e+00 1 0 1 -764 1 3 -1.0500000000000000e+00 -8.6188729718585932e-03 -1.2482920396955421e-02 -8.0699179546331727e+00 1 0 1 -765 1 3 -1.0500000000000000e+00 3.5250787080465251e-01 2.7779913853191438e+00 -8.0689168596637728e+00 1 0 1 -766 1 4 -9.4999999999999996e-01 -2.2892220877298080e+00 1.5776836917392316e+00 -8.1949892434186324e+00 1 0 1 -767 1 3 -1.0500000000000000e+00 -3.6416558775967207e+00 3.5735859470648457e+00 -5.9676618077622781e+00 1 0 1 -768 1 3 -1.0500000000000000e+00 1.0366324864525822e-01 1.3956935322625732e+00 -5.9680402824844023e+00 1 0 1 -769 1 3 -1.0500000000000000e+00 -1.0527219254536782e+00 3.7186671156132682e+00 -6.1625926129833219e+00 1 0 1 -770 1 5 4.2499999999999999e-01 -2.7981063006237710e+00 6.9992930292593769e-01 -8.0547914410968673e+00 1 0 1 -771 1 1 1.5750000000000000e+00 -1.3879174610500087e+00 5.6213390517670483e+00 9.1935507059286543e+00 1 0 0 -772 1 2 2.1000000000000001e+00 -2.4174618429239194e+00 4.3976712422938320e+00 -6.4504342537992549e+00 1 0 1 -773 1 2 2.1000000000000001e+00 -2.3392980010594711e+00 7.3515373610344845e+00 -6.4430821311845046e+00 1 0 1 -774 1 3 -1.0500000000000000e+00 -2.5607455140022584e+00 4.4721686901021300e+00 -8.0688109422235907e+00 1 0 1 -775 1 3 -1.0500000000000000e+00 -2.2029351636538657e+00 7.2579709988917536e+00 -8.0675974116014899e+00 1 0 1 -776 1 4 -9.4999999999999996e-01 3.1503774820546226e-01 6.0547234917768264e+00 -8.2007938602838450e+00 1 0 1 -777 1 3 -1.0500000000000000e+00 -1.0380776881650196e+00 8.0581781116326354e+00 -5.9676358842015311e+00 1 0 1 -778 1 3 -1.0500000000000000e+00 -2.4474344798197656e+00 5.8775410837946609e+00 -5.9639945099507505e+00 1 0 1 -779 1 3 -1.0500000000000000e+00 -3.6098334082402692e+00 8.1950763808497662e+00 -6.1605934725849432e+00 1 0 1 -780 1 5 4.2499999999999999e-01 -1.9343898578893537e-01 5.1716067072862586e+00 -8.0924891469214746e+00 1 0 1 -781 1 1 1.5750000000000000e+00 -3.9819109860090673e+00 7.1278821645832515e+00 9.1794568283563684e+00 1 0 0 -782 1 2 2.1000000000000001e+00 -4.7711092505534243e+00 -2.4961708534817717e-01 6.4302175314427075e+00 1 0 0 -783 1 2 2.1000000000000001e+00 -4.7980248252941404e+00 5.7595948817686633e+00 6.4204302612729869e+00 1 0 0 -784 1 3 -1.0500000000000000e+00 -4.5722577456560627e+00 8.6366188227387966e+00 8.0464294691392837e+00 1 0 0 -785 1 3 -1.0500000000000000e+00 -4.9345841040231235e+00 5.8546522534532315e+00 8.0453157444338856e+00 1 0 0 -786 1 4 -9.4999999999999996e-01 -2.2950485136112402e+00 7.0414742449253289e+00 8.1670698432269262e+00 1 0 0 -787 1 3 -1.0500000000000000e+00 -9.4322368519229727e-01 5.0583922754040493e+00 5.9473202684856226e+00 1 0 0 -788 1 3 -1.0500000000000000e+00 -4.6852444590308222e+00 7.2347193310481188e+00 5.9437730334392516e+00 1 0 0 -789 1 3 -1.0500000000000000e+00 -3.5318782888059550e+00 4.9070714154512309e+00 6.1416899186114549e+00 1 0 0 -790 1 5 4.2499999999999999e-01 -1.7897389106805264e+00 7.9087102208661939e+00 7.9655144193934220e+00 1 0 0 -791 1 1 1.5750000000000000e+00 -6.5913339062097513e+00 2.6468497975807033e+00 9.1792619619430162e+00 1 0 0 -792 1 2 2.1000000000000001e+00 -2.1648883857568695e+00 4.2311985874890006e+00 6.4301625979673851e+00 1 0 0 -793 1 2 2.1000000000000001e+00 -2.2420861443113900e+00 1.2768702610370006e+00 6.4203454133313507e+00 1 0 0 -794 1 3 -1.0500000000000000e+00 -2.0217198603553559e+00 4.1560099426044417e+00 8.0483097791455265e+00 1 0 0 -795 1 3 -1.0500000000000000e+00 -2.3755103752685907e+00 1.3702499337738310e+00 8.0465506462058656e+00 1 0 0 -796 1 4 -9.4999999999999996e-01 -4.8948441051275200e+00 2.5711939331276632e+00 8.1785936809047612e+00 1 0 0 -797 1 3 -1.0500000000000000e+00 -3.5473074090830519e+00 5.7621423668433280e-01 5.9494861719257255e+00 1 0 0 -798 1 3 -1.0500000000000000e+00 -2.1304518566866264e+00 2.7521222219861841e+00 5.9450338731066523e+00 1 0 0 -799 1 3 -1.0500000000000000e+00 -9.7604917996146234e-01 4.2743242093933631e-01 6.1368188122788609e+00 1 0 0 -800 1 5 4.2499999999999999e-01 -4.3849682689579170e+00 3.4495340448098268e+00 8.0416949277317578e+00 1 0 0 -801 1 1 1.5750000000000000e+00 1.2177479422747144e+00 1.0105671982265516e+01 9.1911717556069377e+00 0 0 0 -802 1 2 2.1000000000000001e+00 5.4028732967001947e+00 1.7841047479342119e+01 -6.4510393709496974e+00 0 0 1 -803 1 2 2.1000000000000001e+00 5.4240075308673585e+00 1.1832888504345338e+01 -6.4443769214046185e+00 0 0 1 -804 1 3 -1.0500000000000000e+00 5.2059743495624975e+00 8.9516977180176447e+00 -8.0703328770473846e+00 0 0 1 -805 1 3 -1.0500000000000000e+00 5.5588840586373252e+00 1.1741352776226226e+01 -8.0690858772941780e+00 0 0 1 -806 1 4 -9.4999999999999996e-01 2.9168985270330836e+00 1.0536201188631363e+01 -8.2052670067045472e+00 0 0 1 -807 1 3 -1.0500000000000000e+00 1.5636297606357399e+00 1.2542080073557504e+01 -5.9685732483531311e+00 0 0 1 -808 1 3 -1.0500000000000000e+00 5.3210914018509428e+00 1.0357894065310216e+01 -5.9658433896772838e+00 0 0 1 -809 1 3 -1.0500000000000000e+00 4.1514335827847475e+00 1.2671741145310673e+01 -6.1589981909362441e+00 0 0 1 -810 1 5 4.2499999999999999e-01 2.4094977983389114e+00 9.6488375258475223e+00 -8.1258035863780300e+00 0 0 1 -811 1 1 1.5750000000000000e+00 3.8290813183674253e+00 1.4588094908150456e+01 9.1904822717577943e+00 0 0 0 -812 1 2 2.1000000000000001e+00 2.7925866280054308e+00 1.3359115074091658e+01 -6.4525623492692255e+00 0 0 1 -813 1 2 2.1000000000000001e+00 2.8750881413978675e+00 1.6313213288169710e+01 -6.4426205302474635e+00 0 0 1 -814 1 3 -1.0500000000000000e+00 2.6503049259009615e+00 1.3431602346148491e+01 -8.0707858145347249e+00 0 0 1 -815 1 3 -1.0500000000000000e+00 3.0081475094120620e+00 1.6222378818231615e+01 -8.0677887963907740e+00 0 0 1 -816 1 4 -9.4999999999999996e-01 5.5243143317932333e+00 1.5016770772702397e+01 -8.2123886955905689e+00 0 0 1 -817 1 3 -1.0500000000000000e+00 4.1753179887890006e+00 1.7022074121304907e+01 -5.9672615985572071e+00 0 0 1 -818 1 3 -1.0500000000000000e+00 2.7691833463288749e+00 1.4838613111571629e+01 -5.9658055828753511e+00 0 0 1 -819 1 3 -1.0500000000000000e+00 1.6036794980598117e+00 1.7155802884504208e+01 -6.1591251766682813e+00 0 0 1 -820 1 5 4.2499999999999999e-01 5.0221685222691796e+00 1.4123863643323855e+01 -8.1629756003407365e+00 0 0 1 -821 1 1 1.5750000000000000e+00 1.2331770395400934e+00 1.6095014138862521e+01 9.1822428741097468e+00 0 0 0 -822 1 2 2.1000000000000001e+00 4.4129718258878015e-01 8.7138958244715710e+00 6.4301474980568685e+00 0 0 0 -823 1 2 2.1000000000000001e+00 4.1752319809116045e-01 1.4734898981721944e+01 6.4178631158609107e+00 0 0 0 -824 1 3 -1.0500000000000000e+00 4.1883489566335541e-01 -1.8247870537506458e+01 8.0424107817384680e+00 0 1 0 -825 1 3 -1.0500000000000000e+00 2.7622323541307736e-01 1.4831722361351208e+01 8.0398140014218953e+00 0 0 0 -826 1 4 -9.4999999999999996e-01 2.9156944733144918e+00 1.6010679331655705e+01 8.1611745701771810e+00 0 0 0 -827 1 3 -1.0500000000000000e+00 4.2728540700450157e+00 1.4029989056461162e+01 5.9413170693799291e+00 0 0 0 -828 1 3 -1.0500000000000000e+00 5.3535017273589602e-01 1.6208054197848018e+01 5.9392693380392956e+00 0 0 0 -829 1 3 -1.0500000000000000e+00 1.6796749377377100e+00 1.3871487853290073e+01 6.1449486054624938e+00 0 0 0 -830 1 5 4.2499999999999999e-01 3.4211623366195258e+00 1.6876786250321953e+01 7.9560565011701314e+00 0 0 0 -831 1 1 1.5750000000000000e+00 -1.3759843249390880e+00 1.1611079999170819e+01 9.1830980569222831e+00 0 0 0 -832 1 2 2.1000000000000001e+00 3.0527880800955636e+00 1.3202144761399200e+01 6.4273498600193371e+00 0 0 0 -833 1 2 2.1000000000000001e+00 2.9673215878209103e+00 1.0245189832019808e+01 6.4204800418978696e+00 0 0 0 -834 1 3 -1.0500000000000000e+00 3.2015499780975034e+00 1.3127091416882063e+01 8.0423603495187947e+00 0 0 0 -835 1 3 -1.0500000000000000e+00 2.8259789742977919e+00 1.0342454044816325e+01 8.0433863917706638e+00 0 0 0 -836 1 4 -9.4999999999999996e-01 3.0064366955206090e-01 1.1508815470503929e+01 8.1505562026208125e+00 0 0 0 -837 1 3 -1.0500000000000000e+00 1.6614027425467199e+00 9.5439376114872481e+00 5.9459937739279116e+00 0 0 0 -838 1 3 -1.0500000000000000e+00 3.0830644367038236e+00 1.1720322911391666e+01 5.9430531474743198e+00 0 0 0 -839 1 3 -1.0500000000000000e+00 4.2313377728908055e+00 9.3875250744465077e+00 6.1457452491624043e+00 0 0 0 -840 1 5 4.2499999999999999e-01 7.9198352355202317e-01 1.2351589306998509e+01 7.8457453500151573e+00 0 0 0 -841 1 1 1.5750000000000000e+00 6.3777545701125469e+00 1.0105667684315804e+01 9.1911738439653590e+00 0 0 0 -842 1 2 2.1000000000000001e+00 -1.0077126973066095e+01 1.7841048066988467e+01 -6.4510384915070969e+00 1 0 1 -843 1 2 2.1000000000000001e+00 -1.0055991379073761e+01 1.1832887913573085e+01 -6.4443753288950987e+00 1 0 1 -844 1 3 -1.0500000000000000e+00 -1.0274027201203799e+01 8.9516993311558757e+00 -8.0703328474822609e+00 1 0 1 -845 1 3 -1.0500000000000000e+00 -9.9211141217048002e+00 1.1741352341838013e+01 -8.0690851998921449e+00 1 0 1 -846 1 4 -9.4999999999999996e-01 8.0768934848220262e+00 1.0536198386355188e+01 -8.2052670955592522e+00 0 0 1 -847 1 3 -1.0500000000000000e+00 6.7236288850805721e+00 1.2542079749007890e+01 -5.9685734436631508e+00 0 0 1 -848 1 3 -1.0500000000000000e+00 -1.0158907477232066e+01 1.0357893926340395e+01 -5.9658422920099223e+00 1 0 1 -849 1 3 -1.0500000000000000e+00 9.3114341872216642e+00 1.2671741271744100e+01 -6.1589985867465780e+00 0 0 1 -850 1 5 4.2499999999999999e-01 7.5695071947652650e+00 9.6488258439900001e+00 -8.1258126492240628e+00 0 0 1 -851 1 1 1.5750000000000000e+00 -1.1650923159202367e+01 1.4588095950758198e+01 9.1904798536644492e+00 1 0 0 -852 1 2 2.1000000000000001e+00 7.9525876163438554e+00 1.3359114679994811e+01 -6.4525587899973962e+00 0 0 1 -853 1 2 2.1000000000000001e+00 8.0350897211099301e+00 1.6313212540633796e+01 -6.4426182337725733e+00 0 0 1 -854 1 3 -1.0500000000000000e+00 7.8103042231324800e+00 1.3431603351201368e+01 -8.0707837017577955e+00 0 0 1 -855 1 3 -1.0500000000000000e+00 8.1681456916438364e+00 1.6222378030788409e+01 -8.0677883236843471e+00 0 0 1 -856 1 4 -9.4999999999999996e-01 -9.9556833949247512e+00 1.5016774461714871e+01 -8.2123851998623305e+00 1 0 1 -857 1 3 -1.0500000000000000e+00 9.3353181196517667e+00 1.7022074594246558e+01 -5.9672613121392217e+00 0 0 1 -858 1 3 -1.0500000000000000e+00 7.9291821647262068e+00 1.4838613342065170e+01 -5.9658039276162684e+00 0 0 1 -859 1 3 -1.0500000000000000e+00 6.7636790678813732e+00 1.7155801838896334e+01 -6.1591246566687987e+00 0 0 1 -860 1 5 4.2499999999999999e-01 1.0182177599163175e+01 1.4123865654589640e+01 -8.1629432107445403e+00 0 0 1 -861 1 1 1.5750000000000000e+00 6.3931842783486914e+00 1.6095012713377930e+01 9.1822434729967597e+00 0 0 0 -862 1 2 2.1000000000000001e+00 5.6012990331574937e+00 8.7138966397079116e+00 6.4301486124124203e+00 0 0 0 -863 1 2 2.1000000000000001e+00 5.5775249199936638e+00 1.4734897619063954e+01 6.4178617364374944e+00 0 0 0 -864 1 3 -1.0500000000000000e+00 5.5788295301888660e+00 -1.8247870165251108e+01 8.0424119568045853e+00 0 1 0 -865 1 3 -1.0500000000000000e+00 5.4362249098911484e+00 1.4831717288237943e+01 8.0398140223076879e+00 0 0 0 -866 1 4 -9.4999999999999996e-01 -1.2564310693829743e+01 1.6010676769129201e+01 8.1611713440261440e+00 1 0 0 -867 1 3 -1.0500000000000000e+00 -1.1207143950092348e+01 1.4029986278460989e+01 5.9413195179982061e+00 1 0 0 -868 1 3 -1.0500000000000000e+00 5.6953458787218523e+00 1.6208054291476355e+01 5.9392705796502394e+00 0 0 0 -869 1 3 -1.0500000000000000e+00 6.8396767542621717e+00 1.3871492097319550e+01 6.1449412179435132e+00 0 0 0 -870 1 5 4.2499999999999999e-01 -1.2058848584875113e+01 1.6876779076042485e+01 7.9560212428570303e+00 1 0 0 -871 1 1 1.5750000000000000e+00 3.7840177232977616e+00 1.1611079484861524e+01 9.1831010929109560e+00 0 0 0 -872 1 2 2.1000000000000001e+00 -1.2427215612579261e+01 1.3202143622769825e+01 6.4273466314087671e+00 1 0 0 -873 1 2 2.1000000000000001e+00 -1.2512679322828710e+01 1.0245189139867012e+01 6.4204793854865159e+00 1 0 0 -874 1 3 -1.0500000000000000e+00 -1.2278455708027948e+01 1.3127090128534629e+01 8.0423600282608092e+00 1 0 0 -875 1 3 -1.0500000000000000e+00 -1.2654022792530149e+01 1.0342451017430680e+01 8.0433873354140708e+00 1 0 0 -876 1 4 -9.4999999999999996e-01 5.4606516129555338e+00 1.1508831418138509e+01 8.1505740089422005e+00 0 0 0 -877 1 3 -1.0500000000000000e+00 6.8214021263856957e+00 9.5439370900882814e+00 5.9459924821809302e+00 0 0 0 -878 1 3 -1.0500000000000000e+00 -1.2396934274295841e+01 1.1720321812894195e+01 5.9430529111724120e+00 1 0 0 -879 1 3 -1.0500000000000000e+00 -1.1248661882853874e+01 9.3875244774101176e+00 6.1457454060416055e+00 1 0 0 -880 1 5 4.2499999999999999e-01 5.9520270019183457e+00 1.2351632966716323e+01 7.8458925722181974e+00 0 0 0 -881 1 1 1.5750000000000000e+00 -9.1022522694050068e+00 1.0105671937327052e+01 9.1911717971133697e+00 1 0 0 -882 1 2 2.1000000000000001e+00 -4.9171268547512970e+00 1.7841047470065750e+01 -6.4510392901480689e+00 1 0 1 -883 1 2 2.1000000000000001e+00 -4.8959923701017676e+00 1.1832888566849096e+01 -6.4443768802257528e+00 1 0 1 -884 1 3 -1.0500000000000000e+00 -5.1140256929443391e+00 8.9516976983461944e+00 -8.0703328971661552e+00 1 0 1 -885 1 3 -1.0500000000000000e+00 -4.7611158935790332e+00 1.1741352748789584e+01 -8.0690858572863000e+00 1 0 1 -886 1 4 -9.4999999999999996e-01 -7.4031014695429942e+00 1.0536201141233821e+01 -8.2052670157866192e+00 1 0 1 -887 1 3 -1.0500000000000000e+00 -8.7563700953138284e+00 1.2542080122456209e+01 -5.9685733144865303e+00 1 0 1 -888 1 3 -1.0500000000000000e+00 -4.9989086718483486e+00 1.0357894164171565e+01 -5.9658434396227058e+00 1 0 1 -889 1 3 -1.0500000000000000e+00 -6.1685664173196209e+00 1.2671741253418649e+01 -6.1589982801092855e+00 1 0 1 -890 1 5 4.2499999999999999e-01 -7.9105023111370452e+00 9.6488375017018981e+00 -8.1258036226931782e+00 1 0 1 -891 1 1 1.5750000000000000e+00 -6.4909187507470039e+00 1.4588094770216596e+01 9.1904821639654593e+00 1 0 0 -892 1 2 2.1000000000000001e+00 -7.5274133357120867e+00 1.3359115093276227e+01 -6.4525623984732556e+00 1 0 1 -893 1 2 2.1000000000000001e+00 -7.4449118405276966e+00 1.6313213297232334e+01 -6.4426204675256908e+00 1 0 1 -894 1 3 -1.0500000000000000e+00 -7.6696949923829481e+00 1.3431602517557881e+01 -8.0707858532040149e+00 1 0 1 -895 1 3 -1.0500000000000000e+00 -7.3118524352561227e+00 1.6222378966293132e+01 -8.0677887694319370e+00 1 0 1 -896 1 4 -9.4999999999999996e-01 -4.7956857566450255e+00 1.5016770784126745e+01 -8.2123886789056861e+00 1 0 1 -897 1 3 -1.0500000000000000e+00 -6.1446818768254374e+00 1.7022074306365464e+01 -5.9672614593540150e+00 1 0 1 -898 1 3 -1.0500000000000000e+00 -7.5508166104294663e+00 1.4838613411260024e+01 -5.9658055442050921e+00 1 0 1 -899 1 3 -1.0500000000000000e+00 -8.7163205154127610e+00 1.7155802914658064e+01 -6.1591252058023631e+00 1 0 1 -900 1 5 4.2499999999999999e-01 -5.2978316462079613e+00 1.4123863683288381e+01 -8.1629756450535460e+00 1 0 1 -901 1 1 1.5750000000000000e+00 -9.0868230340938076e+00 1.6095014253551401e+01 9.1822429849013574e+00 1 0 0 -902 1 2 2.1000000000000001e+00 -9.8787027837723826e+00 8.7138958525350709e+00 6.4301475131805184e+00 1 0 0 -903 1 2 2.1000000000000001e+00 -9.9024768736753419e+00 1.4734899247776699e+01 6.4178631872384884e+00 1 0 0 -904 1 3 -1.0500000000000000e+00 -9.9011652233354219e+00 -1.8247870435209173e+01 8.0424108766988311e+00 1 1 0 -905 1 3 -1.0500000000000000e+00 -1.0043776915761457e+01 1.4831722487884203e+01 8.0398140567405569e+00 1 0 0 -906 1 4 -9.4999999999999996e-01 -7.4043056347881517e+00 1.6010679322302277e+01 8.1611745788545207e+00 1 0 0 -907 1 3 -1.0500000000000000e+00 -6.0471458374084346e+00 1.4029989212057501e+01 5.9413170765642604e+00 1 0 0 -908 1 3 -1.0500000000000000e+00 -9.7846498498984200e+00 1.6208053963201007e+01 5.9392694119191489e+00 1 0 0 -909 1 3 -1.0500000000000000e+00 -8.6403249745380908e+00 1.3871487788356372e+01 6.1449487540540950e+00 1 0 0 -910 1 5 4.2499999999999999e-01 -6.8988376249556822e+00 1.6876786096086494e+01 7.9560563009973535e+00 1 0 0 -911 1 1 1.5750000000000000e+00 -1.1695984401158782e+01 1.1611080093771111e+01 9.1830980229591539e+00 1 0 0 -912 1 2 2.1000000000000001e+00 -7.2672120871228163e+00 1.3202144841245250e+01 6.4273499963702072e+00 1 0 0 -913 1 2 2.1000000000000001e+00 -7.3526784284462927e+00 1.0245189969725434e+01 6.4204801269076981e+00 1 0 0 -914 1 3 -1.0500000000000000e+00 -7.1184499659398561e+00 1.3127091531922968e+01 8.0423604517473954e+00 1 0 0 -915 1 3 -1.0500000000000000e+00 -7.4940209309196444e+00 1.0342453998833971e+01 8.0433864964999486e+00 1 0 0 -916 1 4 -9.4999999999999996e-01 -1.0019356367224280e+01 1.1508815488031427e+01 8.1505562658575705e+00 1 0 0 -917 1 3 -1.0500000000000000e+00 -8.6585972999644625e+00 9.5439375717253760e+00 5.9459938430820074e+00 1 0 0 -918 1 3 -1.0500000000000000e+00 -7.2369355259295824e+00 1.1720322752988189e+01 5.9430532929288908e+00 1 0 0 -919 1 3 -1.0500000000000000e+00 -6.0886621514313672e+00 9.3875250239704080e+00 6.1457452592640376e+00 1 0 0 -920 1 5 4.2499999999999999e-01 -9.5280164182865796e+00 1.2351589220797447e+01 7.8457453969920330e+00 1 0 0 -921 1 1 1.5750000000000000e+00 -3.9422453188159814e+00 1.0105667828300355e+01 9.1911737165519902e+00 1 0 0 -922 1 2 2.1000000000000001e+00 2.4287304870091653e-01 1.7841047967974742e+01 -6.4510384071405671e+00 1 0 1 -923 1 2 2.1000000000000001e+00 2.6400852999029567e-01 1.1832887919475240e+01 -6.4443752797344844e+00 1 0 1 -924 1 3 -1.0500000000000000e+00 4.5972814395810246e-02 8.9516992498077919e+00 -8.0703328323157866e+00 1 0 1 -925 1 3 -1.0500000000000000e+00 3.9888578426161736e-01 1.1741352265513999e+01 -8.0690851294904178e+00 1 0 1 -926 1 4 -9.4999999999999996e-01 -2.2431065510168366e+00 1.0536198385527367e+01 -8.2052670529389200e+00 1 0 1 -927 1 3 -1.0500000000000000e+00 -3.5963712218379982e+00 1.2542079764502770e+01 -5.9685733883639500e+00 1 0 1 -928 1 3 -1.0500000000000000e+00 1.6109252441288646e-01 1.0357893705855663e+01 -5.9658422206591473e+00 1 0 1 -929 1 3 -1.0500000000000000e+00 -1.0085658483645350e+00 1.2671741202691223e+01 -6.1589984775597637e+00 1 0 1 -930 1 5 4.2499999999999999e-01 -2.7504927541562099e+00 9.6488258793424890e+00 -8.1258123691293633e+00 1 0 1 -931 1 1 1.5750000000000000e+00 -1.3309232188979525e+00 1.4588095893891175e+01 9.1904797506843536e+00 1 0 0 -932 1 2 2.1000000000000001e+00 -2.3674124936914245e+00 1.3359114678315727e+01 -6.4525586625397615e+00 1 0 1 -933 1 2 2.1000000000000001e+00 -2.2849102597287398e+00 1.6313212618002911e+01 -6.4426182312055786e+00 1 0 1 -934 1 3 -1.0500000000000000e+00 -2.5096957885477966e+00 1.3431603349797285e+01 -8.0707836295998145e+00 1 0 1 -935 1 3 -1.0500000000000000e+00 -2.1518542688849074e+00 1.6222378039370074e+01 -8.0677883268859674e+00 1 0 1 -936 1 4 -9.4999999999999996e-01 3.6431665809300107e-01 1.5016774630679432e+01 -8.2123851096188254e+00 1 0 1 -937 1 3 -1.0500000000000000e+00 -9.8468184339734677e-01 1.7022074639986631e+01 -5.9672613095463261e+00 1 0 1 -938 1 3 -1.0500000000000000e+00 -2.3908178224096579e+00 1.4838613289135022e+01 -5.9658039222983188e+00 1 0 1 -939 1 3 -1.0500000000000000e+00 -3.5563208741252010e+00 1.7155801854759414e+01 -6.1591246855761259e+00 1 0 1 -940 1 5 4.2499999999999999e-01 -1.3782250612428726e-01 1.4123865917479133e+01 -8.1629431369693215e+00 1 0 1 -941 1 1 1.5750000000000000e+00 -3.9268157759266771e+00 1.6095012724916028e+01 9.1822436797194200e+00 1 0 0 -942 1 2 2.1000000000000001e+00 -4.7187010649966918e+00 8.7138966303192227e+00 6.4301486173009756e+00 1 0 0 -943 1 2 2.1000000000000001e+00 -4.7424751888790908e+00 1.4734897540435224e+01 6.4178617064972627e+00 1 0 0 -944 1 3 -1.0500000000000000e+00 -4.7411704061541240e+00 -1.8247870107983218e+01 8.0424119387507034e+00 1 1 0 -945 1 3 -1.0500000000000000e+00 -4.8837751081817249e+00 1.4831717198664347e+01 8.0398140198898709e+00 1 0 0 -946 1 4 -9.4999999999999996e-01 -2.2443106077551036e+00 1.6010676599563677e+01 8.1611712410744204e+00 1 0 0 -947 1 3 -1.0500000000000000e+00 -8.8714392269505638e-01 1.4029986117459970e+01 5.9413195108517503e+00 1 0 0 -948 1 3 -1.0500000000000000e+00 -4.6246542078131423e+00 1.6208054413426698e+01 5.9392706023964301e+00 1 0 0 -949 1 3 -1.0500000000000000e+00 -3.4803231677288631e+00 1.3871492139999791e+01 6.1449411491242145e+00 1 0 0 -950 1 5 4.2499999999999999e-01 -1.7388485703866507e+00 1.6876778888613327e+01 7.9560207849164257e+00 1 0 0 -951 1 1 1.5750000000000000e+00 -6.5359822470323747e+00 1.1611079240539500e+01 9.1831013402490420e+00 1 0 0 -952 1 2 2.1000000000000001e+00 -2.1072156908568047e+00 1.3202143668003163e+01 6.4273465538995538e+00 1 0 0 -953 1 2 2.1000000000000001e+00 -2.1926794612378622e+00 1.0245189206217407e+01 6.4204792555846968e+00 1 0 0 -954 1 3 -1.0500000000000000e+00 -1.9584557511094491e+00 1.3127090075106995e+01 8.0423599514750457e+00 1 0 0 -955 1 3 -1.0500000000000000e+00 -2.3340228200333950e+00 1.0342451049403081e+01 8.0433872256329835e+00 1 0 0 -956 1 4 -9.4999999999999996e-01 -4.8593482515710296e+00 1.1508831566290684e+01 8.1505740745330648e+00 1 0 0 -957 1 3 -1.0500000000000000e+00 -3.4985978518424012e+00 9.5439371347438389e+00 5.9459924245392628e+00 1 0 0 -958 1 3 -1.0500000000000000e+00 -2.0769343959931366e+00 1.1720321778685392e+01 5.9430528213871074e+00 1 0 0 -959 1 3 -1.0500000000000000e+00 -9.2866190708267027e-01 9.3875244986496469e+00 6.1457453653597227e+00 1 0 0 -960 1 5 4.2499999999999999e-01 -4.3679729783994290e+00 1.2351633194426945e+01 7.8458931360688702e+00 1 0 0 -961 1 1 1.5750000000000000e+00 1.0442632114566610e+00 -1.6791831350068186e+01 9.1898085308544850e+00 0 1 0 -962 1 2 2.1000000000000001e+00 5.2332756246874581e+00 -9.0514739316669424e+00 -6.4535078618394257e+00 0 1 1 -963 1 2 2.1000000000000001e+00 5.2539784539797907e+00 -1.5065512241273632e+01 -6.4432670504981342e+00 0 1 1 -964 1 3 -1.0500000000000000e+00 5.0336324263728454e+00 -1.7950429729470983e+01 -8.0701256239230403e+00 0 1 1 -965 1 3 -1.0500000000000000e+00 5.3891510369380899e+00 -1.5156434664324332e+01 -8.0701490368479547e+00 0 1 1 -966 1 4 -9.4999999999999996e-01 2.7463323553474019e+00 -1.6360952087129224e+01 -8.2013191299922568e+00 0 1 1 -967 1 3 -1.0500000000000000e+00 1.3930900751266524e+00 -1.4353420741260187e+01 -5.9691417472706023e+00 0 1 1 -968 1 3 -1.0500000000000000e+00 5.1535114536855922e+00 -1.6541895070175961e+01 -5.9679883174098816e+00 0 1 1 -969 1 3 -1.0500000000000000e+00 3.9803309181651265e+00 -1.4226791848196795e+01 -6.1613348201984870e+00 0 1 1 -970 1 5 4.2499999999999999e-01 2.2398605848769844e+00 -1.7247692988104017e+01 -8.1060357556420737e+00 0 1 1 -971 1 1 1.5750000000000000e+00 3.6542460063852165e+00 -1.2305852659793413e+01 9.1862238342266416e+00 0 1 0 -972 1 2 2.1000000000000001e+00 2.6218625651553094e+00 -1.3536712352721191e+01 -6.4515008211632647e+00 0 1 1 -973 1 2 2.1000000000000001e+00 2.7050075157658249e+00 -1.0581602843354904e+01 -6.4453762501346734e+00 0 1 1 -974 1 3 -1.0500000000000000e+00 2.4778384172838965e+00 -1.3463848700002110e+01 -8.0713144043449354e+00 0 1 1 -975 1 3 -1.0500000000000000e+00 2.8361124137645710e+00 -1.0673192059270319e+01 -8.0730494255231697e+00 0 1 1 -976 1 4 -9.4999999999999996e-01 5.3488014838601234e+00 -1.1879680284120909e+01 -8.2120386944811870e+00 0 1 1 -977 1 3 -1.0500000000000000e+00 4.0052305085030575e+00 -9.8705999028378688e+00 -5.9742882996306417e+00 0 1 1 -978 1 3 -1.0500000000000000e+00 2.6013546709879698e+00 -1.2055682201549553e+01 -5.9686014846771283e+00 0 1 1 -979 1 3 -1.0500000000000000e+00 1.4333416810233857e+00 -9.7384021753590471e+00 -6.1636619641496271e+00 0 1 1 -980 1 5 4.2499999999999999e-01 4.8470559660982069e+00 -1.2770804911311922e+01 -8.1379363666510720e+00 0 1 1 -981 1 1 1.5750000000000000e+00 1.0597359736602527e+00 -1.0799517092550143e+01 9.1798571706247145e+00 0 1 0 -982 1 2 2.1000000000000001e+00 2.7547521540928877e-01 -1.8174880417575832e+01 6.4260694941734045e+00 0 1 0 -983 1 2 2.1000000000000001e+00 2.5313879030717779e-01 -1.2164481905755601e+01 6.4146168865788979e+00 0 1 0 -984 1 3 -1.0500000000000000e+00 4.7042668219783046e-01 -9.2817688918991870e+00 8.0435398353235712e+00 0 1 0 -985 1 3 -1.0500000000000000e+00 1.1588390488919487e-01 -1.2073117454665041e+01 8.0391531574634278e+00 0 1 0 -986 1 4 -9.4999999999999996e-01 2.7576312038366293e+00 -1.0869431055520916e+01 8.1782761503763552e+00 0 1 0 -987 1 3 -1.0500000000000000e+00 4.1081156967106267e+00 -1.2866277587844241e+01 5.9401723775815451e+00 0 1 0 -988 1 3 -1.0500000000000000e+00 3.6912101592342594e-01 -1.0690470423871826e+01 5.9399251871982823e+00 0 1 0 -989 1 3 -1.0500000000000000e+00 1.5178703168576391e+00 -1.3018430892195653e+01 6.1318438050016901e+00 0 1 0 -990 1 5 4.2499999999999999e-01 3.2639937638696352e+00 -9.9843279881447238e+00 8.0788664678259536e+00 0 1 0 -991 1 1 1.5750000000000000e+00 -1.5505507592950600e+00 -1.5284917822509893e+01 9.1845893612909535e+00 0 1 0 -992 1 2 2.1000000000000001e+00 2.8856017226586292e+00 -1.3692091836571246e+01 6.4240481700734300e+00 0 1 0 -993 1 2 2.1000000000000001e+00 2.8014391752319749e+00 -1.6645598227279866e+01 6.4177925550469812e+00 0 1 0 -994 1 3 -1.0500000000000000e+00 3.0242770280258355e+00 -1.3762431328686965e+01 8.0418182262626985e+00 0 1 0 -995 1 3 -1.0500000000000000e+00 2.6616965304781921e+00 -1.6551127271723502e+01 8.0407825558578239e+00 0 1 0 -996 1 4 -9.4999999999999996e-01 1.3938945042955808e-01 -1.5364417341877932e+01 8.1746490800763780e+00 0 1 0 -997 1 3 -1.0500000000000000e+00 1.4943750794945103e+00 -1.7346654576495691e+01 5.9412658090625072e+00 0 1 0 -998 1 3 -1.0500000000000000e+00 2.9200312523947822e+00 -1.5172041101286172e+01 5.9399348122915292e+00 0 1 0 -999 1 3 -1.0500000000000000e+00 4.0632899091081534e+00 -1.7504603998332225e+01 6.1389449897812334e+00 0 1 0 -1000 1 5 4.2499999999999999e-01 6.4542418632720633e-01 -1.4487315856426310e+01 8.0241338462314644e+00 0 1 0 -1001 1 1 1.5750000000000000e+00 6.2042705304930799e+00 -1.6791833665711170e+01 9.1898107769316084e+00 0 1 0 -1002 1 2 2.1000000000000001e+00 -1.0246723153348846e+01 -9.0514740460000311e+00 -6.4535070277553324e+00 1 1 1 -1003 1 2 2.1000000000000001e+00 -1.0226021451768954e+01 -1.5065512319265142e+01 -6.4432655393105795e+00 1 1 1 -1004 1 3 -1.0500000000000000e+00 -1.0446369316180249e+01 -1.7950427634301246e+01 -8.0701255802333218e+00 1 1 1 -1005 1 3 -1.0500000000000000e+00 -1.0090848593079510e+01 -1.5156434727239374e+01 -8.0701482898842940e+00 1 1 1 -1006 1 4 -9.4999999999999996e-01 7.9063276726938554e+00 -1.6360953310361893e+01 -8.2013177428168405e+00 0 1 1 -1007 1 3 -1.0500000000000000e+00 6.5530892959751270e+00 -1.4353421384899608e+01 -5.9691407103781886e+00 0 1 1 -1008 1 3 -1.0500000000000000e+00 -1.0326488832838789e+01 -1.6541894355493234e+01 -5.9679869266337633e+00 1 1 1 -1009 1 3 -1.0500000000000000e+00 9.1403313167124836e+00 -1.4226789995257827e+01 -6.1613357642666884e+00 0 1 1 -1010 1 5 4.2499999999999999e-01 7.3998732609111038e+00 -1.7247703280860765e+01 -8.1060327666259262e+00 0 1 1 -1011 1 1 1.5750000000000000e+00 -1.1825758173391559e+01 -1.2305851013344945e+01 9.1862210610239927e+00 1 1 0 -1012 1 2 2.1000000000000001e+00 7.7818622374632440e+00 -1.3536712036378034e+01 -6.4514974565090304e+00 0 1 1 -1013 1 2 2.1000000000000001e+00 7.8650085976251844e+00 -1.0581602546041829e+01 -6.4453726535568485e+00 0 1 1 -1014 1 3 -1.0500000000000000e+00 7.6378350005584572e+00 -1.3463847684310938e+01 -8.0713124910916587e+00 0 1 1 -1015 1 3 -1.0500000000000000e+00 7.9961102824808421e+00 -1.0673192048149209e+01 -8.0730469750415832e+00 0 1 1 -1016 1 4 -9.4999999999999996e-01 -1.0131194360579821e+01 -1.1879674989582783e+01 -8.2120342872500451e+00 1 1 1 -1017 1 3 -1.0500000000000000e+00 9.1652303349074202e+00 -9.8705987111370366e+00 -5.9742870591774047e+00 0 1 1 -1018 1 3 -1.0500000000000000e+00 7.7613520405943426e+00 -1.2055681364068912e+01 -5.9685995411583477e+00 0 1 1 -1019 1 3 -1.0500000000000000e+00 6.5933414576668916e+00 -9.7384004619369851e+00 -6.1636616292495159e+00 0 1 1 -1020 1 5 4.2499999999999999e-01 1.0007068462980300e+01 -1.2770800591903512e+01 -8.1378946938960510e+00 0 1 1 -1021 1 1 1.5750000000000000e+00 6.2197443454886638e+00 -1.0799519393370394e+01 9.1798595477187632e+00 0 1 0 -1022 1 2 2.1000000000000001e+00 5.4354752041455434e+00 -1.8174880408240593e+01 6.4260715102151700e+00 0 1 0 -1023 1 2 2.1000000000000001e+00 5.4131406715074082e+00 -1.2164482842689020e+01 6.4146205492679158e+00 0 1 0 -1024 1 3 -1.0500000000000000e+00 5.6304239880444911e+00 -9.2817681435941299e+00 8.0435410816721387e+00 0 1 0 -1025 1 3 -1.0500000000000000e+00 5.2758822532869889e+00 -1.2073118962568111e+01 8.0391557893257684e+00 0 1 0 -1026 1 4 -9.4999999999999996e-01 -1.2722372341245990e+01 -1.0869436002872895e+01 8.1782703654586868e+00 1 1 0 -1027 1 3 -1.0500000000000000e+00 -1.1371883754989966e+01 -1.2866279020237982e+01 5.9401718706365205e+00 1 1 0 -1028 1 3 -1.0500000000000000e+00 5.5291186732541746e+00 -1.0690471025227311e+01 5.9399276524057605e+00 0 1 0 -1029 1 3 -1.0500000000000000e+00 6.6778703001882675e+00 -1.3018430364131007e+01 6.1318438746138142e+00 0 1 0 -1030 1 5 4.2499999999999999e-01 -1.2216015173079230e+01 -9.9843354407856104e+00 8.0788155606797929e+00 1 1 0 -1031 1 1 1.5750000000000000e+00 3.6094518590851443e+00 -1.5284920811314326e+01 9.1845931712309081e+00 0 1 0 -1032 1 2 2.1000000000000001e+00 -1.2594399274735942e+01 -1.3692092821360468e+01 6.4240466677020915e+00 1 1 0 -1033 1 2 2.1000000000000001e+00 -1.2678561260299871e+01 -1.6645598706406350e+01 6.4177932919645606e+00 1 1 0 -1034 1 3 -1.0500000000000000e+00 -1.2455721801072539e+01 -1.3762432627662784e+01 8.0418184506577752e+00 1 1 0 -1035 1 3 -1.0500000000000000e+00 -1.2818305792613456e+01 -1.6551127151241108e+01 8.0407841274981919e+00 1 1 0 -1036 1 4 -9.4999999999999996e-01 5.2993903528383832e+00 -1.5364409706092212e+01 8.1746586041380667e+00 0 1 0 -1037 1 3 -1.0500000000000000e+00 6.6543746308053890e+00 -1.7346655824744431e+01 5.9412673672388898e+00 0 1 0 -1038 1 3 -1.0500000000000000e+00 -1.2559968533076253e+01 -1.5172042049948024e+01 5.9399350226054093e+00 1 1 0 -1039 1 3 -1.0500000000000000e+00 -1.1416710941546603e+01 -1.7504606408698937e+01 6.1389481182924239e+00 1 1 0 -1040 1 5 4.2499999999999999e-01 5.8054463727137708e+00 -1.4487310950608180e+01 8.0241970113694734e+00 0 1 0 -1041 1 1 1.5750000000000000e+00 -9.2757368802711397e+00 -1.6791831312125804e+01 9.1898085214124201e+00 1 1 0 -1042 1 2 2.1000000000000001e+00 -5.0867242918634785e+00 -9.0514738933585956e+00 -6.4535079288448767e+00 1 1 1 -1043 1 2 2.1000000000000001e+00 -5.0660217385828563e+00 -1.5065512208376937e+01 -6.4432670957317386e+00 1 1 1 -1044 1 3 -1.0500000000000000e+00 -5.2863676674946634e+00 -1.7950429637468549e+01 -8.0701255205587117e+00 1 1 1 -1045 1 3 -1.0500000000000000e+00 -4.9308491266008145e+00 -1.5156434664324657e+01 -8.0701490519116614e+00 1 1 1 -1046 1 4 -9.4999999999999996e-01 -7.5736677633861591e+00 -1.6360952066572519e+01 -8.2013191384030613e+00 1 1 1 -1047 1 3 -1.0500000000000000e+00 -8.9269100561009846e+00 -1.4353420656445872e+01 -5.9691416253352259e+00 1 1 1 -1048 1 3 -1.0500000000000000e+00 -5.1664885049194771e+00 -1.6541894932419183e+01 -5.9679882494046881e+00 1 1 1 -1049 1 3 -1.0500000000000000e+00 -6.3396692049871675e+00 -1.4226791874467263e+01 -6.1613347348097225e+00 1 1 1 -1050 1 5 4.2499999999999999e-01 -8.0801393836947337e+00 -1.7247693032893874e+01 -8.1060356073472999e+00 1 1 1 -1051 1 1 1.5750000000000000e+00 -6.6657540143420526e+00 -1.2305852628791156e+01 9.1862238917922170e+00 1 1 0 -1052 1 2 2.1000000000000001e+00 -7.6981374420465833e+00 -1.3536712270809378e+01 -6.4515007035737275e+00 1 1 1 -1053 1 2 2.1000000000000001e+00 -7.6149924347722786e+00 -1.0581602797121846e+01 -6.4453761675984040e+00 1 1 1 -1054 1 3 -1.0500000000000000e+00 -7.8421616332788391e+00 -1.3463848622759457e+01 -8.0713143081199625e+00 1 1 1 -1055 1 3 -1.0500000000000000e+00 -7.4838876367404339e+00 -1.0673192057197465e+01 -8.0730493138908717e+00 1 1 1 -1056 1 4 -9.4999999999999996e-01 -4.9711985836524004e+00 -1.1879680202337514e+01 -8.2120386908272796e+00 1 1 1 -1057 1 3 -1.0500000000000000e+00 -6.3147695496300109e+00 -9.8705998744487662e+00 -5.9742882766572789e+00 1 1 1 -1058 1 3 -1.0500000000000000e+00 -7.7186452703950819e+00 -1.2055682094751855e+01 -5.9686013346182278e+00 1 1 1 -1059 1 3 -1.0500000000000000e+00 -8.8866584515424485e+00 -9.7384020743383637e+00 -6.1636619082963078e+00 1 1 1 -1060 1 5 4.2499999999999999e-01 -5.4729440647275451e+00 -1.2770804915611262e+01 -8.1379363568309451e+00 1 1 1 -1061 1 1 1.5750000000000000e+00 -9.2602640877795199e+00 -1.0799517176694982e+01 9.1798571208052735e+00 1 1 0 -1062 1 2 2.1000000000000001e+00 -1.0044524713078076e+01 -1.8174880193011337e+01 6.4260695419215921e+00 1 1 0 -1063 1 2 2.1000000000000001e+00 -1.0066861221058145e+01 -1.2164481724822735e+01 6.4146168549170373e+00 1 1 0 -1064 1 3 -1.0500000000000000e+00 -9.8495732828997546e+00 -9.2817689220646518e+00 8.0435397632564438e+00 1 1 0 -1065 1 3 -1.0500000000000000e+00 -1.0204116103164814e+01 -1.2073117464907558e+01 8.0391531408971488e+00 1 1 0 -1066 1 4 -9.4999999999999996e-01 -7.5623686418675682e+00 -1.0869431094368181e+01 8.1782761860883042e+00 1 1 0 -1067 1 3 -1.0500000000000000e+00 -6.2118841789880861e+00 -1.2866277652133158e+01 5.9401725369470881e+00 1 1 0 -1068 1 3 -1.0500000000000000e+00 -9.9508791048297542e+00 -1.0690470583179597e+01 5.9399251778380329e+00 1 1 0 -1069 1 3 -1.0500000000000000e+00 -8.8021295766293708e+00 -1.3018430787892997e+01 6.1318438476452890e+00 1 1 0 -1070 1 5 4.2499999999999999e-01 -7.0560061563137122e+00 -9.9843279559767648e+00 8.0788664705222608e+00 1 1 0 -1071 1 1 1.5750000000000000e+00 -1.1870550761218473e+01 -1.5284917974575785e+01 9.1845895039346317e+00 1 1 0 -1072 1 2 2.1000000000000001e+00 -7.4343981755962751e+00 -1.3692091735606553e+01 6.4240482773020471e+00 1 1 0 -1073 1 2 2.1000000000000001e+00 -7.5185607242181973e+00 -1.6645598273036587e+01 6.4177925530853166e+00 1 1 0 -1074 1 3 -1.0500000000000000e+00 -7.2957230388288208e+00 -1.3762431340306287e+01 8.0418183320068835e+00 1 1 0 -1075 1 3 -1.0500000000000000e+00 -7.6583035003210718e+00 -1.6551127323541010e+01 8.0407826229132695e+00 1 1 0 -1076 1 4 -9.4999999999999996e-01 -1.0180610586208212e+01 -1.5364417384321900e+01 8.1746490481746115e+00 1 1 0 -1077 1 3 -1.0500000000000000e+00 -8.8256252195917320e+00 -1.7346654583777614e+01 5.9412658704009651e+00 1 1 0 -1078 1 3 -1.0500000000000000e+00 -7.3999688228516058e+00 -1.5172041164133208e+01 5.9399349343806769e+00 1 1 0 -1079 1 3 -1.0500000000000000e+00 -6.2567102668092964e+00 -1.7504603915417274e+01 6.1389450484259154e+00 1 1 0 -1080 1 5 4.2499999999999999e-01 -9.6745758957464112e+00 -1.4487315838688451e+01 8.0241337136918531e+00 1 1 0 -1081 1 1 1.5750000000000000e+00 -4.1157294011092533e+00 -1.6791833677670979e+01 9.1898107714971999e+00 1 1 0 -1082 1 2 2.1000000000000001e+00 7.3276801386883861e-02 -9.0514739966231161e+00 -6.4535070951812132e+00 1 1 1 -1083 1 2 2.1000000000000001e+00 9.3978518195260818e-02 -1.5065512301167958e+01 -6.4432655624078921e+00 1 1 1 -1084 1 3 -1.0500000000000000e+00 -1.2636923879036033e-01 -1.7950427672442249e+01 -8.0701255272716086e+00 1 1 1 -1085 1 3 -1.0500000000000000e+00 2.2915139223978720e-01 -1.5156434779707133e+01 -8.0701483056375984e+00 1 1 1 -1086 1 4 -9.4999999999999996e-01 -2.4136723134015430e+00 -1.6360953293769722e+01 -8.2013177162726301e+00 1 1 1 -1087 1 3 -1.0500000000000000e+00 -3.7669107617814612e+00 -1.4353421331342393e+01 -5.9691408337740679e+00 1 1 1 -1088 1 3 -1.0500000000000000e+00 -6.4888679164187835e-03 -1.6541894280359298e+01 -5.9679869165567920e+00 1 1 1 -1089 1 3 -1.0500000000000000e+00 -1.1796686825562102e+00 -1.4226789962402155e+01 -6.1613358338228270e+00 1 1 1 -1090 1 5 4.2499999999999999e-01 -2.9201267468233087e+00 -1.7247703267880009e+01 -8.1060328316318913e+00 1 1 1 -1091 1 1 1.5750000000000000e+00 -1.5057582182878946e+00 -1.2305850947566583e+01 9.1862208461329544e+00 1 1 0 -1092 1 2 2.1000000000000001e+00 -2.5381376770568629e+00 -1.3536711970044786e+01 -6.4514974983938229e+00 1 1 1 -1093 1 2 2.1000000000000001e+00 -2.4549913740186495e+00 -1.0581602623258977e+01 -6.4453726697391627e+00 1 1 1 -1094 1 3 -1.0500000000000000e+00 -2.6821650015510015e+00 -1.3463847532171535e+01 -8.0713125459029342e+00 1 1 1 -1095 1 3 -1.0500000000000000e+00 -2.3238896220346552e+00 -1.0673192119220911e+01 -8.0730469743672906e+00 1 1 1 -1096 1 4 -9.4999999999999996e-01 1.8880566139984900e-01 -1.1879675059795796e+01 -8.2120343889703236e+00 1 1 1 -1097 1 3 -1.0500000000000000e+00 -1.1547696288716924e+00 -9.8705987203270222e+00 -5.9742870456740711e+00 1 1 1 -1098 1 3 -1.0500000000000000e+00 -2.5586479342643020e+00 -1.2055681307294101e+01 -5.9685994952610493e+00 1 1 1 -1099 1 3 -1.0500000000000000e+00 -3.7266585488420558e+00 -9.7384005388603700e+00 -6.1636617121264745e+00 1 1 1 -1100 1 5 4.2499999999999999e-01 -3.1293160687794419e-01 -1.2770800602590427e+01 -8.1378946790470899e+00 1 1 1 -1101 1 1 1.5750000000000000e+00 -4.1002557112339479e+00 -1.0799519459514627e+01 9.1798595531918181e+00 1 1 0 -1102 1 2 2.1000000000000001e+00 -4.8845249346372963e+00 -1.8174880429742991e+01 6.4260714660668050e+00 1 1 0 -1103 1 2 2.1000000000000001e+00 -4.9068593917684646e+00 -1.2164482818261964e+01 6.4146205866117363e+00 1 1 0 -1104 1 3 -1.0500000000000000e+00 -4.6895759605987308e+00 -9.2817681161453276e+00 8.0435410712662190e+00 1 1 0 -1105 1 3 -1.0500000000000000e+00 -5.0441175576729034e+00 -1.2073118952790347e+01 8.0391558191261261e+00 1 1 0 -1106 1 4 -9.4999999999999996e-01 -2.4023722239386895e+00 -1.0869436022362132e+01 8.1782703142905220e+00 1 1 0 -1107 1 3 -1.0500000000000000e+00 -1.0518838150844267e+00 -1.2866278884232802e+01 5.9401718338514122e+00 1 1 0 -1108 1 3 -1.0500000000000000e+00 -4.7908813223887012e+00 -1.0690471007228307e+01 5.9399276546946282e+00 1 1 0 -1109 1 3 -1.0500000000000000e+00 -3.6421296011832798e+00 -1.3018430483447281e+01 6.1318438086061278e+00 1 1 0 -1110 1 5 4.2499999999999999e-01 -1.8960150166850642e+00 -9.9843355368021030e+00 8.0788157245323404e+00 1 1 0 -1111 1 1 1.5750000000000000e+00 -6.7105480464735088e+00 -1.5284920780995433e+01 9.1845931022869713e+00 1 1 0 -1112 1 2 2.1000000000000001e+00 -2.2743992387799423e+00 -1.3692092993191645e+01 6.4240465507043041e+00 1 1 0 -1113 1 2 2.1000000000000001e+00 -2.3585611253751022e+00 -1.6645598739575476e+01 6.4177932067447436e+00 1 1 0 -1114 1 3 -1.0500000000000000e+00 -2.1357218194123284e+00 -1.3762432706432797e+01 8.0418184034939770e+00 1 1 0 -1115 1 3 -1.0500000000000000e+00 -2.4983058565104477e+00 -1.6551127107648668e+01 8.0407840268320570e+00 1 1 0 -1116 1 4 -9.4999999999999996e-01 -5.0206097730473633e+00 -1.5364409587470032e+01 8.1746586778883383e+00 1 1 0 -1117 1 3 -1.0500000000000000e+00 -3.6656254169705633e+00 -1.7346655873455909e+01 5.9412672633171724e+00 1 1 0 -1118 1 3 -1.0500000000000000e+00 -2.2399684459328135e+00 -1.5172042049284187e+01 5.9399349301513809e+00 1 1 0 -1119 1 3 -1.0500000000000000e+00 -1.0967109954839920e+00 -1.7504606501402971e+01 6.1389481422112020e+00 1 1 0 -1120 1 5 4.2499999999999999e-01 -4.5145536722770618e+00 -1.4487310886139630e+01 8.0241971313508245e+00 1 1 0 -1121 1 1 1.5750000000000000e+00 1.1015824608214189e+00 -7.8211831095359372e+00 9.1875061022830238e+00 0 1 0 -1122 1 2 2.1000000000000001e+00 5.2936746325908643e+00 -8.4799847590794286e-02 -6.4509125189695187e+00 0 1 1 -1123 1 2 2.1000000000000001e+00 5.3177620951242091e+00 -6.0968673274809824e+00 -6.4456404427441409e+00 0 1 1 -1124 1 3 -1.0500000000000000e+00 5.0910935469030285e+00 -8.9785797619656442e+00 -8.0737693665945400e+00 0 1 1 -1125 1 3 -1.0500000000000000e+00 5.4502045730762418e+00 -6.1889139035298175e+00 -8.0729056340417351e+00 0 1 1 -1126 1 4 -9.4999999999999996e-01 2.8035007832183751e+00 -7.3943789391372388e+00 -8.2073818644748346e+00 0 1 1 -1127 1 3 -1.0500000000000000e+00 1.4598425104834334e+00 -5.3906202854789136e+00 -5.9727377368561303e+00 0 1 1 -1128 1 3 -1.0500000000000000e+00 5.2095062812533364e+00 -7.5710401150415656e+00 -5.9704882197875353e+00 0 1 1 -1129 1 3 -1.0500000000000000e+00 4.0484397396398908e+00 -5.2497084787801995e+00 -6.1650068702634240e+00 0 1 1 -1130 1 5 4.2499999999999999e-01 2.2978477829132213e+00 -8.2801256423894838e+00 -8.1041913709864826e+00 0 1 1 -1131 1 1 1.5750000000000000e+00 3.7131729244048106e+00 -3.3419269987796998e+00 9.1902779962922487e+00 0 1 0 -1132 1 2 2.1000000000000001e+00 2.6848345532874589e+00 -4.5678847951330201e+00 -6.4527351236606005e+00 0 1 1 -1133 1 2 2.1000000000000001e+00 2.7673070495306860e+00 -1.6134471661957654e+00 -6.4436730408769485e+00 0 1 1 -1134 1 3 -1.0500000000000000e+00 2.5421770632400946e+00 -4.4948732524746937e+00 -8.0726878964976354e+00 0 1 1 -1135 1 3 -1.0500000000000000e+00 2.9030028184381589e+00 -1.7063183689168575e+00 -8.0697019505379295e+00 0 1 1 -1136 1 4 -9.4999999999999996e-01 5.4187399781495422e+00 -2.9112197838295941e+00 -8.2027865190465175e+00 0 1 1 -1137 1 3 -1.0500000000000000e+00 4.0711115185491842e+00 -9.0922469784403503e-01 -5.9691936793185469e+00 0 1 1 -1138 1 3 -1.0500000000000000e+00 2.6564910640164747e+00 -3.0879908743096358e+00 -5.9687540484704833e+00 0 1 1 -1139 1 3 -1.0500000000000000e+00 1.5007277439114972e+00 -7.6391128716378631e-01 -6.1636696730411940e+00 0 1 1 -1140 1 5 4.2499999999999999e-01 4.9049810502204245e+00 -3.7896017133240676e+00 -8.0801840895422021e+00 0 1 1 -1141 1 1 1.5750000000000000e+00 1.1195461568556127e+00 -1.8349730989896607e+00 9.1778447648518622e+00 0 1 0 -1142 1 2 2.1000000000000001e+00 3.3418772623221749e-01 -9.2107192613442752e+00 6.4250032488409836e+00 0 1 0 -1143 1 2 2.1000000000000001e+00 3.1087376727622207e-01 -3.2043623623744502e+00 6.4184917551064515e+00 0 1 0 -1144 1 3 -1.0500000000000000e+00 5.2790340322101059e-01 -3.2222179611793322e-01 8.0497159299825469e+00 0 1 0 -1145 1 3 -1.0500000000000000e+00 1.7797734599654014e-01 -3.1146849720886713e+00 8.0447808218113011e+00 0 1 0 -1146 1 4 -9.4999999999999996e-01 2.8206832520694576e+00 -1.9016858190157464e+00 8.1842289973638032e+00 0 1 0 -1147 1 3 -1.0500000000000000e+00 4.1659228951121268e+00 -3.9052783763117720e+00 5.9457143316954149e+00 0 1 0 -1148 1 3 -1.0500000000000000e+00 4.2461854973715063e-01 -1.7291946873897466e+00 5.9465319770203600e+00 0 1 0 -1149 1 3 -1.0500000000000000e+00 1.5769204510664299e+00 -4.0542195914253902e+00 6.1311301545285115e+00 0 1 0 -1150 1 5 4.2499999999999999e-01 3.3283926713460197e+00 -1.0149824837214574e+00 8.0966810370345641e+00 0 1 0 -1151 1 1 1.5750000000000000e+00 -1.4933374110761299e+00 -6.3169695364932856e+00 9.1776214530387961e+00 0 1 0 -1152 1 2 2.1000000000000001e+00 2.9415030045831241e+00 -4.7304496461315484e+00 6.4255708412573131e+00 0 1 0 -1153 1 2 2.1000000000000001e+00 2.8629792161431595e+00 -7.6841536215653417e+00 6.4153415021124189e+00 0 1 0 -1154 1 3 -1.0500000000000000e+00 3.0784038276487777e+00 -4.8030780368147798e+00 8.0448751011983077e+00 0 1 0 -1155 1 3 -1.0500000000000000e+00 2.7276184084435329e+00 -7.5927277967488962e+00 8.0405678249882158e+00 0 1 0 -1156 1 4 -9.4999999999999996e-01 2.0955934955568978e-01 -6.3839685025088997e+00 8.1820716936116824e+00 0 1 0 -1157 1 3 -1.0500000000000000e+00 1.5580659460531994e+00 -8.3863297106319088e+00 5.9427029281397807e+00 0 1 0 -1158 1 3 -1.0500000000000000e+00 2.9770887460376301e+00 -6.2098967842807333e+00 5.9406501514999093e+00 0 1 0 -1159 1 3 -1.0500000000000000e+00 4.1289265492085221e+00 -8.5345614681068032e+00 6.1300855760690283e+00 0 1 0 -1160 1 5 4.2499999999999999e-01 7.1555263464406238e-01 -5.4964688808013538e+00 8.0942624119313820e+00 0 1 0 -1161 1 1 1.5750000000000000e+00 6.2615891444933247e+00 -7.8211859845168679e+00 9.1875073211238139e+00 0 1 0 -1162 1 2 2.1000000000000001e+00 -1.0186325127172026e+01 -8.4800075241211914e-02 -6.4509113418482755e+00 1 1 1 -1163 1 2 2.1000000000000001e+00 -1.0162237068800831e+01 -6.0968677335338661e+00 -6.4456400242172647e+00 1 1 1 -1164 1 3 -1.0500000000000000e+00 -1.0388907573503941e+01 -8.9785792442938828e+00 -8.0737691389315334e+00 1 1 1 -1165 1 3 -1.0500000000000000e+00 -1.0029794597688529e+01 -6.1889140820372184e+00 -8.0729060411461457e+00 1 1 1 -1166 1 4 -9.4999999999999996e-01 7.9634991109465503e+00 -7.3943767784022150e+00 -8.2073775831115654e+00 0 1 1 -1167 1 3 -1.0500000000000000e+00 6.6198418888646451e+00 -5.3906211267839588e+00 -5.9727366448158667e+00 0 1 1 -1168 1 3 -1.0500000000000000e+00 -1.0270492275886063e+01 -7.5710401650606478e+00 -5.9704876683605912e+00 1 1 1 -1169 1 3 -1.0500000000000000e+00 9.2084397872017583e+00 -5.2497087340039066e+00 -6.1650078397054191e+00 0 1 1 -1170 1 5 4.2499999999999999e-01 7.4578649361207852e+00 -8.2801310121568470e+00 -8.1041641354956884e+00 0 1 1 -1171 1 1 1.5750000000000000e+00 -1.1766831568063315e+01 -3.3419273446205384e+00 9.1902759023325977e+00 1 1 0 -1172 1 2 2.1000000000000001e+00 7.8448352935974413e+00 -4.5678852641334586e+00 -6.4527329721478406e+00 0 1 1 -1173 1 2 2.1000000000000001e+00 7.9273082512353810e+00 -1.6134477902736428e+00 -6.4436705389829019e+00 0 1 1 -1174 1 3 -1.0500000000000000e+00 7.7021737486516884e+00 -4.4948734020144130e+00 -8.0726866210873371e+00 0 1 1 -1175 1 3 -1.0500000000000000e+00 8.0630012098010724e+00 -1.7063190095604703e+00 -8.0697003070482474e+00 0 1 1 -1176 1 4 -9.4999999999999996e-01 -1.0061256666888218e+01 -2.9112186162970488e+00 -8.2027862521661596e+00 1 1 1 -1177 1 3 -1.0500000000000000e+00 9.2311114159812142e+00 -9.0922442160727712e-01 -5.9691926853462611e+00 0 1 1 -1178 1 3 -1.0500000000000000e+00 7.8164888320353079e+00 -3.0879911253937475e+00 -5.9687527762404899e+00 0 1 1 -1179 1 3 -1.0500000000000000e+00 6.6607275822481995e+00 -7.6391118544651349e-01 -6.1636692603452437e+00 0 1 1 -1180 1 5 4.2499999999999999e-01 1.0064991039374874e+01 -3.7896022090646344e+00 -8.0801688083303578e+00 0 1 1 -1181 1 1 1.5750000000000000e+00 6.2795549257811416e+00 -1.8349745187085702e+00 9.1778458301603543e+00 0 1 0 -1182 1 2 2.1000000000000001e+00 5.4941882031935538e+00 -9.2107196566882710e+00 6.4250058022598466e+00 0 1 0 -1183 1 2 2.1000000000000001e+00 5.4708752374262524e+00 -3.2043623791879874e+00 6.4184941869678873e+00 0 1 0 -1184 1 3 -1.0500000000000000e+00 5.6879002394524996e+00 -3.2222017398745351e-01 8.0497168606832901e+00 0 1 0 -1185 1 3 -1.0500000000000000e+00 5.3379757488899280e+00 -3.1146843774704323e+00 8.0447821497209162e+00 0 1 0 -1186 1 4 -9.4999999999999996e-01 -1.2659319544680235e+01 -1.9016868708743111e+00 8.1842259782179809e+00 1 1 0 -1187 1 3 -1.0500000000000000e+00 -1.1314076944108971e+01 -3.9052787781475580e+00 5.9457133911120792e+00 1 1 0 -1188 1 3 -1.0500000000000000e+00 5.5846160222444858e+00 -1.7291943338414377e+00 5.9465332871130734e+00 0 1 0 -1189 1 3 -1.0500000000000000e+00 6.7369197220127148e+00 -4.0542200495218736e+00 6.1311308739310242e+00 0 1 0 -1190 1 5 4.2499999999999999e-01 -1.2151614396159513e+01 -1.0149831975055328e+00 8.0966579883684311e+00 1 1 0 -1191 1 1 1.5750000000000000e+00 3.6666682047781745e+00 -6.3169724146187232e+00 9.1776246950578582e+00 0 1 0 -1192 1 2 2.1000000000000001e+00 -1.2538497140327129e+01 -4.7304498104514927e+00 6.4255688762431085e+00 1 1 0 -1193 1 2 2.1000000000000001e+00 -1.2617021251765532e+01 -7.6841529019756756e+00 6.4153401169994932e+00 1 1 0 -1194 1 3 -1.0500000000000000e+00 -1.2401595570125235e+01 -4.8030791608605909e+00 8.0448746992934019e+00 1 1 0 -1195 1 3 -1.0500000000000000e+00 -1.2752383225248513e+01 -7.5927267115433068e+00 8.0405676137803326e+00 1 1 0 -1196 1 4 -9.4999999999999996e-01 5.3695568125539612e+00 -6.3839691583703839e+00 8.1820733757662722e+00 0 1 0 -1197 1 3 -1.0500000000000000e+00 6.7180651265665112e+00 -8.3863303864656320e+00 5.9427036791502150e+00 0 1 0 -1198 1 3 -1.0500000000000000e+00 -1.2502911834435501e+01 -6.2098968206781429e+00 5.9406487849178440e+00 1 1 0 -1199 1 3 -1.0500000000000000e+00 -1.1351074041553073e+01 -8.5345624235738011e+00 6.1300873293966873e+00 1 1 0 -1200 1 5 4.2499999999999999e-01 5.8755672283030478e+00 -5.4964787867330820e+00 8.0942678645552562e+00 0 1 0 -1201 1 1 1.5750000000000000e+00 -9.2184175502962376e+00 -7.8211832013798652e+00 9.1875062239121803e+00 1 1 0 -1202 1 2 2.1000000000000001e+00 -5.0263253488287951e+00 -8.4799975758681256e-02 -6.4509125447753801e+00 1 1 1 -1203 1 2 2.1000000000000001e+00 -5.0022378764174196e+00 -6.0968674020146665e+00 -6.4456404211126230e+00 1 1 1 -1204 1 3 -1.0500000000000000e+00 -5.2289064605794637e+00 -8.9785798174304059e+00 -8.0737693503184058e+00 1 1 1 -1205 1 3 -1.0500000000000000e+00 -4.8697955340917378e+00 -6.1889139502773070e+00 -8.0729056296279964e+00 1 1 1 -1206 1 4 -9.4999999999999996e-01 -7.5164991096987013e+00 -7.3943790393624838e+00 -8.2073819312780323e+00 1 1 1 -1207 1 3 -1.0500000000000000e+00 -8.8601574516481101e+00 -5.3906203482381265e+00 -5.9727377227746965e+00 1 1 1 -1208 1 3 -1.0500000000000000e+00 -5.1104935819644801e+00 -7.5710402873222886e+00 -5.9704881958143021e+00 1 1 1 -1209 1 3 -1.0500000000000000e+00 -6.2715602191730557e+00 -5.2497085656421589e+00 -6.1650067341374539e+00 1 1 1 -1210 1 5 4.2499999999999999e-01 -8.0221522535984366e+00 -8.2801257743642385e+00 -8.1041914692449204e+00 1 1 1 -1211 1 1 1.5750000000000000e+00 -6.6068271117224775e+00 -3.3419270464568456e+00 9.1902779041867895e+00 1 1 0 -1212 1 2 2.1000000000000001e+00 -7.6351654991307161e+00 -4.5678848617243997e+00 -6.4527350467564748e+00 1 1 1 -1213 1 2 2.1000000000000001e+00 -7.5526929367444247e+00 -1.6134471973766296e+00 -6.4436730785571168e+00 1 1 1 -1214 1 3 -1.0500000000000000e+00 -7.7778228473854565e+00 -4.4948733204094999e+00 -8.0726878308790155e+00 1 1 1 -1215 1 3 -1.0500000000000000e+00 -7.4169971796892629e+00 -1.7063184273715208e+00 -8.0697019812018560e+00 1 1 1 -1216 1 4 -9.4999999999999996e-01 -4.9012600593803448e+00 -2.9112197693073885e+00 -8.2027864975951594e+00 1 1 1 -1217 1 3 -1.0500000000000000e+00 -6.2488884102840938e+00 -9.0922475770645406e-01 -5.9691937409926910e+00 1 1 1 -1218 1 3 -1.0500000000000000e+00 -7.6635089388703292e+00 -3.0879909484731130e+00 -5.9687540272826229e+00 1 1 1 -1219 1 3 -1.0500000000000000e+00 -8.8192723111622708e+00 -7.6391126173227875e-01 -6.1636696564026154e+00 1 1 1 -1220 1 5 4.2499999999999999e-01 -5.4150190028995864e+00 -3.7896017243070617e+00 -8.0801841224122501e+00 1 1 1 -1221 1 1 1.5750000000000000e+00 -9.2004537570599396e+00 -1.8349730836410920e+00 9.1778449601373069e+00 1 1 0 -1222 1 2 2.1000000000000001e+00 -9.9858122193077019e+00 -9.2107192692563444e+00 6.4250032315589554e+00 1 1 0 -1223 1 2 2.1000000000000001e+00 -1.0009126210633999e+01 -3.2043623702919888e+00 6.4184917563012824e+00 1 1 0 -1224 1 3 -1.0500000000000000e+00 -9.7920966083249219e+00 -3.2222173760889916e-01 8.0497159397066937e+00 1 1 0 -1225 1 3 -1.0500000000000000e+00 -1.0142022668601596e+01 -3.1146850529437824e+00 8.0447808560630136e+00 1 1 0 -1226 1 4 -9.4999999999999996e-01 -7.4993166678029128e+00 -1.9016857440853911e+00 8.1842290888720441e+00 1 1 0 -1227 1 3 -1.0500000000000000e+00 -6.1540772384318139e+00 -3.9052784739223902e+00 5.9457142289558593e+00 1 1 0 -1228 1 3 -1.0500000000000000e+00 -9.8953813895697103e+00 -1.7291945564636642e+00 5.9465320319017838e+00 1 1 0 -1229 1 3 -1.0500000000000000e+00 -8.7430795496225642e+00 -4.0542195926089022e+00 6.1311300767671426e+00 1 1 0 -1230 1 5 4.2499999999999999e-01 -6.9916072878775903e+00 -1.0149824078685867e+00 8.0966810453654716e+00 1 1 0 -1231 1 1 1.5750000000000000e+00 -1.1813337265636369e+01 -6.3169694772795992e+00 9.1776213411815064e+00 1 1 0 -1232 1 2 2.1000000000000001e+00 -7.3784969466388866e+00 -4.7304495127622381e+00 6.4255707150312453e+00 1 1 0 -1233 1 2 2.1000000000000001e+00 -7.4570207131413415e+00 -7.6841535786469404e+00 6.4153415116734021e+00 1 1 0 -1234 1 3 -1.0500000000000000e+00 -7.2415961736847372e+00 -4.8030779992497870e+00 8.0448750053751823e+00 1 1 0 -1235 1 3 -1.0500000000000000e+00 -7.5923816334646475e+00 -7.5927276522151796e+00 8.0405677942276057e+00 1 1 0 -1236 1 4 -9.4999999999999996e-01 -1.0110440670769378e+01 -6.3839684120568307e+00 8.1820717309442195e+00 1 1 0 -1237 1 3 -1.0500000000000000e+00 -8.7619342190961333e+00 -8.3863298189298341e+00 5.9427028904677925e+00 1 1 0 -1238 1 3 -1.0500000000000000e+00 -7.3429113056989959e+00 -6.2098968409085060e+00 5.9406500596919436e+00 1 1 0 -1239 1 3 -1.0500000000000000e+00 -6.1910734520151678e+00 -8.5345613511329628e+00 6.1300855720601781e+00 1 1 0 -1240 1 5 4.2499999999999999e-01 -9.6044471581020350e+00 -5.4964688110858688e+00 8.0942623502612108e+00 1 1 0 -1241 1 1 1.5750000000000000e+00 -4.0584107519436232e+00 -7.8211860380049369e+00 9.1875072237364463e+00 1 1 0 -1242 1 2 2.1000000000000001e+00 1.3367485440650029e-01 -8.4799965580153014e-02 -6.4509113495749801e+00 1 1 1 -1243 1 2 2.1000000000000001e+00 1.5776274491955711e-01 -6.0968676439569425e+00 -6.4456400560741933e+00 1 1 1 -1244 1 3 -1.0500000000000000e+00 -6.8907540324431693e-02 -8.9785792836431977e+00 -8.0737693082767965e+00 1 1 1 -1245 1 3 -1.0500000000000000e+00 2.9020545393568575e-01 -6.1889141008000550e+00 -8.0729061180129769e+00 1 1 1 -1246 1 4 -9.4999999999999996e-01 -2.3565008977990276e+00 -7.3943767834038372e+00 -8.2073776588246954e+00 1 1 1 -1247 1 3 -1.0500000000000000e+00 -3.7001581590911137e+00 -5.3906211235323056e+00 -5.9727367128414635e+00 1 1 1 -1248 1 3 -1.0500000000000000e+00 4.9507736411996461e-02 -7.5710401964130263e+00 -5.9704876680203274e+00 1 1 1 -1249 1 3 -1.0500000000000000e+00 -1.1115601275858431e+00 -5.2497087955008670e+00 -6.1650079818572099e+00 1 1 1 -1250 1 5 4.2499999999999999e-01 -2.8621348916727039e+00 -8.2801312147431325e+00 -8.1041641535991769e+00 1 1 1 -1251 1 1 1.5750000000000000e+00 -1.4468315900733817e+00 -3.3419273050042992e+00 9.1902759757014039e+00 1 1 0 -1252 1 2 2.1000000000000001e+00 -2.4751647567833022e+00 -4.5678853071586438e+00 -6.4527331061087434e+00 1 1 1 -1253 1 2 2.1000000000000001e+00 -2.3926916777692053e+00 -1.6134477378778271e+00 -6.4436705358094475e+00 1 1 1 -1254 1 3 -1.0500000000000000e+00 -2.6178263390636518e+00 -4.4948733860373640e+00 -8.0726867631303918e+00 1 1 1 -1255 1 3 -1.0500000000000000e+00 -2.2569987515521053e+00 -1.7063190458171640e+00 -8.0697002816566314e+00 1 1 1 -1256 1 4 -9.4999999999999996e-01 2.5874333263023530e-01 -2.9112185409980427e+00 -8.2027862880335025e+00 1 1 1 -1257 1 3 -1.0500000000000000e+00 -1.0888886073415289e+00 -9.0922453078972154e-01 -5.9691926900327612e+00 1 1 1 -1258 1 3 -1.0500000000000000e+00 -2.5035112488146156e+00 -3.0879911370282862e+00 -5.9687528345113279e+00 1 1 1 -1259 1 3 -1.0500000000000000e+00 -3.6592723823244739e+00 -7.6391115856610625e-01 -6.1636692160584907e+00 1 1 1 -1260 1 5 4.2499999999999999e-01 -2.5500907325471367e-01 -3.7896021876333510e+00 -8.0801688898624118e+00 1 1 1 -1261 1 1 1.5750000000000000e+00 -4.0404451360221882e+00 -1.8349745312350443e+00 9.1778458883844216e+00 1 1 0 -1262 1 2 2.1000000000000001e+00 -4.8258118665782437e+00 -9.2107196053125140e+00 6.4250057386066040e+00 1 1 0 -1263 1 2 2.1000000000000001e+00 -4.8491247818409375e+00 -3.2043623465875708e+00 6.4184941125229429e+00 1 1 0 -1264 1 3 -1.0500000000000000e+00 -4.6320997791364613e+00 -3.2222019520617451e-01 8.0497168475235767e+00 1 1 0 -1265 1 3 -1.0500000000000000e+00 -4.9820242758466300e+00 -3.1146844524036688e+00 8.0447821221677955e+00 1 1 0 -1266 1 4 -9.4999999999999996e-01 -2.3393194643499040e+00 -1.9016868474303124e+00 8.1842259919272209e+00 1 1 0 -1267 1 3 -1.0500000000000000e+00 -9.9407705821971248e-01 -3.9052788255152002e+00 5.9457135420655742e+00 1 1 0 -1268 1 3 -1.0500000000000000e+00 -4.7353840007993124e+00 -1.7291944085022344e+00 5.9465332553825938e+00 1 1 0 -1269 1 3 -1.0500000000000000e+00 -3.5830802190934108e+00 -4.0542201386276755e+00 6.1311308645892630e+00 1 1 0 -1270 1 5 4.2499999999999999e-01 -1.8316144355749984e+00 -1.0149831217187000e+00 8.0966580459795239e+00 1 1 0 -1271 1 1 1.5750000000000000e+00 -6.6533317491128035e+00 -6.3169725114546189e+00 9.1776246533874613e+00 1 1 0 -1272 1 2 2.1000000000000001e+00 -2.2184971428248037e+00 -4.7304497857744110e+00 6.4255689051555027e+00 1 1 0 -1273 1 2 2.1000000000000001e+00 -2.2970211722251843e+00 -7.6841529453748727e+00 6.4153400537345462e+00 1 1 0 -1274 1 3 -1.0500000000000000e+00 -2.0815956558263284e+00 -4.8030792466520467e+00 8.0448747063271782e+00 1 1 0 -1275 1 3 -1.0500000000000000e+00 -2.4323832845308839e+00 -7.5927267789324340e+00 8.0405676521363887e+00 1 1 0 -1276 1 4 -9.4999999999999996e-01 -4.9504431259642567e+00 -6.3839692185778247e+00 8.1820733592564707e+00 1 1 0 -1277 1 3 -1.0500000000000000e+00 -3.6019349440261488e+00 -8.3863303521760315e+00 5.9427036169602658e+00 1 1 0 -1278 1 3 -1.0500000000000000e+00 -2.1829118060411634e+00 -6.2098967649521573e+00 5.9406488050957158e+00 1 1 0 -1279 1 3 -1.0500000000000000e+00 -1.0310740882006293e+00 -8.5345624612586146e+00 6.1300873115327192e+00 1 1 0 -1280 1 5 4.2499999999999999e-01 -4.4444326684473836e+00 -5.4964788380050251e+00 8.0942678176661076e+00 1 1 0 +1 1 1 1.5750000000000000e+00 2.5993596520046260e+00 1.4882900600258182e+00 -1.0812491332750085e-03 0 0 0 +2 1 2 2.1000000000000001e+00 3.3881083617607270e+00 8.8638081118868861e+00 2.7520703123178425e+00 0 0 0 +3 1 2 2.1000000000000001e+00 3.4139343398345119e+00 2.8526270594973830e+00 2.7596168355590276e+00 0 0 0 +4 1 3 -1.0500000000000000e+00 3.1875086232004897e+00 -3.1304936054915800e-02 1.1367387214371441e+00 0 0 0 +5 1 3 -1.0500000000000000e+00 3.5527594315534881e+00 2.7568701063549739e+00 1.1381402995392769e+00 0 0 0 +6 1 4 -9.4999999999999996e-01 9.1766045315059230e-01 1.5740631280969382e+00 1.0189580159233298e+00 0 0 0 +7 1 3 -1.0500000000000000e+00 -4.3978237985006885e-01 3.5528966155392290e+00 3.2377228498225232e+00 0 0 0 +8 1 3 -1.0500000000000000e+00 3.2966896305453979e+00 1.3784025867574137e+00 3.2369230385132948e+00 0 0 0 +9 1 3 -1.0500000000000000e+00 2.1511854698456094e+00 3.7101624965654629e+00 3.0393623494866819e+00 0 0 0 +10 1 5 4.2499999999999999e-01 4.1304115882830850e-01 7.0858429800630063e-01 1.2341413567628692e+00 0 0 0 +11 1 1 1.5750000000000000e+00 5.2075289141421628e+00 5.9712273126645208e+00 -1.1003217385940900e-03 0 0 0 +12 1 2 2.1000000000000001e+00 7.7992413524051329e-01 4.3808730648448524e+00 2.7520886637176147e+00 0 0 0 +13 1 2 2.1000000000000001e+00 8.6212166757941588e-01 7.3355201716095273e+00 2.7596040381917586e+00 0 0 0 +14 1 3 -1.0500000000000000e+00 6.3565084014680373e-01 4.4515684498491375e+00 1.1367428409373659e+00 0 0 0 +15 1 3 -1.0500000000000000e+00 1.0009642240685892e+00 7.2397556658052480e+00 1.1381282197844147e+00 0 0 0 +16 1 4 -9.4999999999999996e-01 3.5258684152099597e+00 6.0570110595805637e+00 1.0189984106767973e+00 0 0 0 +17 1 3 -1.0500000000000000e+00 2.1684022966187442e+00 8.0357879734705300e+00 3.2377030337549169e+00 0 0 0 +18 1 3 -1.0500000000000000e+00 7.4486198526787106e-01 5.8613648961516880e+00 3.2369050077262393e+00 0 0 0 +19 1 3 -1.0500000000000000e+00 -4.0060852713138040e-01 8.1930902338805822e+00 3.0393178026634580e+00 0 0 0 +20 1 5 4.2499999999999999e-01 3.0212600858515035e+00 5.1916295129444698e+00 1.2344575067874164e+00 0 0 0 +21 1 1 1.5750000000000000e+00 2.6165689354331576e+00 7.4772141627982442e+00 -5.6194863630345537e-03 0 0 0 +22 1 2 2.1000000000000001e+00 1.8278876550536935e+00 1.0182971026238974e-01 -2.7586970692677335e+00 0 0 0 +23 1 2 2.1000000000000001e+00 1.8021215275304208e+00 6.1129688954300327e+00 -2.7662422357374892e+00 0 0 0 +24 1 3 -1.0500000000000000e+00 2.0283728515776911e+00 8.9970577309046504e+00 -1.1432473706049304e+00 0 0 0 +25 1 3 -1.0500000000000000e+00 1.6634566967028626e+00 6.2086697842131962e+00 -1.1446560607305951e+00 0 0 0 +26 1 4 -9.4999999999999996e-01 4.2986260781470733e+00 7.3920166940321401e+00 -1.0249968856997604e+00 0 0 0 +27 1 3 -1.0500000000000000e+00 5.6559330286221403e+00 5.4126307782462391e+00 -3.2442608651498190e+00 0 0 0 +28 1 3 -1.0500000000000000e+00 1.9192504670401291e+00 7.5872042145847054e+00 -3.2434320963187044e+00 0 0 0 +29 1 3 -1.0500000000000000e+00 3.0650235195062034e+00 5.2556901644357339e+00 -3.0461115502159730e+00 0 0 0 +30 1 5 4.2499999999999999e-01 4.8035751889592486e+00 8.2580856040690769e+00 -1.2372390756746245e+00 0 0 0 +31 1 1 1.5750000000000000e+00 8.4044409014900623e-03 2.9942788765782851e+00 -5.6005955861042622e-03 0 0 0 +32 1 2 2.1000000000000001e+00 4.4360730600385114e+00 4.5847605410689702e+00 -2.7587150701119336e+00 0 0 0 +33 1 2 2.1000000000000001e+00 4.3539364181102407e+00 1.6300766548957064e+00 -2.7662298465172128e+00 0 0 0 +34 1 3 -1.0500000000000000e+00 4.5802276362029435e+00 4.5141855930315025e+00 -1.1432542885526686e+00 0 0 0 +35 1 3 -1.0500000000000000e+00 4.2152561507898412e+00 1.7257911288873942e+00 -1.1446452820482715e+00 0 0 0 +36 1 4 -9.4999999999999996e-01 1.6904176280658092e+00 2.9090779966367997e+00 -1.0250300704067090e+00 0 0 0 +37 1 3 -1.0500000000000000e+00 3.0477514338401175e+00 9.2973642699415393e-01 -3.2442410144828848e+00 0 0 0 +38 1 3 -1.0500000000000000e+00 4.4710788948266256e+00 3.1042503377148130e+00 -3.2434184044029397e+00 0 0 0 +39 1 3 -1.0500000000000000e+00 5.6168180411235280e+00 7.7276465358232826e-01 -3.0460654755419396e+00 0 0 0 +40 1 5 4.2499999999999999e-01 2.1953543805053659e+00 3.7750625122921733e+00 -1.2375137658443451e+00 0 0 0 +41 1 1 1.5750000000000000e+00 7.7593667974341116e+00 1.4882875844411778e+00 -1.0800878431691530e-03 0 0 0 +42 1 2 2.1000000000000001e+00 -1.2091892554232100e+01 8.8638068273280304e+00 2.7520729565452449e+00 1 0 0 +43 1 2 2.1000000000000001e+00 -1.2066062031288764e+01 2.8526258794028436e+00 2.7596183764589810e+00 1 0 0 +44 1 3 -1.0500000000000000e+00 8.3475033984264826e+00 -3.1302525644800738e-02 1.1367403599149970e+00 0 0 0 +45 1 3 -1.0500000000000000e+00 -1.1927240183972263e+01 2.7568656415244952e+00 1.1381418614692507e+00 1 0 0 +46 1 4 -9.4999999999999996e-01 6.0776532475762153e+00 1.5740572667257275e+00 1.0189511331187227e+00 0 0 0 +47 1 3 -1.0500000000000000e+00 4.7202183448957626e+00 3.5528937230222120e+00 3.2377240632977262e+00 0 0 0 +48 1 3 -1.0500000000000000e+00 -1.2183313639709638e+01 1.3784030003855925e+00 3.2369255861931432e+00 1 0 0 +49 1 3 -1.0500000000000000e+00 7.3111869688358517e+00 3.7101650079422015e+00 3.0393575352778353e+00 0 0 0 +50 1 5 4.2499999999999999e-01 5.5730238334148332e+00 7.0856858877032991e-01 1.2340767994475854e+00 0 0 0 +51 1 1 1.5750000000000000e+00 -1.0272466189276649e+01 5.9712246082248015e+00 -1.0975943967341806e-03 1 0 0 +52 1 2 2.1000000000000001e+00 5.9399221186099425e+00 4.3808736732785256e+00 2.7520868822860844e+00 0 0 0 +53 1 2 2.1000000000000001e+00 6.0221179373263780e+00 7.3355193588128209e+00 2.7596040009105103e+00 0 0 0 +54 1 3 -1.0500000000000000e+00 5.7956494009087649e+00 4.4515669264515516e+00 1.1367431058474011e+00 0 0 0 +55 1 3 -1.0500000000000000e+00 6.1609603541139819e+00 7.2397536142585857e+00 1.1381283858110915e+00 0 0 0 +56 1 4 -9.4999999999999996e-01 -1.1954127386017726e+01 6.0570238530538951e+00 1.0190134395868142e+00 1 0 0 +57 1 3 -1.0500000000000000e+00 7.3284057147376060e+00 8.0357882119204724e+00 3.2377031674562478e+00 0 0 0 +58 1 3 -1.0500000000000000e+00 5.9048624834697314e+00 5.8613620762115737e+00 3.2369046557501786e+00 0 0 0 +59 1 3 -1.0500000000000000e+00 4.7593918660988059e+00 8.1930867571954415e+00 3.0393208108983529e+00 0 0 0 +60 1 5 4.2499999999999999e-01 8.1812915063843654e+00 5.1916541819268076e+00 1.2345712712632348e+00 0 0 0 +61 1 1 1.5750000000000000e+00 7.7765744150069196e+00 7.4772110898687600e+00 -5.6168077529363813e-03 0 0 0 +62 1 2 2.1000000000000001e+00 6.9878919349463970e+00 1.0183096031471806e-01 -2.7586957459689803e+00 0 0 0 +63 1 2 2.1000000000000001e+00 6.9621241616101770e+00 6.1129671620970711e+00 -2.7662418189596103e+00 0 0 0 +64 1 3 -1.0500000000000000e+00 7.1883739812393408e+00 8.9970595571723742e+00 -1.1432480919496335e+00 0 0 0 +65 1 3 -1.0500000000000000e+00 6.8234605148207379e+00 6.2086712878975341e+00 -1.1446560033087021e+00 0 0 0 +66 1 4 -9.4999999999999996e-01 -1.1181369923537819e+01 7.3920293161420574e+00 -1.0249827015771427e+00 1 0 0 +67 1 3 -1.0500000000000000e+00 -9.8240645767162160e+00 5.4126303979453212e+00 -3.2442605127814081e+00 1 0 0 +68 1 3 -1.0500000000000000e+00 7.0792502199296017e+00 7.5872092188871072e+00 -3.2434313874298280e+00 0 0 0 +69 1 3 -1.0500000000000000e+00 8.2250240290825261e+00 5.2556934165631084e+00 -3.0461146419104255e+00 0 0 0 +70 1 5 4.2499999999999999e-01 -1.0676395334126314e+01 8.2581079751635009e+00 -1.2371320778147110e+00 1 0 0 +71 1 1 1.5750000000000000e+00 5.1683976201164832e+00 2.9942809980328420e+00 -5.6020985389082512e-03 0 0 0 +72 1 2 2.1000000000000001e+00 -1.1043928010003881e+01 4.5847591583652694e+00 -2.7587123842620365e+00 1 0 0 +73 1 2 2.1000000000000001e+00 -1.1126060457304307e+01 1.6300761266194748e+00 -2.7662283658262670e+00 1 0 0 +74 1 3 -1.0500000000000000e+00 -1.0899777242788575e+01 4.5141879531325131e+00 -1.1432521082081628e+00 1 0 0 +75 1 3 -1.0500000000000000e+00 -1.1264743494035447e+01 1.7257868171081157e+00 -1.1446439904893246e+00 1 0 0 +76 1 4 -9.4999999999999996e-01 6.8504252550065843e+00 2.9090852617536171e+00 -1.0250226167091654e+00 0 0 0 +77 1 3 -1.0500000000000000e+00 8.2077473575870670e+00 9.2973693217553688e-01 -3.2442419395420981e+00 0 0 0 +78 1 3 -1.0500000000000000e+00 -1.1008923595240562e+01 3.1042496762326763e+00 -3.2434162193484148e+00 1 0 0 +79 1 3 -1.0500000000000000e+00 -9.8631808888372188e+00 7.7276710294123774e-01 -3.0460701921807960e+00 1 0 0 +80 1 5 4.2499999999999999e-01 7.3553731471165094e+00 3.7750787575772335e+00 -1.2374424942294322e+00 0 0 0 +81 1 1 1.5750000000000000e+00 -7.7206401882061746e+00 1.4882898218945044e+00 -1.0812584217809729e-03 1 0 0 +82 1 2 2.1000000000000001e+00 -6.9318913071396580e+00 8.8638081162334181e+00 2.7520702511657547e+00 1 0 0 +83 1 2 2.1000000000000001e+00 -6.9060648624012906e+00 2.8526262402975249e+00 2.7596169460676006e+00 1 0 0 +84 1 3 -1.0500000000000000e+00 -7.1324913614342149e+00 -3.1305213230350404e-02 1.1367384001582508e+00 1 0 0 +85 1 3 -1.0500000000000000e+00 -6.7672404919867226e+00 2.7568702222101642e+00 1.1381405338448083e+00 1 0 0 +86 1 4 -9.4999999999999996e-01 -9.4023397012950252e+00 1.5740630961057498e+00 1.0189579178024069e+00 1 0 0 +87 1 3 -1.0500000000000000e+00 -1.0759783557906536e+01 3.5528960691954445e+00 3.2377228214345362e+00 1 0 0 +88 1 3 -1.0500000000000000e+00 -7.0233104229350349e+00 1.3784037207955535e+00 3.2369231215909693e+00 1 0 0 +89 1 3 -1.0500000000000000e+00 -8.1688155767960176e+00 3.7101632112761109e+00 3.0393623580722871e+00 1 0 0 +90 1 5 4.2499999999999999e-01 -9.9069587594590995e+00 7.0858432122345505e-01 1.2341412755829353e+00 1 0 0 +91 1 1 1.5750000000000000e+00 -5.1124710337727040e+00 5.9712269673627354e+00 -1.1005750706711837e-03 1 0 0 +92 1 2 2.1000000000000001e+00 -9.5400750826900236e+00 4.3808726655238566e+00 2.7520886312657993e+00 1 0 0 +93 1 2 2.1000000000000001e+00 -9.4578779728056119e+00 7.3355202382622302e+00 2.7596041587940974e+00 1 0 0 +94 1 3 -1.0500000000000000e+00 -9.6843493213260725e+00 4.4515687309881606e+00 1.1367428848243328e+00 1 0 0 +95 1 3 -1.0500000000000000e+00 -9.3190360046761604e+00 7.2397555503813784e+00 1.1381283743765493e+00 1 0 0 +96 1 4 -9.4999999999999996e-01 -6.7941318287943044e+00 6.0570110449549759e+00 1.0189986124375956e+00 1 0 0 +97 1 3 -1.0500000000000000e+00 -8.1515984802680244e+00 8.0357874501742721e+00 3.2377029421208778e+00 1 0 0 +98 1 3 -1.0500000000000000e+00 -9.5751378881570002e+00 5.8613657458809421e+00 3.2369051279403411e+00 1 0 0 +99 1 3 -1.0500000000000000e+00 -1.0720608192402096e+01 8.1930901284224831e+00 3.0393178234034188e+00 1 0 0 +100 1 5 4.2499999999999999e-01 -7.2987396846922490e+00 5.1916297578326613e+00 1.2344582686008589e+00 1 0 0 +101 1 1 1.5750000000000000e+00 -7.7034308104540345e+00 7.4772140769486413e+00 -5.6197731413192287e-03 1 0 0 +102 1 2 2.1000000000000001e+00 -8.4921119129382436e+00 1.0182970310578554e-01 -2.7586972291666285e+00 1 0 0 +103 1 2 2.1000000000000001e+00 -8.5178789427259556e+00 6.1129672105684811e+00 -2.7662420541728974e+00 1 0 0 +104 1 3 -1.0500000000000000e+00 -8.2916269406042815e+00 8.9970574537439134e+00 -1.1432477637342799e+00 1 0 0 +105 1 3 -1.0500000000000000e+00 -8.6565431334381540e+00 6.2086700919000997e+00 -1.1446559772339153e+00 1 0 0 +106 1 4 -9.4999999999999996e-01 -6.0213736470209289e+00 7.3920169164383012e+00 -1.0249968704970076e+00 1 0 0 +107 1 3 -1.0500000000000000e+00 -4.6640665339764151e+00 5.4126315535304492e+00 -3.2442609407384317e+00 1 0 0 +108 1 3 -1.0500000000000000e+00 -8.4007493355621392e+00 7.5872053220556950e+00 -3.2434325373540069e+00 1 0 0 +109 1 3 -1.0500000000000000e+00 -7.2549763508011527e+00 5.2556901927825770e+00 -3.0461113024904209e+00 1 0 0 +110 1 5 4.2499999999999999e-01 -5.5164249475720561e+00 8.2580855166510574e+00 -1.2372386431255986e+00 1 0 0 +111 1 1 1.5750000000000000e+00 -1.0311595596909763e+01 2.9942785375934520e+00 -5.6010879733605634e-03 1 0 0 +112 1 2 2.1000000000000001e+00 -5.8839277840350794e+00 4.5847597474795627e+00 -2.7587147329177935e+00 1 0 0 +113 1 2 2.1000000000000001e+00 -5.9660640139885333e+00 1.6300766033495755e+00 -2.7662298309066697e+00 1 0 0 +114 1 3 -1.0500000000000000e+00 -5.7397721868746547e+00 4.5141858483596486e+00 -1.1432542008572852e+00 1 0 0 +115 1 3 -1.0500000000000000e+00 -6.1047441414010697e+00 1.7257910143984674e+00 -1.1446453815549233e+00 1 0 0 +116 1 4 -9.4999999999999996e-01 -8.6295822895689920e+00 2.9090779511777676e+00 -1.0250302130375939e+00 1 0 0 +117 1 3 -1.0500000000000000e+00 -7.2722489263206986e+00 9.2973628122143026e-01 -3.2442411958593489e+00 1 0 0 +118 1 3 -1.0500000000000000e+00 -5.8489208202980265e+00 3.1042509548662487e+00 -3.2434183592489791e+00 1 0 0 +119 1 3 -1.0500000000000000e+00 -4.7031825360221502e+00 7.7276491102555767e-01 -3.0460656077210357e+00 1 0 0 +120 1 5 4.2499999999999999e-01 -8.1246457266301864e+00 3.7750624151061807e+00 -1.2375138030883610e+00 1 0 0 +121 1 1 1.5750000000000000e+00 -2.5606329385739004e+00 1.4882876387506627e+00 -1.0802971591790111e-03 1 0 0 +122 1 2 2.1000000000000001e+00 -1.7718921832935095e+00 8.8638064760312680e+00 2.7520727645518317e+00 1 0 0 +123 1 2 2.1000000000000001e+00 -1.7460630957075320e+00 2.8526261301782654e+00 2.7596182888033560e+00 1 0 0 +124 1 3 -1.0500000000000000e+00 -1.9724962744006582e+00 -3.1302525320935359e-02 1.1367406594669021e+00 1 0 0 +125 1 3 -1.0500000000000000e+00 -1.6072398016960410e+00 2.7568655221939551e+00 1.1381416172788992e+00 1 0 0 +126 1 4 -9.4999999999999996e-01 -4.2423468629791490e+00 1.5740570781575833e+00 1.0189509285491933e+00 1 0 0 +127 1 3 -1.0500000000000000e+00 -5.5997821570648973e+00 3.5528937411991670e+00 3.2377242628374425e+00 1 0 0 +128 1 3 -1.0500000000000000e+00 -1.8633136078718611e+00 1.3784018887914193e+00 3.2369255823977880e+00 1 0 0 +129 1 3 -1.0500000000000000e+00 -3.0088127134797826e+00 3.7101647071782331e+00 3.0393572972698379e+00 1 0 0 +130 1 5 4.2499999999999999e-01 -4.7469759999778125e+00 7.0856900200933737e-01 1.2340771581365360e+00 1 0 0 +131 1 1 1.5750000000000000e+00 4.7533764258004041e-02 5.9712249082954791e+00 -1.0980032702558873e-03 1 0 0 +132 1 2 2.1000000000000001e+00 -4.3800778559486355e+00 4.3808730955712392e+00 2.7520867646571521e+00 1 0 0 +133 1 2 2.1000000000000001e+00 -4.2978813666723550e+00 7.3355189936402283e+00 2.7596038854553466e+00 1 0 0 +134 1 3 -1.0500000000000000e+00 -4.5243507576301081e+00 4.4515670379929304e+00 1.1367428603445937e+00 1 0 0 +135 1 3 -1.0500000000000000e+00 -4.1590395183545183e+00 7.2397536991140861e+00 1.1381282657041645e+00 1 0 0 +136 1 4 -9.4999999999999996e-01 -1.6341272396600957e+00 6.0570237762126418e+00 1.0190134201552699e+00 1 0 0 +137 1 3 -1.0500000000000000e+00 -2.9915950681060766e+00 8.0357879652451025e+00 3.2377031654102684e+00 1 0 0 +138 1 3 -1.0500000000000000e+00 -4.4151372958651747e+00 5.8613630169435886e+00 3.2369046247973667e+00 1 0 0 +139 1 3 -1.0500000000000000e+00 -5.5606088011257127e+00 8.1930870861008209e+00 3.0393206313569685e+00 1 0 0 +140 1 5 4.2499999999999999e-01 -2.1387086319686546e+00 5.1916539522099150e+00 1.2345711189225028e+00 1 0 0 +141 1 1 1.5750000000000000e+00 -2.5434257506591189e+00 7.4772111640629149e+00 -5.6166073635797176e-03 1 0 0 +142 1 2 2.1000000000000001e+00 -3.3321080973339834e+00 1.0183053426608524e-01 -2.7586955337533938e+00 1 0 0 +143 1 2 2.1000000000000001e+00 -3.3578756775438610e+00 6.1129677881450775e+00 -2.7662419312279969e+00 1 0 0 +144 1 3 -1.0500000000000000e+00 -3.1316261087441042e+00 8.9970596129863871e+00 -1.1432480013029860e+00 1 0 0 +145 1 3 -1.0500000000000000e+00 -3.4965397924140653e+00 6.2086715233472134e+00 -1.1446562154372266e+00 1 0 0 +146 1 4 -9.4999999999999996e-01 -8.6136969835578370e-01 7.3920295019150544e+00 -1.0249827983121680e+00 1 0 0 +147 1 3 -1.0500000000000000e+00 4.9593486485604110e-01 5.4126301727891821e+00 -3.2442607643276054e+00 1 0 0 +148 1 3 -1.0500000000000000e+00 -3.2407499951325498e+00 7.5872078587173846e+00 -3.2434313976945433e+00 1 0 0 +149 1 3 -1.0500000000000000e+00 -2.0949760055733631e+00 5.2556933239404167e+00 -3.0461145312557329e+00 1 0 0 +150 1 5 4.2499999999999999e-01 -3.5639556828485119e-01 8.2581075202862202e+00 -1.2371324129475632e+00 1 0 0 +151 1 1 1.5750000000000000e+00 -5.1516025778253747e+00 2.9942806292952824e+00 -5.6020314364904067e-03 1 0 0 +152 1 2 2.1000000000000001e+00 -7.2392846989897031e-01 4.5847591850911407e+00 -2.7587123572021817e+00 1 0 0 +153 1 2 2.1000000000000001e+00 -8.0606124093459997e-01 1.6300756900441478e+00 -2.7662282754379124e+00 1 0 0 +154 1 3 -1.0500000000000000e+00 -5.7977714984581930e-01 4.5141882260288142e+00 -1.1432523173441691e+00 1 0 0 +155 1 3 -1.0500000000000000e+00 -9.4474362669159540e-01 1.7257867957754662e+00 -1.1446438349685657e+00 1 0 0 +156 1 4 -9.4999999999999996e-01 -3.4695751862629614e+00 2.9090849623510024e+00 -1.0250227718480005e+00 1 0 0 +157 1 3 -1.0500000000000000e+00 -2.1122520231077235e+00 9.2973734613946490e-01 -3.2442422530911523e+00 1 0 0 +158 1 3 -1.0500000000000000e+00 -6.8892351775213534e-01 3.1042507000580102e+00 -3.2434160931881308e+00 1 0 0 +159 1 3 -1.0500000000000000e+00 4.5682015302910983e-01 7.7276675239134107e-01 -3.0460702154371067e+00 1 0 0 +160 1 5 4.2499999999999999e-01 -2.9646266683981821e+00 3.7750790184820460e+00 -1.2374424236664838e+00 1 0 0 +161 1 1 1.5750000000000000e+00 2.6556818821090964e+00 1.0454139317685271e+01 -1.0967487592186131e-03 0 0 0 +162 1 2 2.1000000000000001e+00 3.2190883901663732e+00 -1.8033682997326732e+01 2.7520792634058324e+00 0 1 0 +163 1 2 2.1000000000000001e+00 3.4702724007933625e+00 1.1818456050027734e+01 2.7596095643846965e+00 0 0 0 +164 1 3 -1.0500000000000000e+00 3.2438190247714367e+00 8.9345087495768887e+00 1.1367286810172814e+00 0 0 0 +165 1 3 -1.0500000000000000e+00 3.6091019184592668e+00 1.1722713133488707e+01 1.1381300087342048e+00 0 0 0 +166 1 4 -9.4999999999999996e-01 9.7403525154598114e-01 1.0539969137000679e+01 1.0190361573885678e+00 0 0 0 +167 1 3 -1.0500000000000000e+00 -3.8344098619301015e-01 1.2518709381128804e+01 3.2377129561903750e+00 0 0 0 +168 1 3 -1.0500000000000000e+00 3.3530346120893668e+00 1.0344256721199777e+01 3.2368964278836216e+00 0 0 0 +169 1 3 -1.0500000000000000e+00 2.2075261249614506e+00 1.2675984414385344e+01 3.0393755293097939e+00 0 0 0 +170 1 5 4.2499999999999999e-01 4.6946947232569691e-01 9.6746169226990908e+00 1.2347503550646231e+00 0 0 0 +171 1 1 1.5750000000000000e+00 5.2638682461479931e+00 1.4937024367958717e+01 -1.0830929787566390e-03 0 0 0 +172 1 2 2.1000000000000001e+00 8.3627348727016582e-01 1.3346719747969363e+01 2.7520900367341774e+00 0 0 0 +173 1 2 2.1000000000000001e+00 9.1844390475228010e-01 1.6301351155347096e+01 2.7596146755120223e+00 0 0 0 +174 1 3 -1.0500000000000000e+00 6.9200904179799849e-01 1.3417418895837162e+01 1.1367448072616178e+00 0 0 0 +175 1 3 -1.0500000000000000e+00 1.0572750533869613e+00 1.6205584802874231e+01 1.1381396583081482e+00 0 0 0 +176 1 4 -9.4999999999999996e-01 3.5821591649189308e+00 1.5022772754878350e+01 1.0189362598570391e+00 0 0 0 +177 1 3 -1.0500000000000000e+00 2.2247238837666679e+00 1.7001626385194026e+01 3.2377190326894834e+00 0 0 0 +178 1 3 -1.0500000000000000e+00 8.0118846368444352e-01 1.4827145539656680e+01 3.2369273314691132e+00 0 0 0 +179 1 3 -1.0500000000000000e+00 -3.4429783416864623e-01 1.7158905791057652e+01 3.0393359488930063e+00 0 0 0 +180 1 5 4.2499999999999999e-01 3.0775120285271527e+00 1.4157276685022456e+01 1.2339704188105145e+00 0 0 0 +181 1 1 1.5750000000000000e+00 2.6728962897491595e+00 1.6443030737972894e+01 -5.6130056081560298e-03 0 0 0 +182 1 2 2.1000000000000001e+00 1.8842162783347014e+00 9.0676400472734358e+00 -2.7587067354302466e+00 0 0 0 +183 1 2 2.1000000000000001e+00 1.8584430112652637e+00 1.5078808460300461e+01 -2.7662346501713060e+00 0 0 0 +184 1 3 -1.0500000000000000e+00 1.8593747354599621e+00 -1.7900383824518766e+01 -1.1432557087206643e+00 0 1 0 +185 1 3 -1.0500000000000000e+00 1.7197671926689502e+00 1.5174523882890096e+01 -1.1446509888927761e+00 0 0 0 +186 1 4 -9.4999999999999996e-01 4.3549452942679565e+00 1.6357848451105280e+01 -1.0249948286125878e+00 0 0 0 +187 1 3 -1.0500000000000000e+00 5.7122579118579146e+00 1.4378464668268730e+01 -3.2442496238843814e+00 0 0 0 +188 1 3 -1.0500000000000000e+00 1.9755855592791320e+00 1.6553006355938908e+01 -3.2434313676473057e+00 0 0 0 +189 1 3 -1.0500000000000000e+00 3.1213317655781960e+00 1.4221501186880683e+01 -3.0460712151507545e+00 0 0 0 +190 1 5 4.2499999999999999e-01 4.8599000881537826e+00 1.7223900425388404e+01 -1.2372316785225088e+00 0 0 0 +191 1 1 1.5750000000000000e+00 6.4751958332497139e-02 1.1960086179452826e+01 -5.5940253527388961e-03 0 0 0 +192 1 2 2.1000000000000001e+00 4.4924009282296247e+00 1.3550578140910872e+01 -2.7587007070121921e+00 0 0 0 +193 1 2 2.1000000000000001e+00 4.4102752196003863e+00 1.0595884839261831e+01 -2.7662291626403626e+00 0 0 0 +194 1 3 -1.0500000000000000e+00 4.6365759504820634e+00 1.3479998807602311e+01 -1.1432425507490294e+00 0 0 0 +195 1 3 -1.0500000000000000e+00 4.2715994293334454e+00 1.0691581675818046e+01 -1.1446410827572890e+00 0 0 0 +196 1 4 -9.4999999999999996e-01 1.7467323627051137e+00 1.1874836178893101e+01 -1.0250877304956010e+00 0 0 0 +197 1 3 -1.0500000000000000e+00 3.1040856453427352e+00 9.8955602020005813e+00 -3.2442403996714972e+00 0 0 0 +198 1 3 -1.0500000000000000e+00 4.5274060012281065e+00 1.2070063409836187e+01 -3.2434000150922735e+00 0 0 0 +199 1 3 -1.0500000000000000e+00 5.6731636207942486e+00 9.7385956733565244e+00 -3.0460988419800330e+00 0 0 0 +200 1 5 4.2499999999999999e-01 2.2516283969072273e+00 1.2740742133603828e+01 -1.2379506508609781e+00 0 0 0 +201 1 1 1.5750000000000000e+00 7.8156893616331828e+00 1.0454137088473082e+01 -1.0955111275805507e-03 0 0 0 +202 1 2 2.1000000000000001e+00 -1.2260912502451882e+01 -1.8033684121902734e+01 2.7520815974654695e+00 1 1 0 +203 1 2 2.1000000000000001e+00 -1.2009724236432007e+01 1.1818456368836152e+01 2.7596111897949385e+00 1 0 0 +204 1 3 -1.0500000000000000e+00 8.4038140943966368e+00 8.9345113147968043e+00 1.1367312931978493e+00 0 0 0 +205 1 3 -1.0500000000000000e+00 -1.1870897365188386e+01 1.1722708787330966e+01 1.1381317133583817e+00 1 0 0 +206 1 4 -9.4999999999999996e-01 6.1340280078189942e+00 1.0539962575114235e+01 1.0190296172793811e+00 0 0 0 +207 1 3 -1.0500000000000000e+00 4.7765633472017175e+00 1.2518709397228395e+01 3.2377142920171860e+00 0 0 0 +208 1 3 -1.0500000000000000e+00 -1.2126968373801200e+01 1.0344255623715938e+01 3.2368988134331111e+00 1 0 0 +209 1 3 -1.0500000000000000e+00 7.3675282518155782e+00 1.2675986281628141e+01 3.0393708032009918e+00 0 0 0 +210 1 5 4.2499999999999999e-01 5.6294525251353491e+00 9.6746021216973723e+00 1.2346860554186367e+00 0 0 0 +211 1 1 1.5750000000000000e+00 -1.0216126765500446e+01 1.4937021698072794e+01 -1.0804571915681294e-03 1 0 0 +212 1 2 2.1000000000000001e+00 5.9962693813004755e+00 1.3346718357487401e+01 2.7520884302740498e+00 0 0 0 +213 1 2 2.1000000000000001e+00 6.0784425686717896e+00 1.6301352145254210e+01 2.7596145213284817e+00 0 0 0 +214 1 3 -1.0500000000000000e+00 5.8520075816368156e+00 1.3417417209742045e+01 1.1367453979659494e+00 0 0 0 +215 1 3 -1.0500000000000000e+00 6.2172711652894392e+00 1.6205582997226156e+01 1.1381400867322924e+00 0 0 0 +216 1 4 -9.4999999999999996e-01 -1.1897836054868892e+01 1.5022786314249299e+01 1.0189507923373213e+00 1 0 0 +217 1 3 -1.0500000000000000e+00 7.3847241452337329e+00 1.7001624783744415e+01 3.2377191729581032e+00 0 0 0 +218 1 3 -1.0500000000000000e+00 5.9611885897234238e+00 1.4827142204243398e+01 3.2369270002236199e+00 0 0 0 +219 1 3 -1.0500000000000000e+00 4.8157014817288921e+00 1.7158902704342633e+01 3.0393389989495638e+00 0 0 0 +220 1 5 4.2499999999999999e-01 8.2375429461726419e+00 1.4157300311477027e+01 1.2340837858505793e+00 0 0 0 +221 1 1 1.5750000000000000e+00 7.8329018338411949e+00 1.6443028121723327e+01 -5.6100773936336168e-03 0 0 0 +222 1 2 2.1000000000000001e+00 7.0442203430868133e+00 9.0676404834672937e+00 -2.7587051720268496e+00 0 0 0 +223 1 2 2.1000000000000001e+00 7.0184458690821359e+00 1.5078808448004967e+01 -2.7662344750219390e+00 0 0 0 +224 1 3 -1.0500000000000000e+00 7.0193755008091827e+00 -1.7900382144787855e+01 -1.1432558848603609e+00 0 1 0 +225 1 3 -1.0500000000000000e+00 6.8797708305099192e+00 1.5174525646707149e+01 -1.1446511126747918e+00 0 0 0 +226 1 4 -9.4999999999999996e-01 -1.1125050731138488e+01 1.6357860900902505e+01 -1.0249808380157237e+00 1 0 0 +227 1 3 -1.0500000000000000e+00 -9.7677403920054449e+00 1.4378463591820999e+01 -3.2442496211494918e+00 1 0 0 +228 1 3 -1.0500000000000000e+00 7.1355847058613584e+00 1.6553008836399872e+01 -3.2434306502393415e+00 0 0 0 +229 1 3 -1.0500000000000000e+00 8.2813321720628821e+00 1.4221504404451952e+01 -3.0460741300753185e+00 0 0 0 +230 1 5 4.2499999999999999e-01 -1.0620070568908549e+01 1.7223922518014870e+01 -1.2371248265838757e+00 1 0 0 +231 1 1 1.5750000000000000e+00 5.2247450830562165e+00 1.1960087963897653e+01 -5.5952218029862166e-03 0 0 0 +232 1 2 2.1000000000000001e+00 -1.0987599973565146e+01 1.3550576905434923e+01 -2.7586985908013846e+00 1 0 0 +233 1 2 2.1000000000000001e+00 -1.1069721131969022e+01 1.0595884194424993e+01 -2.7662274903218655e+00 1 0 0 +234 1 3 -1.0500000000000000e+00 -1.0843428909969546e+01 1.3480001106285169e+01 -1.1432405923923277e+00 1 0 0 +235 1 3 -1.0500000000000000e+00 -1.1208400116673181e+01 1.0691577426474574e+01 -1.1446395001093119e+00 1 0 0 +236 1 4 -9.4999999999999996e-01 6.9067401423823966e+00 1.1874843567670599e+01 -1.0250803468076732e+00 0 0 0 +237 1 3 -1.0500000000000000e+00 8.2640815008668724e+00 9.8955607832155152e+00 -3.2442416068615270e+00 0 0 0 +238 1 3 -1.0500000000000000e+00 -1.0952596684000325e+01 1.2070063438815009e+01 -3.2433976923455790e+00 1 0 0 +239 1 3 -1.0500000000000000e+00 -9.8068349057340996e+00 9.7385976839622721e+00 -3.0461032972181128e+00 1 0 0 +240 1 5 4.2499999999999999e-01 7.4116471684275176e+00 1.2740758474319527e+01 -1.2378788260679467e+00 0 0 0 +241 1 1 1.5750000000000000e+00 -7.6643179426497525e+00 1.0454139419937150e+01 -1.0962229039090943e-03 1 0 0 +242 1 2 2.1000000000000001e+00 -7.1009118316832343e+00 -1.8033684438409352e+01 2.7520791275920509e+00 1 1 0 +243 1 2 2.1000000000000001e+00 -6.8497278061126945e+00 1.1818456215583829e+01 2.7596097726734108e+00 1 0 0 +244 1 3 -1.0500000000000000e+00 -7.0761809550557970e+00 8.9345089461886964e+00 1.1367286776553360e+00 1 0 0 +245 1 3 -1.0500000000000000e+00 -6.7108981539992012e+00 1.1722712925609560e+01 1.1381300943841577e+00 1 0 0 +246 1 4 -9.4999999999999996e-01 -9.3459642299118482e+00 1.0539969495434978e+01 1.0190365296880355e+00 1 0 0 +247 1 3 -1.0500000000000000e+00 -1.0703441171752413e+01 1.2518709623233921e+01 3.2377133170104671e+00 1 0 0 +248 1 3 -1.0500000000000000e+00 -6.9669653989505278e+00 1.0344256930017146e+01 3.2368965623591759e+00 1 0 0 +249 1 3 -1.0500000000000000e+00 -8.1124740218176914e+00 1.2675984357684172e+01 3.0393755219123175e+00 1 0 0 +250 1 5 4.2499999999999999e-01 -9.8505304823245687e+00 9.6746170371754623e+00 1.2347510489464995e+00 1 0 0 +251 1 1 1.5750000000000000e+00 -5.0561316803539889e+00 1.4937024268043505e+01 -1.0829065734565546e-03 1 0 0 +252 1 2 2.1000000000000001e+00 -9.4837259053398899e+00 1.3346719962723810e+01 2.7520900108513082e+00 1 0 0 +253 1 2 2.1000000000000001e+00 -9.4015551966123336e+00 1.6301350642789163e+01 2.7596147867214107e+00 1 0 0 +254 1 3 -1.0500000000000000e+00 -9.6279910381059679e+00 1.3417418875038809e+01 1.1367446598910824e+00 1 0 0 +255 1 3 -1.0500000000000000e+00 -9.2627250986713108e+00 1.6205584991793625e+01 1.1381398148417947e+00 1 0 0 +256 1 4 -9.4999999999999996e-01 -6.7378409974283313e+00 1.5022773040573416e+01 1.0189360574966457e+00 1 0 0 +257 1 3 -1.0500000000000000e+00 -8.0952765293247868e+00 1.7001626509746554e+01 3.2377188951214659e+00 1 0 0 +258 1 3 -1.0500000000000000e+00 -9.5188119224387648e+00 1.4827145992564279e+01 3.2369272786018328e+00 1 0 0 +259 1 3 -1.0500000000000000e+00 -1.0664297889974348e+01 1.7158906077131672e+01 3.0393361196823427e+00 1 0 0 +260 1 5 4.2499999999999999e-01 -7.2424879190792639e+00 1.4157276762729641e+01 1.2339705980408517e+00 1 0 0 +261 1 1 1.5750000000000000e+00 -7.6471035513231538e+00 1.6443030607589659e+01 -5.6125884183728658e-03 1 0 0 +262 1 2 2.1000000000000001e+00 -8.4357826338081772e+00 9.0676404123257832e+00 -2.7587070775740408e+00 1 0 0 +263 1 2 2.1000000000000001e+00 -8.4615566925045371e+00 1.5078808796634203e+01 -2.7662348683445597e+00 1 0 0 +264 1 3 -1.0500000000000000e+00 -8.4606253957796245e+00 -1.7900383839740815e+01 -1.1432555267487441e+00 1 1 0 +265 1 3 -1.0500000000000000e+00 -8.6002330838050476e+00 1.5174523645600292e+01 -1.1446511932328676e+00 1 0 0 +266 1 4 -9.4999999999999996e-01 -5.9650547142172607e+00 1.6357848553350390e+01 -1.0249949313794904e+00 1 0 0 +267 1 3 -1.0500000000000000e+00 -4.6077428484131735e+00 1.4378463759301493e+01 -3.2442497119880169e+00 1 0 0 +268 1 3 -1.0500000000000000e+00 -8.3444145492062560e+00 1.6553005185437367e+01 -3.2434310483664923e+00 1 0 0 +269 1 3 -1.0500000000000000e+00 -7.1986685645277566e+00 1.4221501454485075e+01 -3.0460709514638733e+00 1 0 0 +270 1 5 4.2499999999999999e-01 -5.4600998588816294e+00 1.7223900322661347e+01 -1.2372318792667087e+00 1 0 0 +271 1 1 1.5750000000000000e+00 -1.0255247970690887e+01 1.1960086333452317e+01 -5.5943668067435226e-03 1 0 0 +272 1 2 2.1000000000000001e+00 -5.8275988002349521e+00 1.3550577809482188e+01 -2.7587009813069550e+00 1 0 0 +273 1 2 2.1000000000000001e+00 -5.9097244493462071e+00 1.0595884450080696e+01 -2.7662291068655538e+00 1 0 0 +274 1 3 -1.0500000000000000e+00 -5.6834239852945680e+00 1.3479998862366219e+01 -1.1432428233272507e+00 1 0 0 +275 1 3 -1.0500000000000000e+00 -6.0484005454403418e+00 1.0691581478572949e+01 -1.1446409670840598e+00 1 0 0 +276 1 4 -9.4999999999999996e-01 -8.5732677527921446e+00 1.1874836600405910e+01 -1.0250875495358311e+00 1 0 0 +277 1 3 -1.0500000000000000e+00 -7.2159157266611800e+00 9.8955589169132132e+00 -3.2442406060653104e+00 1 0 0 +278 1 3 -1.0500000000000000e+00 -5.7925936828689233e+00 1.2070064831351676e+01 -3.2433999039629473e+00 1 0 0 +279 1 3 -1.0500000000000000e+00 -4.6468366745099621e+00 9.7385958523553278e+00 -3.0460988386163157e+00 1 0 0 +280 1 5 4.2499999999999999e-01 -8.0683717020120653e+00 1.2740741975259727e+01 -1.2379504409220923e+00 1 0 0 +281 1 1 1.5750000000000000e+00 -2.5043103063231698e+00 1.0454137041769233e+01 -1.0955776034045073e-03 1 0 0 +282 1 2 2.1000000000000001e+00 -1.9409113681576287e+00 -1.8033684630667693e+01 2.7520816482150821e+00 1 1 0 +283 1 2 2.1000000000000001e+00 -1.6897237585569442e+00 1.1818456159006658e+01 2.7596113602032819e+00 1 0 0 +284 1 3 -1.0500000000000000e+00 -1.9161858687627795e+00 8.9345113277413226e+00 1.1367311570806979e+00 1 0 0 +285 1 3 -1.0500000000000000e+00 -1.5508974960657724e+00 1.1722708736204648e+01 1.1381317626904028e+00 1 0 0 +286 1 4 -9.4999999999999996e-01 -4.1859719657524312e+00 1.0539962987801832e+01 1.0190299948366395e+00 1 0 0 +287 1 3 -1.0500000000000000e+00 -5.5434364533052758e+00 1.2518708822565813e+01 3.2377141022240199e+00 1 0 0 +288 1 3 -1.0500000000000000e+00 -1.8069684705864013e+00 1.0344256386383396e+01 3.2368987567931882e+00 1 0 0 +289 1 3 -1.0500000000000000e+00 -2.9524716360243488e+00 1.2675986602553834e+01 3.0393706229990389e+00 1 0 0 +290 1 5 4.2499999999999999e-01 -4.6905473984212733e+00 9.6746022181425850e+00 1.2346865349224334e+00 1 0 0 +291 1 1 1.5750000000000000e+00 1.0387263453565154e-01 1.4937021679653970e+01 -1.0803087316890725e-03 1 0 0 +292 1 2 2.1000000000000001e+00 -4.3237306195777112e+00 1.3346718865490875e+01 2.7520886736955354e+00 1 0 0 +293 1 2 2.1000000000000001e+00 -4.2415576036549307e+00 1.6301351409312520e+01 2.7596145214836199e+00 1 0 0 +294 1 3 -1.0500000000000000e+00 -4.4679924179422645e+00 1.3417417242248238e+01 1.1367455899036667e+00 1 0 0 +295 1 3 -1.0500000000000000e+00 -4.1027286922604711e+00 1.6205583108737766e+01 1.1381399975737896e+00 1 0 0 +296 1 4 -9.4999999999999996e-01 -1.5778360664000299e+00 1.5022786282606827e+01 1.0189506681801621e+00 1 0 0 +297 1 3 -1.0500000000000000e+00 -2.9352760886329765e+00 1.7001624816618172e+01 3.2377191689188400e+00 1 0 0 +298 1 3 -1.0500000000000000e+00 -4.3588113039383209e+00 1.4827142112865648e+01 3.2369268844328793e+00 1 0 0 +299 1 3 -1.0500000000000000e+00 -5.5042987520995075e+00 1.7158903098032237e+01 3.0393390022499229e+00 1 0 0 +300 1 5 4.2499999999999999e-01 -2.0824569877477490e+00 1.4157300358450829e+01 1.2340840580285750e+00 1 0 0 +301 1 1 1.5750000000000000e+00 -2.4870981803080809e+00 1.6443028209964741e+01 -5.6102401653319589e-03 1 0 0 +302 1 2 2.1000000000000001e+00 -3.2757783750802085e+00 9.0676411986375776e+00 -2.7587052630914108e+00 1 0 0 +303 1 2 2.1000000000000001e+00 -3.3015530905199171e+00 1.5078808433950211e+01 -2.7662344480276566e+00 1 0 0 +304 1 3 -1.0500000000000000e+00 -3.3006243233085435e+00 -1.7900381957020564e+01 -1.1432559054884770e+00 1 1 0 +305 1 3 -1.0500000000000000e+00 -3.4402291504896514e+00 1.5174525203564766e+01 -1.1446511726399660e+00 1 0 0 +306 1 4 -9.4999999999999996e-01 -8.0505097226361499e-01 1.6357860783458040e+01 -1.0249807422121293e+00 1 0 0 +307 1 3 -1.0500000000000000e+00 5.5225961157958281e-01 1.4378463293307757e+01 -3.2442495562097395e+00 1 0 0 +308 1 3 -1.0500000000000000e+00 -3.1844152097644951e+00 1.6553009129396468e+01 -3.2434302097628276e+00 1 0 0 +309 1 3 -1.0500000000000000e+00 -2.0386691285593450e+00 1.4221505202153235e+01 -3.0460742898602859e+00 1 0 0 +310 1 5 4.2499999999999999e-01 -3.0007039553907155e-01 1.7223922716132360e+01 -1.2371243431452115e+00 1 0 0 +311 1 1 1.5750000000000000e+00 -5.0952547864220676e+00 1.1960088100769052e+01 -5.5953583749186464e-03 1 0 0 +312 1 2 2.1000000000000001e+00 -6.6759902672779958e-01 1.3550576575741974e+01 -2.7586981099376437e+00 1 0 0 +313 1 2 2.1000000000000001e+00 -7.4972139961055007e-01 1.0595883968448494e+01 -2.7662273023982697e+00 1 0 0 +314 1 3 -1.0500000000000000e+00 -5.2342899419099176e-01 1.3480001038530336e+01 -1.1432402527662973e+00 1 0 0 +315 1 3 -1.0500000000000000e+00 -8.8840016936638122e-01 1.0691577182826830e+01 -1.1446391895017562e+00 1 0 0 +316 1 4 -9.4999999999999996e-01 -3.4132600876614694e+00 1.1874843552506302e+01 -1.0250800347447182e+00 1 0 0 +317 1 3 -1.0500000000000000e+00 -2.0559190848667779e+00 9.8955600940128399e+00 -3.2442414555236709e+00 1 0 0 +318 1 3 -1.0500000000000000e+00 -6.3259661236352471e-01 1.2070063816206304e+01 -3.2433976335968744e+00 1 0 0 +319 1 3 -1.0500000000000000e+00 5.1316632183767119e-01 9.7385974180405270e+00 -3.0461032618933910e+00 1 0 0 +320 1 5 4.2499999999999999e-01 -2.9083528061343831e+00 1.2740758511764572e+01 -1.2378790020058172e+00 1 0 0 +321 1 1 1.5750000000000000e+00 2.4866838321129432e+00 -1.6443319488109282e+01 -1.1052122708079537e-03 0 1 0 +322 1 2 2.1000000000000001e+00 3.2754265411166124e+00 -9.0678517679308079e+00 2.7520900797894097e+00 0 1 0 +323 1 2 2.1000000000000001e+00 3.3012812929774746e+00 -1.5079030526064177e+01 2.7596013825947594e+00 0 1 0 +324 1 3 -1.0500000000000000e+00 3.0748056585424361e+00 -1.7962989307471080e+01 1.1367359259236025e+00 0 1 0 +325 1 3 -1.0500000000000000e+00 3.4401235277647455e+00 -1.5174784672674543e+01 1.1381238857923606e+00 0 1 0 +326 1 4 -9.4999999999999996e-01 8.0505286931961884e-01 -1.6357496400929239e+01 1.0190465911180997e+00 0 1 0 +327 1 3 -1.0500000000000000e+00 -5.5243247748446578e-01 -1.4378771011552535e+01 3.2377005530010123e+00 0 1 0 +328 1 3 -1.0500000000000000e+00 3.1840321940634020e+00 -1.6553184601282616e+01 3.2368916486281609e+00 0 1 0 +329 1 3 -1.0500000000000000e+00 2.0385510103668523e+00 -1.4221473310941782e+01 3.0393368510881782e+00 0 1 0 +330 1 5 4.2499999999999999e-01 3.0048969742444598e-01 -1.7222811424191573e+01 1.2348343591199580e+00 0 1 0 +331 1 1 1.5750000000000000e+00 5.0948515088140134e+00 -1.1960433270121339e+01 -1.0876949941369674e-03 0 1 0 +332 1 2 2.1000000000000001e+00 6.6727808983553771e-01 -1.3550740597944426e+01 2.7520760228958601e+00 0 1 0 +333 1 2 2.1000000000000001e+00 7.4943462146550033e-01 -1.0596101661410035e+01 2.7596155693777611e+00 0 1 0 +334 1 3 -1.0500000000000000e+00 5.2299455188436106e-01 -1.3480035921031281e+01 1.1367332195284785e+00 0 1 0 +335 1 3 -1.0500000000000000e+00 8.8825807180432292e-01 -1.0691849953949507e+01 1.1381370197848408e+00 0 1 0 +336 1 4 -9.4999999999999996e-01 3.4131680476977380e+00 -1.1874640592503349e+01 1.0189839478032976e+00 0 1 0 +337 1 3 -1.0500000000000000e+00 2.0557170504923619e+00 -9.8958405340072435e+00 3.2377210676171551e+00 0 1 0 +338 1 3 -1.0500000000000000e+00 6.3219072957596723e-01 -1.2070320832852712e+01 3.2369125697498298e+00 0 1 0 +339 1 3 -1.0500000000000000e+00 -5.1331886839296281e-01 -9.7385719542020084e+00 3.0393730353943766e+00 0 1 0 +340 1 5 4.2499999999999999e-01 2.9085567042349272e+00 -1.2740073690134071e+01 1.2343383715043785e+00 0 1 0 +341 1 1 1.5750000000000000e+00 2.5039032466604549e+00 -1.0454466538594451e+01 -5.5960730650141954e-03 0 1 0 +342 1 2 2.1000000000000001e+00 1.7152275001596990e+00 -1.7829808688654179e+01 -2.7587166647436021e+00 0 1 0 +343 1 2 2.1000000000000001e+00 1.6894335215566603e+00 -1.1818663688876498e+01 -2.7662265220436604e+00 0 1 0 +344 1 3 -1.0500000000000000e+00 1.9157340427059211e+00 -8.9345473608383177e+00 -1.1432471206377617e+00 0 1 0 +345 1 3 -1.0500000000000000e+00 1.5507516674216397e+00 -1.1722960250115612e+01 -1.1446401076687174e+00 0 1 0 +346 1 4 -9.4999999999999996e-01 4.1858986207638882e+00 -1.0539702481685042e+01 -1.0250735492307399e+00 0 1 0 +347 1 3 -1.0500000000000000e+00 5.5432427049211590e+00 -1.2518994315249024e+01 -3.2442381942600074e+00 0 1 0 +348 1 3 -1.0500000000000000e+00 1.8065709108473413e+00 -1.0344498902003835e+01 -3.2434043420500638e+00 0 1 0 +349 1 3 -1.0500000000000000e+00 2.9523165443770907e+00 -1.2675968128901552e+01 -3.0460780182920182e+00 0 1 0 +350 1 5 4.2499999999999999e-01 4.6908016291113697e+00 -9.6737781755527656e+00 -1.2378367743540553e+00 0 1 0 +351 1 1 1.5750000000000000e+00 -1.0425447203585136e-01 -1.4937358684679706e+01 -5.6106194774994123e-03 0 1 0 +352 1 2 2.1000000000000001e+00 4.3233824712378919e+00 -1.3346914448028802e+01 -2.7587002669798242e+00 0 1 0 +353 1 2 2.1000000000000001e+00 4.2412839821809278e+00 -1.6301594659581504e+01 -2.7662396364223305e+00 0 1 0 +354 1 3 -1.0500000000000000e+00 4.4675504961105545e+00 -1.3417498905849381e+01 -1.1432426558764934e+00 0 1 0 +355 1 3 -1.0500000000000000e+00 4.1026208373942730e+00 -1.6205898491474812e+01 -1.1446521303438502e+00 0 1 0 +356 1 4 -9.4999999999999996e-01 1.5777692153908944e+00 -1.5022580505130305e+01 -1.0250330934557859e+00 0 1 0 +357 1 3 -1.0500000000000000e+00 2.9350960382309523e+00 -1.7001924243682240e+01 -3.2442567458411018e+00 0 1 0 +358 1 3 -1.0500000000000000e+00 4.3584096627367686e+00 -1.4827364022948601e+01 -3.2434195961678007e+00 0 1 0 +359 1 3 -1.0500000000000000e+00 5.5041860736461103e+00 -1.7158864694292664e+01 -3.0461222072625471e+00 0 1 0 +360 1 5 4.2499999999999999e-01 2.0826986387347990e+00 -1.4156571987326556e+01 -1.2375254033422998e+00 0 1 0 +361 1 1 1.5750000000000000e+00 7.6466908849126014e+00 -1.6443321754016392e+01 -1.1041224510428549e-03 0 1 0 +362 1 2 2.1000000000000001e+00 -1.2204574846737183e+01 -9.0678518278606912e+00 2.7520925524717459e+00 1 1 0 +363 1 2 2.1000000000000001e+00 -1.2178717466768493e+01 -1.5079031499609750e+01 2.7596029924438756e+00 1 1 0 +364 1 3 -1.0500000000000000e+00 8.2348008054954960e+00 -1.7962986869903343e+01 1.1367379330046692e+00 0 1 0 +365 1 3 -1.0500000000000000e+00 -1.2039875609808947e+01 -1.5174789007925407e+01 1.1381252015789638e+00 1 1 0 +366 1 4 -9.4999999999999996e-01 5.9650455103894018e+00 -1.6357502539919235e+01 1.0190398694940352e+00 0 1 0 +367 1 3 -1.0500000000000000e+00 4.6075703186532646e+00 -1.4378771968687596e+01 3.2377016053391294e+00 0 1 0 +368 1 3 -1.0500000000000000e+00 -1.2295970618632911e+01 -1.6553184449361289e+01 3.2368938019479572e+00 1 1 0 +369 1 3 -1.0500000000000000e+00 7.1985537885044550e+00 -1.4221471579874732e+01 3.0393317480666369e+00 0 1 0 +370 1 5 4.2499999999999999e-01 5.4604725076268998e+00 -1.7222826822790651e+01 1.2347698071974254e+00 0 1 0 +371 1 1 1.5750000000000000e+00 -1.0385143665178852e+01 -1.1960435684801363e+01 -1.0853516532023377e-03 1 1 0 +372 1 2 2.1000000000000001e+00 5.8272745149297016e+00 -1.3550741738215720e+01 2.7520743740516878e+00 0 1 0 +373 1 2 2.1000000000000001e+00 5.9094310788632569e+00 -1.0596100694323482e+01 2.7596156110055325e+00 0 1 0 +374 1 3 -1.0500000000000000e+00 5.6829931158397002e+00 -1.3480037625699254e+01 1.1367334438217025e+00 0 1 0 +375 1 3 -1.0500000000000000e+00 6.0482541515217108e+00 -1.0691851773456836e+01 1.1381373581641387e+00 0 1 0 +376 1 4 -9.4999999999999996e-01 -1.2066827362398454e+01 -1.1874627117564305e+01 1.0189990183751902e+00 1 1 0 +377 1 3 -1.0500000000000000e+00 7.2157190991150912e+00 -9.8958414032052460e+00 3.2377212458579532e+00 0 1 0 +378 1 3 -1.0500000000000000e+00 5.7921912072101094e+00 -1.2070323645164482e+01 3.2369120775048064e+00 0 1 0 +379 1 3 -1.0500000000000000e+00 4.6466814159274534e+00 -9.7385758622745708e+00 3.0393762038813108e+00 0 1 0 +380 1 5 4.2499999999999999e-01 8.0685878358716643e+00 -1.2740049388957750e+01 1.2344525188369744e+00 0 1 0 +381 1 1 1.5750000000000000e+00 7.6639086018705171e+00 -1.0454469248156386e+01 -5.5934189721433825e-03 0 1 0 +382 1 2 2.1000000000000001e+00 6.8752316288344204e+00 -1.7829807823584318e+01 -2.7587150751428755e+00 0 1 0 +383 1 2 2.1000000000000001e+00 6.8494373041901540e+00 -1.1818664293717356e+01 -2.7662262669767514e+00 0 1 0 +384 1 3 -1.0500000000000000e+00 7.0757348813520622e+00 -8.9345457448486751e+00 -1.1432476584343423e+00 0 1 0 +385 1 3 -1.0500000000000000e+00 6.7107552205638186e+00 -1.1722958833010946e+01 -1.1446400190532522e+00 0 1 0 +386 1 4 -9.4999999999999996e-01 -1.1294097090317557e+01 -1.0539689696343999e+01 -1.0250592110944439e+00 1 1 0 +387 1 3 -1.0500000000000000e+00 -9.9367559589065557e+00 -1.2518995914491093e+01 -3.2442378566980130e+00 1 1 0 +388 1 3 -1.0500000000000000e+00 6.9665699549338136e+00 -1.0344494946573882e+01 -3.2434037157304481e+00 0 1 0 +389 1 3 -1.0500000000000000e+00 8.1123166091576344e+00 -1.2675964698511677e+01 -3.0460814138880501e+00 0 1 0 +390 1 5 4.2499999999999999e-01 -1.0789169005232592e+01 -9.6737561255408266e+00 -1.2377290267876520e+00 1 1 0 +391 1 1 1.5750000000000000e+00 5.0557387988852387e+00 -1.4937356582793674e+01 -5.6114776737725691e-03 0 1 0 +392 1 2 2.1000000000000001e+00 -1.1156617711542536e+01 -1.3346914828324955e+01 -2.7586979036406909e+00 1 1 0 +393 1 2 2.1000000000000001e+00 -1.1238713238960703e+01 -1.6301595679810056e+01 -2.7662378363591245e+00 1 1 0 +394 1 3 -1.0500000000000000e+00 -1.1012454472696536e+01 -1.3417496740820958e+01 -1.1432406294394699e+00 1 1 0 +395 1 3 -1.0500000000000000e+00 -1.1377378816020544e+01 -1.6205903178582190e+01 -1.1446504795207080e+00 1 1 0 +396 1 4 -9.4999999999999996e-01 6.7377771730674745e+00 -1.5022573383214533e+01 -1.0250255533972528e+00 0 1 0 +397 1 3 -1.0500000000000000e+00 8.0950927444749112e+00 -1.7001923167239575e+01 -3.2442576349044270e+00 0 1 0 +398 1 3 -1.0500000000000000e+00 -1.1121593286947569e+01 -1.4827364808022567e+01 -3.2434174922978336e+00 1 1 0 +399 1 3 -1.0500000000000000e+00 -9.9758113911678059e+00 -1.7158863033895148e+01 -3.0461268411966742e+00 1 1 0 +400 1 5 4.2499999999999999e-01 7.2427173351147829e+00 -1.4156555733330411e+01 -1.2374540986225098e+00 0 1 0 +401 1 1 1.5750000000000000e+00 -7.8333163184983183e+00 -1.6443319646672723e+01 -1.1057153806355302e-03 1 1 0 +402 1 2 2.1000000000000001e+00 -7.0445730369177966e+00 -9.0678517366922602e+00 2.7520900766225651e+00 1 1 0 +403 1 2 2.1000000000000001e+00 -7.0187186179690695e+00 -1.5079030072636286e+01 2.7596016789728921e+00 1 1 0 +404 1 3 -1.0500000000000000e+00 -7.2451942661305386e+00 -1.7962989206877381e+01 1.1367357593895537e+00 1 1 0 +405 1 3 -1.0500000000000000e+00 -6.8798763616070744e+00 -1.5174784587161362e+01 1.1381240881921144e+00 1 1 0 +406 1 4 -9.4999999999999996e-01 -9.5149472126778853e+00 -1.6357496630069594e+01 1.0190464689126912e+00 1 1 0 +407 1 3 -1.0500000000000000e+00 -1.0872430842075026e+01 -1.4378769631356926e+01 3.2377002480674850e+00 1 1 0 +408 1 3 -1.0500000000000000e+00 -7.1359677267783841e+00 -1.6553183385507452e+01 3.2368915915291723e+00 1 1 0 +409 1 3 -1.0500000000000000e+00 -8.2814491205833054e+00 -1.4221473550557636e+01 3.0393366905845873e+00 1 1 0 +410 1 5 4.2499999999999999e-01 -1.0019510323783916e+01 -1.7222811464853066e+01 1.2348342312486977e+00 1 1 0 +411 1 1 1.5750000000000000e+00 -5.2251483644632390e+00 -1.1960433393503029e+01 -1.0877685836163664e-03 1 1 0 +412 1 2 2.1000000000000001e+00 -9.6527221300927266e+00 -1.3550740547461348e+01 2.7520764559672823e+00 1 1 0 +413 1 2 2.1000000000000001e+00 -9.5705654796424948e+00 -1.0596100800569875e+01 2.7596159485162843e+00 1 1 0 +414 1 3 -1.0500000000000000e+00 -9.7970053073902470e+00 -1.3480036138708787e+01 1.1367336512328592e+00 1 1 0 +415 1 3 -1.0500000000000000e+00 -9.4317420563132899e+00 -1.0691849950427759e+01 1.1381373483940251e+00 1 1 0 +416 1 4 -9.4999999999999996e-01 -6.9068318895950576e+00 -1.1874640129321861e+01 1.0189838001766294e+00 1 1 0 +417 1 3 -1.0500000000000000e+00 -8.2642833674986882e+00 -9.8958409782214822e+00 3.2377211142407596e+00 1 1 0 +418 1 3 -1.0500000000000000e+00 -9.6878095287914121e+00 -1.2070321728603117e+01 3.2369122834396791e+00 1 1 0 +419 1 3 -1.0500000000000000e+00 -1.0833317681214478e+01 -9.7385725563457299e+00 3.0393731445060546e+00 1 1 0 +420 1 5 4.2499999999999999e-01 -7.4114434796240518e+00 -1.2740074167369675e+01 1.2343379816684390e+00 1 1 0 +421 1 1 1.5750000000000000e+00 -7.8160968038525320e+00 -1.0454466560247031e+01 -5.5963004576096154e-03 1 1 0 +422 1 2 2.1000000000000001e+00 -8.6047723847717084e+00 -1.7829808050252502e+01 -2.7587167334373737e+00 1 1 0 +423 1 2 2.1000000000000001e+00 -8.6305657139420457e+00 -1.1818663432318676e+01 -2.7662265229212739e+00 1 1 0 +424 1 3 -1.0500000000000000e+00 -8.4042660945301382e+00 -8.9345473279146148e+00 -1.1432468616872651e+00 1 1 0 +425 1 3 -1.0500000000000000e+00 -8.7692484349457747e+00 -1.1722960343102701e+01 -1.1446399478981295e+00 1 1 0 +426 1 4 -9.4999999999999996e-01 -6.1341012930302261e+00 -1.0539702462863049e+01 -1.0250734857145236e+00 1 1 0 +427 1 3 -1.0500000000000000e+00 -4.7767555379713080e+00 -1.2518993635097033e+01 -3.2442382059340336e+00 1 1 0 +428 1 3 -1.0500000000000000e+00 -8.5134290079527979e+00 -1.0344498789131556e+01 -3.2434042796301501e+00 1 1 0 +429 1 3 -1.0500000000000000e+00 -7.3676834268456703e+00 -1.2675968077726315e+01 -3.0460781068979870e+00 1 1 0 +430 1 5 4.2499999999999999e-01 -5.6291984212331361e+00 -9.6737782830762864e+00 -1.2378366376607151e+00 1 1 0 +431 1 1 1.5750000000000000e+00 -1.0424254239279566e+01 -1.4937358679422617e+01 -5.6105746367052234e-03 1 1 0 +432 1 2 2.1000000000000001e+00 -5.9966183092261662e+00 -1.3346914037356733e+01 -2.7586999750312273e+00 1 1 0 +433 1 2 2.1000000000000001e+00 -6.0787162912899904e+00 -1.6301594022666766e+01 -2.7662396645053331e+00 1 1 0 +434 1 3 -1.0500000000000000e+00 -5.8524493252948986e+00 -1.3417499106111881e+01 -1.1432423320806038e+00 1 1 0 +435 1 3 -1.0500000000000000e+00 -6.2173790769823585e+00 -1.6205898375930055e+01 -1.1446520550999360e+00 1 1 0 +436 1 4 -9.4999999999999996e-01 -8.7422302133565921e+00 -1.5022580589573913e+01 -1.0250333025345224e+00 1 1 0 +437 1 3 -1.0500000000000000e+00 -7.3849045569555534e+00 -1.7001924100971085e+01 -3.2442565670719610e+00 1 1 0 +438 1 3 -1.0500000000000000e+00 -5.9615899112839399e+00 -1.4827365294985665e+01 -3.2434200227578884e+00 1 1 0 +439 1 3 -1.0500000000000000e+00 -4.8158131421453243e+00 -1.7158865162435937e+01 -3.0461221024416814e+00 1 1 0 +440 1 5 4.2499999999999999e-01 -8.2373015285480466e+00 -1.4156572134845717e+01 -1.2375258291495790e+00 1 1 0 +441 1 1 1.5750000000000000e+00 -2.6733090183237005e+00 -1.6443321645086428e+01 -1.1041107971578867e-03 1 1 0 +442 1 2 2.1000000000000001e+00 -1.8845736595278169e+00 -9.0678522098859222e+00 2.7520924901341566e+00 1 1 0 +443 1 2 2.1000000000000001e+00 -1.8587152203392563e+00 -1.5079031480191311e+01 2.7596031698860788e+00 1 1 0 +444 1 3 -1.0500000000000000e+00 -2.0851990926202717e+00 -1.7962986676530154e+01 1.1367377012975073e+00 1 1 0 +445 1 3 -1.0500000000000000e+00 -1.7198757039202697e+00 -1.5174789123816941e+01 1.1381253814035865e+00 1 1 0 +446 1 4 -9.4999999999999996e-01 -4.3549545647528749e+00 -1.6357502684339160e+01 1.0190397652485448e+00 1 1 0 +447 1 3 -1.0500000000000000e+00 -5.7124304040372786e+00 -1.4378772843054135e+01 3.2377015177917254e+00 1 1 0 +448 1 3 -1.0500000000000000e+00 -1.9759706551892506e+00 -1.6553183921602319e+01 3.2368942832448511e+00 1 1 0 +449 1 3 -1.0500000000000000e+00 -3.1214477226154163e+00 -1.4221470595941236e+01 3.0393318110976573e+00 1 1 0 +450 1 5 4.2499999999999999e-01 -4.8595274968140370e+00 -1.7222826883900630e+01 1.2347694197663284e+00 1 1 0 +451 1 1 1.5750000000000000e+00 -6.5143696356967595e-02 -1.1960436076296416e+01 -1.0852778034564636e-03 1 1 0 +452 1 2 2.1000000000000001e+00 -4.4927245848217705e+00 -1.3550741087639986e+01 2.7520746193540901e+00 1 1 0 +453 1 2 2.1000000000000001e+00 -4.4105679322702214e+00 -1.0596101122108912e+01 2.7596158243426192e+00 1 1 0 +454 1 3 -1.0500000000000000e+00 -4.6370068345753221e+00 -1.3480037496371166e+01 1.1367334894210899e+00 1 1 0 +455 1 3 -1.0500000000000000e+00 -4.2717455869130863e+00 -1.0691851675483679e+01 1.1381374399953774e+00 1 1 0 +456 1 4 -9.4999999999999996e-01 -1.7468275727255218e+00 -1.1874627336250532e+01 1.0189987024995677e+00 1 1 0 +457 1 3 -1.0500000000000000e+00 -3.1042823044114733e+00 -9.8958425495280267e+00 3.2377213874791302e+00 1 1 0 +458 1 3 -1.0500000000000000e+00 -4.5278086511820756e+00 -1.2070323354559752e+01 3.2369119261583279e+00 1 1 0 +459 1 3 -1.0500000000000000e+00 -5.6733188895891216e+00 -9.7385752106690813e+00 3.0393759430533862e+00 1 1 0 +460 1 5 4.2499999999999999e-01 -2.2514123253142220e+00 -1.2740049724133108e+01 1.2344514431371234e+00 1 1 0 +461 1 1 1.5750000000000000e+00 -2.6560915525392463e+00 -1.0454469045872084e+01 -5.5934796436769574e-03 1 1 0 +462 1 2 2.1000000000000001e+00 -3.4447691096570772e+00 -1.7829808352622109e+01 -2.7587149934582342e+00 1 1 0 +463 1 2 2.1000000000000001e+00 -3.4705638102594323e+00 -1.1818663454813812e+01 -2.7662260942983252e+00 1 1 0 +464 1 3 -1.0500000000000000e+00 -3.2442649332123601e+00 -8.9345457398481685e+00 -1.1432475377456388e+00 1 1 0 +465 1 3 -1.0500000000000000e+00 -3.6092446002445957e+00 -1.1722958903027791e+01 -1.1446400407837753e+00 1 1 0 +466 1 4 -9.4999999999999996e-01 -9.7409713851972057e-01 -1.0539690012298589e+01 -1.0250592930335500e+00 1 1 0 +467 1 3 -1.0500000000000000e+00 3.8324539707928373e-01 -1.2518994839604371e+01 -3.2442378033162171e+00 1 1 0 +468 1 3 -1.0500000000000000e+00 -3.3534300302827278e+00 -1.0344495603443224e+01 -3.2434036074381662e+00 1 1 0 +469 1 3 -1.0500000000000000e+00 -2.2076823216047785e+00 -1.2675965356189565e+01 -3.0460813501677881e+00 1 1 0 +470 1 5 4.2499999999999999e-01 -4.6916881797521626e-01 -9.6737558202661873e+00 -1.2377292312443240e+00 1 1 0 +471 1 1 1.5750000000000000e+00 -5.2642616085225935e+00 -1.4937356526072383e+01 -5.6116587268295604e-03 1 1 0 +472 1 2 2.1000000000000001e+00 -8.3661833171900390e-01 -1.3346915215719637e+01 -2.7586977065852176e+00 1 1 0 +473 1 2 2.1000000000000001e+00 -9.1871278138782664e-01 -1.6301595545983076e+01 -2.7662379934375263e+00 1 1 0 +474 1 3 -1.0500000000000000e+00 -6.9245440347775755e-01 -1.3417496706431736e+01 -1.1432406132617512e+00 1 1 0 +475 1 3 -1.0500000000000000e+00 -1.0573790171958670e+00 -1.6205902997341223e+01 -1.1446506660881521e+00 1 1 0 +476 1 4 -9.4999999999999996e-01 -3.5822229029179731e+00 -1.5022573420463287e+01 -1.0250258180222183e+00 1 1 0 +477 1 3 -1.0500000000000000e+00 -2.2249070437183498e+00 -1.7001922706143915e+01 -3.2442579679185970e+00 1 1 0 +478 1 3 -1.0500000000000000e+00 -8.0159321030372510e-01 -1.4827364979185424e+01 -3.2434174597378771e+00 1 1 0 +479 1 3 -1.0500000000000000e+00 3.4418836997070201e-01 -1.7158862954024297e+01 -3.0461267809100399e+00 1 1 0 +480 1 5 4.2499999999999999e-01 -3.0772826637410828e+00 -1.4156555961952805e+01 -1.2374543942475222e+00 1 1 0 +481 1 1 1.5750000000000000e+00 2.5430303416955855e+00 -7.4775229275666533e+00 -1.0903927610055320e-03 0 1 0 +482 1 2 2.1000000000000001e+00 3.3317774092475236e+00 -1.0200616183370670e-01 2.7520816901337355e+00 0 1 0 +483 1 2 2.1000000000000001e+00 3.3576130781465192e+00 -6.1132130971357093e+00 2.7596084428234988e+00 0 1 0 +484 1 3 -1.0500000000000000e+00 3.1311642989416661e+00 -8.9971574268844936e+00 1.1367451744438704e+00 0 1 0 +485 1 3 -1.0500000000000000e+00 3.4964506058497058e+00 -6.2089817547529798e+00 1.1381339401218931e+00 0 1 0 +486 1 4 -9.4999999999999996e-01 8.6134755852905975e-01 -7.3917559135405408e+00 1.0189681652581299e+00 0 1 0 +487 1 3 -1.0500000000000000e+00 -4.9610734278260260e-01 -5.4129397248180346e+00 3.2377101449353205e+00 0 1 0 +488 1 3 -1.0500000000000000e+00 3.2403560737478330e+00 -7.5873923340264575e+00 3.2369181500237261e+00 0 1 0 +489 1 3 -1.0500000000000000e+00 2.0948790222468219e+00 -5.2556484602929725e+00 3.0393232957068630e+00 0 1 0 +490 1 5 4.2499999999999999e-01 3.5673111416459058e-01 -8.2571975608171613e+00 1.2342266954650540e+00 0 1 0 +491 1 1 1.5750000000000000e+00 5.1511815603719224e+00 -2.9945846268543264e+00 -1.1047948613196468e-03 0 1 0 +492 1 2 2.1000000000000001e+00 7.2360018542633497e-01 -4.5849392306330117e+00 2.7520743918737764e+00 0 1 0 +493 1 2 2.1000000000000001e+00 8.0578013514410252e-01 -1.6302866446931539e+00 2.7596051499244538e+00 0 1 0 +494 1 3 -1.0500000000000000e+00 5.7930578780146647e-01 -4.5142401074583667e+00 1.1367309793240796e+00 0 1 0 +495 1 3 -1.0500000000000000e+00 9.4461652358187642e-01 -1.7260327072197654e+00 1.1381257728605814e+00 0 1 0 +496 1 4 -9.4999999999999996e-01 3.4695465007879651e+00 -2.9087560275153557e+00 1.0190473592372129e+00 0 1 0 +497 1 3 -1.0500000000000000e+00 2.1120666591399697e+00 -9.3003203981488269e-01 3.2377051847589051e+00 0 1 0 +498 1 3 -1.0500000000000000e+00 6.8853386599591992e-01 -3.1044553574194609e+00 3.2368898303761000e+00 0 1 0 +499 1 3 -1.0500000000000000e+00 -4.5695846809990215e-01 -7.7274213373466694e-01 3.0393550575236041e+00 0 1 0 +500 1 5 4.2499999999999999e-01 2.9649751295027293e+00 -3.7740729758308227e+00 1.2348302779365401e+00 0 1 0 +501 1 1 1.5750000000000000e+00 2.5602455433478095e+00 -1.4886370470841328e+00 -5.6026869057745898e-03 0 1 0 +502 1 2 2.1000000000000001e+00 1.7715675925683332e+00 -8.8639733645011098e+00 -2.7587077024332771e+00 0 1 0 +503 1 2 2.1000000000000001e+00 1.7457794725236990e+00 -2.8528570719941939e+00 -2.7662336074055576e+00 0 1 0 +504 1 3 -1.0500000000000000e+00 1.9720632323485283e+00 3.1248108495219640e-02 -1.1432389482542256e+00 0 1 0 +505 1 3 -1.0500000000000000e+00 1.6071104318756078e+00 -2.7571679803082816e+00 -1.1446451199327043e+00 0 1 0 +506 1 4 -9.4999999999999996e-01 4.2422488968293255e+00 -1.5738884358966132e+00 -1.0250754166120224e+00 0 1 0 +507 1 3 -1.0500000000000000e+00 5.5995876520675196e+00 -3.5531812583774034e+00 -3.2442489486500676e+00 0 1 0 +508 1 3 -1.0500000000000000e+00 1.8629054353523422e+00 -1.3786551986298932e+00 -3.2434052794354411e+00 0 1 0 +509 1 3 -1.0500000000000000e+00 3.0086789511562593e+00 -3.7101344966819827e+00 -3.0461180106053920e+00 0 1 0 +510 1 5 4.2499999999999999e-01 4.7471454529273167e+00 -7.0794763541514882e-01 -1.2378467359889296e+00 0 1 0 +511 1 1 1.5750000000000000e+00 -4.7932782879019697e-02 -5.9715202067818431e+00 -5.6177997612092412e-03 0 1 0 +512 1 2 2.1000000000000001e+00 4.3797236506319344e+00 -4.3810861297739017e+00 -2.7587141580099024e+00 0 1 0 +513 1 2 2.1000000000000001e+00 4.2976145155261865e+00 -7.3357570341475089e+00 -2.7662411844589094e+00 0 1 0 +514 1 3 -1.0500000000000000e+00 4.5238714023038167e+00 -4.4516662698513905e+00 -1.1432550052329180e+00 0 1 0 +515 1 3 -1.0500000000000000e+00 4.1589466735422995e+00 -7.2400431067335660e+00 -1.1446569042053270e+00 0 1 0 +516 1 4 -9.4999999999999996e-01 1.6341232053025081e+00 -6.0566945530462757e+00 -1.0249767726955099e+00 0 1 0 +517 1 3 -1.0500000000000000e+00 2.9914313972856412e+00 -8.0361012638344800e+00 -3.2442572919861199e+00 0 1 0 +518 1 3 -1.0500000000000000e+00 4.4147518470193514e+00 -5.8615310726740084e+00 -3.2434381043887726e+00 0 1 0 +519 1 3 -1.0500000000000000e+00 5.5605092517298846e+00 -8.1930491910314345e+00 -3.0460892999096219e+00 0 1 0 +520 1 5 4.2499999999999999e-01 2.1390929147156150e+00 -5.1906070381937255e+00 -1.2370937634001455e+00 0 1 0 +521 1 1 1.5750000000000000e+00 7.7030374451805130e+00 -7.4775251290699156e+00 -1.0888698840503253e-03 0 1 0 +522 1 2 2.1000000000000001e+00 -1.2148222137118129e+01 -1.0200707653891428e-01 2.7520836382546499e+00 1 1 0 +523 1 2 2.1000000000000001e+00 -1.2122384734121020e+01 -6.1132141998771736e+00 2.7596096679521516e+00 1 1 0 +524 1 3 -1.0500000000000000e+00 8.2911596618110579e+00 -8.9971549264740709e+00 1.1367476780142667e+00 0 1 0 +525 1 3 -1.0500000000000000e+00 -1.1983548377753742e+01 -6.2089861862005691e+00 1.1381352840058057e+00 1 1 0 +526 1 4 -9.4999999999999996e-01 6.0213405572092640e+00 -7.3917620717347017e+00 1.0189611302437704e+00 0 1 0 +527 1 3 -1.0500000000000000e+00 4.6638963381499163e+00 -5.4129402679040961e+00 3.2377110120966819e+00 0 1 0 +528 1 3 -1.0500000000000000e+00 -1.2239646742419254e+01 -7.5873934548230721e+00 3.2369204693707800e+00 1 1 0 +529 1 3 -1.0500000000000000e+00 7.2548822209885486e+00 -5.2556459928142818e+00 3.0393180544048288e+00 0 1 0 +530 1 5 4.2499999999999999e-01 5.5167139217558265e+00 -8.2572129315963121e+00 1.2341620107429829e+00 0 1 0 +531 1 1 1.5750000000000000e+00 -1.0328813564994913e+01 -2.9945868545091265e+00 -1.1021907772317263e-03 1 1 0 +532 1 2 2.1000000000000001e+00 5.8835951489452150e+00 -4.5849409042754914e+00 2.7520727650181094e+00 0 1 0 +533 1 2 2.1000000000000001e+00 5.9657784216131660e+00 -1.6302865620333193e+00 2.7596049941930865e+00 0 1 0 +534 1 3 -1.0500000000000000e+00 5.7393042862500394e+00 -4.5142417400968107e+00 1.1367316201327782e+00 0 1 0 +535 1 3 -1.0500000000000000e+00 6.1046128790094336e+00 -1.7260345843011997e+00 1.1381256767653500e+00 0 1 0 +536 1 4 -9.4999999999999996e-01 -1.2010449072132545e+01 -2.9087428048367947e+00 1.0190619717138087e+00 1 1 0 +537 1 3 -1.0500000000000000e+00 7.2720667438571454e+00 -9.3003375924615028e-01 3.2377054783681878e+00 0 1 0 +538 1 3 -1.0500000000000000e+00 5.8485347152072791e+00 -3.1044579833410477e+00 3.2368893613845415e+00 0 1 0 +539 1 3 -1.0500000000000000e+00 4.7030395038638559e+00 -7.7274444126095432e-01 3.0393579344212860e+00 0 1 0 +540 1 5 4.2499999999999999e-01 8.1250060733523100e+00 -3.7740488527511040e+00 1.2349441294301648e+00 0 1 0 +541 1 1 1.5750000000000000e+00 7.7202504144252089e+00 -1.4886401919691252e+00 -5.5997253889010778e-03 0 1 0 +542 1 2 2.1000000000000001e+00 6.9315728795215357e+00 -8.8639724557003667e+00 -2.7587060787641455e+00 0 1 0 +543 1 2 2.1000000000000001e+00 6.9057838596794561e+00 -2.8528592404895132e+00 -2.7662331608367374e+00 0 1 0 +544 1 3 -1.0500000000000000e+00 7.1320642866870863e+00 3.1249839057021944e-02 -1.1432394397281911e+00 0 1 0 +545 1 3 -1.0500000000000000e+00 6.7671141776717505e+00 -2.7571666591679307e+00 -1.1446449331865125e+00 0 1 0 +546 1 4 -9.4999999999999996e-01 -1.1237747215119535e+01 -1.5738757700472377e+00 -1.0250615920395205e+00 1 1 0 +547 1 3 -1.0500000000000000e+00 -9.8804112801459567e+00 -3.5531825924339877e+00 -3.2442488647618859e+00 1 1 0 +548 1 3 -1.0500000000000000e+00 7.0229047342143325e+00 -1.3786504878971080e+00 -3.2434046761109689e+00 0 1 0 +549 1 3 -1.0500000000000000e+00 8.1686778469357328e+00 -3.7101299461678341e+00 -3.0461212587078812e+00 0 1 0 +550 1 5 4.2499999999999999e-01 -1.0732824995725242e+01 -7.0792555897359932e-01 -1.2377392726116767e+00 1 1 0 +551 1 1 1.5750000000000000e+00 5.1120601166863970e+00 -5.9715183597089112e+00 -5.6187983426916333e-03 0 1 0 +552 1 2 2.1000000000000001e+00 -1.1100276794376551e+01 -4.3810875359534727e+00 -2.7587117227861402e+00 1 1 0 +553 1 2 2.1000000000000001e+00 -1.1182381983071433e+01 -7.3357579946775822e+00 -2.7662391390428738e+00 1 1 0 +554 1 3 -1.0500000000000000e+00 -1.0956133372652097e+01 -4.4516637738746425e+00 -1.1432525432779990e+00 1 1 0 +555 1 3 -1.0500000000000000e+00 -1.1321053003616177e+01 -7.2400474579999852e+00 -1.1446549890006956e+00 1 1 0 +556 1 4 -9.4999999999999996e-01 6.7941308320976503e+00 -6.0566870115769760e+00 -1.0249692732319655e+00 0 1 0 +557 1 3 -1.0500000000000000e+00 8.1514264585793157e+00 -8.0361013588165378e+00 -3.2442581978572722e+00 0 1 0 +558 1 3 -1.0500000000000000e+00 -1.1065250816254272e+01 -5.8615306608635951e+00 -3.2434358585503720e+00 1 1 0 +559 1 3 -1.0500000000000000e+00 -9.9194890195715804e+00 -8.1930470320482822e+00 -3.0460940625846717e+00 1 1 0 +560 1 5 4.2499999999999999e-01 7.2991117284917664e+00 -5.1905906016715520e+00 -1.2370220105766148e+00 0 1 0 +561 1 1 1.5750000000000000e+00 -7.7769694764459549e+00 -7.4775228929349531e+00 -1.0903321962043577e-03 1 1 0 +562 1 2 2.1000000000000001e+00 -6.9882223227988884e+00 -1.0200599156834400e-01 2.7520813529086645e+00 1 1 0 +563 1 2 2.1000000000000001e+00 -6.9623871029163196e+00 -6.1132135879265324e+00 2.7596084729391421e+00 1 1 0 +564 1 3 -1.0500000000000000e+00 -7.1888358271498909e+00 -8.9971574534300913e+00 1.1367452994897640e+00 1 1 0 +565 1 3 -1.0500000000000000e+00 -6.8235491585764851e+00 -6.2089817636730675e+00 1.1381342081903227e+00 1 1 0 +566 1 4 -9.4999999999999996e-01 -9.4586522450746617e+00 -7.3917559442724308e+00 1.0189677422932828e+00 1 1 0 +567 1 3 -1.0500000000000000e+00 -1.0816105717836082e+01 -5.4129387266467255e+00 3.2377102405102285e+00 1 1 0 +568 1 3 -1.0500000000000000e+00 -7.0796436923285659e+00 -7.5873918804348364e+00 3.2369180686424208e+00 1 1 0 +569 1 3 -1.0500000000000000e+00 -8.2251199853950752e+00 -5.2556486050312170e+00 3.0393233362599990e+00 1 1 0 +570 1 5 4.2499999999999999e-01 -9.9632689764484716e+00 -8.2571977035906201e+00 1.2342271136820635e+00 1 1 0 +571 1 1 1.5750000000000000e+00 -5.1688184090282769e+00 -2.9945842541622625e+00 -1.1049830698812713e-03 1 1 0 +572 1 2 2.1000000000000001e+00 -9.5964010560755071e+00 -4.5849397755744583e+00 2.7520743640476244e+00 1 1 0 +573 1 2 2.1000000000000001e+00 -9.5142194597070979e+00 -1.6302853443276355e+00 2.7596050766847391e+00 1 1 0 +574 1 3 -1.0500000000000000e+00 -9.7406942201924345e+00 -4.5142399448165857e+00 1.1367314955040211e+00 1 1 0 +575 1 3 -1.0500000000000000e+00 -9.3753833519977476e+00 -1.7260328865687384e+00 1.1381255909198060e+00 1 1 0 +576 1 4 -9.4999999999999996e-01 -6.8504532855874363e+00 -2.9087557444405547e+00 1.0190471692018370e+00 1 1 0 +577 1 3 -1.0500000000000000e+00 -8.2079343311356894e+00 -9.3003298460855177e-01 3.2377053869099868e+00 1 1 0 +578 1 3 -1.0500000000000000e+00 -9.6314660706552573e+00 -3.1044564284563947e+00 3.2368901782957202e+00 1 1 0 +579 1 3 -1.0500000000000000e+00 -1.0776959338377278e+01 -7.7274187234431935e-01 3.0393550095282205e+00 1 1 0 +580 1 5 4.2499999999999999e-01 -7.3550252516863521e+00 -3.7740734258413475e+00 1.2348301022365700e+00 1 1 0 +581 1 1 1.5750000000000000e+00 -7.7597546667391075e+00 -1.4886372190293855e+00 -5.6028063145898699e-03 1 1 0 +582 1 2 2.1000000000000001e+00 -8.5484319115694074e+00 -8.8639732448546162e+00 -2.7587074441979862e+00 1 1 0 +583 1 2 2.1000000000000001e+00 -8.5742190829932454e+00 -2.8528582113279413e+00 -2.7662332727028680e+00 1 1 0 +584 1 3 -1.0500000000000000e+00 -8.3479366822833345e+00 3.1248113559382062e-02 -1.1432390562401515e+00 1 1 0 +585 1 3 -1.0500000000000000e+00 -8.7128896354955199e+00 -2.7571678381699805e+00 -1.1446447922666891e+00 1 1 0 +586 1 4 -9.4999999999999996e-01 -6.0777513782948924e+00 -1.5738883796396408e+00 -1.0250755563280176e+00 1 1 0 +587 1 3 -1.0500000000000000e+00 -4.7204117547131013e+00 -3.5531805865335961e+00 -3.2442493913644750e+00 1 1 0 +588 1 3 -1.0500000000000000e+00 -8.4570947948427566e+00 -1.3786536562287637e+00 -3.2434054472491800e+00 1 1 0 +589 1 3 -1.0500000000000000e+00 -7.3113220657295681e+00 -3.7101336804442635e+00 -3.0461181837386828e+00 1 1 0 +590 1 5 4.2499999999999999e-01 -5.5728545420378985e+00 -7.0794773175252601e-01 -1.2378459849698666e+00 1 1 0 +591 1 1 1.5750000000000000e+00 -1.0367933033422295e+01 -5.9715207111099300e+00 -5.6180258102536129e-03 1 1 0 +592 1 2 2.1000000000000001e+00 -5.9402773341848842e+00 -4.3810863407387419e+00 -2.7587137659613497e+00 1 1 0 +593 1 2 2.1000000000000001e+00 -6.0223861903383664e+00 -7.3357563341016014e+00 -2.7662408005495491e+00 1 1 0 +594 1 3 -1.0500000000000000e+00 -5.7961285489484782e+00 -4.4516661488714071e+00 -1.1432545371263672e+00 1 1 0 +595 1 3 -1.0500000000000000e+00 -6.1610531877327741e+00 -7.2400431512329035e+00 -1.1446566654329438e+00 1 1 0 +596 1 4 -9.4999999999999996e-01 -8.6858768030040228e+00 -6.0566944409790153e+00 -1.0249771835179526e+00 1 1 0 +597 1 3 -1.0500000000000000e+00 -7.3285688373453928e+00 -8.0361009910012822e+00 -3.2442573173398053e+00 1 1 0 +598 1 3 -1.0500000000000000e+00 -5.9052480946191332e+00 -5.8615317762863910e+00 -3.2434382847147987e+00 1 1 0 +599 1 3 -1.0500000000000000e+00 -4.7594898600782685e+00 -8.1930498820267719e+00 -3.0460893476713888e+00 1 1 0 +600 1 5 4.2499999999999999e-01 -8.1809072617429095e+00 -5.1906073742810666e+00 -1.2370942019865021e+00 1 1 0 +601 1 1 1.5750000000000000e+00 -2.6169622759827735e+00 -7.4775250382217529e+00 -1.0888870917842297e-03 1 1 0 +602 1 2 2.1000000000000001e+00 -1.8282239536923868e+00 -1.0200678261182716e-01 2.7520839285150736e+00 1 1 0 +603 1 2 2.1000000000000001e+00 -1.8023848871371975e+00 -6.1132149333807195e+00 2.7596099334948168e+00 1 1 0 +604 1 3 -1.0500000000000000e+00 -2.0288403830127706e+00 -8.9971548663128456e+00 1.1367473309286318e+00 1 1 0 +605 1 3 -1.0500000000000000e+00 -1.6635485658314071e+00 -6.2089861954563119e+00 1.1381355321400246e+00 1 1 0 +606 1 4 -9.4999999999999996e-01 -4.2986595508918528e+00 -7.3917622156287290e+00 1.0189613383602527e+00 1 1 0 +607 1 3 -1.0500000000000000e+00 -5.6561041982113966e+00 -5.4129404080508507e+00 3.2377113204733696e+00 1 1 0 +608 1 3 -1.0500000000000000e+00 -1.9196465393900546e+00 -7.5873916738064437e+00 3.2369203740034518e+00 1 1 0 +609 1 3 -1.0500000000000000e+00 -3.0651183760167475e+00 -5.2556458992218147e+00 3.0393180703268552e+00 1 1 0 +610 1 5 4.2499999999999999e-01 -4.8032860741367820e+00 -8.2572129488122474e+00 1.2341622267812014e+00 1 1 0 +611 1 1 1.5750000000000000e+00 -8.8136395279434510e-03 -2.9945867081536335e+00 -1.1020784710353837e-03 1 1 0 +612 1 2 2.1000000000000001e+00 -4.4364036780683005e+00 -4.5849400967132752e+00 2.7520728814233770e+00 1 1 0 +613 1 2 2.1000000000000001e+00 -4.3542219747282349e+00 -1.6302850214721580e+00 2.7596048654719549e+00 1 1 0 +614 1 3 -1.0500000000000000e+00 -4.5806958046153143e+00 -4.5142415882856302e+00 1.1367317320609143e+00 1 1 0 +615 1 3 -1.0500000000000000e+00 -4.2153870702774707e+00 -1.7260346018672514e+00 1.1381255858809833e+00 1 1 0 +616 1 4 -9.4999999999999996e-01 -1.6904484881905741e+00 -2.9087428483903537e+00 1.0190619641731047e+00 1 1 0 +617 1 3 -1.0500000000000000e+00 -3.0479317476150687e+00 -9.3003299864808042e-01 3.2377055514096416e+00 1 1 0 +618 1 3 -1.0500000000000000e+00 -4.4714653858633984e+00 -3.1044601086008594e+00 3.2368896248794208e+00 1 1 0 +619 1 3 -1.0500000000000000e+00 -5.6169595136074468e+00 -7.7274517323492020e-01 3.0393582920246160e+00 1 1 0 +620 1 5 4.2499999999999999e-01 -2.1949939741826050e+00 -3.7740490432004226e+00 1.2349439971972558e+00 1 1 0 +621 1 1 1.5750000000000000e+00 -2.5997492371978028e+00 -1.4886398193905990e+00 -5.6002322660066994e-03 1 1 0 +622 1 2 2.1000000000000001e+00 -3.3884286665344208e+00 -8.8639728194260989e+00 -2.7587059169010058e+00 1 1 0 +623 1 2 2.1000000000000001e+00 -3.4142168562381734e+00 -2.8528585513833544e+00 -2.7662332460077952e+00 1 1 0 +624 1 3 -1.0500000000000000e+00 -3.1879357842532645e+00 3.1249998420598502e-02 -1.1432393944720811e+00 1 1 0 +625 1 3 -1.0500000000000000e+00 -3.5528861209569067e+00 -2.7571666396533310e+00 -1.1446448702388476e+00 1 1 0 +626 1 4 -9.4999999999999996e-01 -9.1774770858351040e-01 -1.5738756903152868e+00 -1.0250614076089395e+00 1 1 0 +627 1 3 -1.0500000000000000e+00 4.3959096909002859e-01 -3.5531812996144048e+00 -3.2442490495597234e+00 1 1 0 +628 1 3 -1.0500000000000000e+00 -3.2970956810823822e+00 -1.3786508353563782e+00 -3.2434046667663115e+00 1 1 0 +629 1 3 -1.0500000000000000e+00 -2.1513208190170037e+00 -3.7101311003824247e+00 -3.0461214548245756e+00 1 1 0 +630 1 5 4.2499999999999999e-01 -4.1282515710059897e-01 -7.0792553648389855e-01 -1.2377392208466960e+00 1 1 0 +631 1 1 1.5750000000000000e+00 -5.2079395540863889e+00 -5.9715183498939979e+00 -5.6189154650017059e-03 1 1 0 +632 1 2 2.1000000000000001e+00 -7.8027877908760246e-01 -4.3810869458036930e+00 -2.7587117009994255e+00 1 1 0 +633 1 2 2.1000000000000001e+00 -8.6238304934035170e-01 -7.3357574190446098e+00 -2.7662393468626991e+00 1 1 0 +634 1 3 -1.0500000000000000e+00 -6.3613312327963456e-01 -4.4516638025267596e+00 -1.1432525496412325e+00 1 1 0 +635 1 3 -1.0500000000000000e+00 -1.0010528938323446e+00 -7.2400475071979216e+00 -1.1446551557320230e+00 1 1 0 +636 1 4 -9.4999999999999996e-01 -3.5258691127600343e+00 -6.0566870533706787e+00 -1.0249693530848099e+00 1 1 0 +637 1 3 -1.0500000000000000e+00 -2.1685719463830591e+00 -8.0361002179160028e+00 -3.2442582882243087e+00 1 1 0 +638 1 3 -1.0500000000000000e+00 -7.4525076665510959e-01 -5.8615324373278312e+00 -3.2434359625823452e+00 1 1 0 +639 1 3 -1.0500000000000000e+00 4.0051195075634105e-01 -8.1930478397103030e+00 -3.0460938191693359e+00 1 1 0 +640 1 5 4.2499999999999999e-01 -3.0208884823444722e+00 -5.1905909853316992e+00 -1.2370225487419324e+00 1 1 0 +641 1 1 1.5750000000000000e+00 9.0282170121347960e-01 1.3067808346491034e+00 9.1954900210304160e+00 0 0 0 +642 1 2 2.1000000000000001e+00 5.0853835325505230e+00 9.0458192878071024e+00 -6.4446541799393655e+00 0 0 1 +643 1 2 2.1000000000000001e+00 5.1111493365352629e+00 3.0346760560285908e+00 -6.4371077695079695e+00 0 0 1 +644 1 3 -1.0500000000000000e+00 4.8848963121391513e+00 1.5058832132587696e-01 -8.0601060968085214e+00 0 0 1 +645 1 3 -1.0500000000000000e+00 5.2498169817742131e+00 2.9389707079076253e+00 -8.0586979445716871e+00 0 0 1 +646 1 4 -9.4999999999999996e-01 2.6146516473663581e+00 1.7556358662176805e+00 -8.1783595973058638e+00 0 0 1 +647 1 3 -1.0500000000000000e+00 1.2573386427266922e+00 3.7350082135214784e+00 -5.9590929067377916e+00 0 0 1 +648 1 3 -1.0500000000000000e+00 4.9940149672411014e+00 1.5604416078802856e+00 -5.9599200664829546e+00 0 0 1 +649 1 3 -1.0500000000000000e+00 3.8482541417496154e+00 3.8919666550679288e+00 -6.1572476854511091e+00 0 0 1 +650 1 5 4.2499999999999999e-01 2.1097040697622891e+00 8.8957731104389026e-01 -7.9660756309162224e+00 0 0 1 +651 1 1 1.5750000000000000e+00 3.5109912488121129e+00 5.7897181695951225e+00 9.1954705054955745e+00 0 0 0 +652 1 2 2.1000000000000001e+00 2.4772021487596234e+00 4.5628871120549128e+00 -6.4446362740426100e+00 0 0 1 +653 1 2 2.1000000000000001e+00 2.5593374191476794e+00 7.5175693695357708e+00 -6.4371205439737373e+00 0 0 1 +654 1 3 -1.0500000000000000e+00 2.3330389253492214e+00 4.6334622812685353e+00 -8.0601018794420991e+00 0 0 1 +655 1 3 -1.0500000000000000e+00 2.6980213434351334e+00 7.4218563502603487e+00 -8.0587104796385134e+00 0 0 1 +656 1 4 -9.4999999999999996e-01 5.2228588615568459e+00 6.2385825824180614e+00 -8.1783195626453065e+00 0 0 1 +657 1 3 -1.0500000000000000e+00 3.8655271022521749e+00 8.2179024105543590e+00 -5.9591131457953859e+00 0 0 1 +658 1 3 -1.0500000000000000e+00 2.4421872281182946e+00 6.0434027797923129e+00 -5.9599379213137418e+00 0 0 1 +659 1 3 -1.0500000000000000e+00 1.2964595577439280e+00 8.3748945576099629e+00 -6.1572921167220986e+00 0 0 1 +660 1 5 4.2499999999999999e-01 4.7179221441586208e+00 5.3726204637823400e+00 -7.9657645022302050e+00 0 0 1 +661 1 1 1.5750000000000000e+00 9.2003142836339080e-01 7.2957038015049420e+00 9.1909524249125170e+00 0 0 0 +662 1 2 2.1000000000000001e+00 1.3128924701203459e-01 -7.9808840783822887e-02 6.4377919902493055e+00 0 0 0 +663 1 2 2.1000000000000001e+00 1.0546023681999728e-01 5.9313687148606853e+00 6.4302462180669693e+00 0 0 0 +664 1 3 -1.0500000000000000e+00 3.3189746819901522e-01 8.8152951448223718e+00 8.0531305310702521e+00 0 0 0 +665 1 3 -1.0500000000000000e+00 -3.3366415369801672e-02 6.0271283114307259e+00 8.0517278286053369e+00 0 0 0 +666 1 4 -9.4999999999999996e-01 2.6017302946151339e+00 7.2099082126139891e+00 8.1708949617667592e+00 0 0 0 +667 1 3 -1.0500000000000000e+00 3.9591748761955579e+00 5.2311084129770329e+00 5.9521442450521000e+00 0 0 0 +668 1 3 -1.0500000000000000e+00 2.2271628762136153e-01 7.4055903557905545e+00 5.9529445306364988e+00 0 0 0 +669 1 3 -1.0500000000000000e+00 1.3682057529686027e+00 5.0738203147372865e+00 6.1505125690719922e+00 0 0 0 +670 1 5 4.2499999999999999e-01 3.1063243132629346e+00 8.0753596790647784e+00 7.9555583042567513e+00 0 0 0 +671 1 1 1.5750000000000000e+00 -1.6881330777744310e+00 2.8127685078479772e+00 9.1909703237928646e+00 0 0 0 +672 1 2 2.1000000000000001e+00 2.7394734624265844e+00 4.4031213577293649e+00 6.4377744023627468e+00 0 0 0 +673 1 2 2.1000000000000001e+00 2.6572745013040375e+00 1.4484777197340897e+00 6.4302586021309676e+00 0 0 0 +674 1 3 -1.0500000000000000e+00 2.8837529128519606e+00 4.3324232851542490e+00 8.0531236793474079e+00 0 0 0 +675 1 3 -1.0500000000000000e+00 2.5184326773127097e+00 1.5442499309140771e+00 8.0517387951511736e+00 0 0 0 +676 1 4 -9.4999999999999996e-01 -6.4787867921509701e-03 2.7269684263190967e+00 8.1708608856277287e+00 0 0 0 +677 1 3 -1.0500000000000000e+00 1.3509919587768682e+00 7.4821392321423730e-01 5.9521645342909348e+00 0 0 0 +678 1 3 -1.0500000000000000e+00 2.7745451384870847e+00 2.9226361141182764e+00 5.9529584547104051e+00 0 0 0 +679 1 3 -1.0500000000000000e+00 3.9200003209778860e+00 5.9089377157124190e-01 6.1505589500203222e+00 0 0 0 +680 1 5 4.2499999999999999e-01 4.9810210703177837e-01 3.5923337586577269e+00 7.9552779078896307e+00 0 0 0 +681 1 1 1.5750000000000000e+00 6.0628283623338497e+00 1.3067787544128677e+00 9.1954909873757167e+00 0 0 0 +682 1 2 2.1000000000000001e+00 -1.0394616141118407e+01 9.0458193519106409e+00 -6.4446518531202219e+00 1 0 1 +683 1 2 2.1000000000000001e+00 -1.0368847299390932e+01 3.0346758923982300e+00 -6.4371063430411706e+00 1 0 1 +684 1 3 -1.0500000000000000e+00 1.0044891703353517e+01 1.5059081263817120e-01 -8.0601036725870028e+00 0 0 1 +685 1 3 -1.0500000000000000e+00 -1.0230182196436386e+01 2.9389665302324168e+00 -8.0586964701046337e+00 1 0 1 +686 1 4 -9.4999999999999996e-01 7.7746441523543695e+00 1.7556293951885031e+00 -8.1783660429245710e+00 0 0 1 +687 1 3 -1.0500000000000000e+00 6.4173436726475721e+00 3.7350081528213259e+00 -5.9590919416472339e+00 0 0 1 +688 1 3 -1.0500000000000000e+00 -1.0485987846647536e+01 1.5604408004977763e+00 -5.9599178840240823e+00 1 0 1 +689 1 3 -1.0500000000000000e+00 9.0082562045789452e+00 3.8919690750329750e+00 -6.1572526175430760e+00 0 0 1 +690 1 5 4.2499999999999999e-01 7.2696874964052860e+00 8.8956322413740452e-01 -7.9661385199625387e+00 0 0 1 +691 1 1 1.5750000000000000e+00 -1.1969004017494131e+01 5.7897156747092851e+00 9.1954731135386147e+00 1 0 0 +692 1 2 2.1000000000000001e+00 7.6371976678189384e+00 4.5628860598886369e+00 -6.4446375389432280e+00 0 0 1 +693 1 2 2.1000000000000001e+00 7.7193341776776379e+00 7.5175699006706118e+00 -6.4371207394056498e+00 0 0 1 +694 1 3 -1.0500000000000000e+00 7.4930378996419051e+00 4.6334607153110241e+00 -8.0601011377496583e+00 0 0 1 +695 1 3 -1.0500000000000000e+00 7.8580178164782311e+00 7.4218546158866339e+00 -8.0587102079682431e+00 0 0 1 +696 1 4 -9.4999999999999996e-01 -1.0257136297370325e+01 6.2385960309449793e+00 -8.1783048768873243e+00 1 0 1 +697 1 3 -1.0500000000000000e+00 9.0255277326224110e+00 8.2179011721412998e+00 -5.9591128282486947e+00 0 0 1 +698 1 3 -1.0500000000000000e+00 7.6021882835487915e+00 6.0433992298178438e+00 -5.9599384416368117e+00 0 0 1 +699 1 3 -1.0500000000000000e+00 6.4564599887550536e+00 8.3748908688953065e+00 -6.1572891447458140e+00 0 0 1 +700 1 5 4.2499999999999999e-01 9.8779530507507687e+00 5.3726436083238696e+00 -7.9656529410886030e+00 0 0 1 +701 1 1 1.5750000000000000e+00 6.0800368052241147e+00 7.2957010411414913e+00 9.1909549731442191e+00 0 0 0 +702 1 2 2.1000000000000001e+00 5.2912906726759843e+00 -7.9808353462858861e-02 6.4377941846760578e+00 0 0 0 +703 1 2 2.1000000000000001e+00 5.2654624063345530e+00 5.9313690715250438e+00 6.4302464810153701e+00 0 0 0 +704 1 3 -1.0500000000000000e+00 5.4918987656309710e+00 8.8152970163554336e+00 8.0531301876551780e+00 0 0 0 +705 1 3 -1.0500000000000000e+00 5.1266374369100358e+00 6.0271296260902361e+00 8.0517278840107451e+00 0 0 0 +706 1 4 -9.4999999999999996e-01 -1.2878265807380702e+01 7.2099210406363419e+00 8.1709093688922145e+00 1 0 0 +707 1 3 -1.0500000000000000e+00 -1.1520823829022035e+01 5.2311075597295300e+00 5.9521445177225409e+00 1 0 0 +708 1 3 -1.0500000000000000e+00 5.3827155636603301e+00 7.4055937650983346e+00 5.9529453826342102e+00 0 0 0 +709 1 3 -1.0500000000000000e+00 6.5282069019146896e+00 5.0738230478200776e+00 6.1505092076333874e+00 0 0 0 +710 1 5 4.2499999999999999e-01 -1.2373645815072846e+01 8.0753826805516304e+00 7.9556665099946571e+00 1 0 0 +711 1 1 1.5750000000000000e+00 3.4718594708291537e+00 2.8127702813274880e+00 9.1909694677029918e+00 0 0 0 +712 1 2 2.1000000000000001e+00 -1.2740526750701058e+01 4.4031212786667204e+00 6.4377767362340492e+00 1 0 0 +713 1 2 2.1000000000000001e+00 -1.2822723122075143e+01 1.4484779219502784e+00 6.4302598890813556e+00 1 0 0 +714 1 3 -1.0500000000000000e+00 -1.2596252212651482e+01 4.3324258711439079e+00 8.0531259774885768e+00 1 0 0 +715 1 3 -1.0500000000000000e+00 -1.2961567045757878e+01 1.5442452789517418e+00 8.0517399296724008e+00 1 0 0 +716 1 4 -9.4999999999999996e-01 5.1535286802960059e+00 2.7269756012129740e+00 8.1708683839668197e+00 0 0 0 +717 1 3 -1.0500000000000000e+00 6.5109910701777771e+00 7.4821635788560670e-01 5.9521631737244522e+00 0 0 0 +718 1 3 -1.0500000000000000e+00 -1.2705458212098772e+01 2.9226334233813347e+00 5.9529611227102706e+00 1 0 0 +719 1 3 -1.0500000000000000e+00 -1.1559997625285227e+01 5.9089561905540933e-01 6.1505543903574669e+00 1 0 0 +720 1 5 4.2499999999999999e-01 5.6581213766816436e+00 3.5923506975091541e+00 7.9553499354981909e+00 0 0 0 +721 1 1 1.5750000000000000e+00 -9.4171785987889631e+00 1.3067808486544372e+00 9.1954897775731723e+00 1 0 0 +722 1 2 2.1000000000000001e+00 -5.2346156198576832e+00 9.0458196754799260e+00 -6.4446543075167195e+00 1 0 1 +723 1 2 2.1000000000000001e+00 -5.2088506338339888e+00 3.0346764869382277e+00 -6.4371077596082475e+00 1 0 1 +724 1 3 -1.0500000000000000e+00 -5.4351035648491859e+00 1.5058837801954184e-01 -8.0601058050873053e+00 1 0 1 +725 1 3 -1.0500000000000000e+00 -5.0701827211781874e+00 2.9389705570882150e+00 -8.0586978679653747e+00 1 0 1 +726 1 4 -9.4999999999999996e-01 -7.7053483398404694e+00 1.7556358142533099e+00 -8.1783597967549291e+00 1 0 1 +727 1 3 -1.0500000000000000e+00 -9.0626602483802241e+00 3.7350088365021605e+00 -5.9590930555921631e+00 1 0 1 +728 1 3 -1.0500000000000000e+00 -5.3259850845429462e+00 1.5604413432350590e+00 -5.9599201167798617e+00 1 0 1 +729 1 3 -1.0500000000000000e+00 -6.4717460701469731e+00 3.8919666136824311e+00 -6.1572477700008852e+00 1 0 1 +730 1 5 4.2499999999999999e-01 -8.2102959727496483e+00 8.8957734266167066e-01 -7.9660752806221620e+00 1 0 1 +731 1 1 1.5750000000000000e+00 -6.8090088180939414e+00 5.7897184585834118e+00 9.1954702982259811e+00 1 0 0 +732 1 2 2.1000000000000001e+00 -7.8427983144216862e+00 4.5628869165101520e+00 -6.4446360763690098e+00 1 0 1 +733 1 2 2.1000000000000001e+00 -7.7606629979491730e+00 7.5175699984949489e+00 -6.4371205648361025e+00 1 0 1 +734 1 3 -1.0500000000000000e+00 -7.9869609377381527e+00 4.6334621113719940e+00 -8.0601015647338841e+00 1 0 1 +735 1 3 -1.0500000000000000e+00 -7.6219785160957585e+00 7.4218563339332313e+00 -8.0587102722736077e+00 1 0 1 +736 1 4 -9.4999999999999996e-01 -5.0971407346304529e+00 6.2385827897666140e+00 -8.1783194514318325e+00 1 0 1 +737 1 3 -1.0500000000000000e+00 -6.4544738570782689e+00 8.2179019812079908e+00 -5.9591129036180632e+00 1 0 1 +738 1 3 -1.0500000000000000e+00 -7.8778125007394006e+00 6.0434023974722173e+00 -5.9599378900983080e+00 1 0 1 +739 1 3 -1.0500000000000000e+00 -9.0235397385233895e+00 8.3748940286283720e+00 -6.1572920902934296e+00 1 0 1 +740 1 5 4.2499999999999999e-01 -5.6020780318294392e+00 5.3726203020941981e+00 -7.9657640369980607e+00 1 0 1 +741 1 1 1.5750000000000000e+00 -9.3999686103369591e+00 7.2957037537216749e+00 9.1909523483902298e+00 1 0 0 +742 1 2 2.1000000000000001e+00 -1.0188712454891705e+01 -7.9809867036434667e-02 6.4377918471894127e+00 1 0 0 +743 1 2 2.1000000000000001e+00 -1.0214540895341987e+01 5.9313686094339637e+00 6.4302464282464289e+00 1 0 0 +744 1 3 -1.0500000000000000e+00 -9.9881023396587274e+00 8.8152949623305936e+00 8.0531306273493115e+00 1 0 0 +745 1 3 -1.0500000000000000e+00 -1.0353366432123767e+01 6.0271282133342368e+00 8.0517282168411271e+00 1 0 0 +746 1 4 -9.4999999999999996e-01 -7.7182695549998517e+00 7.2099080126934929e+00 8.1708951229879005e+00 1 0 0 +747 1 3 -1.0500000000000000e+00 -6.3608241242834938e+00 5.2311093628097822e+00 5.9521442327632545e+00 1 0 0 +748 1 3 -1.0500000000000000e+00 -1.0097283702242729e+01 7.4055912636174384e+00 5.9529443802500737e+00 1 0 0 +749 1 3 -1.0500000000000000e+00 -8.9517932081997955e+00 5.0738196454293210e+00 6.1505123949521554e+00 1 0 0 +750 1 5 4.2499999999999999e-01 -7.2136757138895025e+00 8.0753596143702886e+00 7.9555577114906342e+00 1 0 0 +751 1 1 1.5750000000000000e+00 -1.2008133444652948e+01 2.8127683008440947e+00 9.1909706701552984e+00 1 0 0 +752 1 2 2.1000000000000001e+00 -7.5805274252007724e+00 4.4031223309112946e+00 6.4377744823201759e+00 1 0 0 +753 1 2 2.1000000000000001e+00 -7.6627254705476791e+00 1.4484791346359565e+00 6.4302581024479899e+00 1 0 0 +754 1 3 -1.0500000000000000e+00 -7.4362471116748949e+00 4.3324233917456567e+00 8.0531239863719186e+00 1 0 0 +755 1 3 -1.0500000000000000e+00 -7.8015673921534887e+00 1.5442497287246404e+00 8.0517383669550711e+00 1 0 0 +756 1 4 -9.4999999999999996e-01 -1.0326478478204436e+01 2.7269689794268039e+00 8.1708604984881745e+00 1 0 0 +757 1 3 -1.0500000000000000e+00 -8.9690071758193977e+00 7.4821433214897581e-01 5.9521645232376734e+00 1 0 0 +758 1 3 -1.0500000000000000e+00 -7.5454553734540859e+00 2.9226330419737394e+00 5.9529588234715085e+00 1 0 0 +759 1 3 -1.0500000000000000e+00 -6.4000002065018471e+00 5.9089392922225770e-01 6.1505591636157284e+00 1 0 0 +760 1 5 4.2499999999999999e-01 -9.8218980423571463e+00 3.5923334012661101e+00 7.9552781595426296e+00 1 0 0 +761 1 1 1.5750000000000000e+00 -4.2571709946591874e+00 1.3067789983272604e+00 9.1954912457008291e+00 1 0 0 +762 1 2 2.1000000000000001e+00 -7.4615947805828142e-02 9.0458186641757301e+00 -6.4446517393813050e+00 1 0 1 +763 1 2 2.1000000000000001e+00 -4.8847059507879820e-02 3.0346758473621733e+00 -6.4371061453415539e+00 1 0 1 +764 1 3 -1.0500000000000000e+00 -2.7510819789244145e-01 1.5059081182694456e-01 -8.0601038895221766e+00 1 0 1 +765 1 3 -1.0500000000000000e+00 8.9817726439083145e-02 2.9389665023304516e+00 -8.0586963846550130e+00 1 0 1 +766 1 4 -9.4999999999999996e-01 -2.5453551619952046e+00 1.7556292429068776e+00 -8.1783659437282008e+00 1 0 1 +767 1 3 -1.0500000000000000e+00 -3.9026573163633245e+00 3.7350079481623872e+00 -5.9590919324016900e+00 1 0 1 +768 1 3 -1.0500000000000000e+00 -1.6598807534023940e-01 1.5604410924438490e+00 -5.9599178362058840e+00 1 0 1 +769 1 3 -1.0500000000000000e+00 -1.3117441457939520e+00 3.8919688725428685e+00 -6.1572524045698778e+00 1 0 1 +770 1 5 4.2499999999999999e-01 -3.0503128630104008e+00 8.8956260147064015e-01 -7.9661390937649665e+00 1 0 1 +771 1 1 1.5750000000000000e+00 -1.6490039246502679e+00 5.7897160415787177e+00 9.1954734654979546e+00 1 0 0 +772 1 2 2.1000000000000001e+00 -2.6828020899634888e+00 4.5628863471251293e+00 -6.4446376369361653e+00 1 0 1 +773 1 2 2.1000000000000001e+00 -2.6006660610846364e+00 7.5175704777841723e+00 -6.4371205656554675e+00 1 0 1 +774 1 3 -1.0500000000000000e+00 -2.8269620388405015e+00 4.6334605606551946e+00 -8.0601011908140467e+00 1 0 1 +775 1 3 -1.0500000000000000e+00 -2.4619821914972793e+00 7.4218546711255335e+00 -8.0587102568278617e+00 1 0 1 +776 1 4 -9.4999999999999996e-01 6.2863701187678700e-02 6.2385965848054994e+00 -8.1783047755469465e+00 1 0 1 +777 1 3 -1.0500000000000000e+00 -1.2944719376526681e+00 8.2179006583176921e+00 -5.9591128327106793e+00 1 0 1 +778 1 3 -1.0500000000000000e+00 -2.7178118928939066e+00 6.0433987789451216e+00 -5.9599384800937845e+00 1 0 1 +779 1 3 -1.0500000000000000e+00 -3.8635401045790045e+00 8.3748910583801184e+00 -6.1572890071054553e+00 1 0 1 +780 1 5 4.2499999999999999e-01 -4.4204712300966698e-01 5.3726433482133373e+00 -7.9656531380324660e+00 1 0 1 +781 1 1 1.5750000000000000e+00 -4.2399634654657090e+00 7.2957009416369552e+00 9.1909553289548924e+00 1 0 0 +782 1 2 2.1000000000000001e+00 -5.0287080909651944e+00 -7.9808550998908601e-02 6.4377936146807784e+00 1 0 0 +783 1 2 2.1000000000000001e+00 -5.0545385009127086e+00 5.9313688166380309e+00 6.4302462215147322e+00 1 0 0 +784 1 3 -1.0500000000000000e+00 -4.8281012837694046e+00 8.8152966265658250e+00 8.0531304181641090e+00 1 0 0 +785 1 3 -1.0500000000000000e+00 -5.1933624018226121e+00 6.0271296980450337e+00 8.0517278915789099e+00 1 0 0 +786 1 4 -9.4999999999999996e-01 -2.5582654039278250e+00 7.2099210191643230e+00 8.1709094567216596e+00 1 0 0 +787 1 3 -1.0500000000000000e+00 -1.2008232805355838e+00 5.2311079788265964e+00 5.9521445604051575e+00 1 0 0 +788 1 3 -1.0500000000000000e+00 -4.9372845104833996e+00 7.4055932340930894e+00 5.9529452386312212e+00 1 0 0 +789 1 3 -1.0500000000000000e+00 -3.7917925808657120e+00 5.0738227824576327e+00 6.1505092832850394e+00 1 0 0 +790 1 5 4.2499999999999999e-01 -2.0536457954054264e+00 8.0753827040336112e+00 7.9556672672870157e+00 1 0 0 +791 1 1 1.5750000000000000e+00 -6.8481402725302178e+00 2.8127705009414115e+00 9.1909694932944426e+00 1 0 0 +792 1 2 2.1000000000000001e+00 -2.4205274648176855e+00 4.4031200138880386e+00 6.4377764011069978e+00 1 0 0 +793 1 2 2.1000000000000001e+00 -2.5027219381836101e+00 1.4484775756189130e+00 6.4302596827224008e+00 1 0 0 +794 1 3 -1.0500000000000000e+00 -2.2762520213516915e+00 4.3324257166757114e+00 8.0531256507795952e+00 1 0 0 +795 1 3 -1.0500000000000000e+00 -2.6415669992893562e+00 1.5442450680497792e+00 8.0517400741648757e+00 1 0 0 +796 1 4 -9.4999999999999996e-01 -5.1664710908493685e+00 2.7269760054801573e+00 8.1708685623218216e+00 1 0 0 +797 1 3 -1.0500000000000000e+00 -3.8090112360918749e+00 7.4821451946543149e-01 5.9521635417610916e+00 1 0 0 +798 1 3 -1.0500000000000000e+00 -2.3854579876216917e+00 2.9226351213691721e+00 5.9529612412827628e+00 1 0 0 +799 1 3 -1.0500000000000000e+00 -1.2399990334443274e+00 5.9089634154326021e-01 6.1505543057211902e+00 1 0 0 +800 1 5 4.2499999999999999e-01 -4.6618788562015183e+00 3.5923503664769996e+00 7.9553499133386154e+00 1 0 0 +801 1 1 1.5750000000000000e+00 9.5914406174922995e-01 1.0272631015358176e+01 9.1954741725279803e+00 0 0 0 +802 1 2 2.1000000000000001e+00 4.9163649402406300e+00 -1.7851670476514286e+01 -6.4446454331564214e+00 0 1 1 +803 1 2 2.1000000000000001e+00 5.1674890920397623e+00 1.2000505607668213e+01 -6.4371152304477413e+00 0 0 1 +804 1 3 -1.0500000000000000e+00 4.9412075316071071e+00 9.1164022763974764e+00 -8.0601155887589986e+00 0 0 1 +805 1 3 -1.0500000000000000e+00 5.3061601603892807e+00 1.1904813057459688e+01 -8.0587083009271474e+00 0 0 1 +806 1 4 -9.4999999999999996e-01 2.6710267982659861e+00 1.0721541092250025e+01 -8.1782818075558055e+00 0 0 1 +807 1 3 -1.0500000000000000e+00 1.3136827409815606e+00 1.2700822491700222e+01 -5.9591030958311784e+00 0 0 1 +808 1 3 -1.0500000000000000e+00 5.0503604221137230e+00 1.0526296057145295e+01 -5.9599464659705452e+00 0 0 1 +809 1 3 -1.0500000000000000e+00 3.9045943009002055e+00 1.2857789754493236e+01 -6.1572353852276542e+00 0 0 1 +810 1 5 4.2499999999999999e-01 2.1661314467377775e+00 9.8556076146865621e+00 -7.9654730740477273e+00 0 0 1 +811 1 1 1.5750000000000000e+00 3.5673301724587283e+00 1.4755515176235637e+01 9.1954876583807348e+00 0 0 0 +812 1 2 2.1000000000000001e+00 2.5335509271891183e+00 1.3528730973964503e+01 -6.4446348900139068e+00 0 0 1 +813 1 2 2.1000000000000001e+00 2.6156602424171709e+00 1.6483400736101043e+01 -6.4371095743167226e+00 0 0 1 +814 1 3 -1.0500000000000000e+00 2.3893968917343269e+00 1.3599312392901009e+01 -8.0600996882849216e+00 0 0 1 +815 1 3 -1.0500000000000000e+00 2.7543325110934749e+00 1.6387685957052550e+01 -8.0586980737555223e+00 0 0 1 +816 1 4 -9.4999999999999996e-01 5.2791504809107757e+00 1.5204345400600356e+01 -8.1783811175504368e+00 0 0 1 +817 1 3 -1.0500000000000000e+00 3.9218460573530898e+00 1.7183739161164620e+01 -5.9590968118050913e+00 0 0 1 +818 1 3 -1.0500000000000000e+00 2.4985138132724032e+00 1.5009185242071151e+01 -5.9599156639450763e+00 0 0 1 +819 1 3 -1.0500000000000000e+00 1.3527706108090296e+00 1.7340709698012464e+01 -6.1572734604378230e+00 0 0 1 +820 1 5 4.2499999999999999e-01 4.7741756216847424e+00 1.4338270758814485e+01 -7.9662441318143786e+00 0 0 1 +821 1 1 1.5750000000000000e+00 9.7635861426469361e-01 1.6261520290635300e+01 9.1909590425904568e+00 0 0 0 +822 1 2 2.1000000000000001e+00 1.8761728232581376e-01 8.8860015271918087e+00 6.4377823342876148e+00 0 0 0 +823 1 2 2.1000000000000001e+00 1.6178187728752036e-01 1.4897210497758469e+01 6.4302535208403473e+00 0 0 0 +824 1 3 -1.0500000000000000e+00 1.6289933810008073e-01 -1.8082146201530033e+01 8.0531224563498363e+00 0 1 0 +825 1 3 -1.0500000000000000e+00 2.2943722128655253e-02 1.4992982570349231e+01 8.0517328793189620e+00 0 0 0 +826 1 4 -9.4999999999999996e-01 2.6580491635268899e+00 1.6175739290729627e+01 8.1708963986422134e+00 0 0 0 +827 1 3 -1.0500000000000000e+00 4.0155000758857184e+00 1.4196942418462651e+01 5.9521556174858237e+00 0 0 0 +828 1 3 -1.0500000000000000e+00 2.7905119318886840e-01 1.6371390450192795e+01 5.9529460168667665e+00 0 0 0 +829 1 3 -1.0500000000000000e+00 1.4245133402443670e+00 1.4039630263960067e+01 6.1505535148942521e+00 0 0 0 +830 1 5 4.2499999999999999e-01 3.1626488202285810e+00 1.7041173319396815e+01 7.9555631536157563e+00 0 0 0 +831 1 1 1.5750000000000000e+00 -1.6317857389553545e+00 1.1778575789107471e+01 9.1909774754098343e+00 0 0 0 +832 1 2 2.1000000000000001e+00 2.7958005594194901e+00 1.3368938756081434e+01 6.4377887014402724e+00 0 0 0 +833 1 2 2.1000000000000001e+00 2.7136140216299953e+00 1.0414285896617148e+01 6.4302588638537337e+00 0 0 0 +834 1 3 -1.0500000000000000e+00 2.9401013504113145e+00 1.3298235927640278e+01 8.0531356610080529e+00 0 0 0 +835 1 3 -1.0500000000000000e+00 2.5747765218956289e+00 1.0510039452077731e+01 8.0517427577113736e+00 0 0 0 +836 1 4 -9.4999999999999996e-01 4.9836397683449718e-02 1.1692727037569306e+01 8.1708028966272543e+00 0 0 0 +837 1 3 -1.0500000000000000e+00 1.4073270338750845e+00 9.7140368504023975e+00 5.9521647147306140e+00 0 0 0 +838 1 3 -1.0500000000000000e+00 2.8308718652665448e+00 1.1888448817516608e+01 5.9529770340129922e+00 0 0 0 +839 1 3 -1.0500000000000000e+00 3.9763465629763353e+00 9.5567258410570339e+00 6.1505245832195392e+00 0 0 0 +840 1 5 4.2499999999999999e-01 5.5437574103903842e-01 1.2558011512794511e+01 7.9548369309958389e+00 0 0 0 +841 1 1 1.5750000000000000e+00 6.1191514752329681e+00 1.0272628446089495e+01 9.1954755116342426e+00 0 0 0 +842 1 2 2.1000000000000001e+00 -1.0563635316303126e+01 -1.7851672404772010e+01 -6.4446430636755050e+00 1 1 1 +843 1 2 2.1000000000000001e+00 -1.0312508127788274e+01 1.2000504835300273e+01 -6.4371136189975573e+00 1 0 1 +844 1 3 -1.0500000000000000e+00 1.0101202529117401e+01 9.1164047236489303e+00 -8.0601136752638354e+00 0 0 1 +845 1 3 -1.0500000000000000e+00 -1.0173839477133416e+01 1.1904808798534869e+01 -8.0587071681585449e+00 1 0 1 +846 1 4 -9.4999999999999996e-01 7.8310195505242639e+00 1.0721534949720333e+01 -8.1782884834257619e+00 0 0 1 +847 1 3 -1.0500000000000000e+00 6.4736859042570032e+00 1.2700820960773893e+01 -5.9591019402721095e+00 0 0 1 +848 1 3 -1.0500000000000000e+00 -1.0429642829932224e+01 1.0526294753619656e+01 -5.9599442021427578e+00 1 0 1 +849 1 3 -1.0500000000000000e+00 9.0645970517075085e+00 1.2857791358856836e+01 -6.1572400533644327e+00 0 0 1 +850 1 5 4.2499999999999999e-01 7.3261143750068030e+00 9.8555923188552050e+00 -7.9655376915875653e+00 0 0 1 +851 1 1 1.5750000000000000e+00 -1.1912664745717505e+01 1.4755512694758050e+01 9.1954902953247242e+00 1 0 0 +852 1 2 2.1000000000000001e+00 7.6935469306619453e+00 1.3528730405288009e+01 -6.4446365983477065e+00 0 0 1 +853 1 2 2.1000000000000001e+00 7.7756573929779265e+00 1.6483399911100069e+01 -6.4371097682563789e+00 0 0 1 +854 1 3 -1.0500000000000000e+00 7.5493958338441338e+00 1.3599310486957631e+01 -8.0600996912441172e+00 0 0 1 +855 1 3 -1.0500000000000000e+00 7.9143287217791674e+00 1.6387684086693778e+01 -8.0586981246027225e+00 0 0 1 +856 1 4 -9.4999999999999996e-01 -1.0200845140703732e+01 1.5204358135858296e+01 -8.1783665874928815e+00 1 0 1 +857 1 3 -1.0500000000000000e+00 9.0818478621054517e+00 1.7183738509482989e+01 -5.9590966964900600e+00 0 0 1 +858 1 3 -1.0500000000000000e+00 7.6585145018918297e+00 1.5009182963545197e+01 -5.9599164030816034e+00 0 0 1 +859 1 3 -1.0500000000000000e+00 6.5127698033881778e+00 1.7340706948873940e+01 -6.1572705127029428e+00 0 0 1 +860 1 5 4.2499999999999999e-01 9.9342059889460685e+00 1.4338293751628743e+01 -7.9661339862898357e+00 0 0 1 +861 1 1 1.5750000000000000e+00 6.1363641232948538e+00 1.6261517685550391e+01 9.1909616347166967e+00 0 0 0 +862 1 2 2.1000000000000001e+00 5.3476210543043585e+00 8.8860009553146710e+00 6.4377841107469234e+00 0 0 0 +863 1 2 2.1000000000000001e+00 5.3217846225385763e+00 1.4897209955806343e+01 6.4302538644841931e+00 0 0 0 +864 1 3 -1.0500000000000000e+00 5.3229006772736174e+00 -1.8082144401486747e+01 8.0531219084318906e+00 0 1 0 +865 1 3 -1.0500000000000000e+00 5.1829474111249212e+00 1.4992983958301888e+01 8.0517326625554517e+00 0 0 0 +866 1 4 -9.4999999999999996e-01 -1.2821946263469606e+01 1.6175752280071055e+01 8.1709108142761941e+00 1 0 0 +867 1 3 -1.0500000000000000e+00 -1.1464500391818991e+01 1.4196940103749188e+01 5.9521562356745896e+00 1 0 0 +868 1 3 -1.0500000000000000e+00 5.4390504792354797e+00 1.6371393957699727e+01 5.9529465671206658e+00 0 0 0 +869 1 3 -1.0500000000000000e+00 6.5845132545315721e+00 1.4039633778280614e+01 6.1505504432606806e+00 0 0 0 +870 1 5 4.2499999999999999e-01 -1.2317321462039381e+01 1.7041195964547800e+01 7.9556719658547124e+00 1 0 0 +871 1 1 1.5750000000000000e+00 3.5282075217309234e+00 1.1778578148162438e+01 9.1909766534637463e+00 0 0 0 +872 1 2 2.1000000000000001e+00 -1.2684197981403766e+01 1.3368938474166271e+01 6.4377910191215193e+00 1 0 0 +873 1 2 2.1000000000000001e+00 -1.2766382201355855e+01 1.0414283899170361e+01 6.4302606644857043e+00 1 0 0 +874 1 3 -1.0500000000000000e+00 -1.2539903585174706e+01 1.3298238226200727e+01 8.0531375005468320e+00 1 0 0 +875 1 3 -1.0500000000000000e+00 -1.2905223260001820e+01 1.0510035107430650e+01 8.0517442427969428e+00 1 0 0 +876 1 4 -9.4999999999999996e-01 5.2098438522500086e+00 1.1692733978776371e+01 8.1708104702368587e+00 0 0 0 +877 1 3 -1.0500000000000000e+00 6.5673241954230335e+00 9.7140383740497107e+00 5.9521633948163242e+00 0 0 0 +878 1 3 -1.0500000000000000e+00 -1.2649131000462521e+01 1.1888449401145639e+01 5.9529791502617400e+00 1 0 0 +879 1 3 -1.0500000000000000e+00 -1.1503651811789799e+01 9.5567279202891875e+00 6.1505199011592016e+00 1 0 0 +880 1 5 4.2499999999999999e-01 5.7143946870250417e+00 1.2558028213964622e+01 7.9549080992801180e+00 0 0 0 +881 1 1 1.5750000000000000e+00 -9.3608560873875017e+00 1.0272631111638628e+01 9.1954743046681102e+00 1 0 0 +882 1 2 2.1000000000000001e+00 -5.4036354702322322e+00 -1.7851671672098018e+01 -6.4446456410194326e+00 1 1 1 +883 1 2 2.1000000000000001e+00 -5.1525121726586081e+00 1.2000505871725608e+01 -6.4371154407465685e+00 1 0 1 +884 1 3 -1.0500000000000000e+00 -5.3787925590936512e+00 9.1164020368666421e+00 -8.0601158563416693e+00 1 0 1 +885 1 3 -1.0500000000000000e+00 -5.0138400597627237e+00 1.1904813221841685e+01 -8.0587086689959708e+00 1 0 1 +886 1 4 -9.4999999999999996e-01 -7.6489730268219471e+00 1.0721541393030112e+01 -8.1782816163345107e+00 1 0 1 +887 1 3 -1.0500000000000000e+00 -9.0063161114965986e+00 1.2700823102122630e+01 -5.9591029368687813e+00 1 0 1 +888 1 3 -1.0500000000000000e+00 -5.2696400231689218e+00 1.0526295627999442e+01 -5.9599467060166038e+00 1 0 1 +889 1 3 -1.0500000000000000e+00 -6.4154044952494846e+00 1.2857788573331046e+01 -6.1572353369908708e+00 1 0 1 +890 1 5 4.2499999999999999e-01 -8.1538688418860570e+00 9.8556072002489721e+00 -7.9654733157229831e+00 1 0 1 +891 1 1 1.5750000000000000e+00 -6.7526701707369892e+00 1.4755515263778587e+01 9.1954878881928188e+00 1 0 0 +892 1 2 2.1000000000000001e+00 -7.7864505865215792e+00 1.3528730556929716e+01 -6.4446350299330026e+00 1 0 1 +893 1 2 2.1000000000000001e+00 -7.7043396890495206e+00 1.6483400221802551e+01 -6.4371098065081451e+00 1 0 1 +894 1 3 -1.0500000000000000e+00 -7.9306030418371165e+00 1.3599311965107294e+01 -8.0600999807331739e+00 1 0 1 +895 1 3 -1.0500000000000000e+00 -7.5656672531230766e+00 1.6387686056649184e+01 -8.0586984571757885e+00 1 0 1 +896 1 4 -9.4999999999999996e-01 -5.0408493365276072e+00 1.5204345526595521e+01 -8.1783814388930907e+00 1 0 1 +897 1 3 -1.0500000000000000e+00 -6.3981534810776139e+00 1.7183739336411168e+01 -5.9590967711935878e+00 1 0 1 +898 1 3 -1.0500000000000000e+00 -7.8214861266717790e+00 1.5009185587807504e+01 -5.9599160327840419e+00 1 0 1 +899 1 3 -1.0500000000000000e+00 -8.9672301887897952e+00 1.7340709847341071e+01 -6.1572735870534272e+00 1 0 1 +900 1 5 4.2499999999999999e-01 -5.5458244519934565e+00 1.4338270710946130e+01 -7.9662442234946882e+00 1 0 1 +901 1 1 1.5750000000000000e+00 -9.3436417625408161e+00 1.6261520341186785e+01 9.1909588118085530e+00 1 0 0 +902 1 2 2.1000000000000001e+00 -1.0132382849065314e+01 8.8860007791418241e+00 6.4377824875809608e+00 1 0 0 +903 1 2 2.1000000000000001e+00 -1.0158217927145110e+01 1.4897210104346438e+01 6.4302538251145513e+00 1 0 0 +904 1 3 -1.0500000000000000e+00 -1.0157100539689431e+01 -1.8082146191863000e+01 8.0531224378788977e+00 1 1 0 +905 1 3 -1.0500000000000000e+00 -1.0297056154948535e+01 1.4992982573007399e+01 8.0517330805570211e+00 1 0 0 +906 1 4 -9.4999999999999996e-01 -7.6619508608712756e+00 1.6175738951123353e+01 8.1708961078531885e+00 1 0 0 +907 1 3 -1.0500000000000000e+00 -6.3045002551674836e+00 1.4196942784200427e+01 5.9521560261705897e+00 1 0 0 +908 1 3 -1.0500000000000000e+00 -1.0040948526384431e+01 1.6371391069099882e+01 5.9529458790138428e+00 1 0 0 +909 1 3 -1.0500000000000000e+00 -8.8954871125765536e+00 1.4039630737184076e+01 6.1505537814232305e+00 1 0 0 +910 1 5 4.2499999999999999e-01 -7.1573512577454785e+00 1.7041173181632427e+01 7.9555627857502351e+00 1 0 0 +911 1 1 1.5750000000000000e+00 -1.1951785705124813e+01 1.1778576365200770e+01 9.1909778876749968e+00 1 0 0 +912 1 2 2.1000000000000001e+00 -7.5241984114621445e+00 1.3368938832318168e+01 6.4377888161749119e+00 1 0 0 +913 1 2 2.1000000000000001e+00 -7.6063850495666916e+00 1.0414286154620815e+01 6.4302588734015060e+00 1 0 0 +914 1 3 -1.0500000000000000e+00 -7.3798985816552030e+00 1.3298236020498834e+01 8.0531357941146524e+00 1 0 0 +915 1 3 -1.0500000000000000e+00 -7.7452236302433537e+00 1.0510039578811938e+01 8.0517427625121130e+00 1 0 0 +916 1 4 -9.4999999999999996e-01 -1.0270163380627732e+01 1.1692727345346942e+01 8.1708031312386424e+00 1 0 0 +917 1 3 -1.0500000000000000e+00 -8.9126734492709296e+00 9.7140366941120710e+00 5.9521645789014563e+00 1 0 0 +918 1 3 -1.0500000000000000e+00 -7.4891282810178623e+00 1.1888448733932542e+01 5.9529770818747529e+00 1 0 0 +919 1 3 -1.0500000000000000e+00 -6.3436536163207187e+00 9.5567258743233907e+00 6.1505246485982816e+00 1 0 0 +920 1 5 4.2499999999999999e-01 -9.7656244363498228e+00 1.2558011321716375e+01 7.9548367085508005e+00 1 0 0 +921 1 1 1.5750000000000000e+00 -4.2008488566253455e+00 1.0272628241376413e+01 9.1954754754211550e+00 1 0 0 +922 1 2 2.1000000000000001e+00 -2.4363611391576256e-01 -1.7851672227993191e+01 -6.4446427593915843e+00 1 1 1 +923 1 2 2.1000000000000001e+00 7.4920002869873770e-03 1.2000503205404943e+01 -6.4371134900596143e+00 1 0 1 +924 1 3 -1.0500000000000000e+00 -2.1879733799502432e-01 9.1164044355592857e+00 -8.0601137680425783e+00 1 0 1 +925 1 3 -1.0500000000000000e+00 1.4616062656334705e-01 1.1904808638368209e+01 -8.0587065169801857e+00 1 0 1 +926 1 4 -9.4999999999999996e-01 -2.4889803693301005e+00 1.0721534462006659e+01 -8.1782885284693503e+00 1 0 1 +927 1 3 -1.0500000000000000e+00 -3.8463123942428785e+00 1.2700822641851001e+01 -5.9591019920428749e+00 1 0 1 +928 1 3 -1.0500000000000000e+00 -1.0964283522775631e-01 1.0526297071062629e+01 -5.9599446471423798e+00 1 0 1 +929 1 3 -1.0500000000000000e+00 -1.2554025566113030e+00 1.2857790869854561e+01 -6.1572402054460467e+00 1 0 1 +930 1 5 4.2499999999999999e-01 -2.9938854981750351e+00 9.8555924277129918e+00 -7.9655379182989181e+00 1 0 1 +931 1 1 1.5750000000000000e+00 -1.5926650770635575e+00 1.4755512457435376e+01 9.1954904185503317e+00 1 0 0 +932 1 2 2.1000000000000001e+00 -2.6264551550247752e+00 1.3528730763598830e+01 -6.4446363551555761e+00 1 0 1 +933 1 2 2.1000000000000001e+00 -2.5443431324864640e+00 1.6483400816714376e+01 -6.4371099821502948e+00 1 0 1 +934 1 3 -1.0500000000000000e+00 -2.7706042230449031e+00 1.3599310469578739e+01 -8.0600990617377661e+00 1 0 1 +935 1 3 -1.0500000000000000e+00 -2.4056709693112541e+00 1.6387684207415344e+01 -8.0586985057548883e+00 1 0 1 +936 1 4 -9.4999999999999996e-01 1.1915501302319775e-01 1.5204358528283802e+01 -8.1783667475430661e+00 1 0 1 +937 1 3 -1.0500000000000000e+00 -1.2381509555074448e+00 1.7183739215801442e+01 -5.9590963979213312e+00 1 0 1 +938 1 3 -1.0500000000000000e+00 -2.6614854614111429e+00 1.5009180546004234e+01 -5.9599164737856771e+00 1 0 1 +939 1 3 -1.0500000000000000e+00 -3.8072302104423459e+00 1.7340707015694367e+01 -6.1572708167991710e+00 1 0 1 +940 1 5 4.2499999999999999e-01 -3.8579405681622525e-01 1.4338293590901028e+01 -7.9661336443422401e+00 1 0 1 +941 1 1 1.5750000000000000e+00 -4.1836362691246727e+00 1.6261517742061802e+01 9.1909615661208726e+00 1 0 0 +942 1 2 2.1000000000000001e+00 -4.9723795732015592e+00 8.8860018630080155e+00 6.4377841413580192e+00 1 0 0 +943 1 2 2.1000000000000001e+00 -4.9982157003256944e+00 1.4897209688412953e+01 6.4302536770108674e+00 1 0 0 +944 1 3 -1.0500000000000000e+00 -4.9970994132512585e+00 -1.8082144569897800e+01 8.0531220029840256e+00 1 1 0 +945 1 3 -1.0500000000000000e+00 -5.1370522841439490e+00 1.4992984178461246e+01 8.0517327477920944e+00 1 0 0 +946 1 4 -9.4999999999999996e-01 -2.5019464819683837e+00 1.6175752244545148e+01 8.1709103734671338e+00 1 0 0 +947 1 3 -1.0500000000000000e+00 -1.1444992486448786e+00 1.4196941339209797e+01 5.9521560282594095e+00 1 0 0 +948 1 3 -1.0500000000000000e+00 -4.8809494637777346e+00 1.6371393724148003e+01 5.9529465455089419e+00 1 0 0 +949 1 3 -1.0500000000000000e+00 -3.7354864432116726e+00 1.4039633678663673e+01 6.1505502589862431e+00 1 0 0 +950 1 5 4.2499999999999999e-01 -1.9973215627103933e+00 1.7041195791149196e+01 7.9556716086143417e+00 1 0 0 +951 1 1 1.5750000000000000e+00 -6.7917924975806212e+00 1.1778578339587053e+01 9.1909763267374807e+00 1 0 0 +952 1 2 2.1000000000000001e+00 -2.3641992187272685e+00 1.3368937547031486e+01 6.4377908082496447e+00 1 0 0 +953 1 2 2.1000000000000001e+00 -2.4463826408850196e+00 1.0414285481815444e+01 6.4302606013691719e+00 1 0 0 +954 1 3 -1.0500000000000000e+00 -2.2199035877660513e+00 1.3298238522499428e+01 8.0531376169618127e+00 1 0 0 +955 1 3 -1.0500000000000000e+00 -2.5852229421360589e+00 1.0510035033763266e+01 8.0517442164456270e+00 1 0 0 +956 1 4 -9.4999999999999996e-01 -5.1101559860180181e+00 1.1692734063814708e+01 8.1708104764498550e+00 1 0 0 +957 1 3 -1.0500000000000000e+00 -3.7526759502622120e+00 9.7140380134006072e+00 5.9521636643917653e+00 1 0 0 +958 1 3 -1.0500000000000000e+00 -2.3291309919069043e+00 1.1888448818777260e+01 5.9529792884851158e+00 1 0 0 +959 1 3 -1.0500000000000000e+00 -1.1836511455589331e+00 9.5567275335870292e+00 6.1505199645362669e+00 1 0 0 +960 1 5 4.2499999999999999e-01 -4.6056052997364061e+00 1.2558028327423553e+01 7.9549083626382711e+00 1 0 0 +961 1 1 1.5750000000000000e+00 7.9014613306237891e-01 -1.6624828059821887e+01 9.1954653122587686e+00 0 1 0 +962 1 2 2.1000000000000001e+00 4.9727020207313082e+00 -8.8858385658361030e+00 -6.4446346787343831e+00 0 1 1 +963 1 2 2.1000000000000001e+00 4.9984962573283767e+00 -1.4896980808338144e+01 -6.4371234345320723e+00 0 1 1 +964 1 3 -1.0500000000000000e+00 4.7721944899002082e+00 -1.7781095648285575e+01 -8.0601087106109883e+00 0 1 1 +965 1 3 -1.0500000000000000e+00 5.1371812278447404e+00 -1.4992684234570383e+01 -8.0587150921377120e+00 0 1 1 +966 1 4 -9.4999999999999996e-01 2.5020439078875452e+00 -1.6175924288916114e+01 -8.1782720714353765e+00 0 1 1 +967 1 3 -1.0500000000000000e+00 1.1446895668103814e+00 -1.4196658409646401e+01 -5.9591156185573206e+00 0 1 1 +968 1 3 -1.0500000000000000e+00 4.8813576501926015e+00 -1.6371146444977054e+01 -5.9599513342168366e+00 0 1 1 +969 1 3 -1.0500000000000000e+00 3.7356194709202537e+00 -1.4039668811309069e+01 -6.1572736391364105e+00 0 1 1 +970 1 5 4.2499999999999999e-01 1.9971503919299423e+00 -1.7041823176333580e+01 -7.9653929088053017e+00 0 1 1 +971 1 1 1.5750000000000000e+00 3.3983134983828833e+00 -1.2141942115268218e+01 9.1954834388209079e+00 0 1 0 +972 1 2 2.1000000000000001e+00 2.3645549216844337e+00 -1.3368728600498955e+01 -6.4446487804419128e+00 0 1 1 +973 1 2 2.1000000000000001e+00 2.4466496933542512e+00 -1.0414052043112875e+01 -6.4371091863675458e+00 0 1 1 +974 1 3 -1.0500000000000000e+00 2.2203829025710782e+00 -1.3298142677608045e+01 -8.0601117272154124e+00 0 1 1 +975 1 3 -1.0500000000000000e+00 2.5853160747242718e+00 -1.0509749620232439e+01 -8.0587013655766029e+00 0 1 1 +976 1 4 -9.4999999999999996e-01 5.1101593885303913e+00 -1.1693068756148804e+01 -8.1783334517138577e+00 0 1 1 +977 1 3 -1.0500000000000000e+00 3.7528412982499759e+00 -9.7137276173481961e+00 -5.9590950284740796e+00 0 1 1 +978 1 3 -1.0500000000000000e+00 2.3295163638276914e+00 -1.1888281133590237e+01 -5.9599308169704548e+00 0 1 1 +979 1 3 -1.0500000000000000e+00 1.1837503114553147e+00 -9.5567680230443344e+00 -6.1572373214348515e+00 0 1 1 +980 1 5 4.2499999999999999e-01 4.6052195473971445e+00 -1.2559080874253979e+01 -7.9658803779595155e+00 0 1 1 +981 1 1 1.5750000000000000e+00 8.0736573778941612e-01 -1.0635975996148398e+01 9.1909753061810733e+00 0 1 0 +982 1 2 2.1000000000000001e+00 1.8628445256133830e-02 -1.8011447915235433e+01 6.4377718915943305e+00 0 1 0 +983 1 2 2.1000000000000001e+00 -7.2276874662620116e-03 -1.2000261733881192e+01 6.4302612474555989e+00 0 1 0 +984 1 3 -1.0500000000000000e+00 2.1925925755456710e-01 -9.1163097472082928e+00 8.0531309097051746e+00 0 1 0 +985 1 3 -1.0500000000000000e+00 -1.4607171300389510e-01 -1.1904502333090878e+01 8.0517435137832436e+00 0 1 0 +986 1 4 -9.4999999999999996e-01 2.4890026502506046e+00 -1.0721811475529716e+01 8.1708171771864926e+00 0 1 0 +987 1 3 -1.0500000000000000e+00 3.8464851927735317e+00 -1.2700516442874417e+01 5.9521673170375600e+00 0 1 0 +988 1 3 -1.0500000000000000e+00 1.1003656040682230e-01 -1.0526114309286720e+01 5.9529730815098585e+00 0 1 0 +989 1 3 -1.0500000000000000e+00 1.2554990680267775e+00 -1.2857838414621902e+01 6.1505457741886893e+00 0 1 0 +990 1 5 4.2499999999999999e-01 2.9935487410545569e+00 -9.8565089684446594e+00 7.9549502968255368e+00 0 1 0 +991 1 1 1.5750000000000000e+00 -1.8007920015586318e+00 -1.5118868718316499e+01 9.1909616323537193e+00 0 1 0 +992 1 2 2.1000000000000001e+00 2.6267830700490862e+00 -1.3528553650636692e+01 6.4377889611924815e+00 0 1 0 +993 1 2 2.1000000000000001e+00 2.5446226083752919e+00 -1.6483194360994744e+01 6.4302480124363584e+00 0 1 0 +994 1 3 -1.0500000000000000e+00 2.7710754601895964e+00 -1.3599261821323303e+01 8.0531352851040801e+00 0 1 0 +995 1 3 -1.0500000000000000e+00 2.4057978135080607e+00 -1.6387440553216820e+01 8.0517316145122209e+00 0 1 0 +996 1 4 -9.4999999999999996e-01 -1.1912614015696477e-01 -1.5204689518349639e+01 8.1708584714428554e+00 0 1 0 +997 1 3 -1.0500000000000000e+00 1.2383377906357129e+00 -1.7183446415936316e+01 5.9521484947542458e+00 0 1 0 +998 1 3 -1.0500000000000000e+00 2.6618753214999806e+00 -1.5008977825648442e+01 5.9529572045667578e+00 0 1 0 +999 1 3 -1.0500000000000000e+00 3.8073691332916653e+00 -1.7340734809224180e+01 6.1505012851695469e+00 0 1 0 +1000 1 5 4.2499999999999999e-01 3.8544716658245193e-01 -1.4339299456480449e+01 7.9552681735473065e+00 0 1 0 +1001 1 1 1.5750000000000000e+00 5.9501534782574090e+00 -1.6624829967969383e+01 9.1954667439341904e+00 0 1 0 +1002 1 2 2.1000000000000001e+00 -1.0507298342314735e+01 -8.8858403670229400e+00 -6.4446316599145241e+00 1 1 1 +1003 1 2 2.1000000000000001e+00 -1.0481500260769259e+01 -1.4896981508692269e+01 -6.4371215581986849e+00 1 1 1 +1004 1 3 -1.0500000000000000e+00 9.9321894580003942e+00 -1.7781093582562203e+01 -8.0601067045635411e+00 0 1 1 +1005 1 3 -1.0500000000000000e+00 -1.0342818267007878e+01 -1.4992688390522170e+01 -8.0587131133336154e+00 1 1 1 +1006 1 4 -9.4999999999999996e-01 7.6620365971861197e+00 -1.6175930874377521e+01 -8.1782783790624585e+00 0 1 1 +1007 1 3 -1.0500000000000000e+00 6.3046951963703499e+00 -1.4196658135879822e+01 -5.9591147168331009e+00 0 1 1 +1008 1 3 -1.0500000000000000e+00 -1.0598645287750651e+01 -1.6371145636648091e+01 -5.9599491670546207e+00 1 1 1 +1009 1 3 -1.0500000000000000e+00 8.8956218003342826e+00 -1.4039666968186262e+01 -6.1572783744964994e+00 0 1 1 +1010 1 5 4.2499999999999999e-01 7.1571337866779707e+00 -1.7041837629100414e+01 -7.9654566502107356e+00 0 1 1 +1011 1 1 1.5750000000000000e+00 -1.2081681588808072e+01 -1.2141945064834767e+01 9.1954858538142332e+00 1 1 0 +1012 1 2 2.1000000000000001e+00 7.5245498984114256e+00 -1.3368729330630948e+01 -6.4446501069430093e+00 0 1 1 +1013 1 2 2.1000000000000001e+00 7.6066470789041496e+00 -1.0414051110966987e+01 -6.4371091545821617e+00 0 1 1 +1014 1 3 -1.0500000000000000e+00 7.3803815000028088e+00 -1.3298144454433004e+01 -8.0601109104777926e+00 0 1 1 +1015 1 3 -1.0500000000000000e+00 7.7453123285148955e+00 -1.0509751071336581e+01 -8.0587012400951590e+00 0 1 1 +1016 1 4 -9.4999999999999996e-01 -1.0369835928182086e+01 -1.1693055457843840e+01 -8.1783191615471544e+00 1 1 1 +1017 1 3 -1.0500000000000000e+00 8.9128425434035599e+00 -9.7137284889883500e+00 -5.9590947471446691e+00 0 1 1 +1018 1 3 -1.0500000000000000e+00 7.4895160998307411e+00 -1.1888285480319359e+01 -5.9599317216137955e+00 0 1 1 +1019 1 3 -1.0500000000000000e+00 6.3437510422751444e+00 -9.5567716600651273e+00 -6.1572344950413971e+00 0 1 1 +1020 1 5 4.2499999999999999e-01 9.7652499011245943e+00 -1.2559058042019794e+01 -7.9657691425755282e+00 0 1 1 +1021 1 1 1.5750000000000000e+00 5.9673710268621605e+00 -1.0635979075429637e+01 9.1909780463641972e+00 0 1 0 +1022 1 2 2.1000000000000001e+00 5.1786324224706561e+00 -1.8011446897219908e+01 6.4377735217856547e+00 0 1 0 +1023 1 2 2.1000000000000001e+00 5.1527757056696668e+00 -1.2000262913350006e+01 6.4302615654366306e+00 0 1 0 +1024 1 3 -1.0500000000000000e+00 5.3792604215663040e+00 -9.1163084856211647e+00 8.0531305067542291e+00 0 1 0 +1025 1 3 -1.0500000000000000e+00 5.0139323929661863e+00 -1.1904500623282853e+01 8.0517434924964313e+00 0 1 0 +1026 1 4 -9.4999999999999996e-01 -1.2990992960142725e+01 -1.0721798911775450e+01 8.1708312033561299e+00 1 1 0 +1027 1 3 -1.0500000000000000e+00 -1.1633514507876814e+01 -1.2700518084199729e+01 5.9521673705405220e+00 1 1 0 +1028 1 3 -1.0500000000000000e+00 5.2700358161820464e+00 -1.0526111294047354e+01 5.9529732606049226e+00 0 1 0 +1029 1 3 -1.0500000000000000e+00 6.4154992900391434e+00 -1.2857834730884534e+01 6.1505423546811979e+00 0 1 0 +1030 1 5 4.2499999999999999e-01 -1.2486421375705273e+01 -9.8564859567479530e+00 7.9550599164222504e+00 1 1 0 +1031 1 1 1.5750000000000000e+00 3.3592010037004201e+00 -1.5118866785118019e+01 9.1909601380801327e+00 0 1 0 +1032 1 2 2.1000000000000001e+00 -1.2853217324153643e+01 -1.3528555060001777e+01 6.4377910921695420e+00 1 1 0 +1033 1 2 2.1000000000000001e+00 -1.2935373395897837e+01 -1.6483194822573775e+01 6.4302497515585930e+00 1 1 0 +1034 1 3 -1.0500000000000000e+00 -1.2708929650106869e+01 -1.3599259177332932e+01 8.0531371914658507e+00 1 1 0 +1035 1 3 -1.0500000000000000e+00 -1.3074201659062052e+01 -1.6387445182261104e+01 8.0517334527674720e+00 1 1 0 +1036 1 4 -9.4999999999999996e-01 5.0408813035519113e+00 -1.5204682628331938e+01 8.1708660551808379e+00 0 1 0 +1037 1 3 -1.0500000000000000e+00 6.3983336015586403e+00 -1.7183446108267308e+01 5.9521474124245533e+00 0 1 0 +1038 1 3 -1.0500000000000000e+00 -1.2818127275342043e+01 -1.5008977873533983e+01 5.9529597817453528e+00 1 1 0 +1039 1 3 -1.0500000000000000e+00 -1.1672629205493738e+01 -1.7340732551870772e+01 6.1504965556458426e+00 1 1 0 +1040 1 5 4.2499999999999999e-01 5.5454661271361978e+00 -1.4339282916765702e+01 7.9553392571972665e+00 0 1 0 +1041 1 1 1.5750000000000000e+00 -9.5298540603204458e+00 -1.6624828197867142e+01 9.1954654289520192e+00 1 1 0 +1042 1 2 2.1000000000000001e+00 -5.3472966808288840e+00 -8.8858396116206695e+00 -6.4446344932339041e+00 1 1 1 +1043 1 2 2.1000000000000001e+00 -5.3215029610881182e+00 -1.4896981421392763e+01 -6.4371230371389112e+00 1 1 1 +1044 1 3 -1.0500000000000000e+00 -5.5478058143690685e+00 -1.7781095902577366e+01 -8.0601089646617385e+00 1 1 1 +1045 1 3 -1.0500000000000000e+00 -5.1828186898750133e+00 -1.4992684223927771e+01 -8.0587144969042122e+00 1 1 1 +1046 1 4 -9.4999999999999996e-01 -7.8179561399721482e+00 -1.6175924811672513e+01 -8.1782720589106059e+00 1 1 1 +1047 1 3 -1.0500000000000000e+00 -9.1753085493851678e+00 -1.4196657228095635e+01 -5.9591157258489611e+00 1 1 1 +1048 1 3 -1.0500000000000000e+00 -5.4386424422348201e+00 -1.6371144430291693e+01 -5.9599515639333625e+00 1 1 1 +1049 1 3 -1.0500000000000000e+00 -6.5843804973133686e+00 -1.4039668592563981e+01 -6.1572734558794879e+00 1 1 1 +1050 1 5 4.2499999999999999e-01 -8.3228492405101608e+00 -1.7041822645328683e+01 -7.9653921925885740e+00 1 1 1 +1051 1 1 1.5750000000000000e+00 -6.9216864138919236e+00 -1.2141942251066947e+01 9.1954831994680930e+00 1 1 0 +1052 1 2 2.1000000000000001e+00 -7.9554462780630182e+00 -1.3368728161393772e+01 -6.4446483443793081e+00 1 1 1 +1053 1 2 2.1000000000000001e+00 -7.8733505961726316e+00 -1.0414051499920177e+01 -6.4371088734937656e+00 1 1 1 +1054 1 3 -1.0500000000000000e+00 -8.0996169734985859e+00 -1.3298142833743967e+01 -8.0601111408424195e+00 1 1 1 +1055 1 3 -1.0500000000000000e+00 -7.7346838862237659e+00 -1.0509749331655325e+01 -8.0587013087778416e+00 1 1 1 +1056 1 4 -9.4999999999999996e-01 -5.2098402258324183e+00 -1.1693068660258980e+01 -8.1783338515888548e+00 1 1 1 +1057 1 3 -1.0500000000000000e+00 -6.5671588505380107e+00 -9.7137273022640027e+00 -5.9590947424970375e+00 1 1 1 +1058 1 3 -1.0500000000000000e+00 -7.9904839189904369e+00 -1.1888282909204072e+01 -5.9599311259660048e+00 1 1 1 +1059 1 3 -1.0500000000000000e+00 -9.1362488600448213e+00 -9.5567679602290649e+00 -6.1572371300669877e+00 1 1 1 +1060 1 5 4.2499999999999999e-01 -5.7147808132948716e+00 -1.2559081489389445e+01 -7.9658807022744158e+00 1 1 1 +1061 1 1 1.5750000000000000e+00 -9.5126343226942840e+00 -1.0635976344345174e+01 9.1909751178689092e+00 1 1 0 +1062 1 2 2.1000000000000001e+00 -1.0301371444602418e+01 -1.8011447538981720e+01 6.4377720108397156e+00 1 1 0 +1063 1 2 2.1000000000000001e+00 -1.0327226163200578e+01 -1.2000262398368617e+01 6.4302614654075185e+00 1 1 0 +1064 1 3 -1.0500000000000000e+00 -1.0100740918198079e+01 -9.1163100578818881e+00 8.0531308835390902e+00 1 1 0 +1065 1 3 -1.0500000000000000e+00 -1.0466071548753908e+01 -1.1904502092764059e+01 8.0517439789134144e+00 1 1 0 +1066 1 4 -9.4999999999999996e-01 -7.8309974311701129e+00 -1.0721811603640802e+01 8.1708170690204867e+00 1 1 0 +1067 1 3 -1.0500000000000000e+00 -6.4735145828088188e+00 -1.2700516099478254e+01 5.9521671446509004e+00 1 1 0 +1068 1 3 -1.0500000000000000e+00 -1.0209963543237151e+01 -1.0526113259939304e+01 5.9529726533375751e+00 1 1 0 +1069 1 3 -1.0500000000000000e+00 -9.0645012346420568e+00 -1.2857838100935414e+01 6.1505456631840669e+00 1 1 0 +1070 1 5 4.2499999999999999e-01 -7.3264511880101475e+00 -9.8565090043672878e+00 7.9549502005820187e+00 1 1 0 +1071 1 1 1.5750000000000000e+00 -1.2120791838981495e+01 -1.5118868930682885e+01 9.1909609001634287e+00 1 1 0 +1072 1 2 2.1000000000000001e+00 -7.6932177031264928e+00 -1.3528553408346209e+01 6.4377890763094090e+00 1 1 0 +1073 1 2 2.1000000000000001e+00 -7.7753776223234814e+00 -1.6483193005861636e+01 6.4302481263530531e+00 1 1 0 +1074 1 3 -1.0500000000000000e+00 -7.5489244914932039e+00 -1.3599261820971208e+01 8.0531355607228114e+00 1 1 0 +1075 1 3 -1.0500000000000000e+00 -7.9142020206664743e+00 -1.6387440549061097e+01 8.0517315712238542e+00 1 1 0 +1076 1 4 -9.4999999999999996e-01 -1.0439126601942455e+01 -1.5204689299647541e+01 8.1708582047045937e+00 1 1 0 +1077 1 3 -1.0500000000000000e+00 -9.0816619196054607e+00 -1.7183446894359871e+01 5.9521483635552528e+00 1 1 0 +1078 1 3 -1.0500000000000000e+00 -7.6581244905423622e+00 -1.5008979204713924e+01 5.9529570353984163e+00 1 1 0 +1079 1 3 -1.0500000000000000e+00 -6.5126300673439719e+00 -1.7340734949776287e+01 6.1505011625784114e+00 1 1 0 +1080 1 5 4.2499999999999999e-01 -9.9345529944712521e+00 -1.4339299609095498e+01 7.9552680464235195e+00 1 1 0 +1081 1 1 1.5750000000000000e+00 -4.3698465507728370e+00 -1.6624830195129618e+01 9.1954664129414425e+00 1 1 0 +1082 1 2 2.1000000000000001e+00 -1.8729690580028091e-01 -8.8858402840281592e+00 -6.4446321463831335e+00 1 1 1 +1083 1 2 2.1000000000000001e+00 -1.6149869212405754e-01 -1.4896981184413754e+01 -6.4371216032033303e+00 1 1 1 +1084 1 3 -1.0500000000000000e+00 -3.8781072107300041e-01 -1.7781093773263681e+01 -8.0601065233489333e+00 1 1 1 +1085 1 3 -1.0500000000000000e+00 -2.2817988448505133e-02 -1.4992688410242105e+01 -8.0587131504128617e+00 1 1 1 +1086 1 4 -9.4999999999999996e-01 -2.6579633119623240e+00 -1.6175931116845067e+01 -8.1782784037223681e+00 1 1 1 +1087 1 3 -1.0500000000000000e+00 -4.0153061844639106e+00 -1.4196659127020242e+01 -5.9591144626296035e+00 1 1 1 +1088 1 3 -1.0500000000000000e+00 -2.7864533168977346e-01 -1.6371146104547211e+01 -5.9599491712132977e+00 1 1 1 +1089 1 3 -1.0500000000000000e+00 -1.4243790518188248e+00 -1.4039666239510284e+01 -6.1572782908827435e+00 1 1 1 +1090 1 5 4.2499999999999999e-01 -3.1628663016451819e+00 -1.7041837795964462e+01 -7.9654575744277851e+00 1 1 1 +1091 1 1 1.5750000000000000e+00 -1.7616812646750670e+00 -1.2141944760869407e+01 9.1954857091333579e+00 1 1 0 +1092 1 2 2.1000000000000001e+00 -2.7954488666628210e+00 -1.3368728697571713e+01 -6.4446500608326609e+00 1 1 1 +1093 1 2 2.1000000000000001e+00 -2.7133527815913725e+00 -1.0414050974412064e+01 -6.4371092551368125e+00 1 1 1 +1094 1 3 -1.0500000000000000e+00 -2.9396183120149280e+00 -1.3298144684109019e+01 -8.0601108293224346e+00 1 1 1 +1095 1 3 -1.0500000000000000e+00 -2.5746875206227164e+00 -1.0509751039714816e+01 -8.0587012400426286e+00 1 1 1 +1096 1 4 -9.4999999999999996e-01 -4.9836065876942115e-02 -1.1693055475253132e+01 -8.1783188965868909e+00 1 1 1 +1097 1 3 -1.0500000000000000e+00 -1.4071577661384467e+00 -9.7137287243612782e+00 -5.9590944987765981e+00 1 1 1 +1098 1 3 -1.0500000000000000e+00 -2.8304831874052363e+00 -1.1888285968982350e+01 -5.9599315915305215e+00 1 1 1 +1099 1 3 -1.0500000000000000e+00 -3.9762496707077783e+00 -9.5567711446227541e+00 -6.1572345365326910e+00 1 1 1 +1100 1 5 4.2499999999999999e-01 -5.5475012598910567e-01 -1.2559058085924427e+01 -7.9657691107287523e+00 1 1 1 +1101 1 1 1.5750000000000000e+00 -4.3526289054242984e+00 -1.0635978834212278e+01 9.1909781859194304e+00 1 1 0 +1102 1 2 2.1000000000000001e+00 -5.1413676045368053e+00 -1.8011446654304045e+01 6.4377736130452288e+00 1 1 0 +1103 1 2 2.1000000000000001e+00 -5.1672245994445820e+00 -1.2000262598180228e+01 6.4302617433153380e+00 1 1 0 +1104 1 3 -1.0500000000000000e+00 -4.9407395975530388e+00 -9.1163082210535826e+00 8.0531302348575551e+00 1 1 0 +1105 1 3 -1.0500000000000000e+00 -5.3060677368685205e+00 -1.1904500593470591e+01 8.0517438416369416e+00 1 1 0 +1106 1 4 -9.4999999999999996e-01 -2.6709931280807089e+00 -1.0721798780591797e+01 8.1708316593728583e+00 1 1 0 +1107 1 3 -1.0500000000000000e+00 -1.3135120934193907e+00 -1.2700516981563499e+01 5.9521671022984215e+00 1 1 0 +1108 1 3 -1.0500000000000000e+00 -5.0499643062891941e+00 -1.0526110320807298e+01 5.9529734299645867e+00 1 1 0 +1109 1 3 -1.0500000000000000e+00 -3.9044997627426969e+00 -1.2857835432014063e+01 6.1505425081508758e+00 1 1 0 +1110 1 5 4.2499999999999999e-01 -2.1664211183641147e+00 -9.8564856863583152e+00 7.9550601086903363e+00 1 1 0 +1111 1 1 1.5750000000000000e+00 -6.9607989462374693e+00 -1.5118866754729385e+01 9.1909602457306789e+00 1 1 0 +1112 1 2 2.1000000000000001e+00 -2.5332191864745202e+00 -1.3528554728985561e+01 6.4377911479127725e+00 1 1 0 +1113 1 2 2.1000000000000001e+00 -2.6153745302543241e+00 -1.6483194537062502e+01 6.4302497060326793e+00 1 1 0 +1114 1 3 -1.0500000000000000e+00 -2.3889292855535356e+00 -1.3599259360919771e+01 8.0531374643776630e+00 1 1 0 +1115 1 3 -1.0500000000000000e+00 -2.7542016190129281e+00 -1.6387445154953969e+01 8.0517331746205230e+00 1 1 0 +1116 1 4 -9.4999999999999996e-01 -5.2791186550288964e+00 -1.5204682417425946e+01 8.1708659838454807e+00 1 1 0 +1117 1 3 -1.0500000000000000e+00 -3.9216653036631381e+00 -1.7183445936730127e+01 5.9521473805842504e+00 1 1 0 +1118 1 3 -1.0500000000000000e+00 -2.4981275433346131e+00 -1.5008979011389789e+01 5.9529593741036173e+00 1 1 0 +1119 1 3 -1.0500000000000000e+00 -1.3526287687687368e+00 -1.7340732714304664e+01 6.1504965116596608e+00 1 1 0 +1120 1 5 4.2499999999999999e-01 -4.7745340143387986e+00 -1.4339283120392054e+01 7.9553395735268033e+00 1 1 0 +1121 1 1 1.5750000000000000e+00 8.4649265784556782e-01 -7.6590316478709717e+00 9.1954808147602378e+00 0 1 0 +1122 1 2 2.1000000000000001e+00 5.0290539851672360e+00 8.0006151898999178e-02 -6.4446435295181104e+00 0 1 1 +1123 1 2 2.1000000000000001e+00 5.0548273689359711e+00 -5.9311641214118893e+00 -6.4371163410349563e+00 0 1 1 +1124 1 3 -1.0500000000000000e+00 4.8285527009070641e+00 -8.8152641218335113e+00 -8.0600993164277526e+00 0 1 1 +1125 1 3 -1.0500000000000000e+00 5.1935081038409781e+00 -6.0268806310273124e+00 -8.0587047595272114e+00 0 1 1 +1126 1 4 -9.4999999999999996e-01 2.5583386908688013e+00 -7.2101837523739878e+00 -8.1783495610975780e+00 0 1 1 +1127 1 3 -1.0500000000000000e+00 1.2010155933116664e+00 -5.2308262552128042e+00 -5.9591057142334050e+00 0 1 1 +1128 1 3 -1.0500000000000000e+00 4.9376816126500405e+00 -7.4053547395612469e+00 -5.9599253307172955e+00 0 1 1 +1129 1 3 -1.0500000000000000e+00 3.7919477685959233e+00 -5.0738448792140893e+00 -6.1572864294774075e+00 0 1 1 +1130 1 5 4.2499999999999999e-01 2.0533933412457817e+00 -8.0762057852338245e+00 -7.9659924590863387e+00 0 1 1 +1131 1 1 1.5750000000000000e+00 3.4546438872086149e+00 -3.1760927300233561e+00 9.1954660928736232e+00 0 1 0 +1132 1 2 2.1000000000000001e+00 2.4208757240189911e+00 -4.4029274999915202e+00 -6.4446504165722978e+00 0 1 1 +1133 1 2 2.1000000000000001e+00 2.5029962220505464e+00 -1.4482376446371603e+00 -6.4371199971735953e+00 0 1 1 +1134 1 3 -1.0500000000000000e+00 2.2766942091831979e+00 -4.3323470564518445e+00 -8.0601137168075780e+00 0 1 1 +1135 1 3 -1.0500000000000000e+00 2.6416744508286012e+00 -1.5439326545622158e+00 -8.0587130531082298e+00 0 1 1 +1136 1 4 -9.4999999999999996e-01 5.1665377903694445e+00 -2.7271842543862004e+00 -8.1782712780512163e+00 0 1 1 +1137 1 3 -1.0500000000000000e+00 3.8091896811936063e+00 -7.4791968577672208e-01 -5.9591108106198565e+00 0 1 1 +1138 1 3 -1.0500000000000000e+00 2.3858593295413364e+00 -2.9224163407654515e+00 -5.9599533128358946e+00 0 1 1 +1139 1 3 -1.0500000000000000e+00 1.2401095034911940e+00 -5.9093717191560913e-01 -6.1572556279808008e+00 0 1 1 +1140 1 5 4.2499999999999999e-01 4.6616362343769246e+00 -3.5930840308541416e+00 -7.9653957557873856e+00 0 1 1 +1141 1 1 1.5750000000000000e+00 8.6370765636898739e-01 -1.6701471798639602e+00 9.1909688371298337e+00 0 1 0 +1142 1 2 2.1000000000000001e+00 7.4968332550801264e-02 -9.0456118958105698e+00 6.4377818499810679e+00 0 1 0 +1143 1 2 2.1000000000000001e+00 4.9118954811554261e-02 -3.0344567042575790e+00 6.4302542853818707e+00 0 1 0 +1144 1 3 -1.0500000000000000e+00 2.7558834052513426e-01 -1.5051467137653063e-01 8.0531393149106840e+00 0 1 0 +1145 1 3 -1.0500000000000000e+00 -8.9712691241480513e-02 -2.9387102931554647e+00 8.0517388529870857e+00 0 1 0 +1146 1 4 -9.4999999999999996e-01 2.5453530524285419e+00 -1.7559972615381625e+00 8.1708152621431793e+00 0 1 0 +1147 1 3 -1.0500000000000000e+00 3.9028303555709520e+00 -3.7347032909157303e+00 5.9521561354213652e+00 0 1 0 +1148 1 3 -1.0500000000000000e+00 1.6637091060828268e-01 -1.5602693648514823e+00 5.9529716884712087e+00 0 1 0 +1149 1 3 -1.0500000000000000e+00 1.3118618400732025e+00 -3.8920040137780276e+00 6.1505054969671402e+00 0 1 0 +1150 1 5 4.2499999999999999e-01 3.0498930168659033e+00 -8.9067777084346034e-01 7.9549423433861826e+00 0 1 0 +1151 1 1 1.5750000000000000e+00 -1.7444701519919832e+00 -6.1530306614484331e+00 9.1909540682356905e+00 0 1 0 +1152 1 2 2.1000000000000001e+00 2.6831234082185826e+00 -4.5627254150988374e+00 6.4377751060812898e+00 0 1 0 +1153 1 2 2.1000000000000001e+00 2.6009530528463074e+00 -7.5173549325685194e+00 6.4302474097262863e+00 0 1 0 +1154 1 3 -1.0500000000000000e+00 2.8273961193326116e+00 -4.6334287671637053e+00 8.0531236444779566e+00 0 1 0 +1155 1 3 -1.0500000000000000e+00 2.4621232686795853e+00 -7.4215844720777415e+00 8.0517272097361001e+00 0 1 0 +1156 1 4 -9.4999999999999996e-01 -6.2772558368610731e-02 -6.2388022627288375e+00 8.1709154350262914e+00 0 1 0 +1157 1 3 -1.0500000000000000e+00 1.2946722027750504e+00 -8.2176236909119194e+00 5.9521479683060470e+00 0 1 0 +1158 1 3 -1.0500000000000000e+00 2.7182175292796220e+00 -6.0431461611087478e+00 5.9529389603888099e+00 0 1 0 +1159 1 3 -1.0500000000000000e+00 3.8636918627839485e+00 -8.3749201878631396e+00 6.1505351359524703e+00 0 1 0 +1160 1 5 4.2499999999999999e-01 4.4184228266968972e-01 -5.3733326268332249e+00 7.9557049908859163e+00 0 1 0 +1161 1 1 1.5750000000000000e+00 6.0064998567600831e+00 -7.6590339440820880e+00 9.1954821624359404e+00 0 1 0 +1162 1 2 2.1000000000000001e+00 -1.0450948424185741e+01 8.0004848248361071e-02 -6.4446406642160960e+00 1 1 1 +1163 1 2 2.1000000000000001e+00 -1.0425170913363983e+01 -5.9311641103256250e+00 -6.4371145056714001e+00 1 1 1 +1164 1 3 -1.0500000000000000e+00 9.9885479439016400e+00 -8.8152614483975729e+00 -8.0600969622139811e+00 0 1 1 +1165 1 3 -1.0500000000000000e+00 -1.0286491248736608e+01 -6.0268849719205448e+00 -8.0587031275419516e+00 1 1 1 +1166 1 4 -9.4999999999999996e-01 7.7183312238007424e+00 -7.2101900633404732e+00 -8.1783563981195133e+00 0 1 1 +1167 1 3 -1.0500000000000000e+00 6.3610204286830054e+00 -5.2308260122533348e+00 -5.9591048242285387e+00 0 1 1 +1168 1 3 -1.0500000000000000e+00 -1.0542321244713614e+01 -7.4053546166208886e+00 -5.9599228518814780e+00 1 1 1 +1169 1 3 -1.0500000000000000e+00 8.9519511809135253e+00 -5.0738434227322244e+00 -6.1572913182233906e+00 0 1 1 +1170 1 5 4.2499999999999999e-01 7.2133763495672767e+00 -8.0762207879772347e+00 -7.9660569135885888e+00 0 1 1 +1171 1 1 1.5750000000000000e+00 -1.2025351447948214e+01 -3.1760951501436132e+00 9.1954683565253923e+00 1 1 0 +1172 1 2 2.1000000000000001e+00 7.5808710018807162e+00 -4.4029286662727607e+00 -6.4446516063961274e+00 0 1 1 +1173 1 2 2.1000000000000001e+00 7.6629929340387335e+00 -1.4482359015170587e+00 -6.4371203024730663e+00 0 1 1 +1174 1 3 -1.0500000000000000e+00 7.4366927585676663e+00 -4.3323482541491742e+00 -8.0601129467341472e+00 0 1 1 +1175 1 3 -1.0500000000000000e+00 7.8016709253314289e+00 -1.5439345943044600e+00 -8.0587132144372866e+00 0 1 1 +1176 1 4 -9.4999999999999996e-01 -1.0313457976792096e+01 -2.7271711372396084e+00 -8.1782564794637711e+00 1 1 1 +1177 1 3 -1.0500000000000000e+00 8.9691934072995494e+00 -7.4791928988931033e-01 -5.9591107614026733e+00 0 1 1 +1178 1 3 -1.0500000000000000e+00 7.5458599796578838e+00 -2.9224210845033021e+00 -5.9599534951439104e+00 0 1 1 +1179 1 3 -1.0500000000000000e+00 6.4001097143598891e+00 -5.9094112388466868e-01 -6.1572528614400470e+00 0 1 1 +1180 1 5 4.2499999999999999e-01 9.8216674588499444e+00 -3.5930599056773893e+00 -7.9652833961409772e+00 0 1 1 +1181 1 1 1.5750000000000000e+00 6.0237130315742604e+00 -1.6701498651223297e+00 9.1909716499225347e+00 0 1 0 +1182 1 2 2.1000000000000001e+00 5.2349727460189612e+00 -9.0456103685362041e+00 6.4377832886283670e+00 0 1 0 +1183 1 2 2.1000000000000001e+00 5.2091224429492211e+00 -3.0344570159094602e+00 6.4302542213077380e+00 0 1 0 +1184 1 3 -1.0500000000000000e+00 5.4355896772184735e+00 -1.5051288779739380e-01 8.0531391187706660e+00 0 1 0 +1185 1 3 -1.0500000000000000e+00 5.0702912833048757e+00 -2.9387089180316366e+00 8.0517385819353926e+00 0 1 0 +1186 1 4 -9.4999999999999996e-01 -1.2934642949314187e+01 -1.7559847257218912e+00 8.1708294256262910e+00 1 1 0 +1187 1 3 -1.0500000000000000e+00 -1.1577169237580188e+01 -3.7347054902069914e+00 5.9521561789272894e+00 1 1 0 +1188 1 3 -1.0500000000000000e+00 5.3263701125563951e+00 -1.5602673756645373e+00 5.9529722419497961e+00 0 1 0 +1189 1 3 -1.0500000000000000e+00 6.4718616516398733e+00 -3.8920001734332761e+00 6.1505018577191866e+00 0 1 0 +1190 1 5 4.2499999999999999e-01 -1.2430076990380384e+01 -8.9065439882470088e-01 7.9550515178028682e+00 1 1 0 +1191 1 1 1.5750000000000000e+00 3.4155223831010773e+00 -6.1530285563734282e+00 9.1909531664907718e+00 0 1 0 +1192 1 2 2.1000000000000001e+00 -1.2796877131928996e+01 -4.5627257148470246e+00 6.4377772089420890e+00 1 1 0 +1193 1 2 2.1000000000000001e+00 -1.2879045192336523e+01 -7.5173566238746297e+00 6.4302490413513969e+00 1 1 0 +1194 1 3 -1.0500000000000000e+00 -1.2652608503021799e+01 -4.6334262443131635e+00 8.0531255486597075e+00 1 1 0 +1195 1 3 -1.0500000000000000e+00 -1.3017875954152798e+01 -7.4215892024841494e+00 8.0517289252332702e+00 1 1 0 +1196 1 4 -9.4999999999999996e-01 5.0972349006688784e+00 -6.2387956913977813e+00 8.1709230060825391e+00 0 1 0 +1197 1 3 -1.0500000000000000e+00 6.4546696317000993e+00 -8.2176229159463166e+00 5.9521472766377492e+00 0 1 0 +1198 1 3 -1.0500000000000000e+00 -1.2761784849308587e+01 -6.0431463762646498e+00 5.9529414065664668e+00 1 1 0 +1199 1 3 -1.0500000000000000e+00 -1.1616306331965955e+01 -8.3749177026671298e+00 6.1505302303580649e+00 1 1 0 +1200 1 5 4.2499999999999999e-01 5.6018615694711507e+00 -5.3733155900706073e+00 7.9557768215295965e+00 0 1 0 +1201 1 1 1.5750000000000000e+00 -9.4735072448817164e+00 -7.6590320527705416e+00 9.1954807697554948e+00 1 1 0 +1202 1 2 2.1000000000000001e+00 -5.2909468100058090e+00 8.0006086419366795e-02 -6.4446434257288043e+00 1 1 1 +1203 1 2 2.1000000000000001e+00 -5.2651718235824800e+00 -5.9311642224177845e+00 -6.4371160339732629e+00 1 1 1 +1204 1 3 -1.0500000000000000e+00 -5.4914476266061882e+00 -8.8152638243211481e+00 -8.0600993043027991e+00 1 1 1 +1205 1 3 -1.0500000000000000e+00 -5.1264920790767334e+00 -6.0268806796169763e+00 -8.0587043233764355e+00 1 1 1 +1206 1 4 -9.4999999999999996e-01 -7.7616619124912027e+00 -7.2101841048075848e+00 -8.1783499013596455e+00 1 1 1 +1207 1 3 -1.0500000000000000e+00 -9.1189817113699370e+00 -5.2308247892081638e+00 -5.9591059692751394e+00 1 1 1 +1208 1 3 -1.0500000000000000e+00 -5.3823180938576183e+00 -7.4053530600915156e+00 -5.9599252905400046e+00 1 1 1 +1209 1 3 -1.0500000000000000e+00 -6.5280518649480630e+00 -5.0738449133653170e+00 -6.1572866581095234e+00 1 1 1 +1210 1 5 4.2499999999999999e-01 -8.2666065027514168e+00 -8.0762056833355214e+00 -7.9659923921764788e+00 1 1 1 +1211 1 1 1.5750000000000000e+00 -6.8653560402119123e+00 -3.1760925260783193e+00 9.1954657688653327e+00 1 1 0 +1212 1 2 2.1000000000000001e+00 -7.8991254538789946e+00 -4.4029272845113034e+00 -6.4446496932287829e+00 1 1 1 +1213 1 2 2.1000000000000001e+00 -7.8170036470663806e+00 -1.4482365817121234e+00 -6.4371196776902897e+00 1 1 1 +1214 1 3 -1.0500000000000000e+00 -8.0433056588119936e+00 -4.3323464610830431e+00 -8.0601131080047796e+00 1 1 1 +1215 1 3 -1.0500000000000000e+00 -7.6783256377514784e+00 -1.5439327877615590e+00 -8.0587129502816790e+00 1 1 1 +1216 1 4 -9.4999999999999996e-01 -5.1534620689021686e+00 -2.7271839043705395e+00 -8.1782711517777749e+00 1 1 1 +1217 1 3 -1.0500000000000000e+00 -6.5108097814779358e+00 -7.4791939622507897e-01 -5.9591109774105018e+00 1 1 1 +1218 1 3 -1.0500000000000000e+00 -7.9341405083582082e+00 -2.9224179954767937e+00 -5.9599529405810499e+00 1 1 1 +1219 1 3 -1.0500000000000000e+00 -9.0798894526966674e+00 -5.9093777920264401e-01 -6.1572554885342869e+00 1 1 1 +1220 1 5 4.2499999999999999e-01 -5.6583636191276270e+00 -3.5930837460965979e+00 -7.9653947537777077e+00 1 1 1 +1221 1 1 1.5750000000000000e+00 -9.4562923299278676e+00 -1.6701472584663328e+00 9.1909689973534192e+00 1 1 0 +1222 1 2 2.1000000000000001e+00 -1.0245030508048330e+01 -9.0456125290389782e+00 6.4377814795475814e+00 1 1 0 +1223 1 2 2.1000000000000001e+00 -1.0270879978381927e+01 -3.0344571957024122e+00 6.4302543086834234e+00 1 1 0 +1224 1 3 -1.0500000000000000e+00 -1.0044411694842136e+01 -1.5051458256098726e-01 8.0531389993797653e+00 1 1 0 +1225 1 3 -1.0500000000000000e+00 -1.0409712568783553e+01 -2.9387104997912452e+00 8.0517390105640203e+00 1 1 0 +1226 1 4 -9.4999999999999996e-01 -7.7746474912235151e+00 -1.7559975886962746e+00 8.1708154354551148e+00 1 1 0 +1227 1 3 -1.0500000000000000e+00 -6.4171698099417593e+00 -3.7347037166950336e+00 5.9521560911919309e+00 1 1 0 +1228 1 3 -1.0500000000000000e+00 -1.0153628880270464e+01 -1.5602674701572106e+00 5.9529718667608673e+00 1 1 0 +1229 1 3 -1.0500000000000000e+00 -9.0081392343640783e+00 -3.8920037268779861e+00 6.1505051098750130e+00 1 1 0 +1230 1 5 4.2499999999999999e-01 -7.2701067791174978e+00 -8.9067744365669910e-01 7.9549423301271212e+00 1 1 0 +1231 1 1 1.5750000000000000e+00 -1.2064470429331175e+01 -6.1530305067105981e+00 9.1909538872342047e+00 1 1 0 +1232 1 2 2.1000000000000001e+00 -7.6368757659277016e+00 -4.5627240917832879e+00 6.4377749829785653e+00 1 1 0 +1233 1 2 2.1000000000000001e+00 -7.7190472239103940e+00 -7.5173556591111748e+00 6.4302476099465196e+00 1 1 0 +1234 1 3 -1.0500000000000000e+00 -7.4926037543967574e+00 -4.6334286527773223e+00 8.0531236823653174e+00 1 1 0 +1235 1 3 -1.0500000000000000e+00 -7.8578765245588285e+00 -7.4215843280687963e+00 8.0517273536543144e+00 1 1 0 +1236 1 4 -9.4999999999999996e-01 -1.0382772430413246e+01 -6.2388022271827168e+00 8.1709151448373092e+00 1 1 0 +1237 1 3 -1.0500000000000000e+00 -9.0253276302112635e+00 -8.2176234848898737e+00 5.9521483749140618e+00 1 1 0 +1238 1 3 -1.0500000000000000e+00 -7.6017823605271708e+00 -6.0431471295933612e+00 5.9529387905739597e+00 1 1 0 +1239 1 3 -1.0500000000000000e+00 -6.4563079316317067e+00 -8.3749199855971863e+00 6.1505351418746486e+00 1 1 0 +1240 1 5 4.2499999999999999e-01 -9.8781576605607420e+00 -5.3733323048345270e+00 7.9557059773781234e+00 1 1 0 +1241 1 1 1.5750000000000000e+00 -4.3135000855046055e+00 -7.6590339693892009e+00 9.1954817955725900e+00 1 1 0 +1242 1 2 2.1000000000000001e+00 -1.3094691450599782e-01 8.0004740134636876e-02 -6.4446408791692233e+00 1 1 1 +1243 1 2 2.1000000000000001e+00 -1.0516854306323076e-01 -5.9311654543700811e+00 -6.4371145620142682e+00 1 1 1 +1244 1 3 -1.0500000000000000e+00 -3.3145209907989148e-01 -8.8152614697341019e+00 -8.0600970943252204e+00 1 1 1 +1245 1 3 -1.0500000000000000e+00 3.3508572606210763e-02 -6.0268849233645678e+00 -8.0587028575970621e+00 1 1 1 +1246 1 4 -9.4999999999999996e-01 -2.6016689202920009e+00 -7.2101904280977287e+00 -8.1783565194823176e+00 1 1 1 +1247 1 3 -1.0500000000000000e+00 -3.9589808878459936e+00 -5.2308271594891291e+00 -5.9591050078849062e+00 1 1 1 +1248 1 3 -1.0500000000000000e+00 -2.2232094021939552e-01 -7.4053534196232693e+00 -5.9599229648807235e+00 1 1 1 +1249 1 3 -1.0500000000000000e+00 -1.3680507968195190e+00 -5.0738421086798464e+00 -6.1572913750279854e+00 1 1 1 +1250 1 5 4.2499999999999999e-01 -3.1066237816822548e+00 -8.0762210551214011e+00 -7.9660571815827614e+00 1 1 1 +1251 1 1 1.5750000000000000e+00 -1.7053514018791915e+00 -3.1760953908506444e+00 9.1954688510690019e+00 1 1 0 +1252 1 2 2.1000000000000001e+00 -2.7391277521302797e+00 -4.4029281799131130e+00 -6.4446516849181856e+00 1 1 1 +1253 1 2 2.1000000000000001e+00 -2.6570062176136480e+00 -1.4482368017731595e+00 -6.4371198376661711e+00 1 1 1 +1254 1 3 -1.0500000000000000e+00 -2.8833069316405657e+00 -4.3323482694340889e+00 -8.0601128886293285e+00 1 1 1 +1255 1 3 -1.0500000000000000e+00 -2.5183293201372461e+00 -1.5439346879234463e+00 -8.0587127401182030e+00 1 1 1 +1256 1 4 -9.4999999999999996e-01 6.5426352732096404e-03 -2.7271712260870409e+00 -8.1782561824668178e+00 1 1 1 +1257 1 3 -1.0500000000000000e+00 -1.3508085115115946e+00 -7.4792031814002158e-01 -5.9591107981633940e+00 1 1 1 +1258 1 3 -1.0500000000000000e+00 -2.7741401374419166e+00 -2.9224201599098958e+00 -5.9599535785954778e+00 1 1 1 +1259 1 3 -1.0500000000000000e+00 -3.9198904415596063e+00 -5.9094049862414266e-01 -6.1572527738358414e+00 1 1 1 +1260 1 5 4.2499999999999999e-01 -4.9833254279335470e-01 -3.5930601836879887e+00 -7.9652835806511133e+00 1 1 1 +1261 1 1 1.5750000000000000e+00 -4.2962866835193996e+00 -1.6701494789166276e+00 9.1909716389655571e+00 1 1 0 +1262 1 2 2.1000000000000001e+00 -5.0850277639623469e+00 -9.0456113789439101e+00 6.4377830715059510e+00 1 1 0 +1263 1 2 2.1000000000000001e+00 -5.1108781670878898e+00 -3.0344584767582976e+00 6.4302545389906083e+00 1 1 0 +1264 1 3 -1.0500000000000000e+00 -4.8844104262402723e+00 -1.5051319371847072e-01 8.0531387480607250e+00 1 1 0 +1265 1 3 -1.0500000000000000e+00 -5.2497088053993499e+00 -2.9387087618171179e+00 8.0517388016512115e+00 1 1 0 +1266 1 4 -9.4999999999999996e-01 -2.6146430353073615e+00 -1.7559844122091626e+00 8.1708296280722621e+00 1 1 0 +1267 1 3 -1.0500000000000000e+00 -1.2571679725492881e+00 -3.7347046579073737e+00 5.9521562532732517e+00 1 1 0 +1268 1 3 -1.0500000000000000e+00 -4.9936296641621345e+00 -1.5602648346284838e+00 5.9529722843018806e+00 1 1 0 +1269 1 3 -1.0500000000000000e+00 -3.8481382592454132e+00 -3.8920003398680887e+00 6.1505022239019134e+00 1 1 0 +1270 1 5 4.2499999999999999e-01 -2.1100771447180460e+00 -8.9065468536213999e-01 7.9550509588833762e+00 1 1 0 +1271 1 1 1.5750000000000000e+00 -6.9044771207900748e+00 -6.1530285609072077e+00 9.1909529998647947e+00 1 1 0 +1272 1 2 2.1000000000000001e+00 -2.4768770148283359e+00 -4.5627259243055889e+00 6.4377774382641846e+00 1 1 0 +1273 1 2 2.1000000000000001e+00 -2.5590447071763158e+00 -7.5173559192857837e+00 6.4302490105772705e+00 1 1 0 +1274 1 3 -1.0500000000000000e+00 -2.3326085320454037e+00 -4.6334262514317039e+00 8.0531257226497672e+00 1 1 0 +1275 1 3 -1.0500000000000000e+00 -2.6978763623638589e+00 -7.4215890661865789e+00 8.0517287762570824e+00 1 1 0 +1276 1 4 -9.4999999999999996e-01 -5.2227648392043768e+00 -6.2387952984820210e+00 8.1709227994702012e+00 1 1 0 +1277 1 3 -1.0500000000000000e+00 -3.8653309004748362e+00 -8.2176227464587015e+00 5.9521469951053056e+00 1 1 0 +1278 1 3 -1.0500000000000000e+00 -2.4417851934069699e+00 -6.0431476214369226e+00 5.9529412766360750e+00 1 1 0 +1279 1 3 -1.0500000000000000e+00 -1.2963058055537360e+00 -8.3749181707502132e+00 6.1505305588248493e+00 1 1 0 +1280 1 5 4.2499999999999999e-01 -4.7181387966027248e+00 -5.3733160384255498e+00 7.9557765758026413e+00 1 1 0 Velocities diff --git a/tools/msi2lmp/test/runtests.sh b/tools/msi2lmp/test/runtests.sh index 2003fda146..f83a3ec6a7 100755 --- a/tools/msi2lmp/test/runtests.sh +++ b/tools/msi2lmp/test/runtests.sh @@ -29,7 +29,7 @@ counter=0 errors=0 # Class1 tests with cvff -for m in hydrogen water h2-h2o ethane benzene naphthalene crambin nylon phen3_cff97 hap_crystal +for m in hydrogen water h2-h2o ethane benzene naphthalene cnt-hexagonal crambin nylon phen3_cff97 hap_crystal do \ before=$errors vglog=${m}-class1.chk @@ -104,7 +104,7 @@ do \ done # Class2 tests with compass -for m in hydrogen ethane benzene naphthalene +for m in hydrogen ethane benzene naphthalene cnt-hexagonal do \ before=$errors vglog=${m}-class2a.chk @@ -129,7 +129,7 @@ do \ done # Class2 tests with pcff -for m in water h2-h2o ethane benzene naphthalene hap_crystal +for m in water h2-h2o ethane benzene naphthalene cnt-hexagonal hap_crystal do \ before=$errors vglog=${m}-class2b.chk From 0c22e8696abcc773ae67c625f651b204c9d6f795 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 14:59:08 +0000 Subject: [PATCH 15/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11606 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/fix_temp_csvr.cpp | 302 ++++++++++++++++++++++++++++++++++++++++++ src/fix_temp_csvr.h | 104 +++++++++++++++ 2 files changed, 406 insertions(+) create mode 100644 src/fix_temp_csvr.cpp create mode 100644 src/fix_temp_csvr.h diff --git a/src/fix_temp_csvr.cpp b/src/fix_temp_csvr.cpp new file mode 100644 index 0000000000..c540e675ce --- /dev/null +++ b/src/fix_temp_csvr.cpp @@ -0,0 +1,302 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (ICTP, Italy) +------------------------------------------------------------------------- */ + +#include "string.h" +#include "stdlib.h" +#include "math.h" +#include "fix_temp_csvr.h" +#include "atom.h" +#include "force.h" +#include "comm.h" +#include "input.h" +#include "variable.h" +#include "group.h" +#include "update.h" +#include "modify.h" +#include "compute.h" +#include "random_mars.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +enum{NOBIAS,BIAS}; +enum{CONSTANT,EQUAL}; + +/* ---------------------------------------------------------------------- */ + +FixTempCSVR::FixTempCSVR(LAMMPS *lmp, int narg, char **arg) : + Fix(lmp, narg, arg) +{ + if (narg != 7) error->all(FLERR,"Illegal fix temp/csvr command"); + + // CSVR thermostat should be applied every step + + nevery = 1; + scalar_flag = 1; + global_freq = nevery; + extscalar = 1; + + tstr = NULL; + if (strstr(arg[3],"v_") == arg[3]) { + int n = strlen(&arg[3][2]) + 1; + tstr = new char[n]; + strcpy(tstr,&arg[3][2]); + tstyle = EQUAL; + } else { + t_start = force->numeric(FLERR,arg[3]); + t_target = t_start; + tstyle = CONSTANT; + } + + t_stop = force->numeric(FLERR,arg[4]); + t_period = force->numeric(FLERR,arg[5]); + int seed = force->inumeric(FLERR,arg[6]); + + // error checks + + if (t_period <= 0.0) error->all(FLERR,"Fix temp/csvr period must be > 0.0"); + if (seed <= 0) error->all(FLERR,"Illegal fix temp/csvr random seed"); + + // create a new compute temp style + // id = fix-ID + temp, compute group = fix group + + int n = strlen(id) + 6; + id_temp = new char[n]; + strcpy(id_temp,id); + strcat(id_temp,"_temp"); + + char **newarg = new char*[3]; + newarg[0] = id_temp; + newarg[1] = group->names[igroup]; + newarg[2] = (char *) "temp"; + modify->add_compute(3,newarg); + delete [] newarg; + tflag = 1; + + random = new RanMars(lmp,seed + comm->me); + + energy = 0.0; +} + +/* ---------------------------------------------------------------------- */ + +FixTempCSVR::~FixTempCSVR() +{ + delete [] tstr; + + // delete temperature if fix created it + + if (tflag) modify->delete_compute(id_temp); + delete [] id_temp; + + delete random; +} + +/* ---------------------------------------------------------------------- */ + +int FixTempCSVR::setmask() +{ + int mask = 0; + mask |= END_OF_STEP; + mask |= THERMO_ENERGY; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixTempCSVR::init() +{ + // check variable + + if (tstr) { + tvar = input->variable->find(tstr); + if (tvar < 0) + error->all(FLERR,"Variable name for fix temp/csvr does not exist"); + if (input->variable->equalstyle(tvar)) tstyle = EQUAL; + else error->all(FLERR,"Variable for fix temp/csvr is invalid style"); + } + + int icompute = modify->find_compute(id_temp); + if (icompute < 0) + error->all(FLERR,"Temperature ID for fix temp/csvr does not exist"); + temperature = modify->compute[icompute]; + + if (temperature->tempbias) which = BIAS; + else which = NOBIAS; +} + +/* ---------------------------------------------------------------------- */ + +void FixTempCSVR::end_of_step() +{ + double t_current = temperature->compute_scalar(); + + double delta = update->ntimestep - update->beginstep; + if (delta != 0.0) delta /= update->endstep - update->beginstep; + + // set current t_target + // if variable temp, evaluate variable, wrap with clear/add + + if (tstyle == CONSTANT) + t_target = t_start + delta * (t_stop-t_start); + else { + modify->clearstep_compute(); + t_target = input->variable->compute_equal(tvar); + if (t_target < 0.0) + error->one(FLERR, + "Fix temp/csvr variable returned negative temperature"); + modify->addstep_compute(update->ntimestep + nevery); + } + + // Langevin thermostat, implemented as decribed in + // Bussi and Parrinello, Phys. Rev. E (2007). + // it is a linear combination of old velocities and new, + // randomly chosen, velocity, with proper coefficients + + double **v = atom->v; + int *mask = atom->mask; + int nlocal = atom->nlocal; + const double c1 = exp(-update->dt/t_period); + + if (atom->rmass_flag) { // per atom masses + const double * const rmass = atom->rmass; + + if (which == NOBIAS) { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + const double m = rmass[i]; + const double c2 = sqrt((1.0-c1*c1)*t_target/m); + for (int j = 0; j < 3; ++j) { + energy += 0.5*m*v[i][j]*v[i][j]; + v[i][j] *= c1; + v[i][j] += c2*random->gaussian(); + energy -= 0.5*m*v[i][j]*v[i][j]; + } + } + } + } else { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + const double m = rmass[i]; + const double c2 = sqrt((1.0-c1*c1)*t_target/m); + temperature->remove_bias(i,v[i]); + for (int j = 0; j < 3; ++j) { + energy += 0.5*rmass[i]*v[i][j]*v[i][j]; + v[i][j] *= c1; + v[i][j] += c2*random->gaussian(); + energy -= 0.5*rmass[i]*v[i][j]*v[i][j]; + } + temperature->restore_bias(i,v[i]); + } + } + } + } else { // per atom type masses + + const double * const mass = atom->mass; + const int * const type = atom->type; + + if (which == NOBIAS) { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + const double m = mass[type[i]]; + const double c2 = sqrt((1.0-c1*c1)*t_target/m); + + for (int j = 0; j < 3; ++j) { + energy += 0.5*m*v[i][j]*v[i][j]; + v[i][j] *= c1; + v[i][j] += c2*random->gaussian(); + energy -= 0.5*m*v[i][j]*v[i][j]; + } + } + } + } else { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + const double m = mass[type[i]]; + const double c2 = sqrt((1.0-c1*c1)*t_target/m); + + temperature->remove_bias(i,v[i]); + for (int j = 0; j < 3; ++j) { + energy += 0.5*m*v[i][j]*v[i][j]; + v[i][j] *= c1; + v[i][j] += c2*random->gaussian(); + energy -= 0.5*m*v[i][j]*v[i][j]; + } + temperature->restore_bias(i,v[i]); + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +int FixTempCSVR::modify_param(int narg, char **arg) +{ + if (strcmp(arg[0],"temp") == 0) { + if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); + if (tflag) { + modify->delete_compute(id_temp); + tflag = 0; + } + delete [] id_temp; + int n = strlen(arg[1]) + 1; + id_temp = new char[n]; + strcpy(id_temp,arg[1]); + + int icompute = modify->find_compute(id_temp); + if (icompute < 0) + error->all(FLERR,"Could not find fix_modify temperature ID"); + temperature = modify->compute[icompute]; + + if (temperature->tempflag == 0) + error->all(FLERR, + "Fix_modify temperature ID does not compute temperature"); + if (temperature->igroup != igroup && comm->me == 0) + error->warning(FLERR,"Group for fix_modify temp != fix group"); + return 2; + } + return 0; +} + +/* ---------------------------------------------------------------------- */ + +void FixTempCSVR::reset_target(double t_new) +{ + t_target = t_start = t_stop = t_new; +} + +/* ---------------------------------------------------------------------- */ + +double FixTempCSVR::compute_scalar() +{ + return energy; +} + +/* ---------------------------------------------------------------------- + extract thermostat properties +------------------------------------------------------------------------- */ + +void *FixTempCSVR::extract(const char *str, int &dim) +{ + dim=0; + if (strcmp(str,"t_target") == 0) { + return &t_target; + } + return NULL; +} diff --git a/src/fix_temp_csvr.h b/src/fix_temp_csvr.h new file mode 100644 index 0000000000..601b032af6 --- /dev/null +++ b/src/fix_temp_csvr.h @@ -0,0 +1,104 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(temp/csvr,FixTempCSVR) + +#else + +#ifndef LMP_FIX_TEMP_CSVR_H +#define LMP_FIX_TEMP_CSVR_H + +#include "fix.h" + +namespace LAMMPS_NS { + +class FixTempCSVR : public Fix { + public: + FixTempCSVR(class LAMMPS *, int, char **); + ~FixTempCSVR(); + int setmask(); + void init(); + void end_of_step(); + int modify_param(int, char **); + void reset_target(double); + double compute_scalar(); + virtual void *extract(const char *, int &); + + private: + int which; + double t_start,t_stop,t_period,t_target; + double energy; + int tstyle,tvar; + char *tstr; + + char *id_temp; + class Compute *temperature; + int tflag; + + class RanMars *random; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Fix temp/csvr period must be > 0.0 + +Self-explanatory. + +E: Variable name for fix temp/csvr does not exist + +Self-explanatory. + +E: Variable for fix temp/csvr is invalid style + +Only equal-style variables can be used. + +E: Temperature ID for fix temp/csvr does not exist + +Self-explanatory. + +E: Computed temperature for fix temp/csvr cannot be 0.0 + +Self-explanatory. + +E: Fix temp/csvr variable returned negative temperature + +Self-explanatory. + +E: Could not find fix_modify temperature ID + +The compute ID for computing temperature does not exist. + +E: Fix_modify temperature ID does not compute temperature + +The compute ID assigned to the fix must compute temperature. + +W: Group for fix_modify temp != fix group + +The fix_modify command is specifying a temperature computation that +computes a temperature on a different group of atoms than the fix +itself operates on. This is probably not what you want to do. + +*/ From ac6f5afc2ef68533315dbdf534b630c01002eca4 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 14:59:14 +0000 Subject: [PATCH 16/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11607 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- doc/Section_commands.html | 6 +- doc/Section_commands.txt | 1 + doc/Section_howto.html | 9 ++- doc/Section_howto.txt | 9 ++- doc/fix_temp_csvr.html | 161 ++++++++++++++++++++++++++++++++++++++ doc/fix_temp_csvr.txt | 151 +++++++++++++++++++++++++++++++++++ 6 files changed, 326 insertions(+), 11 deletions(-) create mode 100644 doc/fix_temp_csvr.html create mode 100644 doc/fix_temp_csvr.txt diff --git a/doc/Section_commands.html b/doc/Section_commands.html index ae49190355..c8a3f4c630 100644 --- a/doc/Section_commands.html +++ b/doc/Section_commands.html @@ -403,9 +403,9 @@ of each style or click on the style itself for a full description: poemspourpress/berendsenprintproperty/atomqeq/combreax/bondsrecenter restrainrigidrigid/nphrigid/nptrigid/nverigid/nvtrigid/smallsetforce shakespringspring/rgspring/selfsrdstore/forcestore/statetemp/berendsen -temp/rescalethermal/conductivitytmdttmtune/kspacevectorviscosityviscous -wall/colloidwall/granwall/harmonicwall/lj1043wall/lj126wall/lj93wall/pistonwall/reflect -wall/regionwall/srd +temp/csvrtemp/rescalethermal/conductivitytmdttmtune/kspacevectorviscosity +viscouswall/colloidwall/granwall/harmonicwall/lj1043wall/lj126wall/lj93wall/piston +wall/reflectwall/regionwall/srd

These are fix styles contributed by users, which can be used if diff --git a/doc/Section_commands.txt b/doc/Section_commands.txt index 63f593af1d..b6251e6f92 100644 --- a/doc/Section_commands.txt +++ b/doc/Section_commands.txt @@ -539,6 +539,7 @@ of each style or click on the style itself for a full description: "store/force"_fix_store_force.html, "store/state"_fix_store_state.html, "temp/berendsen"_fix_temp_berendsen.html, +"temp/csvr"_fix_temp_csvr.html, "temp/rescale"_fix_temp_rescale.html, "thermal/conductivity"_fix_thermal_conductivity.html, "tmd"_fix_tmd.html, diff --git a/doc/Section_howto.html b/doc/Section_howto.html index db12c18d4c..21994ed9a3 100644 --- a/doc/Section_howto.html +++ b/doc/Section_howto.html @@ -1613,16 +1613,17 @@ velocity of spherical or aspherical particles to be adjusted in prescribed ways.

Thermostatting in LAMMPS is performed by fixes, or in one -case by a pair style. Four thermostatting fixes are currently -available: Nose-Hoover (nvt), Berendsen, Langevin, and direct -rescaling (temp/rescale). Dissipative particle dynamics (DPD) -thermostatting can be invoked via the dpd/tstat pair style: +case by a pair style. Several thermostatting fixes are available: +Nose-Hoover (nvt), Berendsen, CSVR, Langevin, and direct rescaling +(temp/rescale). Dissipative particle dynamics (DPD) thermostatting +can be invoked via the dpd/tstat pair style:

  • fix nvt
  • fix nvt/sphere
  • fix nvt/asphere
  • fix nvt/sllod
  • fix temp/berendsen +
  • fix temp/csvr
  • fix langevin
  • fix temp/rescale
  • pair_style dpd/tstat diff --git a/doc/Section_howto.txt b/doc/Section_howto.txt index fedf94d53d..5480516e5c 100644 --- a/doc/Section_howto.txt +++ b/doc/Section_howto.txt @@ -1600,16 +1600,17 @@ velocity of spherical or aspherical particles to be adjusted in prescribed ways. Thermostatting in LAMMPS is performed by "fixes"_fix.html, or in one -case by a pair style. Four thermostatting fixes are currently -available: Nose-Hoover (nvt), Berendsen, Langevin, and direct -rescaling (temp/rescale). Dissipative particle dynamics (DPD) -thermostatting can be invoked via the {dpd/tstat} pair style: +case by a pair style. Several thermostatting fixes are available: +Nose-Hoover (nvt), Berendsen, CSVR, Langevin, and direct rescaling +(temp/rescale). Dissipative particle dynamics (DPD) thermostatting +can be invoked via the {dpd/tstat} pair style: "fix nvt"_fix_nh.html "fix nvt/sphere"_fix_nvt_sphere.html "fix nvt/asphere"_fix_nvt_asphere.html "fix nvt/sllod"_fix_nvt_sllod.html "fix temp/berendsen"_fix_temp_berendsen.html +"fix temp/csvr"_fix_temp_csvr.html "fix langevin"_fix_langevin.html "fix temp/rescale"_fix_temp_rescale.html "pair_style dpd/tstat"_pair_dpd.html :ul diff --git a/doc/fix_temp_csvr.html b/doc/fix_temp_csvr.html new file mode 100644 index 0000000000..e04add593c --- /dev/null +++ b/doc/fix_temp_csvr.html @@ -0,0 +1,161 @@ + +
    LAMMPS WWW Site - LAMMPS Documentation - LAMMPS Commands +
    + + + + + + +
    + +

    fix temp/csvr command +

    +

    Syntax: +

    +
    fix ID group-ID temp/csvr Tstart Tstop Tdamp seed 
    +
    +
    • ID, group-ID are documented in fix command + +
    • temp/csvr = style name of this fix command + +
    • Tstart,Tstop = desired temperature at start/end of run + +
        Tstart can be a variable (see below) 
      +
      +
    • Tdamp = temperature damping parameter (time units) + +
    • seed = random number seed to use for white noise (positive integer) +
    +

    Examples: +

    +
    fix 1 all temp/csvr 300.0 300.0 100.0 54324 
    +
    +

    Description: +

    +

    Adjust the temperature with a CSVR thermostat (Bussi), which +updates the velocities to a linear combination of the current +velocities with a gaussian distribution at the desired temperature in +every timestep. +

    +

    The thermostat is applied to only the translational degrees of freedom +for the particles, which is an important consideration for finite-size +particles which have rotational degrees of freedom are being +thermostatted with this fix. The translational degrees of freedom can +also have a bias velocity removed from them before thermostatting +takes place; see the description below. +

    +

    The desired temperature at each timestep is a ramped value during the +run from Tstart to Tstop. The Tdamp parameter is specified in +time units and determines how rapidly the temperature is relaxed. For +example, a value of 100.0 means to relax the temperature in a timespan +of (roughly) 100 time units (tau or fmsec or psec - see the +units command). +

    +

    Tstart can be specified as an equal-style variable. +In this case, the Tstop setting is ignored. If the value is a +variable, it should be specified as v_name, where name is the variable +name. In this case, the variable will be evaluated each timestep, and +its value used to determine the target temperature. +

    +

    Equal-style variables can specify formulas with various mathematical +functions, and include thermo_style command +keywords for the simulation box parameters and timestep and elapsed +time. Thus it is easy to specify a time-dependent temperature. +

    +

    IMPORTANT NOTE: Unlike the fix nvt command which +performs Nose/Hoover thermostatting AND time integration, this fix +does NOT perform time integration. It only modifies velocities to +effect thermostatting. Thus you must use a separate time integration +fix, like fix nve to actually update the positions of +atoms using the modified velocities. Likewise, this fix should not +normally be used on atoms that also have their temperature controlled +by another fix - e.g. by fix nvt or fix +langevin commands. +

    +

    See this howto section of the manual for +a discussion of different ways to compute temperature and perform +thermostatting. +

    +

    This fix computes a temperature each timestep. To do this, the fix +creates its own compute of style "temp", as if this command had been +issued: +

    +
    compute fix-ID_temp group-ID temp 
    +
    +

    See the compute temp command for details. Note +that the ID of the new compute is the fix-ID + underscore + "temp", +and the group for the new compute is the same as the fix group. +

    +

    Note that this is NOT the compute used by thermodynamic output (see +the thermo_style command) with ID = thermo_temp. +This means you can change the attributes of this fix's temperature +(e.g. its degrees-of-freedom) via the +compute_modify command or print this temperature +during thermodynamic output via the thermo_style +custom command using the appropriate compute-ID. +It also means that changing attributes of thermo_temp will have no +effect on this fix. +

    +

    Like other fixes that perform thermostatting, this fix can be used +with compute commands that calculate a temperature +after removing a "bias" from the atom velocities. E.g. removing the +center-of-mass velocity from a group of atoms or only calculating +temperature on the x-component of velocity or only calculating +temperature for atoms in a geometric region. This is not done by +default, but only if the fix_modify command is used +to assign a temperature compute to this fix that includes such a bias +term. See the doc pages for individual compute +commands to determine which ones include a bias. In +this case, the thermostat works in the following manner: the current +temperature is calculated taking the bias into account, bias is +removed from each atom, thermostatting is performed on the remaining +thermal degrees of freedom, and the bias is added back in. +

    +
    + +

    Restart, fix_modify, output, run start/stop, minimize info: +

    +

    No information about this fix is written to binary restart +files. +

    +

    The fix_modify temp option is supported by this +fix. You can use it to assign a temperature compute +you have defined to this fix which will be used in its thermostatting +procedure, as described above. For consistency, the group used by +this fix and by the compute should be the same. +

    +

    The fix_modify energy option is supported by this +fix to add the energy change implied by a velocity rescaling to the +system's potential energy as part of thermodynamic +output. +

    +

    This fix computes a global scalar which can be accessed by various +output commands. The scalar is the +cummulative energy change due to this fix. The scalar value +calculated by this fix is "extensive". +

    +

    This fix can ramp its target temperature over multiple runs, using the +start and stop keywords of the run command. See the +run command for details of how to do this. +

    +

    This fix is not invoked during energy minimization. +

    +

    Restrictions: none +

    +

    Related commands: +

    +

    fix nve, fix nvt, fix +temp/rescale, fix langevin, +fix_modify, compute temp, +fix temp/berendsen +

    +

    Default: none +

    +
    + + + +

    (Bussi) Bussi and Parrinello, Phys. Rev. E (2007) +

    + diff --git a/doc/fix_temp_csvr.txt b/doc/fix_temp_csvr.txt new file mode 100644 index 0000000000..872c304618 --- /dev/null +++ b/doc/fix_temp_csvr.txt @@ -0,0 +1,151 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +fix temp/csvr command :h3 + +[Syntax:] + +fix ID group-ID temp/csvr Tstart Tstop Tdamp seed :pre + +ID, group-ID are documented in "fix"_fix.html command :ulb,l +temp/csvr = style name of this fix command :l +Tstart,Tstop = desired temperature at start/end of run :l + Tstart can be a variable (see below) :pre +Tdamp = temperature damping parameter (time units) :l +seed = random number seed to use for white noise (positive integer) :l,ule + +[Examples:] + +fix 1 all temp/csvr 300.0 300.0 100.0 54324 :pre + +[Description:] + +Adjust the temperature with a CSVR thermostat "(Bussi)"_#Bussi, which +updates the velocities to a linear combination of the current +velocities with a gaussian distribution at the desired temperature in +every timestep. + +The thermostat is applied to only the translational degrees of freedom +for the particles, which is an important consideration for finite-size +particles which have rotational degrees of freedom are being +thermostatted with this fix. The translational degrees of freedom can +also have a bias velocity removed from them before thermostatting +takes place; see the description below. + +The desired temperature at each timestep is a ramped value during the +run from {Tstart} to {Tstop}. The {Tdamp} parameter is specified in +time units and determines how rapidly the temperature is relaxed. For +example, a value of 100.0 means to relax the temperature in a timespan +of (roughly) 100 time units (tau or fmsec or psec - see the +"units"_units.html command). + +{Tstart} can be specified as an equal-style "variable"_variable.html. +In this case, the {Tstop} setting is ignored. If the value is a +variable, it should be specified as v_name, where name is the variable +name. In this case, the variable will be evaluated each timestep, and +its value used to determine the target temperature. + +Equal-style variables can specify formulas with various mathematical +functions, and include "thermo_style"_thermo_style.html command +keywords for the simulation box parameters and timestep and elapsed +time. Thus it is easy to specify a time-dependent temperature. + +IMPORTANT NOTE: Unlike the "fix nvt"_fix_nh.html command which +performs Nose/Hoover thermostatting AND time integration, this fix +does NOT perform time integration. It only modifies velocities to +effect thermostatting. Thus you must use a separate time integration +fix, like "fix nve"_fix_nve.html to actually update the positions of +atoms using the modified velocities. Likewise, this fix should not +normally be used on atoms that also have their temperature controlled +by another fix - e.g. by "fix nvt"_fix_nh.html or "fix +langevin"_fix_langevin.html commands. + +See "this howto section"_Section_howto.html#howto_16 of the manual for +a discussion of different ways to compute temperature and perform +thermostatting. + +This fix computes a temperature each timestep. To do this, the fix +creates its own compute of style "temp", as if this command had been +issued: + +compute fix-ID_temp group-ID temp :pre + +See the "compute temp"_compute_temp.html command for details. Note +that the ID of the new compute is the fix-ID + underscore + "temp", +and the group for the new compute is the same as the fix group. + +Note that this is NOT the compute used by thermodynamic output (see +the "thermo_style"_thermo_style.html command) with ID = {thermo_temp}. +This means you can change the attributes of this fix's temperature +(e.g. its degrees-of-freedom) via the +"compute_modify"_compute_modify.html command or print this temperature +during thermodynamic output via the "thermo_style +custom"_thermo_style.html command using the appropriate compute-ID. +It also means that changing attributes of {thermo_temp} will have no +effect on this fix. + +Like other fixes that perform thermostatting, this fix can be used +with "compute commands"_compute.html that calculate a temperature +after removing a "bias" from the atom velocities. E.g. removing the +center-of-mass velocity from a group of atoms or only calculating +temperature on the x-component of velocity or only calculating +temperature for atoms in a geometric region. This is not done by +default, but only if the "fix_modify"_fix_modify.html command is used +to assign a temperature compute to this fix that includes such a bias +term. See the doc pages for individual "compute +commands"_compute.html to determine which ones include a bias. In +this case, the thermostat works in the following manner: the current +temperature is calculated taking the bias into account, bias is +removed from each atom, thermostatting is performed on the remaining +thermal degrees of freedom, and the bias is added back in. + +:line + +[Restart, fix_modify, output, run start/stop, minimize info:] + +No information about this fix is written to "binary restart +files"_restart.html. + +The "fix_modify"_fix_modify.html {temp} option is supported by this +fix. You can use it to assign a temperature "compute"_compute.html +you have defined to this fix which will be used in its thermostatting +procedure, as described above. For consistency, the group used by +this fix and by the compute should be the same. + +The "fix_modify"_fix_modify.html {energy} option is supported by this +fix to add the energy change implied by a velocity rescaling to the +system's potential energy as part of "thermodynamic +output"_thermo_style.html. + +This fix computes a global scalar which can be accessed by various +"output commands"_Section_howto.html#howto_15. The scalar is the +cummulative energy change due to this fix. The scalar value +calculated by this fix is "extensive". + +This fix can ramp its target temperature over multiple runs, using the +{start} and {stop} keywords of the "run"_run.html command. See the +"run"_run.html command for details of how to do this. + +This fix is not invoked during "energy minimization"_minimize.html. + +[Restrictions:] none + +[Related commands:] + +"fix nve"_fix_nve.html, "fix nvt"_fix_nh.html, "fix +temp/rescale"_fix_temp_rescale.html, "fix langevin"_fix_langevin.html, +"fix_modify"_fix_modify.html, "compute temp"_compute_temp.html, +"fix temp/berendsen"_fix_temp_berendsen.html + +[Default:] none + +:line + +:link(Bussi) + +[(Bussi)] Bussi and Parrinello, Phys. Rev. E (2007) From 481778cb31ea3433fde48491e560ebcfb50fafcd Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 15:18:54 +0000 Subject: [PATCH 17/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11608 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/MISC/fix_oneway.cpp | 109 ++++++++++++++++++++++++++++++++++++++++ src/MISC/fix_oneway.h | 54 ++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 src/MISC/fix_oneway.cpp create mode 100644 src/MISC/fix_oneway.h diff --git a/src/MISC/fix_oneway.cpp b/src/MISC/fix_oneway.cpp new file mode 100644 index 0000000000..e0345de3fd --- /dev/null +++ b/src/MISC/fix_oneway.cpp @@ -0,0 +1,109 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (ICTP, Italy) +------------------------------------------------------------------------- */ + +#include "string.h" +#include "fix_oneway.h" +#include "atom.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "region.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +enum{NONE=-1,X=0,Y=1,Z=2,XYZMASK=3,MINUS=4,PLUS=0}; + +/* ---------------------------------------------------------------------- */ + +FixOneWay::FixOneWay(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) +{ + direction = NONE; + regionidx = 0; + regionstr = NULL; + + if (narg < 6) error->all(FLERR,"Illegal fix oneway command"); + + nevery = force->inumeric(FLERR,arg[3]); + if (nevery < 1) error->all(FLERR,"Illegal nevery value for fix oneway"); + + int len = strlen(arg[4]); + regionstr = new char[len]; + strcpy(regionstr,arg[4]); + + if (strcmp(arg[5], "x") == 0) direction = X|PLUS; + if (strcmp(arg[5], "X") == 0) direction = X|PLUS; + if (strcmp(arg[5], "y") == 0) direction = Y|PLUS; + if (strcmp(arg[5], "Y") == 0) direction = Y|PLUS; + if (strcmp(arg[5], "z") == 0) direction = Z|PLUS; + if (strcmp(arg[5], "Z") == 0) direction = Z|PLUS; + if (strcmp(arg[5],"-x") == 0) direction = X|MINUS; + if (strcmp(arg[5],"-X") == 0) direction = X|MINUS; + if (strcmp(arg[5],"-y") == 0) direction = Y|MINUS; + if (strcmp(arg[5],"-Y") == 0) direction = Y|MINUS; + if (strcmp(arg[5],"-z") == 0) direction = Z|MINUS; + if (strcmp(arg[5],"-Z") == 0) direction = Z|MINUS; + + global_freq = nevery; +} + +/* ---------------------------------------------------------------------- */ + +FixOneWay::~FixOneWay() +{ + if (regionstr) delete[] regionstr; +} + +/* ---------------------------------------------------------------------- */ + +int FixOneWay::setmask() +{ + return END_OF_STEP; +} + +/* ---------------------------------------------------------------------- */ + +void FixOneWay::init() +{ + regionidx = domain->find_region(regionstr); + if (regionidx < 0) + error->warning(FLERR,"Region for fix oneway does not exist"); +} + +/* ---------------------------------------------------------------------- */ + +void FixOneWay::end_of_step() +{ + Region *region = domain->regions[regionidx]; + const int idx = direction & XYZMASK; + + const double * const * const x = atom->x; + double * const * const v = atom->v; + const int *mask = atom->mask; + const int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; ++i) { + if ((mask[i] & groupbit) && region->match(x[i][0],x[i][1],x[i][2])) { + if (direction & MINUS) { + if (v[i][idx] > 0.0) v[i][idx] = -v[i][idx]; + } else { + if (v[i][idx] < 0.0) v[i][idx] = -v[i][idx]; + } + } + } +} + diff --git a/src/MISC/fix_oneway.h b/src/MISC/fix_oneway.h new file mode 100644 index 0000000000..4d760c73ec --- /dev/null +++ b/src/MISC/fix_oneway.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(oneway,FixOneWay) + +#else + +#ifndef LMP_FIX_ONEWAY_H +#define LMP_FIX_ONEWAY_H + +#include "fix.h" + +namespace LAMMPS_NS { + +class FixOneWay : public Fix { + public: + FixOneWay(class LAMMPS *, int, char **); + virtual ~FixOneWay(); + int setmask(); + virtual void init(); + virtual void end_of_step(); + + protected: + int direction; + int regionidx; + char *regionstr; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +*/ From 6a0abc7bfb45fffad00d20d4efa838a49f95b94e Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 15:21:49 +0000 Subject: [PATCH 18/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11609 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- doc/Section_commands.html | 14 ++++---- doc/Section_commands.txt | 1 + doc/fix_oneway.html | 69 +++++++++++++++++++++++++++++++++++++++ doc/fix_oneway.txt | 60 ++++++++++++++++++++++++++++++++++ doc/fix_wall_reflect.html | 2 +- doc/fix_wall_reflect.txt | 2 +- 6 files changed, 139 insertions(+), 9 deletions(-) create mode 100644 doc/fix_oneway.html create mode 100644 doc/fix_oneway.txt diff --git a/doc/Section_commands.html b/doc/Section_commands.html index c8a3f4c630..b847c19c7e 100644 --- a/doc/Section_commands.html +++ b/doc/Section_commands.html @@ -399,13 +399,13 @@ of each style or click on the style itself for a full description: gldgravityheatindentlangevinlineforcemomentummove msstnebnphnphugnph/aspherenph/spherenptnpt/asphere npt/spherenvenve/aspherenve/asphere/noforcenve/bodynve/limitnve/linenve/noforce -nve/spherenve/trinvtnvt/aspherenvt/sllodnvt/sphereorient/fccplaneforce -poemspourpress/berendsenprintproperty/atomqeq/combreax/bondsrecenter -restrainrigidrigid/nphrigid/nptrigid/nverigid/nvtrigid/smallsetforce -shakespringspring/rgspring/selfsrdstore/forcestore/statetemp/berendsen -temp/csvrtemp/rescalethermal/conductivitytmdttmtune/kspacevectorviscosity -viscouswall/colloidwall/granwall/harmonicwall/lj1043wall/lj126wall/lj93wall/piston -wall/reflectwall/regionwall/srd +nve/spherenve/trinvtnvt/aspherenvt/sllodnvt/sphereonewayorient/fcc +planeforcepoemspourpress/berendsenprintproperty/atomqeq/combreax/bonds +recenterrestrainrigidrigid/nphrigid/nptrigid/nverigid/nvtrigid/small +setforceshakespringspring/rgspring/selfsrdstore/forcestore/state +temp/berendsentemp/csvrtemp/rescalethermal/conductivitytmdttmtune/kspacevector +viscosityviscouswall/colloidwall/granwall/harmonicwall/lj1043wall/lj126wall/lj93 +wall/pistonwall/reflectwall/regionwall/srd

    These are fix styles contributed by users, which can be used if diff --git a/doc/Section_commands.txt b/doc/Section_commands.txt index b6251e6f92..56331a9e29 100644 --- a/doc/Section_commands.txt +++ b/doc/Section_commands.txt @@ -513,6 +513,7 @@ of each style or click on the style itself for a full description: "nvt/asphere"_fix_nvt_asphere.html, "nvt/sllod"_fix_nvt_sllod.html, "nvt/sphere"_fix_nvt_sphere.html, +"oneway"_fix_oneway.html, "orient/fcc"_fix_orient_fcc.html, "planeforce"_fix_planeforce.html, "poems"_fix_poems.html, diff --git a/doc/fix_oneway.html b/doc/fix_oneway.html new file mode 100644 index 0000000000..02f7a2d358 --- /dev/null +++ b/doc/fix_oneway.html @@ -0,0 +1,69 @@ + +

    LAMMPS WWW Site - LAMMPS Documentation - LAMMPS Commands +
    + + + + + + +
    + +

    fix oneway command +

    +

    Syntax: +

    +
    fix ID group-ID N oneway region-ID direction 
    +
    +
    • ID, group-ID are documented in fix command + +
    • oneway = style name of this fix command + +
    • N = apply this fix every this many timesteps + +
    • region-ID = ID of region where fix is active + +
    • direction = x or -x or y or -y or z or -z = coordinate and direction of the oneway constraint + + +
    +

    Examples: +

    +
    fix ions 10 oneway semi -x
    +fix all 1 oneway left -z
    +fix all 1 oneway right z 
    +
    +

    Description: +

    +

    Enforce that particles in the group and in a given region can only +move in one direction. This is done by reversing a particle's +velocity component, if it has the wrong sign in the specified +dimension. The effect is that the particle moves in one direction +only. +

    +

    This can be used, for example, as a simple model of a semi-permeable +membrane, or as an implementation of Maxwell's demon. +

    +
    + +

    Restart, fix_modify, output, run start/stop, minimize info: +

    +

    No information about this fix is written to binary restart +files. None of the fix_modify options +are relevant to this fix. No global or per-atom quantities are stored +by this fix for access by various output +commands. No parameter of this fix can +be used with the start/stop keywords of the run command. +This fix is not invoked during energy minimization. +

    +

    Restrictions: none +

    +

    Related commands: +

    +

    fix wall/reflect command +

    +

    Default: none +

    +
    + + diff --git a/doc/fix_oneway.txt b/doc/fix_oneway.txt new file mode 100644 index 0000000000..4f0b4ae279 --- /dev/null +++ b/doc/fix_oneway.txt @@ -0,0 +1,60 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +fix oneway command :h3 + +[Syntax:] + +fix ID group-ID N oneway region-ID direction :pre + +ID, group-ID are documented in "fix"_fix.html command :ulb,l +oneway = style name of this fix command :l +N = apply this fix every this many timesteps :l +region-ID = ID of region where fix is active :l +direction = {x} or {-x} or {y} or {-y} or {z} or {-z} = coordinate and direction of the oneway constraint :l +:ule + +[Examples:] + +fix ions 10 oneway semi -x +fix all 1 oneway left -z +fix all 1 oneway right z :pre + +[Description:] + +Enforce that particles in the group and in a given region can only +move in one direction. This is done by reversing a particle's +velocity component, if it has the wrong sign in the specified +dimension. The effect is that the particle moves in one direction +only. + +This can be used, for example, as a simple model of a semi-permeable +membrane, or as an implementation of Maxwell's demon. + +:line + +[Restart, fix_modify, output, run start/stop, minimize info:] + +No information about this fix is written to "binary restart +files"_restart.html. None of the "fix_modify"_fix_modify.html options +are relevant to this fix. No global or per-atom quantities are stored +by this fix for access by various "output +commands"_Section_howto.html#howto_15. No parameter of this fix can +be used with the {start/stop} keywords of the "run"_run.html command. +This fix is not invoked during "energy minimization"_minimize.html. + +[Restrictions:] none + +[Related commands:] + +"fix wall/reflect"_fix_wall_reflect.html command + +[Default:] none + +:line + diff --git a/doc/fix_wall_reflect.html b/doc/fix_wall_reflect.html index 47fb6c1e86..8e87e80db5 100644 --- a/doc/fix_wall_reflect.html +++ b/doc/fix_wall_reflect.html @@ -161,7 +161,7 @@ error.

    Related commands:

    -

    fix wall/lj93 command +

    fix wall/lj93, fix oneway

    Default: none

    diff --git a/doc/fix_wall_reflect.txt b/doc/fix_wall_reflect.txt index 1087fe733d..df3a5481bc 100644 --- a/doc/fix_wall_reflect.txt +++ b/doc/fix_wall_reflect.txt @@ -150,7 +150,7 @@ error. [Related commands:] -"fix wall/lj93"_fix_wall.html command +"fix wall/lj93"_fix_wall.html, "fix oneway"_fix_oneway.html [Default:] none From 99e6ed4554670963843577240a86fe789b41bd15 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 15:23:48 +0000 Subject: [PATCH 19/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11610 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 4340fe74b2..6371867d57 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "6 Mar 2014" +#define LAMMPS_VERSION "10 Mar 2014" From 8fc989a6b1b357af086d35869a362d2177b007d9 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 15:23:49 +0000 Subject: [PATCH 20/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11611 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- doc/Manual.html | 4 ++-- doc/Manual.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/Manual.html b/doc/Manual.html index 5cce482cc9..f3eaba5d7b 100644 --- a/doc/Manual.html +++ b/doc/Manual.html @@ -1,7 +1,7 @@ LAMMPS Users Manual - + @@ -22,7 +22,7 @@

    LAMMPS Documentation

    -

    6 Mar 2014 version +

    10 Mar 2014 version

    Version info:

    diff --git a/doc/Manual.txt b/doc/Manual.txt index a639cc4967..5e61d73f8f 100644 --- a/doc/Manual.txt +++ b/doc/Manual.txt @@ -1,6 +1,6 @@ LAMMPS Users Manual - + @@ -18,7 +18,7 @@

    LAMMPS Documentation :c,h3 -6 Mar 2014 version :c,h4 +10 Mar 2014 version :c,h4 Version info: :h4 From de4737af3ae54f61b5f606626fe3d52d23625253 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 15:40:58 +0000 Subject: [PATCH 21/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11614 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/memory.h | 4 ++-- src/my_page.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/memory.h b/src/memory.h index 48b7c8f3aa..bcd59ab68f 100644 --- a/src/memory.h +++ b/src/memory.h @@ -410,7 +410,7 @@ class Memory : protected Pointers { array = (TYPE ****) smalloc(nbytes,name); int i,j,k; - bigint m1,m2,m3; + bigint m1,m2; bigint n = 0; for (i = 0; i < n1; i++) { m2 = ((bigint) i) * n2; @@ -515,7 +515,7 @@ class Memory : protected Pointers { array = (TYPE *****) smalloc(nbytes,name); int i,j,k,l; - bigint m1,m2,m3,m4,m5; + bigint m1,m2; bigint n = 0; for (i = 0; i < n1; i++) { m2 = ((bigint) i) * n2; diff --git a/src/my_page.h b/src/my_page.h index 5fdf1e1351..8add279494 100644 --- a/src/my_page.h +++ b/src/my_page.h @@ -213,9 +213,9 @@ class MyPage { return; } - void *ptr; for (int i = npage-pagedelta; i < npage; i++) { #if defined(LAMMPS_MEMALIGN) + void *ptr; if (posix_memalign(&ptr, LAMMPS_MEMALIGN, pagesize*sizeof(T))) errorflag = 2; pages[i] = (T *) ptr; From df0c8d061fa35ded90fef5220313caceffe7d583 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 15:42:05 +0000 Subject: [PATCH 22/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11615 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/molecule.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/molecule.h b/src/molecule.h index e45e7b61dc..45aae371a0 100644 --- a/src/molecule.h +++ b/src/molecule.h @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ #ifndef LMP_ONE_MOLECULE_H -#define LMP_ONE_MOLEUCULE_H +#define LMP_ONE_MOLECULE_H #include "pointers.h" From 3d6ac732ad59db0a2c02ff62182a67518dcda4f0 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 15:45:25 +0000 Subject: [PATCH 23/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11616 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/atom.cpp | 2 +- src/dump.cpp | 2 +- src/dump_image.cpp | 8 ++++---- src/fix_move.cpp | 9 ++++++--- src/fix_nh.cpp | 15 ++++++++++----- src/molecule.cpp | 4 ++-- src/neigh_respa.cpp | 12 ++++++------ src/procmap.cpp | 2 +- src/read_data.cpp | 4 ++-- src/read_restart.cpp | 4 ++-- src/universe.cpp | 2 +- src/variable.cpp | 4 ++-- src/write_data.cpp | 6 +++--- src/write_restart.cpp | 2 +- 14 files changed, 42 insertions(+), 34 deletions(-) diff --git a/src/atom.cpp b/src/atom.cpp index a850c66f58..2280943a33 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -620,7 +620,7 @@ int Atom::count_words(const char *line) strcpy(copy,line); char *ptr; - if (ptr = strchr(copy,'#')) *ptr = '\0'; + if ((ptr = strchr(copy,'#'))) *ptr = '\0'; if (strtok(copy," \t\n\r\f") == NULL) { memory->destroy(copy); diff --git a/src/dump.cpp b/src/dump.cpp index a94ba4af92..878304addf 100644 --- a/src/dump.cpp +++ b/src/dump.cpp @@ -105,7 +105,7 @@ Dump::Dump(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) multiname = NULL; char *ptr; - if (ptr = strchr(filename,'%')) { + if ((ptr = strchr(filename,'%'))) { if (strstr(style,"mpiio")) error->all(FLERR, "Dump file MPI-IO output not allowed with % in filename"); diff --git a/src/dump_image.cpp b/src/dump_image.cpp index c7bfd868d4..1c2326b474 100644 --- a/src/dump_image.cpp +++ b/src/dump_image.cpp @@ -969,14 +969,14 @@ int DumpImage::modify_param(int narg, char **arg) int ncount = 1; char *nextptr; char *ptr = arg[2]; - while (nextptr = strchr(ptr,'/')) { + while ((nextptr = strchr(ptr,'/'))) { ptr = nextptr + 1; ncount++; } char **ptrs = new char*[ncount+1]; ncount = 0; ptrs[ncount++] = strtok(arg[2],"/"); - while (ptrs[ncount++] = strtok(NULL,"/")); + while ((ptrs[ncount++] = strtok(NULL,"/"))); ncount--; // assign each of ncount colors in round-robin fashion to types @@ -1029,14 +1029,14 @@ int DumpImage::modify_param(int narg, char **arg) int ncount = 1; char *nextptr; char *ptr = arg[2]; - while (nextptr = strchr(ptr,'/')) { + while ((nextptr = strchr(ptr,'/'))) { ptr = nextptr + 1; ncount++; } char **ptrs = new char*[ncount+1]; ncount = 0; ptrs[ncount++] = strtok(arg[2],"/"); - while (ptrs[ncount++] = strtok(NULL,"/")); + while ((ptrs[ncount++] = strtok(NULL,"/"))); ncount--; // assign each of ncount colors in round-robin fashion to types diff --git a/src/fix_move.cpp b/src/fix_move.cpp index ca0f84cdca..c39646fee0 100644 --- a/src/fix_move.cpp +++ b/src/fix_move.cpp @@ -752,7 +752,7 @@ void FixMove::final_integrate() for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { - if (xflag) + if (xflag) { if (rmass) { dtfm = dtf / rmass[i]; v[i][0] += dtfm * f[i][0]; @@ -760,8 +760,9 @@ void FixMove::final_integrate() dtfm = dtf / mass[type[i]]; v[i][0] += dtfm * f[i][0]; } + } - if (yflag) + if (yflag) { if (rmass) { dtfm = dtf / rmass[i]; v[i][1] += dtfm * f[i][1]; @@ -769,8 +770,9 @@ void FixMove::final_integrate() dtfm = dtf / mass[type[i]]; v[i][1] += dtfm * f[i][1]; } + } - if (zflag) + if (zflag) { if (rmass) { dtfm = dtf / rmass[i]; v[i][2] += dtfm * f[i][2]; @@ -778,6 +780,7 @@ void FixMove::final_integrate() dtfm = dtf / mass[type[i]]; v[i][2] += dtfm * f[i][2]; } + } } } } diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp index 0d456cd04e..a63f3d322b 100644 --- a/src/fix_nh.cpp +++ b/src/fix_nh.cpp @@ -1534,20 +1534,22 @@ double FixNH::compute_vector(int n) n -= ilen; } else if (pstyle == ANISO) { ilen = 3; - if (n < ilen) + if (n < ilen) { if (p_flag[n]) return p_hydro*(volume-vol0) / (pdim*nktv2p); else return 0.0; + } n -= ilen; } else { ilen = 6; - if (n < ilen) + if (n < ilen) { if (n > 2) return 0.0; else if (p_flag[n]) return p_hydro*(volume-vol0) / (pdim*nktv2p); else return 0.0; + } n -= ilen; } @@ -1558,17 +1560,19 @@ double FixNH::compute_vector(int n) n -= ilen; } else if (pstyle == ANISO) { ilen = 3; - if (n < ilen) + if (n < ilen) { if (p_flag[n]) return 0.5*omega_dot[n]*omega_dot[n]*omega_mass[n]; else return 0.0; + } n -= ilen; } else { ilen = 6; - if (n < ilen) + if (n < ilen) { if (p_flag[n]) return 0.5*omega_dot[n]*omega_dot[n]*omega_mass[n]; else return 0.0; + } n -= ilen; } @@ -2139,7 +2143,7 @@ void FixNH::nh_omega_dot() if (deviatoric_flag) compute_deviatoric(); mtk_term1 = 0.0; - if (mtk_flag) + if (mtk_flag) { if (pstyle == ISO) { mtk_term1 = tdof * boltz * t_current; mtk_term1 /= pdim * atom->natoms; @@ -2150,6 +2154,7 @@ void FixNH::nh_omega_dot() mtk_term1 += mvv_current[i]; mtk_term1 /= pdim * atom->natoms; } + } for (int i = 0; i < 3; i++) if (p_flag[i]) { diff --git a/src/molecule.cpp b/src/molecule.cpp index ea1fa6b73a..3fe25e6195 100644 --- a/src/molecule.cpp +++ b/src/molecule.cpp @@ -313,7 +313,7 @@ void Molecule::read(int flag) // trim anything from '#' onward // if line is blank, continue - if (ptr = strchr(line,'#')) *ptr = '\0'; + if ((ptr = strchr(line,'#'))) *ptr = '\0'; if (strspn(line," \t\n\r") == strlen(line)) continue; // search line for header keywords and set corresponding variable @@ -1325,7 +1325,7 @@ int Molecule::parse(char *line, char **words, int max) int nwords = 0; words[nwords++] = strtok(line," \t\n\r\f"); - while (ptr = strtok(NULL," \t\n\r\f")) { + while ((ptr = strtok(NULL," \t\n\r\f"))) { if (nwords < max) words[nwords] = ptr; nwords++; } diff --git a/src/neigh_respa.cpp b/src/neigh_respa.cpp index 239db4b249..91f85de6ea 100644 --- a/src/neigh_respa.cpp +++ b/src/neigh_respa.cpp @@ -131,7 +131,7 @@ void Neighbor::respa_nsq_no_newton(NeighList *list) tag[j]-tagprev); else which = 0; if (which == 0) neighptr[n++] = j; - else if (minchange = domain->minimum_image_check(delx,dely,delz)) + else if ((minchange = domain->minimum_image_check(delx,dely,delz))) neighptr[n++] = j; else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); } else neighptr[n++] = j; @@ -309,7 +309,7 @@ void Neighbor::respa_nsq_newton(NeighList *list) tag[j]-tagprev); else which = 0; if (which == 0) neighptr[n++] = j; - else if (minchange = domain->minimum_image_check(delx,dely,delz)) + else if ((minchange = domain->minimum_image_check(delx,dely,delz))) neighptr[n++] = j; else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); } else neighptr[n++] = j; @@ -478,7 +478,7 @@ void Neighbor::respa_bin_no_newton(NeighList *list) tag[j]-tagprev); else which = 0; if (which == 0) neighptr[n++] = j; - else if (minchange = domain->minimum_image_check(delx,dely,delz)) + else if ((minchange = domain->minimum_image_check(delx,dely,delz))) neighptr[n++] = j; else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); } else neighptr[n++] = j; @@ -651,7 +651,7 @@ void Neighbor::respa_bin_newton(NeighList *list) tag[j]-tagprev); else which = 0; if (which == 0) neighptr[n++] = j; - else if (minchange = domain->minimum_image_check(delx,dely,delz)) + else if ((minchange = domain->minimum_image_check(delx,dely,delz))) neighptr[n++] = j; else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); } else neighptr[n++] = j; @@ -695,7 +695,7 @@ void Neighbor::respa_bin_newton(NeighList *list) tag[j]-tagprev); else which = 0; if (which == 0) neighptr[n++] = j; - else if (minchange = domain->minimum_image_check(delx,dely,delz)) + else if ((minchange = domain->minimum_image_check(delx,dely,delz))) neighptr[n++] = j; else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); } else neighptr[n++] = j; @@ -873,7 +873,7 @@ void Neighbor::respa_bin_newton_tri(NeighList *list) tag[j]-tagprev); else which = 0; if (which == 0) neighptr[n++] = j; - else if (minchange = domain->minimum_image_check(delx,dely,delz)) + else if ((minchange = domain->minimum_image_check(delx,dely,delz))) neighptr[n++] = j; else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); } else neighptr[n++] = j; diff --git a/src/procmap.cpp b/src/procmap.cpp index 6703b08d25..b24e59421b 100644 --- a/src/procmap.cpp +++ b/src/procmap.cpp @@ -291,7 +291,7 @@ void ProcMap::custom_grid(char *cfile, int nprocs, if (!fgets(line,MAXLINE,fp)) error->one(FLERR,"Unexpected end of custom file"); while (1) { - if (ptr = strchr(line,'#')) *ptr = '\0'; + if ((ptr = strchr(line,'#'))) *ptr = '\0'; if (strspn(line," \t\n\r") != strlen(line)) break; if (!fgets(line,MAXLINE,fp)) error->one(FLERR,"Unexpected end of custom file"); diff --git a/src/read_data.cpp b/src/read_data.cpp index 44def025df..5f3633ddbd 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -619,7 +619,7 @@ void ReadData::header() // trim anything from '#' onward // if line is blank, continue - if (ptr = strchr(line,'#')) *ptr = '\0'; + if ((ptr = strchr(line,'#'))) *ptr = '\0'; if (strspn(line," \t\n\r") == strlen(line)) continue; // allow special fixes first chance to match and process the line @@ -1556,7 +1556,7 @@ void ReadData::skip_lines(bigint n) void ReadData::parse_coeffs(char *line, const char *addstr, int dupflag) { char *ptr; - if (ptr = strchr(line,'#')) *ptr = '\0'; + if ((ptr = strchr(line,'#'))) *ptr = '\0'; narg = 0; char *word = strtok(line," \t\n\r\f"); diff --git a/src/read_restart.cpp b/src/read_restart.cpp index 978b2d2bc9..aeeec98e26 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -549,7 +549,7 @@ void ReadRestart::file_search(char *infile, char *outfile) char *pattern = new char[strlen(filename) + 16]; - if (ptr = strchr(filename,'%')) { + if ((ptr = strchr(filename,'%'))) { *ptr = '\0'; sprintf(pattern,"%s%s%s",filename,"base",ptr+1); *ptr = '%'; @@ -574,7 +574,7 @@ void ReadRestart::file_search(char *infile, char *outfile) DIR *dp = opendir(dirname); if (dp == NULL) error->one(FLERR,"Cannot open dir to search for restart file"); - while (ep = readdir(dp)) { + while ((ep = readdir(dp))) { if (strstr(ep->d_name,begin) != ep->d_name) continue; if ((ptr = strstr(&ep->d_name[nbegin],end)) == NULL) continue; if (strlen(end) == 0) ptr = ep->d_name + strlen(ep->d_name); diff --git a/src/universe.cpp b/src/universe.cpp index 35d8f2cf24..d1665599cf 100644 --- a/src/universe.cpp +++ b/src/universe.cpp @@ -100,7 +100,7 @@ void Universe::reorder(char *style, char *arg) if (!fgets(line,MAXLINE,fp)) error->one(FLERR,"Unexpected end of -reorder file"); while (1) { - if (ptr = strchr(line,'#')) *ptr = '\0'; + if ((ptr = strchr(line,'#'))) *ptr = '\0'; if (strspn(line," \t\n\r") != strlen(line)) break; if (!fgets(line,MAXLINE,fp)) error->one(FLERR,"Unexpected end of -reorder file"); diff --git a/src/variable.cpp b/src/variable.cpp index a6f5c69158..b13b4d9515 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -4088,7 +4088,7 @@ int VarReader::read_scalar(char *str) else n = strlen(str); if (n == 0) break; // end of file str[n-1] = '\0'; // strip newline - if (ptr = strchr(str,'#')) *ptr = '\0'; // strip comment + if ((ptr = strchr(str,'#'))) *ptr = '\0'; // strip comment if (strtok(str," \t\n\r\f") == NULL) continue; // skip if blank n = strlen(str) + 1; break; @@ -4131,7 +4131,7 @@ int VarReader::read_peratom() else n = strlen(str); if (n == 0) break; // end of file str[n-1] = '\0'; // strip newline - if (ptr = strchr(str,'#')) *ptr = '\0'; // strip comment + if ((ptr = strchr(str,'#'))) *ptr = '\0'; // strip comment if (strtok(str," \t\n\r\f") == NULL) continue; // skip if blank n = strlen(str) + 1; break; diff --git a/src/write_data.cpp b/src/write_data.cpp index c3b038ee41..899edacdc2 100644 --- a/src/write_data.cpp +++ b/src/write_data.cpp @@ -65,7 +65,7 @@ void WriteData::command(int narg, char **arg) int n = strlen(arg[0]) + 16; char *file = new char[n]; - if (ptr = strchr(arg[0],'*')) { + if ((ptr = strchr(arg[0],'*'))) { *ptr = '\0'; sprintf(file,"%s" BIGINT_FORMAT "%s",arg[0],update->ntimestep,ptr+1); } else strcpy(file,arg[0]); @@ -152,11 +152,11 @@ void WriteData::write(char *file) // sum up bond,angle counts // may be different than atom->nbonds,nangles if broken/turned-off - if (atom->molecular == 1 && atom->nbonds || atom->nbondtypes) { + if (atom->molecular == 1 && (atom->nbonds || atom->nbondtypes)) { nbonds_local = atom->avec->pack_bond(NULL); MPI_Allreduce(&nbonds_local,&nbonds,1,MPI_LMP_BIGINT,MPI_SUM,world); } - if (atom->molecular == 1 && atom->nangles || atom->nangletypes) { + if (atom->molecular == 1 && (atom->nangles || atom->nangletypes)) { nangles_local = atom->avec->pack_angle(NULL); MPI_Allreduce(&nangles_local,&nangles,1,MPI_LMP_BIGINT,MPI_SUM,world); } diff --git a/src/write_restart.cpp b/src/write_restart.cpp index 422462c2bd..5c5bd066ed 100644 --- a/src/write_restart.cpp +++ b/src/write_restart.cpp @@ -89,7 +89,7 @@ void WriteRestart::command(int narg, char **arg) int n = strlen(arg[0]) + 16; char *file = new char[n]; - if (ptr = strchr(arg[0],'*')) { + if ((ptr = strchr(arg[0],'*'))) { *ptr = '\0'; sprintf(file,"%s" BIGINT_FORMAT "%s",arg[0],update->ntimestep,ptr+1); } else strcpy(file,arg[0]); From 5e1d34a23d50f9986b5041844fd12c1183d0a46a Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 15:56:49 +0000 Subject: [PATCH 24/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11617 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/atom_vec_body.cpp | 15 ++++++--------- src/balance.cpp | 2 +- src/change_box.cpp | 1 - src/compute_angle_local.cpp | 2 -- src/compute_bond_local.cpp | 1 - src/compute_cluster_atom.cpp | 3 +-- src/compute_dihedral_local.cpp | 9 +++++---- src/compute_gyration_molecule.cpp | 2 +- src/compute_improper_local.cpp | 2 -- src/compute_property_local.cpp | 2 -- src/compute_temp_profile.cpp | 1 - src/domain.cpp | 2 +- src/fix_box_relax.cpp | 6 +++--- src/fix_nh.cpp | 2 +- src/fix_press_berendsen.cpp | 4 ++-- src/image.cpp | 13 +++++++------ src/irregular.cpp | 5 ++--- src/min.cpp | 4 ---- src/min_linesearch.cpp | 4 ++-- src/molecule.cpp | 1 - src/neigh_stencil.cpp | 5 ++--- src/pair.cpp | 19 ++++++++----------- src/pair_born_coul_wolf.cpp | 3 +-- src/pair_coul_wolf.cpp | 14 +++++--------- src/pair_gauss.cpp | 2 +- src/pair_hybrid.cpp | 6 ++---- src/pair_lj_smooth_linear.cpp | 1 - src/read_data.cpp | 9 ++++----- src/read_restart.cpp | 3 --- src/set.cpp | 4 +--- src/variable.cpp | 18 +++++++++--------- src/write_data.cpp | 2 +- 32 files changed, 66 insertions(+), 101 deletions(-) diff --git a/src/atom_vec_body.cpp b/src/atom_vec_body.cpp index 53808be5b4..39f20272c2 100644 --- a/src/atom_vec_body.cpp +++ b/src/atom_vec_body.cpp @@ -562,7 +562,7 @@ int AtomVecBody::pack_border(int n, int *list, double *buf, { int i,j,m; double dx,dy,dz; - double *quat,*c1,*c2,*c3,*inertia; + double *quat,*inertia; m = 0; if (pbc_flag == 0) { @@ -642,7 +642,7 @@ int AtomVecBody::pack_border_vel(int n, int *list, double *buf, { int i,j,m; double dx,dy,dz,dvx,dvy,dvz; - double *quat,*c1,*c2,*c3,*inertia; + double *quat,*inertia; m = 0; if (pbc_flag == 0) { @@ -775,7 +775,7 @@ int AtomVecBody::pack_border_vel(int n, int *list, double *buf, int AtomVecBody::pack_border_hybrid(int n, int *list, double *buf) { int i,j,m; - double *quat,*c1,*c2,*c3,*inertia; + double *quat,*inertia; m = 0; for (i = 0; i < n; i++) { @@ -805,7 +805,7 @@ int AtomVecBody::pack_border_hybrid(int n, int *list, double *buf) void AtomVecBody::unpack_border(int n, int first, double *buf) { int i,j,m,last; - double *quat,*c1,*c2,*c3,*inertia; + double *quat,*inertia; m = 0; last = first + n; @@ -853,7 +853,7 @@ void AtomVecBody::unpack_border(int n, int first, double *buf) void AtomVecBody::unpack_border_vel(int n, int first, double *buf) { int i,j,m,last; - double *quat,*c1,*c2,*c3,*inertia; + double *quat,*inertia; m = 0; last = first + n; @@ -907,7 +907,7 @@ void AtomVecBody::unpack_border_vel(int n, int first, double *buf) int AtomVecBody::unpack_border_hybrid(int n, int first, double *buf) { int i,j,m,last; - double *quat,*c1,*c2,*c3,*inertia; + double *quat,*inertia; m = 0; last = first + n; @@ -1346,9 +1346,6 @@ int AtomVecBody::data_vel_hybrid(int m, char **values) void AtomVecBody::pack_data(double **buf) { - double c2mc1[2],c3mc1[3],norm[3]; - double area; - int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) { buf[i][0] = ubuf(tag[i]).d; diff --git a/src/balance.cpp b/src/balance.cpp index e1bcf36924..010c2e8740 100644 --- a/src/balance.cpp +++ b/src/balance.cpp @@ -781,7 +781,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp) int nx = comm->procgrid[0] + 1; int ny = comm->procgrid[1] + 1; - int nz = comm->procgrid[2] + 1; + //int nz = comm->procgrid[2] + 1; if (dimension == 2) { int m = 0; diff --git a/src/change_box.cpp b/src/change_box.cpp index e0fb7d20ab..fcc90b7512 100644 --- a/src/change_box.cpp +++ b/src/change_box.cpp @@ -70,7 +70,6 @@ void ChangeBox::command(int narg, char **arg) memset(ops,0,(narg-1)*sizeof(Operation)); nops = 0; - int index; int iarg = 1; while (iarg < narg) { if (strcmp(arg[iarg],"x") == 0 || strcmp(arg[iarg],"y") == 0 || diff --git a/src/compute_angle_local.cpp b/src/compute_angle_local.cpp index b4f080f86c..049887e0d5 100644 --- a/src/compute_angle_local.cpp +++ b/src/compute_angle_local.cpp @@ -48,9 +48,7 @@ ComputeAngleLocal::ComputeAngleLocal(LAMMPS *lmp, int narg, char **arg) : tflag = eflag = -1; nvalues = 0; - int i; for (int iarg = 3; iarg < narg; iarg++) { - i = iarg-3; if (strcmp(arg[iarg],"theta") == 0) tflag = nvalues++; else if (strcmp(arg[iarg],"eng") == 0) eflag = nvalues++; else error->all(FLERR,"Invalid keyword in compute angle/local command"); diff --git a/src/compute_bond_local.cpp b/src/compute_bond_local.cpp index 00aefeda3a..826e1aeeb9 100644 --- a/src/compute_bond_local.cpp +++ b/src/compute_bond_local.cpp @@ -119,7 +119,6 @@ int ComputeBondLocal::compute_bonds(int flag) int i,m,n,nb,atom1,atom2,imol,iatom,btype; tagint tagprev; double delx,dely,delz,rsq; - double *dbuf,*ebuf,*fbuf; double *ptr; double **x = atom->x; diff --git a/src/compute_cluster_atom.cpp b/src/compute_cluster_atom.cpp index b1713873b2..c217142e33 100644 --- a/src/compute_cluster_atom.cpp +++ b/src/compute_cluster_atom.cpp @@ -94,7 +94,7 @@ void ComputeClusterAtom::init_list(int id, NeighList *ptr) void ComputeClusterAtom::compute_peratom() { - int i,j,ii,jj,inum,jnum,n; + int i,j,ii,jj,inum,jnum; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *ilist,*jlist,*numneigh,**firstneigh; @@ -156,7 +156,6 @@ void ComputeClusterAtom::compute_peratom() jlist = firstneigh[i]; jnum = numneigh[i]; - n = 0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; diff --git a/src/compute_dihedral_local.cpp b/src/compute_dihedral_local.cpp index a43681c8f3..258772e1a5 100644 --- a/src/compute_dihedral_local.cpp +++ b/src/compute_dihedral_local.cpp @@ -50,9 +50,7 @@ ComputeDihedralLocal::ComputeDihedralLocal(LAMMPS *lmp, int narg, char **arg) : pflag = -1; nvalues = 0; - int i; for (int iarg = 3; iarg < narg; iarg++) { - i = iarg-3; if (strcmp(arg[iarg],"phi") == 0) pflag = nvalues++; else error->all(FLERR,"Invalid keyword in compute dihedral/local command"); } @@ -111,7 +109,7 @@ int ComputeDihedralLocal::compute_dihedrals(int flag) int i,m,n,nd,atom1,atom2,atom3,atom4,imol,iatom; tagint tagprev; double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; - double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv; + double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,ra2inv,rb2inv,rabinv; double s,c; double *pbuf; @@ -207,8 +205,11 @@ int ComputeDihedralLocal::compute_dihedrals(int flag) rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; rg = sqrt(rgsq); - rginv = ra2inv = rb2inv = 0.0; + ra2inv = rb2inv = 0.0; +/* DEAD CODE + rginv = 0.0; if (rg > 0) rginv = 1.0/rg; +*/ if (rasq > 0) ra2inv = 1.0/rasq; if (rbsq > 0) rb2inv = 1.0/rbsq; rabinv = sqrt(ra2inv*rb2inv); diff --git a/src/compute_gyration_molecule.cpp b/src/compute_gyration_molecule.cpp index f81a373830..c1fc54c5dc 100644 --- a/src/compute_gyration_molecule.cpp +++ b/src/compute_gyration_molecule.cpp @@ -223,7 +223,7 @@ void ComputeGyrationMolecule::compute_array() void ComputeGyrationMolecule::molcom() { tagint imol; - double dx,dy,dz,massone; + double massone; double unwrap[3]; for (int i = 0; i < nmolecules; i++) diff --git a/src/compute_improper_local.cpp b/src/compute_improper_local.cpp index 7726781439..33dcff53bb 100644 --- a/src/compute_improper_local.cpp +++ b/src/compute_improper_local.cpp @@ -51,9 +51,7 @@ ComputeImproperLocal::ComputeImproperLocal(LAMMPS *lmp, int narg, char **arg) : cflag = -1; nvalues = 0; - int i; for (int iarg = 3; iarg < narg; iarg++) { - i = iarg-3; if (strcmp(arg[iarg],"chi") == 0) cflag = nvalues++; else error->all(FLERR,"Invalid keyword in compute improper/local command"); } diff --git a/src/compute_property_local.cpp b/src/compute_property_local.cpp index 675ca7e58c..da34de08ae 100644 --- a/src/compute_property_local.cpp +++ b/src/compute_property_local.cpp @@ -342,8 +342,6 @@ int ComputePropertyLocal::count_pairs(int allflag, int forceflag) int *type = atom->type; int *mask = atom->mask; int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; int newton_pair = force->newton_pair; // invoke half neighbor list (will copy or build if necessary) diff --git a/src/compute_temp_profile.cpp b/src/compute_temp_profile.cpp index f9150907fe..fd759ed773 100644 --- a/src/compute_temp_profile.cpp +++ b/src/compute_temp_profile.cpp @@ -55,7 +55,6 @@ ComputeTempProfile::ComputeTempProfile(LAMMPS *lmp, int narg, char **arg) : ncount += 2; nbinx = nbiny = nbinz = 1; - int lastarg; int iarg = 6; if (strcmp(arg[iarg],"x") == 0) { diff --git a/src/domain.cpp b/src/domain.cpp index c636ec64cc..54ef21fc02 100644 --- a/src/domain.cpp +++ b/src/domain.cpp @@ -693,7 +693,7 @@ void Domain::box_too_small_check() Molecule **onemols = atom->avec->onemols; int nlocal = atom->nlocal; - double delx,dely,delz,rsq,r; + double delx,dely,delz,rsq; double maxbondme = 0.0; int lostbond = output->thermo->lostbond; diff --git a/src/fix_box_relax.cpp b/src/fix_box_relax.cpp index 8f2a1bf253..4a9febf403 100644 --- a/src/fix_box_relax.cpp +++ b/src/fix_box_relax.cpp @@ -812,7 +812,7 @@ int FixBoxRelax::modify_param(int narg, char **arg) void FixBoxRelax::compute_sigma() { - double pdeviatoric[3][3],htmp[3][3]; + double pdeviatoric[3][3];//,htmp[3][3]; double tmp1[3][3],sigma_tensor[3][3],h_invtmp[3][3]; // reset reference box dimensions @@ -829,7 +829,7 @@ void FixBoxRelax::compute_sigma() h0_inv[3] = domain->h_inv[3]; h0_inv[4] = domain->h_inv[4]; h0_inv[5] = domain->h_inv[5]; - +/* DEAD CODE htmp[0][0] = h0[0]; htmp[1][1] = h0[1]; htmp[2][2] = h0[2]; @@ -839,7 +839,7 @@ void FixBoxRelax::compute_sigma() htmp[2][1] = 0.0; htmp[2][0] = 0.0; htmp[1][0] = 0.0; - +*/ h_invtmp[0][0] = h0_inv[0]; h_invtmp[1][1] = h0_inv[1]; h_invtmp[2][2] = h0_inv[2]; diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp index a63f3d322b..139b9ae105 100644 --- a/src/fix_nh.cpp +++ b/src/fix_nh.cpp @@ -1172,7 +1172,7 @@ void FixNH::write_restart(FILE *fp) double *list; memory->create(list,nsize,"nh:list"); - int n = pack_restart_data(list); + pack_restart_data(list); if (comm->me == 0) { int size = nsize * sizeof(double); diff --git a/src/fix_press_berendsen.cpp b/src/fix_press_berendsen.cpp index f4e86f74c9..25e0f9a44c 100644 --- a/src/fix_press_berendsen.cpp +++ b/src/fix_press_berendsen.cpp @@ -347,8 +347,8 @@ void FixPressBerendsen::end_of_step() // compute new T,P if (pstyle == ISO) { - double tmp = temperature->compute_scalar(); - tmp = pressure->compute_scalar(); + temperature->compute_scalar(); + pressure->compute_scalar(); } else { temperature->compute_vector(); pressure->compute_vector(); diff --git a/src/image.cpp b/src/image.cpp index 8f88433c48..e152a9afa8 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -692,7 +692,7 @@ void Image::draw_triangle(double *x, double *y, double *z, double *surfaceColor) { double d1[3], d1len, d2[3], d2len, normal[3], invndotd; double xlocal[3], ylocal[3], zlocal[3]; - double center[3], bounds[6]; + //double center[3]; double surface[3]; double depth; @@ -722,10 +722,11 @@ void Image::draw_triangle(double *x, double *y, double *z, double *surfaceColor) if (invndotd == 0) return; double r[3],u[3]; - +/* DEAD CODE center[0] = (xlocal[0] + ylocal[0] + zlocal[0]) / 3; center[1] = (xlocal[1] + ylocal[1] + zlocal[1]) / 3; center[2] = (xlocal[2] + ylocal[2] + zlocal[2]) / 3; +*/ r[0] = MathExtra::dot3(camRight,xlocal); r[1] = MathExtra::dot3(camRight,ylocal); @@ -1088,7 +1089,7 @@ void Image::write_PPM(FILE *fp) { fprintf (fp,"P6\n%d %d\n255\n",width,height); - int x,y; + int y; for (y = height-1; y >= 0; y --) fwrite(&writeBuffer[y*width*3],3,width,fp); } @@ -1878,7 +1879,7 @@ int ColorMap::minmax(double mindynamic, double maxdynamic) double *ColorMap::value2color(double value) { - double lo,hi; + double lo;//,hi; value = MAX(value,locurrent); value = MIN(value,hicurrent); @@ -1887,10 +1888,10 @@ double *ColorMap::value2color(double value) if (locurrent == hicurrent) value = 0.0; else value = (value-locurrent) / (hicurrent-locurrent); lo = 0.0; - hi = 1.0; + //hi = 1.0; } else { lo = locurrent; - hi = hicurrent; + //hi = hicurrent; } if (mstyle == CONTINUOUS) { diff --git a/src/irregular.cpp b/src/irregular.cpp index 656aa33870..679428fb48 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -193,21 +193,20 @@ int Irregular::migrate_check() // this check needs to observe PBC // cannot check via comm->procneigh since it ignores PBC - AtomVec *avec = atom->avec; double **x = atom->x; int nlocal = atom->nlocal; int *periodicity = domain->periodicity; int *myloc = comm->myloc; int *procgrid = comm->procgrid; - int newproc,igx,igy,igz,glo,ghi; + int igx,igy,igz,glo,ghi; int flag = 0; for (int i = 0; i < nlocal; i++) { if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] || x[i][1] < sublo[1] || x[i][1] >= subhi[1] || x[i][2] < sublo[2] || x[i][2] >= subhi[2]) { - newproc = coord2proc(x[i],igx,igy,igz); + coord2proc(x[i],igx,igy,igz); glo = myloc[0] - 1; ghi = myloc[0] + 1; diff --git a/src/min.cpp b/src/min.cpp index a158fb9739..7093af4e41 100644 --- a/src/min.cpp +++ b/src/min.cpp @@ -547,10 +547,6 @@ void Min::force_clear() { if (external_force_clear) return; - int i; - - if (external_force_clear) return; - // clear global force array // nall includes ghosts only if either newton flag is set diff --git a/src/min_linesearch.cpp b/src/min_linesearch.cpp index eda07c67a5..b0f22ce096 100644 --- a/src/min_linesearch.cpp +++ b/src/min_linesearch.cpp @@ -586,7 +586,7 @@ int MinLineSearch::linemin_forcezero(double eoriginal, double &alpha) { int i,m,n; double fdothall,fdothme,hme,hmax,hmaxall; - double de_ideal,de; + double de; double *xatom,*x0atom,*fatom,*hatom; double alpha_max, alpha_init, alpha_del; @@ -884,7 +884,7 @@ double MinLineSearch::alpha_step(double alpha, int resetflag) double MinLineSearch::compute_dir_deriv(double &ff) { int i,m,n; - double *xatom,*hatom, *fatom; + double *hatom, *fatom; double dot[2],dotall[2]; double fh; diff --git a/src/molecule.cpp b/src/molecule.cpp index 3fe25e6195..5258eb7ad4 100644 --- a/src/molecule.cpp +++ b/src/molecule.cpp @@ -231,7 +231,6 @@ void Molecule::compute_inertia() // inertia = 3 eigenvalues = principal moments of inertia // evectors and exzy = 3 evectors = principal axes of rigid body - int ierror; double cross[3]; double tensor[3][3],evectors[3][3]; diff --git a/src/neigh_stencil.cpp b/src/neigh_stencil.cpp index a39ebf66bb..65fd860d2b 100644 --- a/src/neigh_stencil.cpp +++ b/src/neigh_stencil.cpp @@ -63,13 +63,12 @@ void Neighbor::stencil_half_bin_2d_no_newton(NeighList *list, void Neighbor::stencil_half_ghost_bin_2d_no_newton(NeighList *list, int sx, int sy, int sz) { - int i,j,k; int *stencil = list->stencil; int **stencilxyz = list->stencilxyz; int nstencil = 0; - for (j = -sy; j <= sy; j++) - for (i = -sx; i <= sx; i++) + for (int j = -sy; j <= sy; j++) + for (int i = -sx; i <= sx; i++) if (bin_distance(i,j,0) < cutneighmaxsq) { stencilxyz[nstencil][0] = i; stencilxyz[nstencil][1] = j; diff --git a/src/pair.cpp b/src/pair.cpp index a9062e315e..335436fcbc 100644 --- a/src/pair.cpp +++ b/src/pair.cpp @@ -233,14 +233,11 @@ void Pair::init() void Pair::reinit() { - int i,j; - double tmp; - etail = ptail = 0.0; - for (i = 1; i <= atom->ntypes; i++) - for (j = i; j <= atom->ntypes; j++) { - tmp = init_one(i,j); + for (int i = 1; i <= atom->ntypes; i++) + for (int j = i; j <= atom->ntypes; j++) { + init_one(i,j); if (tail_flag) { etail += etail_ij; ptail += ptail_ij; @@ -494,7 +491,7 @@ void Pair::init_tables(double cut_coul, double *cut_respa) void Pair::init_tables_disp(double cut_lj_global) { int masklo,maskhi; - double r, rsq, r2inv, force_coul, force_lj; + double rsq; double g_ewald_6 = force->kspace->g_ewald_6; double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2; @@ -531,7 +528,7 @@ void Pair::init_tables_disp(double cut_lj_global) rsq_lookup.i = i << ndispshiftbits; rsq_lookup.i |= maskhi; } - r = sqrtf(rsq_lookup.f); + // r = sqrtf(rsq_lookup.f); DEAD CODE rsq = rsq_lookup.f; register double x2 = g2*rsq, a2 = 1.0/x2; x2 = a2*exp(-x2); @@ -567,7 +564,7 @@ void Pair::init_tables_disp(double cut_lj_global) // deltas at itablemax only needed if corresponding rsq < cut*cut // if so, compute deltas between rsq and cut*cut - double f_tmp,c_tmp,e_tmp,p_tmp = 0.0,v_tmp = 0.0; + double f_tmp,e_tmp; double cut_lj_globalsq; itablemin = minrsq_lookup.i & ndispmask; itablemin >>= ndispshiftbits; @@ -578,7 +575,7 @@ void Pair::init_tables_disp(double cut_lj_global) if (rsq_lookup.f < (cut_lj_globalsq = cut_lj_global * cut_lj_global)) { rsq_lookup.f = cut_lj_globalsq; - r = sqrtf(rsq_lookup.f); + // r = sqrtf(rsq_lookup.f); DEAD CODE register double x2 = g2*rsq, a2 = 1.0/x2; x2 = a2*exp(-x2); @@ -1152,7 +1149,7 @@ void Pair::ev_tally4(int i, int j, int k, int m, double evdwl, void Pair::ev_tally_tip4p(int key, int *list, double *v, double ecoul, double alpha) { - int i,j; + int i; if (eflag_either) { if (eflag_global) eng_coul += ecoul; diff --git a/src/pair_born_coul_wolf.cpp b/src/pair_born_coul_wolf.cpp index 4ecaa88340..6829401926 100644 --- a/src/pair_born_coul_wolf.cpp +++ b/src/pair_born_coul_wolf.cpp @@ -84,7 +84,6 @@ void PairBornCoulWolf::compute(int eflag, int vflag) double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; double *special_coul = force->special_coul; double *special_lj = force->special_lj; int newton_pair = force->newton_pair; @@ -285,7 +284,7 @@ void PairBornCoulWolf::init_style() if (!atom->q_flag) error->all(FLERR,"Pair style born/coul/wolf requires atom attribute q"); - int irequest = neighbor->request(this); + neighbor->request(this); cut_coulsq = cut_coul * cut_coul; } diff --git a/src/pair_coul_wolf.cpp b/src/pair_coul_wolf.cpp index 973d1e2c2d..250a8bb1d4 100644 --- a/src/pair_coul_wolf.cpp +++ b/src/pair_coul_wolf.cpp @@ -53,11 +53,11 @@ PairCoulWolf::~PairCoulWolf() void PairCoulWolf::compute(int eflag, int vflag) { - int i,j,ii,jj,inum,jnum,itype,jtype; + int i,j,ii,jj,inum,jnum; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; double rsq,forcecoul,factor_coul; double prefactor; - double r,rexp; + double r; int *ilist,*jlist,*numneigh,**firstneigh; double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq; @@ -68,9 +68,7 @@ void PairCoulWolf::compute(int eflag, int vflag) double **x = atom->x; double **f = atom->f; double *q = atom->q; - int *type = atom->type; int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; double *special_coul = force->special_coul; int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; @@ -95,7 +93,6 @@ void PairCoulWolf::compute(int eflag, int vflag) xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; - itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; @@ -112,7 +109,6 @@ void PairCoulWolf::compute(int eflag, int vflag) dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; if (rsq < cut_coulsq) { r = sqrt(rsq); @@ -214,7 +210,7 @@ void PairCoulWolf::init_style() if (!atom->q_flag) error->all(FLERR,"Pair coul/wolf requires atom attribute q"); - int irequest = neighbor->request(this); + neighbor->request(this); cut_coulsq = cut_coul*cut_coul; } @@ -298,8 +294,8 @@ double PairCoulWolf::single(int i, int j, int itype, int jtype, double rsq, double factor_coul, double factor_lj, double &fforce) { - double r6inv,r,prefactor,rexp; - double forcecoul,forceborn,phicoul; + double r,prefactor; + double forcecoul,phicoul; double e_shift,f_shift,dvdrr,erfcc,erfcd; e_shift = erfc(alf*cut_coul) / cut_coul; diff --git a/src/pair_gauss.cpp b/src/pair_gauss.cpp index 94d74d3c67..80b3d3a79c 100644 --- a/src/pair_gauss.cpp +++ b/src/pair_gauss.cpp @@ -63,7 +63,7 @@ void PairGauss::compute(int eflag, int vflag) { int i,j,ii,jj,inum,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double r,rsq; + double rsq; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index b2e8cf3bef..e15d4b00d8 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -189,16 +189,14 @@ void PairHybrid::allocate() void PairHybrid::settings(int narg, char **arg) { - int i,m,istyle; - if (narg < 1) error->all(FLERR,"Illegal pair_style command"); // delete old lists, since cannot just change settings if (nstyles) { - for (m = 0; m < nstyles; m++) delete styles[m]; + for (int m = 0; m < nstyles; m++) delete styles[m]; delete [] styles; - for (m = 0; m < nstyles; m++) delete [] keywords[m]; + for (int m = 0; m < nstyles; m++) delete [] keywords[m]; delete [] keywords; } diff --git a/src/pair_lj_smooth_linear.cpp b/src/pair_lj_smooth_linear.cpp index ed0eeeb6bc..a2cf43e50f 100644 --- a/src/pair_lj_smooth_linear.cpp +++ b/src/pair_lj_smooth_linear.cpp @@ -69,7 +69,6 @@ void PairLJSmoothLinear::compute(int eflag, int vflag) double **f = atom->f; int *type = atom->type; int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; double *special_lj = force->special_lj; int newton_pair = force->newton_pair; diff --git a/src/read_data.cpp b/src/read_data.cpp index 5f3633ddbd..280beb5543 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -757,7 +757,7 @@ void ReadData::header() void ReadData::atoms() { - int i,m,nchunk,eof; + int nchunk,eof; if (me == 0) { if (screen) fprintf(screen," reading atoms ...\n"); @@ -807,7 +807,7 @@ void ReadData::atoms() void ReadData::velocities() { - int i,m,nchunk,eof; + int nchunk,eof; if (me == 0) { if (screen) fprintf(screen," reading velocities ...\n"); @@ -1146,7 +1146,7 @@ void ReadData::impropers(int firstpass) void ReadData::bonus(bigint nbonus, AtomVec *ptr, const char *type) { - int i,m,nchunk,eof; + int nchunk,eof; int mapflag = 0; if (atom->map_style == 0) { @@ -1261,7 +1261,6 @@ void ReadData::bodies(int firstpass) void ReadData::mass() { - int i,m; char *next; char *buf = new char[atom->ntypes*MAXLINE]; @@ -1269,7 +1268,7 @@ void ReadData::mass() if (eof) error->all(FLERR,"Unexpected end of data file"); char *original = buf; - for (i = 0; i < atom->ntypes; i++) { + for (int i = 0; i < atom->ntypes; i++) { next = strchr(buf,'\n'); *next = '\0'; atom->set_mass(buf); diff --git a/src/read_restart.cpp b/src/read_restart.cpp index aeeec98e26..fb8cb02e31 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -611,9 +611,7 @@ void ReadRestart::file_search(char *infile, char *outfile) void ReadRestart::header(int incompatible) { - int px,py,pz; int xperiodic,yperiodic,zperiodic; - int boundary[3][2]; // read flags and fields until flag = -1 @@ -859,7 +857,6 @@ void ReadRestart::type_arrays() void ReadRestart::force_fields() { - int n; char *style; int flag = read_int(); diff --git a/src/set.cpp b/src/set.cpp index c221fbc3e0..ada5c8a04e 100644 --- a/src/set.cpp +++ b/src/set.cpp @@ -723,7 +723,7 @@ void Set::setrandom(int keyword) AtomVecEllipsoid *avec_ellipsoid = (AtomVecEllipsoid *) atom->style_match("ellipsoid"); - AtomVecLine *avec_line = (AtomVecLine *) atom->style_match("line"); + atom->style_match("line"); // DEAD CODE? AtomVecTri *avec_tri = (AtomVecTri *) atom->style_match("tri"); RanPark *random = new RanPark(lmp,1); @@ -787,8 +787,6 @@ void Set::setrandom(int keyword) // set quaternions to random orientations in 3d or 2d } else if (keyword == QUAT_RANDOM) { - int *ellipsoid = atom->ellipsoid; - int *tri = atom->tri; int nlocal = atom->nlocal; double *quat; diff --git a/src/variable.cpp b/src/variable.cpp index b13b4d9515..04dbd81d3b 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -680,8 +680,8 @@ void Variable::compute_atom(int ivar, int igroup, double *vstore; if (style[ivar] == ATOM) { - double tmp = evaluate(data[ivar][0],&tree); - tmp = collapse_tree(tree); + evaluate(data[ivar][0],&tree); + collapse_tree(tree); } else vstore = reader[ivar]->fix->vstore; int groupbit = group->bitmask[igroup]; @@ -2543,27 +2543,27 @@ int Variable::math_function(char *word, char *contents, Tree **tree, // evaluate args Tree *newtree; - double tmp,value1,value2,value3; + double value1,value2,value3; if (tree) { newtree = new Tree(); Tree *argtree; if (narg == 1) { - tmp = evaluate(arg1,&argtree); + evaluate(arg1,&argtree); newtree->left = argtree; newtree->middle = newtree->right = NULL; } else if (narg == 2) { - tmp = evaluate(arg1,&argtree); + evaluate(arg1,&argtree); newtree->left = argtree; newtree->middle = NULL; - tmp = evaluate(arg2,&argtree); + evaluate(arg2,&argtree); newtree->right = argtree; } else if (narg == 3) { - tmp = evaluate(arg1,&argtree); + evaluate(arg1,&argtree); newtree->left = argtree; - tmp = evaluate(arg2,&argtree); + evaluate(arg2,&argtree); newtree->middle = argtree; - tmp = evaluate(arg3,&argtree); + evaluate(arg3,&argtree); newtree->right = argtree; } treestack[ntreestack++] = newtree; diff --git a/src/write_data.cpp b/src/write_data.cpp index 899edacdc2..b4471aa895 100644 --- a/src/write_data.cpp +++ b/src/write_data.cpp @@ -422,7 +422,7 @@ void WriteData::bonds() // pack my bond data into buf - int foo = atom->avec->pack_bond(buf); + atom->avec->pack_bond(buf); // write one chunk of info per proc to file // proc 0 pings each proc, receives its chunk, writes to file From bfc2b92e320256ea1d0aa5db147362dda92c9285 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 16:02:06 +0000 Subject: [PATCH 25/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11618 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/compute_dihedral_local.cpp | 4 ---- src/image.cpp | 6 ------ src/set.cpp | 1 - 3 files changed, 11 deletions(-) diff --git a/src/compute_dihedral_local.cpp b/src/compute_dihedral_local.cpp index 258772e1a5..5565a6b289 100644 --- a/src/compute_dihedral_local.cpp +++ b/src/compute_dihedral_local.cpp @@ -206,10 +206,6 @@ int ComputeDihedralLocal::compute_dihedrals(int flag) rg = sqrt(rgsq); ra2inv = rb2inv = 0.0; -/* DEAD CODE - rginv = 0.0; - if (rg > 0) rginv = 1.0/rg; -*/ if (rasq > 0) ra2inv = 1.0/rasq; if (rbsq > 0) rb2inv = 1.0/rbsq; rabinv = sqrt(ra2inv*rb2inv); diff --git a/src/image.cpp b/src/image.cpp index e152a9afa8..80ea5293dd 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -692,7 +692,6 @@ void Image::draw_triangle(double *x, double *y, double *z, double *surfaceColor) { double d1[3], d1len, d2[3], d2len, normal[3], invndotd; double xlocal[3], ylocal[3], zlocal[3]; - //double center[3]; double surface[3]; double depth; @@ -722,11 +721,6 @@ void Image::draw_triangle(double *x, double *y, double *z, double *surfaceColor) if (invndotd == 0) return; double r[3],u[3]; -/* DEAD CODE - center[0] = (xlocal[0] + ylocal[0] + zlocal[0]) / 3; - center[1] = (xlocal[1] + ylocal[1] + zlocal[1]) / 3; - center[2] = (xlocal[2] + ylocal[2] + zlocal[2]) / 3; -*/ r[0] = MathExtra::dot3(camRight,xlocal); r[1] = MathExtra::dot3(camRight,ylocal); diff --git a/src/set.cpp b/src/set.cpp index ada5c8a04e..23a654af67 100644 --- a/src/set.cpp +++ b/src/set.cpp @@ -723,7 +723,6 @@ void Set::setrandom(int keyword) AtomVecEllipsoid *avec_ellipsoid = (AtomVecEllipsoid *) atom->style_match("ellipsoid"); - atom->style_match("line"); // DEAD CODE? AtomVecTri *avec_tri = (AtomVecTri *) atom->style_match("tri"); RanPark *random = new RanPark(lmp,1); From eeb3c26ddd5cdbac36b47fef884f18d45a994463 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 16:04:02 +0000 Subject: [PATCH 26/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11619 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/fix_box_relax.cpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/fix_box_relax.cpp b/src/fix_box_relax.cpp index 4a9febf403..0842ea01de 100644 --- a/src/fix_box_relax.cpp +++ b/src/fix_box_relax.cpp @@ -812,7 +812,7 @@ int FixBoxRelax::modify_param(int narg, char **arg) void FixBoxRelax::compute_sigma() { - double pdeviatoric[3][3];//,htmp[3][3]; + double pdeviatoric[3][3]; double tmp1[3][3],sigma_tensor[3][3],h_invtmp[3][3]; // reset reference box dimensions @@ -829,17 +829,7 @@ void FixBoxRelax::compute_sigma() h0_inv[3] = domain->h_inv[3]; h0_inv[4] = domain->h_inv[4]; h0_inv[5] = domain->h_inv[5]; -/* DEAD CODE - htmp[0][0] = h0[0]; - htmp[1][1] = h0[1]; - htmp[2][2] = h0[2]; - htmp[1][2] = h0[3]; - htmp[0][2] = h0[4]; - htmp[0][1] = h0[5]; - htmp[2][1] = 0.0; - htmp[2][0] = 0.0; - htmp[1][0] = 0.0; -*/ + h_invtmp[0][0] = h0_inv[0]; h_invtmp[1][1] = h0_inv[1]; h_invtmp[2][2] = h0_inv[2]; From f7a3672c367bdc24be9e49d17456bc12f0b08c8d Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 16:04:47 +0000 Subject: [PATCH 27/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11620 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/pair.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pair.cpp b/src/pair.cpp index 335436fcbc..617f85e108 100644 --- a/src/pair.cpp +++ b/src/pair.cpp @@ -528,7 +528,6 @@ void Pair::init_tables_disp(double cut_lj_global) rsq_lookup.i = i << ndispshiftbits; rsq_lookup.i |= maskhi; } - // r = sqrtf(rsq_lookup.f); DEAD CODE rsq = rsq_lookup.f; register double x2 = g2*rsq, a2 = 1.0/x2; x2 = a2*exp(-x2); @@ -575,7 +574,6 @@ void Pair::init_tables_disp(double cut_lj_global) if (rsq_lookup.f < (cut_lj_globalsq = cut_lj_global * cut_lj_global)) { rsq_lookup.f = cut_lj_globalsq; - // r = sqrtf(rsq_lookup.f); DEAD CODE register double x2 = g2*rsq, a2 = 1.0/x2; x2 = a2*exp(-x2); From 91360a80a85d569317a8168e651183f877567920 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 16:43:25 +0000 Subject: [PATCH 28/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11621 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/read_data.cpp | 115 +++++++++++++++++++++++++++++++++++++++------ src/read_data.h | 8 ++-- src/write_data.cpp | 14 +++--- 3 files changed, 111 insertions(+), 26 deletions(-) diff --git a/src/read_data.cpp b/src/read_data.cpp index 280beb5543..65dff7301c 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -50,6 +50,14 @@ using namespace LAMMPS_NS; // customize for new sections #define NSECTIONS 25 // change when add to header::section_keywords +// pair style suffixes to ignore +// when matching Pair Coeffs comment to currently-defined pair style + +const char *suffixes[] = {"/cuda","/gpu","/opt","/omp","/kk", + "/coul/cut","/coul/long","/coul/msm", + "/coul/dsf","/coul/debye","/coul/charmm", + NULL}; + /* ---------------------------------------------------------------------- */ ReadData::ReadData(LAMMPS *lmp) : Pointers(lmp) @@ -57,6 +65,7 @@ ReadData::ReadData(LAMMPS *lmp) : Pointers(lmp) MPI_Comm_rank(world,&me); line = new char[MAXLINE]; keyword = new char[MAXLINE]; + style = new char[MAXLINE]; buffer = new char[CHUNK*MAXLINE]; narg = maxarg = 0; arg = NULL; @@ -80,6 +89,7 @@ ReadData::~ReadData() { delete [] line; delete [] keyword; + delete [] style; delete [] buffer; memory->sfree(arg); @@ -218,8 +228,12 @@ void ReadData::command(int narg, char **arg) if (strcmp(keyword,"Atoms") == 0) { atomflag = 1; - if (firstpass) atoms(); - else skip_lines(atom->natoms); + if (firstpass) { + if (me == 0 && !style_match(style,atom->atom_style)) + error->warning(FLERR,"Atom style in data file differs " + "from currently defined atom style"); + atoms(); + } else skip_lines(atom->natoms); } else if (strcmp(keyword,"Velocities") == 0) { if (atomflag == 0) error->all(FLERR,"Must read Atoms before Velocities"); @@ -287,41 +301,65 @@ void ReadData::command(int narg, char **arg) } else if (strcmp(keyword,"Pair Coeffs") == 0) { if (force->pair == NULL) error->all(FLERR,"Must define pair_style before Pair Coeffs"); - if (firstpass) paircoeffs(); - else skip_lines(atom->ntypes); + if (firstpass) { + if (me == 0 && !style_match(style,force->pair_style)) + error->warning(FLERR,"Pair style in data file differs " + "from currently defined pair style"); + paircoeffs(); + } else skip_lines(atom->ntypes); } else if (strcmp(keyword,"PairIJ Coeffs") == 0) { if (force->pair == NULL) error->all(FLERR,"Must define pair_style before PairIJ Coeffs"); - if (firstpass) pairIJcoeffs(); - else skip_lines(atom->ntypes*(atom->ntypes+1)/2); + if (firstpass) { + if (me == 0 && !style_match(style,force->pair_style)) + error->warning(FLERR,"Pair style in data file differs " + "from currently defined pair style"); + pairIJcoeffs(); + } else skip_lines(atom->ntypes*(atom->ntypes+1)/2); } else if (strcmp(keyword,"Bond Coeffs") == 0) { if (atom->avec->bonds_allow == 0) error->all(FLERR,"Invalid data file section: Bond Coeffs"); if (force->bond == NULL) error->all(FLERR,"Must define bond_style before Bond Coeffs"); - if (firstpass) bondcoeffs(); - else skip_lines(atom->nbondtypes); + if (firstpass) { + if (me == 0 && !style_match(style,force->bond_style)) + error->warning(FLERR,"Bond style in data file differs " + "from currently defined bond style"); + bondcoeffs(); + } else skip_lines(atom->nbondtypes); } else if (strcmp(keyword,"Angle Coeffs") == 0) { if (atom->avec->angles_allow == 0) error->all(FLERR,"Invalid data file section: Angle Coeffs"); if (force->angle == NULL) error->all(FLERR,"Must define angle_style before Angle Coeffs"); - if (firstpass) anglecoeffs(0); - else skip_lines(atom->nangletypes); + if (firstpass) { + if (me == 0 && !style_match(style,force->angle_style)) + error->warning(FLERR,"Angle style in data file differs " + "from currently defined angle style"); + anglecoeffs(0); + } else skip_lines(atom->nangletypes); } else if (strcmp(keyword,"Dihedral Coeffs") == 0) { if (atom->avec->dihedrals_allow == 0) error->all(FLERR,"Invalid data file section: Dihedral Coeffs"); if (force->dihedral == NULL) error->all(FLERR,"Must define dihedral_style before Dihedral Coeffs"); - if (firstpass) dihedralcoeffs(0); - else skip_lines(atom->ndihedraltypes); + if (firstpass) { + if (me == 0 && !style_match(style,force->dihedral_style)) + error->warning(FLERR,"Dihedral style in data file differs " + "from currently defined dihedral style"); + dihedralcoeffs(0); + } else skip_lines(atom->ndihedraltypes); } else if (strcmp(keyword,"Improper Coeffs") == 0) { if (atom->avec->impropers_allow == 0) error->all(FLERR,"Invalid data file section: Improper Coeffs"); if (force->improper == NULL) error->all(FLERR,"Must define improper_style before Improper Coeffs"); - if (firstpass) impropercoeffs(0); - else skip_lines(atom->nimpropertypes); + if (firstpass) { + if (me == 0 && !style_match(style,force->improper_style)) + error->warning(FLERR,"Improper style in data file differs " + "from currently defined improper style"); + impropercoeffs(0); + } else skip_lines(atom->nimpropertypes); } else if (strcmp(keyword,"BondBond Coeffs") == 0) { if (atom->avec->angles_allow == 0) @@ -1464,8 +1502,14 @@ void ReadData::open(char *file) else { #ifdef LAMMPS_GZIP char gunzip[128]; - sprintf(gunzip,"gunzip -c %s",file); + sprintf(gunzip,"gzip -c -d %s",file); + +#ifdef _WIN32 + fp = _popen(gunzip,"rb"); +#else fp = popen(gunzip,"r"); +#endif + #else error->one(FLERR,"Cannot open gzipped file"); #endif @@ -1482,6 +1526,7 @@ void ReadData::open(char *file) grab next keyword read lines until one is non-blank keyword is all text on line w/out leading & trailing white space + optional style can be appended after comment char '#' read one additional line (assumed blank) if any read hits EOF, set keyword to empty if first = 1, line variable holds non-blank line that ended header @@ -1519,6 +1564,19 @@ void ReadData::parse_keyword(int first) MPI_Bcast(&n,1,MPI_INT,0,world); MPI_Bcast(line,n,MPI_CHAR,0,world); + // store optional "style" following comment char '#' after keyword + + char *ptr; + if ((ptr = strchr(line,'#'))) { + *ptr++ = '\0'; + while (*ptr == ' ' || *ptr == '\t') ptr++; + int stop = strlen(ptr) - 1; + while (ptr[stop] == ' ' || ptr[stop] == '\t' + || ptr[stop] == '\n' || ptr[stop] == '\r') stop--; + ptr[stop+1] = '\0'; + strcpy(style,ptr); + } else style[0] = '\0'; + // copy non-whitespace portion of line into keyword int start = strspn(line," \t\n\r"); @@ -1572,3 +1630,30 @@ void ReadData::parse_coeffs(char *line, const char *addstr, int dupflag) word = strtok(NULL," \t\n\r\f"); } } + +/* ---------------------------------------------------------------------- + compare two style strings if they both exist + one = comment in data file section, two = currently-defined style + ignore suffixes listed in suffixes array at top of file +------------------------------------------------------------------------- */ + +int ReadData::style_match(const char *one, const char *two) +{ + int i,delta,len,len1,len2; + + if ((one == NULL) || (two == NULL)) return 1; + + len1 = strlen(one); + len2 = strlen(two); + + for (i = 0; suffixes[i] != NULL; i++) { + len = strlen(suffixes[i]); + if ((delta = len1 - len) > 0) + if (strcmp(one+delta,suffixes[i]) == 0) len1 = delta; + if ((delta = len2 - len) > 0) + if (strcmp(two+delta,suffixes[i]) == 0) len2 = delta; + } + + if ((len1 == 0) || (len1 == len2) || (strncmp(one,two,len1) == 0)) return 1; + return 0; +} diff --git a/src/read_data.h b/src/read_data.h index 8d83cfeb06..7026efcc67 100644 --- a/src/read_data.h +++ b/src/read_data.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -32,11 +32,10 @@ class ReadData : protected Pointers { void command(int, char **); private: - int me; - char *line,*keyword,*buffer; + char *line,*keyword,*buffer,*style; FILE *fp; - int narg,maxarg,compressed; char **arg; + int me,narg,maxarg,compressed; int nfix; // # of extra fixes that process/store info in data file int *fix_index; @@ -59,6 +58,7 @@ class ReadData : protected Pointers { void parse_keyword(int); void skip_lines(bigint); void parse_coeffs(char *, const char *, int); + int style_match(const char *, const char *); void atoms(); void velocities(); diff --git a/src/write_data.cpp b/src/write_data.cpp index b4471aa895..e9809692c9 100644 --- a/src/write_data.cpp +++ b/src/write_data.cpp @@ -272,27 +272,27 @@ void WriteData::force_fields() { if (force->pair && force->pair->writedata) { if (pairflag == II) { - fprintf(fp,"\nPair Coeffs\n\n"); + fprintf(fp,"\nPair Coeffs # %s\n\n", force->pair_style); force->pair->write_data(fp); } else if (pairflag == IJ) { - fprintf(fp,"\nPairIJ Coeffs\n\n"); + fprintf(fp,"\nPairIJ Coeffs # %s\n\n", force->pair_style); force->pair->write_data_all(fp); } } if (force->bond && force->bond->writedata) { - fprintf(fp,"\nBond Coeffs\n\n"); + fprintf(fp,"\nBond Coeffs # %s\n\n", force->bond_style); force->bond->write_data(fp); } if (force->angle && force->angle->writedata) { - fprintf(fp,"\nAngle Coeffs\n\n"); + fprintf(fp,"\nAngle Coeffs # %s\n\n", force->angle_style); force->angle->write_data(fp); } if (force->dihedral && force->dihedral->writedata) { - fprintf(fp,"\nDihedral Coeffs\n\n"); + fprintf(fp,"\nDihedral Coeffs # %s\n\n", force->dihedral_style); force->dihedral->write_data(fp); } if (force->improper && force->improper->writedata) { - fprintf(fp,"\nImproper Coeffs\n\n"); + fprintf(fp,"\nImproper Coeffs # %s\n\n", force->improper_style); force->improper->write_data(fp); } } @@ -329,7 +329,7 @@ void WriteData::atoms() MPI_Request request; if (me == 0) { - fprintf(fp,"\nAtoms\n\n"); + fprintf(fp,"\nAtoms # %s\n\n",atom->atom_style); for (int iproc = 0; iproc < nprocs; iproc++) { if (iproc) { MPI_Irecv(&buf[0][0],maxrow*ncol,MPI_DOUBLE,iproc,0,world,&request); From 82532b94bbcfad1349d0d7f0652384cdcb526c80 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 16:53:22 +0000 Subject: [PATCH 29/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11622 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- doc/read_data.html | 28 +++++++++++++++++++++++----- doc/read_data.txt | 28 +++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/doc/read_data.html b/doc/read_data.html index 290a6797c9..8577c9f3b3 100644 --- a/doc/read_data.html +++ b/doc/read_data.html @@ -56,11 +56,13 @@ value(s) is read from the line. If it doesn't contain a header keyword, the line begins the body of the file.

    The body of the file contains zero or more sections. The first line -of a section has only a keyword. The next line is skipped. The -remaining lines of the section contain values. The number of lines -depends on the section keyword as described below. Zero or more blank -lines can be used between sections. Sections can appear in any order, -with a few exceptions as noted below. +of a section has only a keyword. This line can have a trailing +comment starting with '#' that is either ignored or can be used to +check for a style match, as described below. The next line is +skipped. The remaining lines of the section contain values. The +number of lines depends on the section keyword as described below. +Zero or more blank lines can be used between sections. Sections can +appear in any order, with a few exceptions as noted below.

    The keyword fix can be used one or more times. Each usage specifies a fix that will be used to process a specific portion of the data @@ -226,6 +228,22 @@ the maximum values defined in any of the template molecules.

  • Pair Coeffs, PairIJ Coeffs, Bond Coeffs, Angle Coeffs, Dihedral Coeffs, Improper Coeffs = force field sections
  • BondBond Coeffs, BondAngle Coeffs, MiddleBondTorsion Coeffs, EndBondTorsion Coeffs, AngleTorsion Coeffs, AngleAngleTorsion Coeffs, BondBond13 Coeffs, AngleAngle Coeffs = class 2 force field sections
+

These keywords will check an appended comment for a match with the +currently defined style: +

+
  • Atoms, Pair Coeffs, PairIJ Coeffs, Bond Coeffs, Angle Coeffs, Dihedral Coeffs, Improper Coeffs +
+

For example, these lines: +

+
Atoms # sphere
+Pair Coeffs # lj/cut 
+
+

will check if the currently-defined atom_style is +sphere, and the current pair_style is lj/cut. If +not, LAMMPS will issue a warning to indicate that the data file +section likely does not contain the correct number or type of +parameters expected for the currently-defined style. +

Each section is listed below in alphabetic order. The format of each section is described including the number of lines it must contain and rules (if any) for where it can appear in the data file. diff --git a/doc/read_data.txt b/doc/read_data.txt index 59bd1e131a..4ff5159bf8 100644 --- a/doc/read_data.txt +++ b/doc/read_data.txt @@ -50,11 +50,13 @@ value(s) is read from the line. If it doesn't contain a header keyword, the line begins the body of the file. The body of the file contains zero or more sections. The first line -of a section has only a keyword. The next line is skipped. The -remaining lines of the section contain values. The number of lines -depends on the section keyword as described below. Zero or more blank -lines can be used between sections. Sections can appear in any order, -with a few exceptions as noted below. +of a section has only a keyword. This line can have a trailing +comment starting with '#' that is either ignored or can be used to +check for a style match, as described below. The next line is +skipped. The remaining lines of the section contain values. The +number of lines depends on the section keyword as described below. +Zero or more blank lines can be used between sections. Sections can +appear in any order, with a few exceptions as noted below. The keyword {fix} can be used one or more times. Each usage specifies a fix that will be used to process a specific portion of the data @@ -223,6 +225,22 @@ These are the section keywords for the body of the file. EndBondTorsion Coeffs, AngleTorsion Coeffs, AngleAngleTorsion Coeffs, \ BondBond13 Coeffs, AngleAngle Coeffs} = class 2 force field sections :ul +These keywords will check an appended comment for a match with the +currently defined style: + +{Atoms, Pair Coeffs, PairIJ Coeffs, Bond Coeffs, Angle Coeffs, Dihedral Coeffs, Improper Coeffs} :ul + +For example, these lines: + +Atoms # sphere +Pair Coeffs # lj/cut :pre + +will check if the currently-defined "atom_style"_atom_style.html is +{sphere}, and the current "pair_style"_pair_style is {lj/cut}. If +not, LAMMPS will issue a warning to indicate that the data file +section likely does not contain the correct number or type of +parameters expected for the currently-defined style. + Each section is listed below in alphabetic order. The format of each section is described including the number of lines it must contain and rules (if any) for where it can appear in the data file. From d2b44d8c3b6c36170b19680d8834ff4c475d9aa6 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 19:24:05 +0000 Subject: [PATCH 30/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11624 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/read_data.cpp | 40 +++++++++++++++++++++++++++++++--------- src/read_data.h | 6 +++++- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/read_data.cpp b/src/read_data.cpp index 65dff7301c..085603be54 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -23,6 +23,7 @@ #include "atom_vec_ellipsoid.h" #include "atom_vec_line.h" #include "atom_vec_tri.h" +#include "force.h" #include "molecule.h" #include "comm.h" #include "update.h" @@ -108,13 +109,10 @@ void ReadData::command(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal read_data command"); - if (domain->box_exist) - error->all(FLERR,"Cannot read_data after simulation box is defined"); - if (domain->dimension == 2 && domain->zperiodic == 0) - error->all(FLERR,"Cannot run 2d simulation with nonperiodic Z dimension"); - - // fixes that process data file info + // optional args + addflag = mergeflag = 0; + offset[0] = offset[1] = offset[2] = 0.0; nfix = 0; fix_index = NULL; fix_header = NULL; @@ -122,7 +120,20 @@ void ReadData::command(int narg, char **arg) int iarg = 1; while (iarg < narg) { - if (strcmp(arg[iarg],"fix") == 0) { + if (strcmp(arg[iarg],"add") == 0) { + addflag = 1; + iarg++; + } else if (strcmp(arg[iarg],"merge") == 0) { + mergeflag = 1; + iarg++; + } else if (strcmp(arg[iarg],"offset") == 0) { + if (iarg+4 > narg) + error->all(FLERR,"Illegal read_data command"); + offset[0] = force->numeric(FLERR,arg[iarg+1]); + offset[1] = force->numeric(FLERR,arg[iarg+2]); + offset[2] = force->numeric(FLERR,arg[iarg+3]); + iarg += 4; + } else if (strcmp(arg[iarg],"fix") == 0) { if (iarg+4 > narg) error->all(FLERR,"Illegal read_data command"); memory->grow(fix_index,nfix+1,"read_data:fix_index"); @@ -149,6 +160,18 @@ void ReadData::command(int narg, char **arg) } else error->all(FLERR,"Illegal read_data command"); } + // error checks + + if (domain->box_exist && !addflag && !mergeflag) + error->all(FLERR,"Cannot read_data after simulation box is defined"); + if (addflag && mergeflag) error->all(FLERR,"Cannot read_data add and merge"); + if (domain->dimension == 2 && offset[2] != 0.0) + error->all(FLERR,"Cannot use non-zero z offset in read_data " + "for 2d simulation"); + + if (domain->dimension == 2 && domain->zperiodic == 0) + error->all(FLERR,"Cannot run 2d simulation with nonperiodic Z dimension"); + // perform 1-pass read if no molecular topoogy in file // perform 2-pass read if molecular topology, // first pass calculates max topology/atom @@ -633,8 +656,6 @@ void ReadData::header() if (eof == NULL) error->one(FLERR,"Unexpected end of data file"); } - // customize for new header lines - while (1) { // read a line and bcast length if flag is set @@ -675,6 +696,7 @@ void ReadData::header() } // search line for header keyword and set corresponding variable + // customize for new header lines if (strstr(line,"atoms")) { sscanf(line,BIGINT_FORMAT,&atom->natoms); diff --git a/src/read_data.h b/src/read_data.h index 7026efcc67..685c707b8e 100644 --- a/src/read_data.h +++ b/src/read_data.h @@ -37,7 +37,11 @@ class ReadData : protected Pointers { char **arg; int me,narg,maxarg,compressed; - int nfix; // # of extra fixes that process/store info in data file + // optional args + + int addflag,mergeflag; + double offset[3]; + int nfix; int *fix_index; char **fix_header; char **fix_section; From 2b40fd23496656d7f41cd8096bc974e132482209 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 22:16:18 +0000 Subject: [PATCH 31/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11625 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/fix.h | 3 +-- src/lmptype.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/fix.h b/src/fix.h index bf2aa7e46a..5b1f5e2dea 100644 --- a/src/fix.h +++ b/src/fix.h @@ -87,8 +87,7 @@ class Fix : protected Pointers { // KOKKOS host/device flag and per-fix data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_read_ext; - unsigned int datamask_modify, datamask_modify_ext; + unsigned int datamask_read, datamask_modify; // USER-CUDA per-fix data masks diff --git a/src/lmptype.h b/src/lmptype.h index 1390d32477..a03c8601ad 100644 --- a/src/lmptype.h +++ b/src/lmptype.h @@ -48,7 +48,7 @@ namespace LAMMPS_NS { -// enum used for KOKKOS host/device flag +// enum used for KOKKOS host/device flags enum ExecutionSpace{Host,Device}; From 039af550a04f2766b9b95bb5382915bf65e02348 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 10 Mar 2014 22:16:34 +0000 Subject: [PATCH 32/32] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11626 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/variable.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/variable.cpp b/src/variable.cpp index 04dbd81d3b..b9c5152d1e 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -3941,7 +3941,7 @@ unsigned int Variable::data_mask(char *str) { unsigned int datamask = EMPTY_MASK; - for (unsigned int i=0; i < strlen(str)-2; i++) { + for (unsigned int i = 0; i < strlen(str)-2; i++) { int istart = i; while (isalnum(str[i]) || str[i] == '_') i++; int istop = i-1; @@ -3974,21 +3974,21 @@ unsigned int Variable::data_mask(char *str) } if ((strncmp(word,"f_",2) == 0) && (i>0) && (!isalnum(str[i-1]))) { - if (domain->box_exist == 0) - error->all(FLERR, - "Variable evaluation before simulation box is defined"); - - n = strlen(word) - 2 + 1; - char *id = new char[n]; - strcpy(id,&word[2]); - - int ifix = modify->find_fix(id); - if (ifix < 0) error->all(FLERR,"Invalid fix ID in variable formula"); - - datamask &= modify->fix[ifix]->data_mask(); - delete [] id; + if (domain->box_exist == 0) + error->all(FLERR, + "Variable evaluation before simulation box is defined"); + + n = strlen(word) - 2 + 1; + char *id = new char[n]; + strcpy(id,&word[2]); + + int ifix = modify->find_fix(id); + if (ifix < 0) error->all(FLERR,"Invalid fix ID in variable formula"); + + datamask &= modify->fix[ifix]->data_mask(); + delete [] id; } - + if ((strncmp(word,"v_",2) == 0) && (i>0) && (!isalnum(str[i-1]))) { int ivar = find(word); datamask &= data_mask(ivar);