From cf16cc1dd2e72c0b008042e6cab202a73e7d0c83 Mon Sep 17 00:00:00 2001 From: pscrozi Date: Sat, 7 Jul 2012 17:46:24 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8482 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/KSPACE/pppm_cg.cpp | 1224 ++++++++++++++++++++-------------------- 1 file changed, 612 insertions(+), 612 deletions(-) diff --git a/src/KSPACE/pppm_cg.cpp b/src/KSPACE/pppm_cg.cpp index 36945ac987..a2ad349b48 100644 --- a/src/KSPACE/pppm_cg.cpp +++ b/src/KSPACE/pppm_cg.cpp @@ -1,612 +1,612 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Axel Kohlmeyer (Temple U) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "mpi.h" -#include "math.h" -#include "stdlib.h" - -#include "atom.h" -#include "domain.h" -#include "error.h" -#include "force.h" -#include "memory.h" -#include "pppm_cg.h" - -#include "math_const.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define OFFSET 16384 -#define SMALLQ 0.00001 -#if defined(FFT_SINGLE) -#define ZEROF 0.0f -#else -#define ZEROF 0.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg) -{ - if ((narg < 1) || (narg > 2)) - error->all(FLERR,"Illegal kspace_style pppm/cg command"); - - if (narg == 2) - smallq = atof(arg[1]); - else - smallq = SMALLQ; - - num_charged = -1; - is_charged = NULL; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMCG::~PPPMCG() -{ - memory->destroy(is_charged); -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMCG::compute(int eflag, int vflag) -{ - int i,j; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - peratom_allocate_flag = 1; - } - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(part2grid); - memory->destroy(is_charged); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - memory->create(is_charged,nmax,"pppm/cg:is_charged"); - } - - // one time setup message - - if (num_charged < 0) { - bigint charged_all, charged_num; - double charged_frac, charged_fmax, charged_fmin; - - num_charged=0; - for (i=0; i < atom->nlocal; ++i) - if (fabs(atom->q[i]) > smallq) - ++num_charged; - - // get fraction of charged particles per domain - - if (atom->nlocal > 0) - charged_frac = static_cast(num_charged) * 100.0 - / static_cast(atom->nlocal); - else - charged_frac = 0.0; - MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world); - MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world); - - // get fraction of charged particles overall - - charged_num = num_charged; - MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world); - charged_frac = static_cast(charged_all) * 100.0 - / static_cast(atom->natoms); - - if (me == 0) { - if (screen) - fprintf(screen, - " PPPM/cg optimization cutoff: %g\n" - " Total charged atoms: %.1f%%\n" - " Min/max charged atoms/proc: %.1f%% %.1f%%\n", - smallq,charged_frac,charged_fmin,charged_fmax); - if (logfile) - fprintf(logfile, - " PPPM/cg optimization cutoff: %g\n" - " Total charged atoms: %.1f%%\n" - " Min/max charged atoms/proc: %.1f%% %.1f%%\n", - smallq,charged_frac,charged_fmin,charged_fmax); - } - } - - num_charged = 0; - for (i = 0; i < atom->nlocal; ++i) - if (fabs(atom->q[i]) > smallq) { - is_charged[num_charged] = i; - ++num_charged; - } - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - particle_map(); - make_rho(); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - brick2fft(); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - // also performs per-atom calculations via poisson_peratom() - - if (differentiation_flag == 1) { - poisson_ad(); - fillbrick_ad(); - fieldforce_ad(); - if (vflag_atom) fillbrick_peratom_ad(); - } else { - poisson_ik(); - fillbrick_ik(); - fieldforce_ik(); - if (evflag_atom) fillbrick_peratom_ik(); - } - - if (evflag_atom) fieldforce_peratom(); - - // sum global energy across procs and add in volume-dependent term - - const double qscale = force->qqrd2e * scale; - - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // sum global virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - } - - // per-atom energy/virial - // energy includes self-energy correction - - if (evflag_atom) { - double *q = atom->q; - int nlocal = atom->nlocal; - - if (eflag_atom) { - for (int j = 0; j < num_charged; j++) { - int i = is_charged[j]; - eatom[i] *= 0.5; - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - } - - if (vflag_atom) { - for (int j = 0; j < num_charged; j++) { - int i = is_charged[j]; - for (int n = 0; n < 6; n++) vatom[i][n] *= 0.5*q[i]*qscale; - } - } - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPMCG::particle_map() -{ - int nx,ny,nz; - - double **x = atom->x; - - int flag = 0; - for (int j = 0; j < num_charged; j++) { - int i = is_charged[j]; - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - part2grid[i][0] = nx; - part2grid[i][1] = ny; - part2grid[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPMCG::make_rho() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (i = 0; i < ngrid; i++) vec[i] = ZEROF; - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - - for (int j = 0; j < num_charged; j++) { - int i = is_charged[j]; - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPMCG::fieldforce_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - for (int j = 0; j < num_charged; j++) { - i = is_charged[j]; - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPMCG::fieldforce_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0,dx0,dy0,dz0; - FFT_SCALAR ekx,eky,ekz; - double s1,s2,s3; - double sf = 0.0; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm/xprd; - double hy_inv = ny_pppm/yprd; - double hz_inv = nz_pppm/zprd; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (int j = 0; j < num_charged; j++) { - i = is_charged[j]; - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - compute_drho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; - } - } - } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - // convert E-field to force and substract self forces - const double qfactor = force->qqrd2e * scale; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - sf = sf_coeff[0]*sin(2*MY_PI*s1); - sf += sf_coeff[1]*sin(4*MY_PI*s1); - sf *= 2*q[i]*q[i]; - f[i][0] += qfactor*(ekx*q[i] - sf); - - sf = sf_coeff[2]*sin(2*MY_PI*s2); - sf += sf_coeff[3]*sin(4*MY_PI*s2); - sf *= 2*q[i]*q[i]; - f[i][1] += qfactor*(eky*q[i] - sf); - - - sf = sf_coeff[4]*sin(2*MY_PI*s3); - sf += sf_coeff[5]*sin(4*MY_PI*s3); - sf *= 2*q[i]*q[i]; - if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get per-atom energy/virial - ------------------------------------------------------------------------- */ - -void PPPMCG::fieldforce_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - for (int j = 0; j < num_charged; j++) { - i = is_charged[j]; - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - if (eflag_atom) eatom[i] += q[i]*u; - if (vflag_atom) { - vatom[i][0] += v0; - vatom[i][1] += v1; - vatom[i][2] += v2; - vatom[i][3] += v3; - vatom[i][4] += v4; - vatom[i][5] += v5; - } - } -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. -------------------------------------------------------------------------- */ - -void PPPMCG::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - - double dipole = 0.0; - for (int j = 0; j < num_charged; j++) { - int i = is_charged[j]; - dipole += q[i]*x[i][2]; - } - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // compute corrections - - const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - //per-atom energy - - if (eflag_atom) { - double efact = 2.0*MY_PI*dipole_all/volume; - for (int j = 0; j < num_charged; j++) { - int i = is_charged[j]; - eatom[i] += qscale * q[i]*x[i][2]*efact; - } - } - - // add on force corrections - - const double ffact = -4.0*MY_PI*dipole_all/volume * qscale; - double **f = atom->f; - - for (int j = 0; j < num_charged; j++) { - int i = is_charged[j]; - f[i][2] += q[i]*ffact; - } -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPMCG::memory_usage() -{ - double bytes = PPPM::memory_usage(); - bytes += nmax * sizeof(int); - return bytes; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "stdlib.h" + +#include "atom.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "pppm_cg.h" + +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define OFFSET 16384 +#define SMALLQ 0.00001 +#if defined(FFT_SINGLE) +#define ZEROF 0.0f +#else +#define ZEROF 0.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg) +{ + if ((narg < 1) || (narg > 2)) + error->all(FLERR,"Illegal kspace_style pppm/cg command"); + + if (narg == 2) + smallq = atof(arg[1]); + else + smallq = SMALLQ; + + num_charged = -1; + is_charged = NULL; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMCG::~PPPMCG() +{ + memory->destroy(is_charged); +} + +/* ---------------------------------------------------------------------- + compute the PPPM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMCG::compute(int eflag, int vflag) +{ + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + peratom_allocate_flag = 1; + } + + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(part2grid); + memory->destroy(is_charged); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm:part2grid"); + memory->create(is_charged,nmax,"pppm/cg:is_charged"); + } + + // one time setup message + + if (num_charged < 0) { + bigint charged_all, charged_num; + double charged_frac, charged_fmax, charged_fmin; + + num_charged=0; + for (i=0; i < atom->nlocal; ++i) + if (fabs(atom->q[i]) > smallq) + ++num_charged; + + // get fraction of charged particles per domain + + if (atom->nlocal > 0) + charged_frac = static_cast(num_charged) * 100.0 + / static_cast(atom->nlocal); + else + charged_frac = 0.0; + MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world); + MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world); + + // get fraction of charged particles overall + + charged_num = num_charged; + MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world); + charged_frac = static_cast(charged_all) * 100.0 + / static_cast(atom->natoms); + + if (me == 0) { + if (screen) + fprintf(screen, + " PPPM/cg optimization cutoff: %g\n" + " Total charged atoms: %.1f%%\n" + " Min/max charged atoms/proc: %.1f%% %.1f%%\n", + smallq,charged_frac,charged_fmin,charged_fmax); + if (logfile) + fprintf(logfile, + " PPPM/cg optimization cutoff: %g\n" + " Total charged atoms: %.1f%%\n" + " Min/max charged atoms/proc: %.1f%% %.1f%%\n", + smallq,charged_frac,charged_fmin,charged_fmax); + } + } + + num_charged = 0; + for (i = 0; i < atom->nlocal; ++i) + if (fabs(atom->q[i]) > smallq) { + is_charged[num_charged] = i; + ++num_charged; + } + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid + + particle_map(); + make_rho(); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + brick2fft(); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + // also performs per-atom calculations via poisson_peratom() + + if (differentiation_flag == 1) { + poisson_ad(); + fillbrick_ad(); + fieldforce_ad(); + if (vflag_atom) fillbrick_peratom_ad(); + } else { + poisson_ik(); + fillbrick_ik(); + fieldforce_ik(); + if (evflag_atom) fillbrick_peratom_ik(); + } + + if (evflag_atom) fieldforce_peratom(); + + // sum global energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qscale; + } + + // sum global virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + double *q = atom->q; + int nlocal = atom->nlocal; + + if (eflag_atom) { + for (int j = 0; j < num_charged; j++) { + int i = is_charged[j]; + eatom[i] *= 0.5; + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + } + + if (vflag_atom) { + for (int j = 0; j < num_charged; j++) { + int i = is_charged[j]; + for (int n = 0; n < 6; n++) vatom[i][n] *= 0.5*q[i]*qscale; + } + } + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMCG::particle_map() +{ + int nx,ny,nz; + + double **x = atom->x; + + int flag = 0; + for (int j = 0; j < num_charged; j++) { + int i = is_charged[j]; + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMCG::make_rho() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (i = 0; i < ngrid; i++) vec[i] = ZEROF; + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + + for (int j = 0; j < num_charged; j++) { + int i = is_charged[j]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMCG::fieldforce_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + for (int j = 0; j < num_charged; j++) { + i = is_charged[j]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMCG::fieldforce_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0,dx0,dy0,dz0; + FFT_SCALAR ekx,eky,ekz; + double s1,s2,s3; + double sf = 0.0; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm/xprd; + double hy_inv = ny_pppm/yprd; + double hz_inv = nz_pppm/zprd; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (int j = 0; j < num_charged; j++) { + i = is_charged[j]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + compute_drho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + // convert E-field to force and substract self forces + const double qfactor = force->qqrd2e * scale; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + sf = sf_coeff[0]*sin(2*MY_PI*s1); + sf += sf_coeff[1]*sin(4*MY_PI*s1); + sf *= 2*q[i]*q[i]; + f[i][0] += qfactor*(ekx*q[i] - sf); + + sf = sf_coeff[2]*sin(2*MY_PI*s2); + sf += sf_coeff[3]*sin(4*MY_PI*s2); + sf *= 2*q[i]*q[i]; + f[i][1] += qfactor*(eky*q[i] - sf); + + + sf = sf_coeff[4]*sin(2*MY_PI*s3); + sf += sf_coeff[5]*sin(4*MY_PI*s3); + sf *= 2*q[i]*q[i]; + if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial + ------------------------------------------------------------------------- */ + +void PPPMCG::fieldforce_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + for (int j = 0; j < num_charged; j++) { + i = is_charged[j]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += v0; + vatom[i][1] += v1; + vatom[i][2] += v2; + vatom[i][3] += v3; + vatom[i][4] += v4; + vatom[i][5] += v5; + } + } +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. +------------------------------------------------------------------------- */ + +void PPPMCG::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + + double dipole = 0.0; + for (int j = 0; j < num_charged; j++) { + int i = is_charged[j]; + dipole += q[i]*x[i][2]; + } + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // compute corrections + + const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + //per-atom energy + + if (eflag_atom) { + double efact = 2.0*MY_PI*dipole_all/volume; + for (int j = 0; j < num_charged; j++) { + int i = is_charged[j]; + eatom[i] += qscale * q[i]*x[i][2]*efact; + } + } + + // add on force corrections + + const double ffact = -4.0*MY_PI*dipole_all/volume * qscale; + double **f = atom->f; + + for (int j = 0; j < num_charged; j++) { + int i = is_charged[j]; + f[i][2] += q[i]*ffact; + } +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPMCG::memory_usage() +{ + double bytes = PPPM::memory_usage(); + bytes += nmax * sizeof(int); + return bytes; +}