git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9449 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -19,8 +19,10 @@
|
|||||||
#include "mpi.h"
|
#include "mpi.h"
|
||||||
#include "math.h"
|
#include "math.h"
|
||||||
#include "stdlib.h"
|
#include "stdlib.h"
|
||||||
|
#include "string.h"
|
||||||
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
|
#include "commgrid.h"
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
@ -34,7 +36,11 @@ using namespace MathConst;
|
|||||||
|
|
||||||
#define OFFSET 16384
|
#define OFFSET 16384
|
||||||
#define SMALLQ 0.00001
|
#define SMALLQ 0.00001
|
||||||
#if defined(FFT_SINGLE)
|
|
||||||
|
enum{REVERSE_RHO};
|
||||||
|
enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
|
||||||
|
|
||||||
|
#ifdef FFT_SINGLE
|
||||||
#define ZEROF 0.0f
|
#define ZEROF 0.0f
|
||||||
#else
|
#else
|
||||||
#define ZEROF 0.0
|
#define ZEROF 0.0
|
||||||
@ -42,7 +48,7 @@ using namespace MathConst;
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPMOld(lmp, narg, arg)
|
PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
|
||||||
{
|
{
|
||||||
if ((narg < 1) || (narg > 2))
|
if ((narg < 1) || (narg > 2))
|
||||||
error->all(FLERR,"Illegal kspace_style pppm/cg command");
|
error->all(FLERR,"Illegal kspace_style pppm/cg command");
|
||||||
@ -54,6 +60,7 @@ PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPMOld(lmp, narg, arg)
|
|||||||
|
|
||||||
num_charged = -1;
|
num_charged = -1;
|
||||||
is_charged = NULL;
|
is_charged = NULL;
|
||||||
|
group_group_enable = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -71,7 +78,6 @@ PPPMCG::~PPPMCG()
|
|||||||
|
|
||||||
void PPPMCG::compute(int eflag, int vflag)
|
void PPPMCG::compute(int eflag, int vflag)
|
||||||
{
|
{
|
||||||
int i,j;
|
|
||||||
|
|
||||||
// set energy/virial flags
|
// set energy/virial flags
|
||||||
// invoke allocate_peratom() if needed for first time
|
// invoke allocate_peratom() if needed for first time
|
||||||
@ -82,6 +88,8 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||||||
|
|
||||||
if (evflag_atom && !peratom_allocate_flag) {
|
if (evflag_atom && !peratom_allocate_flag) {
|
||||||
allocate_peratom();
|
allocate_peratom();
|
||||||
|
cg_peratom->ghost_notify();
|
||||||
|
cg_peratom->setup();
|
||||||
peratom_allocate_flag = 1;
|
peratom_allocate_flag = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,7 +118,7 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||||||
double charged_frac, charged_fmax, charged_fmin;
|
double charged_frac, charged_fmax, charged_fmin;
|
||||||
|
|
||||||
num_charged=0;
|
num_charged=0;
|
||||||
for (i=0; i < atom->nlocal; ++i)
|
for (int i=0; i < atom->nlocal; ++i)
|
||||||
if (fabs(atom->q[i]) > smallq)
|
if (fabs(atom->q[i]) > smallq)
|
||||||
++num_charged;
|
++num_charged;
|
||||||
|
|
||||||
@ -148,7 +156,7 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||||||
}
|
}
|
||||||
|
|
||||||
num_charged = 0;
|
num_charged = 0;
|
||||||
for (i = 0; i < atom->nlocal; ++i)
|
for (int i = 0; i < atom->nlocal; ++i)
|
||||||
if (fabs(atom->q[i]) > smallq) {
|
if (fabs(atom->q[i]) > smallq) {
|
||||||
is_charged[num_charged] = i;
|
is_charged[num_charged] = i;
|
||||||
++num_charged;
|
++num_charged;
|
||||||
@ -164,6 +172,7 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||||||
// to fully sum contribution in their 3d bricks
|
// to fully sum contribution in their 3d bricks
|
||||||
// remap from 3d decomposition to FFT decomposition
|
// remap from 3d decomposition to FFT decomposition
|
||||||
|
|
||||||
|
cg->reverse_comm(this,REVERSE_RHO);
|
||||||
brick2fft();
|
brick2fft();
|
||||||
|
|
||||||
// compute potential gradient on my FFT grid and
|
// compute potential gradient on my FFT grid and
|
||||||
@ -176,11 +185,17 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||||||
// all procs communicate E-field values
|
// all procs communicate E-field values
|
||||||
// to fill ghost cells surrounding their 3d bricks
|
// to fill ghost cells surrounding their 3d bricks
|
||||||
|
|
||||||
fillbrick();
|
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
|
||||||
|
else cg->forward_comm(this,FORWARD_IK);
|
||||||
|
|
||||||
// extra per-atom energy/virial communication
|
// extra per-atom energy/virial communication
|
||||||
|
|
||||||
if (evflag_atom) fillbrick_peratom();
|
if (evflag_atom) {
|
||||||
|
if (differentiation_flag == 1 && vflag_atom)
|
||||||
|
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
|
||||||
|
else if (differentiation_flag == 0)
|
||||||
|
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
|
||||||
|
}
|
||||||
|
|
||||||
// calculate the force on my particles
|
// calculate the force on my particles
|
||||||
|
|
||||||
@ -210,19 +225,18 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||||||
if (vflag_global) {
|
if (vflag_global) {
|
||||||
double virial_all[6];
|
double virial_all[6];
|
||||||
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
|
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
|
||||||
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
|
for (int i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// per-atom energy/virial
|
// per-atom energy/virial
|
||||||
// energy includes self-energy correction
|
// energy includes self-energy correction
|
||||||
|
|
||||||
if (evflag_atom) {
|
if (evflag_atom) {
|
||||||
double *q = atom->q;
|
const double * const q = atom->q;
|
||||||
int nlocal = atom->nlocal;
|
|
||||||
|
|
||||||
if (eflag_atom) {
|
if (eflag_atom) {
|
||||||
for (int j = 0; j < num_charged; j++) {
|
for (int j = 0; j < num_charged; j++) {
|
||||||
int i = is_charged[j];
|
const int i = is_charged[j];
|
||||||
eatom[i] *= 0.5;
|
eatom[i] *= 0.5;
|
||||||
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
|
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
|
||||||
(g_ewald*g_ewald*volume);
|
(g_ewald*g_ewald*volume);
|
||||||
@ -231,16 +245,16 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (vflag_atom) {
|
if (vflag_atom) {
|
||||||
for (int j = 0; j < num_charged; j++) {
|
for (int n = 0; n < num_charged; n++) {
|
||||||
int i = is_charged[j];
|
const int i = is_charged[n];
|
||||||
for (int n = 0; n < 6; n++) vatom[i][n] *= 0.5*q[i]*qscale;
|
for (int j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2d slab correction
|
// 2d slab correction
|
||||||
|
|
||||||
if (slabflag) slabcorr();
|
if (slabflag == 1) slabcorr();
|
||||||
|
|
||||||
// convert atoms back from lamda to box coords
|
// convert atoms back from lamda to box coords
|
||||||
|
|
||||||
@ -279,7 +293,8 @@ void PPPMCG::particle_map()
|
|||||||
|
|
||||||
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
|
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
|
||||||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
|
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
|
||||||
nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag = 1;
|
nz+nlower < nzlo_out || nz+nupper > nzhi_out)
|
||||||
|
flag = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
|
if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
|
||||||
@ -299,8 +314,8 @@ void PPPMCG::make_rho()
|
|||||||
|
|
||||||
// clear 3d density array
|
// clear 3d density array
|
||||||
|
|
||||||
FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
|
||||||
for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
|
ngrid*sizeof(FFT_SCALAR));
|
||||||
|
|
||||||
// loop over my charges, add their contribution to nearby grid points
|
// loop over my charges, add their contribution to nearby grid points
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
@ -311,7 +326,7 @@ void PPPMCG::make_rho()
|
|||||||
double **x = atom->x;
|
double **x = atom->x;
|
||||||
|
|
||||||
for (int j = 0; j < num_charged; j++) {
|
for (int j = 0; j < num_charged; j++) {
|
||||||
int i = is_charged[j];
|
i = is_charged[j];
|
||||||
|
|
||||||
nx = part2grid[i][0];
|
nx = part2grid[i][0];
|
||||||
ny = part2grid[i][1];
|
ny = part2grid[i][1];
|
||||||
@ -337,12 +352,11 @@ void PPPMCG::make_rho()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
interpolate from grid to get electric field & force on my particles
|
interpolate from grid to get electric field & force on my particles for ik
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMCG::fieldforce()
|
void PPPMCG::fieldforce_ik()
|
||||||
{
|
{
|
||||||
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||||
@ -392,74 +406,162 @@ void PPPMCG::fieldforce()
|
|||||||
const double qfactor = force->qqrd2e * scale * q[i];
|
const double qfactor = force->qqrd2e * scale * q[i];
|
||||||
f[i][0] += qfactor*ekx;
|
f[i][0] += qfactor*ekx;
|
||||||
f[i][1] += qfactor*eky;
|
f[i][1] += qfactor*eky;
|
||||||
f[i][2] += qfactor*ekz;
|
if (slabflag != 2) f[i][2] += qfactor*ekz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
interpolate from grid to get per-atom energy/virial
|
interpolate from grid to get electric field & force on my particles for ad
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMCG::fieldforce_ad()
|
||||||
|
{
|
||||||
|
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
|
FFT_SCALAR dx,dy,dz;
|
||||||
|
FFT_SCALAR ekx,eky,ekz;
|
||||||
|
double s1,s2,s3;
|
||||||
|
double sf = 0.0;
|
||||||
|
double *prd;
|
||||||
|
|
||||||
|
if (triclinic == 0) prd = domain->prd;
|
||||||
|
else prd = domain->prd_lamda;
|
||||||
|
|
||||||
|
double xprd = prd[0];
|
||||||
|
double yprd = prd[1];
|
||||||
|
double zprd = prd[2];
|
||||||
|
|
||||||
|
double hx_inv = nx_pppm/xprd;
|
||||||
|
double hy_inv = ny_pppm/yprd;
|
||||||
|
double hz_inv = nz_pppm/zprd;
|
||||||
|
|
||||||
|
// loop over my charges, interpolate electric field from nearby grid points
|
||||||
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
|
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||||
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
|
// ek = 3 components of E-field on particle
|
||||||
|
|
||||||
|
double *q = atom->q;
|
||||||
|
double **x = atom->x;
|
||||||
|
double **f = atom->f;
|
||||||
|
|
||||||
|
for (int j = 0; j < num_charged; j++) {
|
||||||
|
i = is_charged[j];
|
||||||
|
|
||||||
|
nx = part2grid[i][0];
|
||||||
|
ny = part2grid[i][1];
|
||||||
|
nz = part2grid[i][2];
|
||||||
|
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
||||||
|
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
||||||
|
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
||||||
|
|
||||||
|
compute_rho1d(dx,dy,dz);
|
||||||
|
compute_drho1d(dx,dy,dz);
|
||||||
|
|
||||||
|
ekx = eky = ekz = ZEROF;
|
||||||
|
for (n = nlower; n <= nupper; n++) {
|
||||||
|
mz = n+nz;
|
||||||
|
for (m = nlower; m <= nupper; m++) {
|
||||||
|
my = m+ny;
|
||||||
|
for (l = nlower; l <= nupper; l++) {
|
||||||
|
mx = l+nx;
|
||||||
|
ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ekx *= hx_inv;
|
||||||
|
eky *= hy_inv;
|
||||||
|
ekz *= hz_inv;
|
||||||
|
|
||||||
|
// convert E-field to force and substract self forces
|
||||||
|
|
||||||
|
const double qfactor = force->qqrd2e * scale;
|
||||||
|
|
||||||
|
s1 = x[i][0]*hx_inv;
|
||||||
|
s2 = x[i][1]*hy_inv;
|
||||||
|
s3 = x[i][2]*hz_inv;
|
||||||
|
sf = sf_coeff[0]*sin(2*MY_PI*s1);
|
||||||
|
sf += sf_coeff[1]*sin(4*MY_PI*s1);
|
||||||
|
sf *= 2*q[i]*q[i];
|
||||||
|
f[i][0] += qfactor*(ekx*q[i] - sf);
|
||||||
|
|
||||||
|
sf = sf_coeff[2]*sin(2*MY_PI*s2);
|
||||||
|
sf += sf_coeff[3]*sin(4*MY_PI*s2);
|
||||||
|
sf *= 2*q[i]*q[i];
|
||||||
|
f[i][1] += qfactor*(eky*q[i] - sf);
|
||||||
|
|
||||||
|
|
||||||
|
sf = sf_coeff[4]*sin(2*MY_PI*s3);
|
||||||
|
sf += sf_coeff[5]*sin(4*MY_PI*s3);
|
||||||
|
sf *= 2*q[i]*q[i];
|
||||||
|
if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
interpolate from grid to get per-atom energy/virial
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMCG::fieldforce_peratom()
|
void PPPMCG::fieldforce_peratom()
|
||||||
{
|
{
|
||||||
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||||
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
|
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
|
||||||
|
|
||||||
// loop over my charges, interpolate from nearby grid points
|
// loop over my charges, interpolate from nearby grid points
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
// (dx,dy,dz) = distance to "lower left" grid pt
|
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||||
// (mx,my,mz) = global coords of moving stencil pt
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
|
|
||||||
double *q = atom->q;
|
double *q = atom->q;
|
||||||
double **x = atom->x;
|
double **x = atom->x;
|
||||||
double **f = atom->f;
|
|
||||||
|
|
||||||
for (int j = 0; j < num_charged; j++) {
|
for (int j = 0; j < num_charged; j++) {
|
||||||
i = is_charged[j];
|
i = is_charged[j];
|
||||||
|
|
||||||
nx = part2grid[i][0];
|
nx = part2grid[i][0];
|
||||||
ny = part2grid[i][1];
|
ny = part2grid[i][1];
|
||||||
nz = part2grid[i][2];
|
nz = part2grid[i][2];
|
||||||
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
||||||
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
||||||
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
||||||
|
|
||||||
compute_rho1d(dx,dy,dz);
|
compute_rho1d(dx,dy,dz);
|
||||||
|
|
||||||
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
|
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
|
||||||
for (n = nlower; n <= nupper; n++) {
|
for (n = nlower; n <= nupper; n++) {
|
||||||
mz = n+nz;
|
mz = n+nz;
|
||||||
z0 = rho1d[2][n];
|
z0 = rho1d[2][n];
|
||||||
for (m = nlower; m <= nupper; m++) {
|
for (m = nlower; m <= nupper; m++) {
|
||||||
my = m+ny;
|
my = m+ny;
|
||||||
y0 = z0*rho1d[1][m];
|
y0 = z0*rho1d[1][m];
|
||||||
for (l = nlower; l <= nupper; l++) {
|
for (l = nlower; l <= nupper; l++) {
|
||||||
mx = l+nx;
|
mx = l+nx;
|
||||||
x0 = y0*rho1d[0][l];
|
x0 = y0*rho1d[0][l];
|
||||||
if (eflag_atom) u += x0*u_brick[mz][my][mx];
|
if (eflag_atom) u += x0*u_brick[mz][my][mx];
|
||||||
if (vflag_atom) {
|
if (vflag_atom) {
|
||||||
v0 += x0*v0_brick[mz][my][mx];
|
v0 += x0*v0_brick[mz][my][mx];
|
||||||
v1 += x0*v1_brick[mz][my][mx];
|
v1 += x0*v1_brick[mz][my][mx];
|
||||||
v2 += x0*v2_brick[mz][my][mx];
|
v2 += x0*v2_brick[mz][my][mx];
|
||||||
v3 += x0*v3_brick[mz][my][mx];
|
v3 += x0*v3_brick[mz][my][mx];
|
||||||
v4 += x0*v4_brick[mz][my][mx];
|
v4 += x0*v4_brick[mz][my][mx];
|
||||||
v5 += x0*v5_brick[mz][my][mx];
|
v5 += x0*v5_brick[mz][my][mx];
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (eflag_atom) eatom[i] += q[i]*u;
|
|
||||||
if (vflag_atom) {
|
|
||||||
vatom[i][0] += v0;
|
|
||||||
vatom[i][1] += v1;
|
|
||||||
vatom[i][2] += v2;
|
|
||||||
vatom[i][3] += v3;
|
|
||||||
vatom[i][4] += v4;
|
|
||||||
vatom[i][5] += v5;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (eflag_atom) eatom[i] += q[i]*u;
|
||||||
|
if (vflag_atom) {
|
||||||
|
vatom[i][0] += q[i]*v0;
|
||||||
|
vatom[i][1] += q[i]*v1;
|
||||||
|
vatom[i][2] += q[i]*v2;
|
||||||
|
vatom[i][3] += q[i]*v3;
|
||||||
|
vatom[i][4] += q[i]*v4;
|
||||||
|
vatom[i][5] += q[i]*v5;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -471,14 +573,17 @@ void PPPMCG::fieldforce_peratom()
|
|||||||
|
|
||||||
void PPPMCG::slabcorr()
|
void PPPMCG::slabcorr()
|
||||||
{
|
{
|
||||||
|
int i,j;
|
||||||
|
|
||||||
// compute local contribution to global dipole moment
|
// compute local contribution to global dipole moment
|
||||||
|
|
||||||
double *q = atom->q;
|
const double * const q = atom->q;
|
||||||
double **x = atom->x;
|
const double * const * const x = atom->x;
|
||||||
|
|
||||||
double dipole = 0.0;
|
double dipole = 0.0;
|
||||||
for (int j = 0; j < num_charged; j++) {
|
|
||||||
int i = is_charged[j];
|
|
||||||
|
for (j = 0; j < num_charged; j++) {
|
||||||
|
i = is_charged[j];
|
||||||
dipole += q[i]*x[i][2];
|
dipole += q[i]*x[i][2];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -489,29 +594,101 @@ void PPPMCG::slabcorr()
|
|||||||
|
|
||||||
// compute corrections
|
// compute corrections
|
||||||
|
|
||||||
const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume;
|
const double e_slabcorr = MY_2PI*dipole_all*dipole_all/volume;
|
||||||
const double qscale = force->qqrd2e * scale;
|
const double qscale = force->qqrd2e * scale;
|
||||||
|
|
||||||
if (eflag_global) energy += qscale * e_slabcorr;
|
if (eflag_global) energy += qscale * e_slabcorr;
|
||||||
|
|
||||||
//per-atom energy
|
// per-atom energy
|
||||||
|
|
||||||
if (eflag_atom) {
|
if (eflag_atom) {
|
||||||
double efact = 2.0*MY_PI*dipole_all/volume;
|
const double efact = MY_2PI*dipole_all/volume;
|
||||||
for (int j = 0; j < num_charged; j++) {
|
for (j = 0; j < num_charged; j++) {
|
||||||
int i = is_charged[j];
|
i = is_charged[j];
|
||||||
eatom[i] += qscale * q[i]*x[i][2]*efact;
|
eatom[i] += qscale * q[i]*x[i][2]*efact;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// add on force corrections
|
// add on force corrections
|
||||||
|
|
||||||
const double ffact = -4.0*MY_PI*dipole_all/volume * qscale;
|
const double ffact = -MY_4PI*dipole_all/volume;
|
||||||
double **f = atom->f;
|
double * const * const f = atom->f;
|
||||||
|
|
||||||
|
for (j = 0; j < num_charged; j++) {
|
||||||
|
i = is_charged[j];
|
||||||
|
f[i][2] += qscale * q[i]*ffact;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
create discretized "density" on section of global grid due to my particles
|
||||||
|
density(x,y,z) = charge "density" at grid points of my 3d brick
|
||||||
|
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
|
||||||
|
in global grid for group-group interactions
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMCG::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag)
|
||||||
|
{
|
||||||
|
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
|
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||||
|
|
||||||
|
// clear 3d density arrays
|
||||||
|
|
||||||
|
memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
|
||||||
|
ngrid*sizeof(FFT_SCALAR));
|
||||||
|
|
||||||
|
memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
|
||||||
|
ngrid*sizeof(FFT_SCALAR));
|
||||||
|
|
||||||
|
// loop over my charges, add their contribution to nearby grid points
|
||||||
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
|
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||||
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
|
|
||||||
|
const double * const q = atom->q;
|
||||||
|
const double * const * const x = atom->x;
|
||||||
|
const int * const mask = atom->mask;
|
||||||
|
|
||||||
for (int j = 0; j < num_charged; j++) {
|
for (int j = 0; j < num_charged; j++) {
|
||||||
int i = is_charged[j];
|
i = is_charged[j];
|
||||||
f[i][2] += q[i]*ffact;
|
|
||||||
|
if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B))
|
||||||
|
if (BA_flag) continue;
|
||||||
|
|
||||||
|
if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
|
||||||
|
|
||||||
|
nx = part2grid[i][0];
|
||||||
|
ny = part2grid[i][1];
|
||||||
|
nz = part2grid[i][2];
|
||||||
|
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
||||||
|
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
||||||
|
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
||||||
|
|
||||||
|
compute_rho1d(dx,dy,dz);
|
||||||
|
|
||||||
|
z0 = delvolinv * q[i];
|
||||||
|
for (n = nlower; n <= nupper; n++) {
|
||||||
|
mz = n+nz;
|
||||||
|
y0 = z0*rho1d[2][n];
|
||||||
|
for (m = nlower; m <= nupper; m++) {
|
||||||
|
my = m+ny;
|
||||||
|
x0 = y0*rho1d[1][m];
|
||||||
|
for (l = nlower; l <= nupper; l++) {
|
||||||
|
mx = l+nx;
|
||||||
|
|
||||||
|
// group A
|
||||||
|
|
||||||
|
if (mask[i] & groupbit_A)
|
||||||
|
density_A_brick[mz][my][mx] += x0*rho1d[0][l];
|
||||||
|
|
||||||
|
// group B
|
||||||
|
|
||||||
|
if (mask[i] & groupbit_B)
|
||||||
|
density_B_brick[mz][my][mx] += x0*rho1d[0][l];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -521,7 +698,7 @@ void PPPMCG::slabcorr()
|
|||||||
|
|
||||||
double PPPMCG::memory_usage()
|
double PPPMCG::memory_usage()
|
||||||
{
|
{
|
||||||
double bytes = PPPMOld::memory_usage();
|
double bytes = PPPM::memory_usage();
|
||||||
bytes += nmax * sizeof(int);
|
bytes += nmax * sizeof(int);
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -20,11 +20,11 @@ KSpaceStyle(pppm/cg,PPPMCG)
|
|||||||
#ifndef LMP_PPPM_CG_H
|
#ifndef LMP_PPPM_CG_H
|
||||||
#define LMP_PPPM_CG_H
|
#define LMP_PPPM_CG_H
|
||||||
|
|
||||||
#include "pppm_old.h"
|
#include "pppm.h"
|
||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
class PPPMCG : public PPPMOld {
|
class PPPMCG : public PPPM {
|
||||||
public:
|
public:
|
||||||
PPPMCG(class LAMMPS *, int, char **);
|
PPPMCG(class LAMMPS *, int, char **);
|
||||||
virtual ~PPPMCG();
|
virtual ~PPPMCG();
|
||||||
@ -38,9 +38,11 @@ class PPPMCG : public PPPMOld {
|
|||||||
|
|
||||||
virtual void particle_map();
|
virtual void particle_map();
|
||||||
virtual void make_rho();
|
virtual void make_rho();
|
||||||
virtual void fieldforce();
|
virtual void fieldforce_ik();
|
||||||
|
virtual void fieldforce_ad();
|
||||||
virtual void fieldforce_peratom();
|
virtual void fieldforce_peratom();
|
||||||
virtual void slabcorr();
|
virtual void slabcorr();
|
||||||
|
virtual void make_rho_groups(int, int, int);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -226,7 +226,7 @@ void PPPMTIP4P::fieldforce_ik()
|
|||||||
if (type[i] != typeO) {
|
if (type[i] != typeO) {
|
||||||
f[i][0] += qfactor*ekx;
|
f[i][0] += qfactor*ekx;
|
||||||
f[i][1] += qfactor*eky;
|
f[i][1] += qfactor*eky;
|
||||||
f[i][2] += qfactor*ekz;
|
if (slabflag != 2) f[i][2] += qfactor*ekz;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
fx = qfactor * ekx;
|
fx = qfactor * ekx;
|
||||||
@ -236,15 +236,15 @@ void PPPMTIP4P::fieldforce_ik()
|
|||||||
|
|
||||||
f[i][0] += fx*(1 - alpha);
|
f[i][0] += fx*(1 - alpha);
|
||||||
f[i][1] += fy*(1 - alpha);
|
f[i][1] += fy*(1 - alpha);
|
||||||
f[i][2] += fz*(1 - alpha);
|
if (slabflag != 2) f[i][2] += fz*(1 - alpha);
|
||||||
|
|
||||||
f[iH1][0] += 0.5*alpha*fx;
|
f[iH1][0] += 0.5*alpha*fx;
|
||||||
f[iH1][1] += 0.5*alpha*fy;
|
f[iH1][1] += 0.5*alpha*fy;
|
||||||
f[iH1][2] += 0.5*alpha*fz;
|
if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz;
|
||||||
|
|
||||||
f[iH2][0] += 0.5*alpha*fx;
|
f[iH2][0] += 0.5*alpha*fx;
|
||||||
f[iH2][1] += 0.5*alpha*fy;
|
f[iH2][1] += 0.5*alpha*fy;
|
||||||
f[iH2][2] += 0.5*alpha*fz;
|
if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -256,7 +256,7 @@ void PPPMTIP4P::fieldforce_ik()
|
|||||||
void PPPMTIP4P::fieldforce_ad()
|
void PPPMTIP4P::fieldforce_ad()
|
||||||
{
|
{
|
||||||
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0,dx0,dy0,dz0;
|
FFT_SCALAR dx,dy,dz;
|
||||||
FFT_SCALAR ekx,eky,ekz;
|
FFT_SCALAR ekx,eky,ekz;
|
||||||
double *xi;
|
double *xi;
|
||||||
int iH1,iH2;
|
int iH1,iH2;
|
||||||
@ -272,14 +272,11 @@ void PPPMTIP4P::fieldforce_ad()
|
|||||||
double xprd = prd[0];
|
double xprd = prd[0];
|
||||||
double yprd = prd[1];
|
double yprd = prd[1];
|
||||||
double zprd = prd[2];
|
double zprd = prd[2];
|
||||||
double zprd_slab = zprd*slab_volfactor;
|
|
||||||
|
|
||||||
double hx_inv = nx_pppm/xprd;
|
double hx_inv = nx_pppm/xprd;
|
||||||
double hy_inv = ny_pppm/yprd;
|
double hy_inv = ny_pppm/yprd;
|
||||||
double hz_inv = nz_pppm/zprd;
|
double hz_inv = nz_pppm/zprd;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// loop over my charges, interpolate electric field from nearby grid points
|
// loop over my charges, interpolate electric field from nearby grid points
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
// (dx,dy,dz) = distance to "lower left" grid pt
|
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||||
@ -302,9 +299,9 @@ void PPPMTIP4P::fieldforce_ad()
|
|||||||
nx = part2grid[i][0];
|
nx = part2grid[i][0];
|
||||||
ny = part2grid[i][1];
|
ny = part2grid[i][1];
|
||||||
nz = part2grid[i][2];
|
nz = part2grid[i][2];
|
||||||
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv;
|
||||||
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv;
|
||||||
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv;
|
||||||
|
|
||||||
compute_rho1d(dx,dy,dz);
|
compute_rho1d(dx,dy,dz);
|
||||||
compute_drho1d(dx,dy,dz);
|
compute_drho1d(dx,dy,dz);
|
||||||
@ -335,38 +332,38 @@ void PPPMTIP4P::fieldforce_ad()
|
|||||||
s3 = x[i][2]*hz_inv;
|
s3 = x[i][2]*hz_inv;
|
||||||
sf = sf_coeff[0]*sin(2*MY_PI*s1);
|
sf = sf_coeff[0]*sin(2*MY_PI*s1);
|
||||||
sf += sf_coeff[1]*sin(4*MY_PI*s1);
|
sf += sf_coeff[1]*sin(4*MY_PI*s1);
|
||||||
sf *= 2*q[i]*q[i];
|
sf *= 2.0*q[i]*q[i];
|
||||||
fx = qfactor*(ekx*q[i] - sf);
|
fx = qfactor*(ekx*q[i] - sf);
|
||||||
|
|
||||||
sf = sf_coeff[2]*sin(2*MY_PI*s2);
|
sf = sf_coeff[2]*sin(2*MY_PI*s2);
|
||||||
sf += sf_coeff[3]*sin(4*MY_PI*s2);
|
sf += sf_coeff[3]*sin(4*MY_PI*s2);
|
||||||
sf *= 2*q[i]*q[i];
|
sf *= 2.0*q[i]*q[i];
|
||||||
fy = qfactor*(eky*q[i] - sf);
|
fy = qfactor*(eky*q[i] - sf);
|
||||||
|
|
||||||
sf = sf_coeff[4]*sin(2*MY_PI*s3);
|
sf = sf_coeff[4]*sin(2*MY_PI*s3);
|
||||||
sf += sf_coeff[5]*sin(4*MY_PI*s3);
|
sf += sf_coeff[5]*sin(4*MY_PI*s3);
|
||||||
sf *= 2*q[i]*q[i];
|
sf *= 2.0*q[i]*q[i];
|
||||||
fz = qfactor*(ekz*q[i] - sf);
|
fz = qfactor*(ekz*q[i] - sf);
|
||||||
|
|
||||||
if (type[i] != typeO) {
|
if (type[i] != typeO) {
|
||||||
f[i][0] += fx;
|
f[i][0] += fx;
|
||||||
f[i][1] += fy;
|
f[i][1] += fy;
|
||||||
f[i][2] += fz;
|
if (slabflag != 2) f[i][2] += fz;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
find_M(i,iH1,iH2,xM);
|
find_M(i,iH1,iH2,xM);
|
||||||
|
|
||||||
f[i][0] += fx*(1 - alpha);
|
f[i][0] += fx*(1 - alpha);
|
||||||
f[i][1] += fy*(1 - alpha);
|
f[i][1] += fy*(1 - alpha);
|
||||||
f[i][2] += fz*(1 - alpha);
|
if (slabflag != 2) f[i][2] += fz*(1 - alpha);
|
||||||
|
|
||||||
f[iH1][0] += 0.5*alpha*fx;
|
f[iH1][0] += 0.5*alpha*fx;
|
||||||
f[iH1][1] += 0.5*alpha*fy;
|
f[iH1][1] += 0.5*alpha*fy;
|
||||||
f[iH1][2] += 0.5*alpha*fz;
|
if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz;
|
||||||
|
|
||||||
f[iH2][0] += 0.5*alpha*fx;
|
f[iH2][0] += 0.5*alpha*fx;
|
||||||
f[iH2][1] += 0.5*alpha*fy;
|
f[iH2][1] += 0.5*alpha*fy;
|
||||||
f[iH2][2] += 0.5*alpha*fz;
|
if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -392,7 +389,6 @@ void PPPMTIP4P::fieldforce_peratom()
|
|||||||
// ek = 3 components of E-field on particle
|
// ek = 3 components of E-field on particle
|
||||||
double *q = atom->q;
|
double *q = atom->q;
|
||||||
double **x = atom->x;
|
double **x = atom->x;
|
||||||
double **f = atom->f;
|
|
||||||
|
|
||||||
int *type = atom->type;
|
int *type = atom->type;
|
||||||
int nlocal = atom->nlocal;
|
int nlocal = atom->nlocal;
|
||||||
|
|||||||
@ -19,9 +19,12 @@
|
|||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "comm.h"
|
#include "comm.h"
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
|
#include "error.h"
|
||||||
|
#include "fix_omp.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "math_const.h"
|
#include "math_const.h"
|
||||||
|
#include "math_special.h"
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
@ -29,12 +32,14 @@
|
|||||||
#include "suffix.h"
|
#include "suffix.h"
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace MathConst;
|
using namespace MathConst;
|
||||||
|
using namespace MathSpecial;
|
||||||
|
|
||||||
#define SMALLQ 0.00001
|
#ifdef FFT_SINGLE
|
||||||
#if defined(FFT_SINGLE)
|
|
||||||
#define ZEROF 0.0f
|
#define ZEROF 0.0f
|
||||||
|
#define ONEF 1.0f
|
||||||
#else
|
#else
|
||||||
#define ZEROF 0.0
|
#define ZEROF 0.0
|
||||||
|
#define ONEF 1.0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define EPS_HOC 1.0e-7
|
#define EPS_HOC 1.0e-7
|
||||||
@ -55,7 +60,27 @@ void PPPMCGOMP::allocate()
|
|||||||
{
|
{
|
||||||
PPPMCG::allocate();
|
PPPMCG::allocate();
|
||||||
|
|
||||||
const int nthreads = comm->nthreads;
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
const int tid = omp_get_thread_num();
|
||||||
|
#else
|
||||||
|
const int tid = 0;
|
||||||
|
#endif
|
||||||
|
ThrData *thr = fix->get_thr(tid);
|
||||||
|
thr->init_pppm(order,memory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
free memory that depends on # of K-vectors and order
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMCGOMP::deallocate()
|
||||||
|
{
|
||||||
|
PPPMCG::deallocate();
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(none)
|
#pragma omp parallel default(none)
|
||||||
@ -66,153 +91,26 @@ void PPPMCGOMP::allocate()
|
|||||||
#else
|
#else
|
||||||
const int tid = 0;
|
const int tid = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
FFT_SCALAR **rho1d_thr;
|
|
||||||
memory->create2d_offset(rho1d_thr,3,-order/2,order/2,"pppm:rho1d_thr");
|
|
||||||
ThrData *thr = fix->get_thr(tid);
|
ThrData *thr = fix->get_thr(tid);
|
||||||
thr->init_pppm(static_cast<void *>(rho1d_thr));
|
thr->init_pppm(-order,memory);
|
||||||
}
|
|
||||||
|
|
||||||
const int nzend = (nzhi_out-nzlo_out+1)*nthreads + nzlo_out -1;
|
|
||||||
|
|
||||||
// reallocate density brick, so it fits our needs
|
|
||||||
memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
|
|
||||||
memory->create3d_offset(density_brick,nzlo_out,nzend,nylo_out,nyhi_out,
|
|
||||||
nxlo_out,nxhi_out,"pppm:density_brick");
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: special version of reduce_data for FFT_SCALAR data type.
|
|
||||||
// reduce per thread data into the first part of the data
|
|
||||||
// array that is used for the non-threaded parts and reset
|
|
||||||
// the temporary storage to 0.0. this routine depends on
|
|
||||||
// multi-dimensional arrays like force stored in this order
|
|
||||||
// x1,y1,z1,x2,y2,z2,...
|
|
||||||
// we need to post a barrier to wait until all threads are done
|
|
||||||
// writing to the array.
|
|
||||||
static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid)
|
|
||||||
{
|
|
||||||
#if defined(_OPENMP)
|
|
||||||
// NOOP in non-threaded execution.
|
|
||||||
if (nthreads == 1) return;
|
|
||||||
#pragma omp barrier
|
|
||||||
{
|
|
||||||
const int nvals = ndim*nall;
|
|
||||||
const int idelta = nvals/nthreads + 1;
|
|
||||||
const int ifrom = tid*idelta;
|
|
||||||
const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
|
|
||||||
|
|
||||||
// this if protects against having more threads than atoms
|
|
||||||
if (ifrom < nall) {
|
|
||||||
for (int m = ifrom; m < ito; ++m) {
|
|
||||||
for (int n = 1; n < nthreads; ++n) {
|
|
||||||
dall[m] += dall[n*nvals + m];
|
|
||||||
dall[n*nvals + m] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// NOOP in non-threaded execution.
|
|
||||||
return;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
free memory that depends on # of K-vectors and order
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void PPPMCGOMP::deallocate()
|
|
||||||
{
|
|
||||||
PPPMCG::deallocate();
|
|
||||||
for (int i=0; i < comm->nthreads; ++i) {
|
|
||||||
ThrData * thr = fix->get_thr(i);
|
|
||||||
FFT_SCALAR ** rho1d_thr = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
|
||||||
memory->destroy2d_offset(rho1d_thr,-order/2);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
adjust PPPMCG coeffs, called initially and whenever volume has changed
|
pre-compute modified (Hockney-Eastwood) Coulomb Green's function
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMCGOMP::setup()
|
void PPPMCGOMP::compute_gf_ik()
|
||||||
{
|
{
|
||||||
int i,j,k,n;
|
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
|
||||||
double *prd;
|
|
||||||
|
|
||||||
// volume-dependent factors
|
|
||||||
// adjust z dimension for 2d slab PPPM
|
|
||||||
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
|
|
||||||
|
|
||||||
if (triclinic == 0) prd = domain->prd;
|
|
||||||
else prd = domain->prd_lamda;
|
|
||||||
|
|
||||||
const double xprd = prd[0];
|
const double xprd = prd[0];
|
||||||
const double yprd = prd[1];
|
const double yprd = prd[1];
|
||||||
const double zprd = prd[2];
|
const double zprd = prd[2];
|
||||||
const double zprd_slab = zprd*slab_volfactor;
|
const double zprd_slab = zprd*slab_volfactor;
|
||||||
volume = xprd * yprd * zprd_slab;
|
const double unitkx = (MY_2PI/xprd);
|
||||||
|
const double unitky = (MY_2PI/yprd);
|
||||||
delxinv = nx_pppm/xprd;
|
const double unitkz = (MY_2PI/zprd_slab);
|
||||||
delyinv = ny_pppm/yprd;
|
|
||||||
delzinv = nz_pppm/zprd_slab;
|
|
||||||
|
|
||||||
delvolinv = delxinv*delyinv*delzinv;
|
|
||||||
|
|
||||||
const double unitkx = (2.0*MY_PI/xprd);
|
|
||||||
const double unitky = (2.0*MY_PI/yprd);
|
|
||||||
const double unitkz = (2.0*MY_PI/zprd_slab);
|
|
||||||
|
|
||||||
// fkx,fky,fkz for my FFT grid pts
|
|
||||||
|
|
||||||
double per;
|
|
||||||
|
|
||||||
for (i = nxlo_fft; i <= nxhi_fft; i++) {
|
|
||||||
per = i - nx_pppm*(2*i/nx_pppm);
|
|
||||||
fkx[i] = unitkx*per;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = nylo_fft; i <= nyhi_fft; i++) {
|
|
||||||
per = i - ny_pppm*(2*i/ny_pppm);
|
|
||||||
fky[i] = unitky*per;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = nzlo_fft; i <= nzhi_fft; i++) {
|
|
||||||
per = i - nz_pppm*(2*i/nz_pppm);
|
|
||||||
fkz[i] = unitkz*per;
|
|
||||||
}
|
|
||||||
|
|
||||||
// virial coefficients
|
|
||||||
|
|
||||||
double sqk,vterm;
|
|
||||||
|
|
||||||
n = 0;
|
|
||||||
for (k = nzlo_fft; k <= nzhi_fft; k++) {
|
|
||||||
for (j = nylo_fft; j <= nyhi_fft; j++) {
|
|
||||||
for (i = nxlo_fft; i <= nxhi_fft; i++) {
|
|
||||||
sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
|
|
||||||
if (sqk == 0.0) {
|
|
||||||
vg[n][0] = 0.0;
|
|
||||||
vg[n][1] = 0.0;
|
|
||||||
vg[n][2] = 0.0;
|
|
||||||
vg[n][3] = 0.0;
|
|
||||||
vg[n][4] = 0.0;
|
|
||||||
vg[n][5] = 0.0;
|
|
||||||
} else {
|
|
||||||
vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
|
|
||||||
vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
|
|
||||||
vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
|
|
||||||
vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
|
|
||||||
vg[n][3] = vterm*fkx[i]*fky[j];
|
|
||||||
vg[n][4] = vterm*fkx[i]*fkz[k];
|
|
||||||
vg[n][5] = vterm*fky[j]*fkz[k];
|
|
||||||
}
|
|
||||||
n++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// modified (Hockney-Eastwood) Coulomb Green's function
|
|
||||||
|
|
||||||
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
|
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
|
||||||
pow(-log(EPS_HOC),0.25));
|
pow(-log(EPS_HOC),0.25));
|
||||||
@ -220,93 +118,188 @@ void PPPMCGOMP::setup()
|
|||||||
pow(-log(EPS_HOC),0.25));
|
pow(-log(EPS_HOC),0.25));
|
||||||
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
|
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
|
||||||
pow(-log(EPS_HOC),0.25));
|
pow(-log(EPS_HOC),0.25));
|
||||||
|
const int numk = nxhi_fft - nxlo_fft + 1;
|
||||||
|
const int numl = nyhi_fft - nylo_fft + 1;
|
||||||
|
|
||||||
const double form = 1.0;
|
const int twoorder = 2*order;
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(none)
|
#pragma omp parallel default(none)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
int tid,nn,nnfrom,nnto,nx,ny,nz,k,l,m;
|
double snx,sny,snz;
|
||||||
double snx,sny,snz,sqk;
|
|
||||||
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||||
double sum1,dot1,dot2;
|
double sum1,dot1,dot2;
|
||||||
double numerator,denominator;
|
double numerator,denominator;
|
||||||
const double gew2 = -4.0*g_ewald*g_ewald;
|
double sqk;
|
||||||
|
|
||||||
const int nnx = nxhi_fft-nxlo_fft+1;
|
int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;
|
||||||
const int nny = nyhi_fft-nylo_fft+1;
|
|
||||||
|
|
||||||
loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads);
|
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
|
||||||
|
|
||||||
for (m = nzlo_fft; m <= nzhi_fft; m++) {
|
for (n = nfrom; n < nto; ++n) {
|
||||||
|
m = n / (numl*numk);
|
||||||
|
l = (n - m*numl*numk) / numk;
|
||||||
|
k = n - m*numl*numk - l*numk;
|
||||||
|
m += nzlo_fft;
|
||||||
|
l += nylo_fft;
|
||||||
|
k += nxlo_fft;
|
||||||
|
|
||||||
const double fkzm = fkz[m];
|
mper = m - nz_pppm*(2*m/nz_pppm);
|
||||||
snz = sin(0.5*fkzm*zprd_slab/nz_pppm);
|
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
|
||||||
snz *= snz;
|
|
||||||
|
|
||||||
for (l = nylo_fft; l <= nyhi_fft; l++) {
|
lper = l - ny_pppm*(2*l/ny_pppm);
|
||||||
const double fkyl = fky[l];
|
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
|
||||||
sny = sin(0.5*fkyl*yprd/ny_pppm);
|
|
||||||
sny *= sny;
|
|
||||||
|
|
||||||
for (k = nxlo_fft; k <= nxhi_fft; k++) {
|
kper = k - nx_pppm*(2*k/nx_pppm);
|
||||||
|
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
|
||||||
|
|
||||||
/* only compute the part designated to this thread */
|
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
|
||||||
nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft));
|
|
||||||
if ((nn < nnfrom) || (nn >=nnto)) continue;
|
|
||||||
|
|
||||||
const double fkxk = fkx[k];
|
if (sqk != 0.0) {
|
||||||
snx = sin(0.5*fkxk*xprd/nx_pppm);
|
numerator = 12.5663706/sqk;
|
||||||
snx *= snx;
|
denominator = gf_denom(snx,sny,snz);
|
||||||
|
sum1 = 0.0;
|
||||||
|
|
||||||
sqk = fkxk*fkxk + fkyl*fkyl + fkzm*fkzm;
|
for (nx = -nbx; nx <= nbx; nx++) {
|
||||||
|
qx = unitkx*(kper+nx_pppm*nx);
|
||||||
|
sx = exp(-0.25*square(qx/g_ewald));
|
||||||
|
argx = 0.5*qx*xprd/nx_pppm;
|
||||||
|
wx = powsinxx(argx,twoorder);
|
||||||
|
|
||||||
if (sqk != 0.0) {
|
for (ny = -nby; ny <= nby; ny++) {
|
||||||
numerator = form*MY_4PI/sqk;
|
qy = unitky*(lper+ny_pppm*ny);
|
||||||
denominator = gf_denom(snx,sny,snz);
|
sy = exp(-0.25*square(qy/g_ewald));
|
||||||
const double dorder = static_cast<double>(order);
|
argy = 0.5*qy*yprd/ny_pppm;
|
||||||
sum1 = 0.0;
|
wy = powsinxx(argy,twoorder);
|
||||||
for (nx = -nbx; nx <= nbx; nx++) {
|
|
||||||
qx = fkxk + unitkx*nx_pppm*nx;
|
|
||||||
sx = exp(qx*qx/gew2);
|
|
||||||
wx = 1.0;
|
|
||||||
argx = 0.5*qx*xprd/nx_pppm;
|
|
||||||
if (argx != 0.0) wx = pow(sin(argx)/argx,dorder);
|
|
||||||
wx *=wx;
|
|
||||||
|
|
||||||
for (ny = -nby; ny <= nby; ny++) {
|
for (nz = -nbz; nz <= nbz; nz++) {
|
||||||
qy = fkyl + unitky*ny_pppm*ny;
|
qz = unitkz*(mper+nz_pppm*nz);
|
||||||
sy = exp(qy*qy/gew2);
|
sz = exp(-0.25*square(qz/g_ewald));
|
||||||
wy = 1.0;
|
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||||
argy = 0.5*qy*yprd/ny_pppm;
|
wz = powsinxx(argz,twoorder);
|
||||||
if (argy != 0.0) wy = pow(sin(argy)/argy,dorder);
|
|
||||||
wy *= wy;
|
|
||||||
|
|
||||||
for (nz = -nbz; nz <= nbz; nz++) {
|
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
|
||||||
qz = fkzm + unitkz*nz_pppm*nz;
|
dot2 = qx*qx+qy*qy+qz*qz;
|
||||||
sz = exp(qz*qz/gew2);
|
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
|
||||||
wz = 1.0;
|
}
|
||||||
argz = 0.5*qz*zprd_slab/nz_pppm;
|
}
|
||||||
if (argz != 0.0) wz = pow(sin(argz)/argz,dorder);
|
}
|
||||||
wz *= wz;
|
greensfn[n] = numerator*sum1/denominator;
|
||||||
|
} else greensfn[n] = 0.0;
|
||||||
dot1 = fkxk*qx + fkyl*qy + fkzm*qz;
|
|
||||||
dot2 = qx*qx+qy*qy+qz*qz;
|
|
||||||
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
greensfn[nn] = numerator*sum1/denominator;
|
|
||||||
} else greensfn[nn] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
} // end of parallel region
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
run the regular toplevel compute method from plain PPPPMCG
|
compute optimized Green's function for energy calculation
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMCGOMP::compute_gf_ad()
|
||||||
|
{
|
||||||
|
|
||||||
|
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
|
||||||
|
|
||||||
|
const double xprd = prd[0];
|
||||||
|
const double yprd = prd[1];
|
||||||
|
const double zprd = prd[2];
|
||||||
|
const double zprd_slab = zprd*slab_volfactor;
|
||||||
|
const double unitkx = (MY_2PI/xprd);
|
||||||
|
const double unitky = (MY_2PI/yprd);
|
||||||
|
const double unitkz = (MY_2PI/zprd_slab);
|
||||||
|
|
||||||
|
const int numk = nxhi_fft - nxlo_fft + 1;
|
||||||
|
const int numl = nyhi_fft - nylo_fft + 1;
|
||||||
|
|
||||||
|
const int twoorder = 2*order;
|
||||||
|
double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0;
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
double snx,sny,snz,sqk;
|
||||||
|
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||||
|
double numerator,denominator;
|
||||||
|
int k,l,m,kper,lper,mper,n,nfrom,nto,tid;
|
||||||
|
|
||||||
|
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
|
||||||
|
|
||||||
|
for (n = nfrom; n < nto; ++n) {
|
||||||
|
|
||||||
|
m = n / (numl*numk);
|
||||||
|
l = (n - m*numl*numk) / numk;
|
||||||
|
k = n - m*numl*numk - l*numk;
|
||||||
|
m += nzlo_fft;
|
||||||
|
l += nylo_fft;
|
||||||
|
k += nxlo_fft;
|
||||||
|
|
||||||
|
mper = m - nz_pppm*(2*m/nz_pppm);
|
||||||
|
qz = unitkz*mper;
|
||||||
|
snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
|
||||||
|
sz = exp(-0.25*square(qz/g_ewald));
|
||||||
|
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||||
|
wz = powsinxx(argz,twoorder);
|
||||||
|
|
||||||
|
lper = l - ny_pppm*(2*l/ny_pppm);
|
||||||
|
qy = unitky*lper;
|
||||||
|
sny = square(sin(0.5*qy*yprd/ny_pppm));
|
||||||
|
sy = exp(-0.25*square(qy/g_ewald));
|
||||||
|
argy = 0.5*qy*yprd/ny_pppm;
|
||||||
|
wy = powsinxx(argy,twoorder);
|
||||||
|
|
||||||
|
kper = k - nx_pppm*(2*k/nx_pppm);
|
||||||
|
qx = unitkx*kper;
|
||||||
|
snx = square(sin(0.5*qx*xprd/nx_pppm));
|
||||||
|
sx = exp(-0.25*square(qx/g_ewald));
|
||||||
|
argx = 0.5*qx*xprd/nx_pppm;
|
||||||
|
wx = powsinxx(argx,twoorder);
|
||||||
|
|
||||||
|
sqk = qx*qx + qy*qy + qz*qz;
|
||||||
|
|
||||||
|
if (sqk != 0.0) {
|
||||||
|
numerator = MY_4PI/sqk;
|
||||||
|
denominator = gf_denom(snx,sny,snz);
|
||||||
|
greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
|
||||||
|
sf0 += sf_precoeff1[n]*greensfn[n];
|
||||||
|
sf1 += sf_precoeff2[n]*greensfn[n];
|
||||||
|
sf2 += sf_precoeff3[n]*greensfn[n];
|
||||||
|
sf3 += sf_precoeff4[n]*greensfn[n];
|
||||||
|
sf4 += sf_precoeff5[n]*greensfn[n];
|
||||||
|
sf5 += sf_precoeff6[n]*greensfn[n];
|
||||||
|
} else {
|
||||||
|
greensfn[n] = 0.0;
|
||||||
|
sf0 += sf_precoeff1[n]*greensfn[n];
|
||||||
|
sf1 += sf_precoeff2[n]*greensfn[n];
|
||||||
|
sf2 += sf_precoeff3[n]*greensfn[n];
|
||||||
|
sf3 += sf_precoeff4[n]*greensfn[n];
|
||||||
|
sf4 += sf_precoeff5[n]*greensfn[n];
|
||||||
|
sf5 += sf_precoeff6[n]*greensfn[n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end of paralle region
|
||||||
|
|
||||||
|
// compute the coefficients for the self-force correction
|
||||||
|
|
||||||
|
double prex, prey, prez, tmp[6];
|
||||||
|
prex = prey = prez = MY_PI/volume;
|
||||||
|
prex *= nx_pppm/xprd;
|
||||||
|
prey *= ny_pppm/yprd;
|
||||||
|
prez *= nz_pppm/zprd_slab;
|
||||||
|
tmp[0] = sf0 * prex;
|
||||||
|
tmp[1] = sf1 * prex*2;
|
||||||
|
tmp[2] = sf2 * prey;
|
||||||
|
tmp[3] = sf3 * prey*2;
|
||||||
|
tmp[4] = sf4 * prez;
|
||||||
|
tmp[5] = sf5 * prez*2;
|
||||||
|
|
||||||
|
// communicate values with other procs
|
||||||
|
|
||||||
|
MPI_Allreduce(tmp,sf_coeff,6,MPI_DOUBLE,MPI_SUM,world);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
run the regular toplevel compute method from plain PPPMCG
|
||||||
which will have individual methods replaced by our threaded
|
which will have individual methods replaced by our threaded
|
||||||
versions and then call the obligatory force reduction.
|
versions and then call the obligatory force reduction.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
@ -331,94 +324,10 @@ void PPPMCGOMP::compute(int eflag, int vflag)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
create discretized "density" on section of global grid due to my particles
|
interpolate from grid to get electric field & force on my particles for ik
|
||||||
density(x,y,z) = charge "density" at grid points of my 3d brick
|
|
||||||
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
|
|
||||||
in global grid
|
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMCGOMP::make_rho()
|
void PPPMCGOMP::fieldforce_ik()
|
||||||
{
|
|
||||||
const double * const q = atom->q;
|
|
||||||
const double * const * const x = atom->x;
|
|
||||||
const int nthreads = comm->nthreads;
|
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
|
||||||
#pragma omp parallel default(none)
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
#if defined(_OPENMP)
|
|
||||||
// each thread works on a fixed chunk of atoms.
|
|
||||||
const int tid = omp_get_thread_num();
|
|
||||||
const int inum = num_charged;
|
|
||||||
const int idelta = 1 + inum/nthreads;
|
|
||||||
const int ifrom = tid*idelta;
|
|
||||||
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
|
|
||||||
#else
|
|
||||||
const int tid = 0;
|
|
||||||
const int ifrom = 0;
|
|
||||||
const int ito = num_charged;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
|
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
|
||||||
|
|
||||||
// set up clear 3d density array
|
|
||||||
const int nzoffs = (nzhi_out-nzlo_out+1)*tid;
|
|
||||||
FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]);
|
|
||||||
memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));
|
|
||||||
|
|
||||||
ThrData *thr = fix->get_thr(tid);
|
|
||||||
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
|
||||||
|
|
||||||
// loop over my charges, add their contribution to nearby grid points
|
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
|
||||||
// (dx,dy,dz) = distance to "lower left" grid pt
|
|
||||||
// (mx,my,mz) = global coords of moving stencil pt
|
|
||||||
|
|
||||||
// this if protects against having more threads than charged local atoms
|
|
||||||
if (ifrom < num_charged) {
|
|
||||||
for (j = ifrom; j < ito; j++) {
|
|
||||||
i = is_charged[j];
|
|
||||||
|
|
||||||
nx = part2grid[i][0];
|
|
||||||
ny = part2grid[i][1];
|
|
||||||
nz = part2grid[i][2];
|
|
||||||
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
|
||||||
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
|
||||||
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
|
||||||
|
|
||||||
compute_rho1d_thr(r1d,dx,dy,dz);
|
|
||||||
|
|
||||||
z0 = delvolinv * q[i];
|
|
||||||
for (n = nlower; n <= nupper; n++) {
|
|
||||||
mz = n+nz;
|
|
||||||
y0 = z0*r1d[2][n];
|
|
||||||
for (m = nlower; m <= nupper; m++) {
|
|
||||||
my = m+ny;
|
|
||||||
x0 = y0*r1d[1][m];
|
|
||||||
for (l = nlower; l <= nupper; l++) {
|
|
||||||
mx = l+nx;
|
|
||||||
db[mz][my][mx] += x0*r1d[0][l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#if defined(_OPENMP)
|
|
||||||
// reduce 3d density array
|
|
||||||
if (nthreads > 1) {
|
|
||||||
data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
interpolate from grid to get electric field & force on my particles
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void PPPMCGOMP::fieldforce()
|
|
||||||
{
|
{
|
||||||
// loop over my charges, interpolate electric field from nearby grid points
|
// loop over my charges, interpolate electric field from nearby grid points
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
@ -426,79 +335,161 @@ void PPPMCGOMP::fieldforce()
|
|||||||
// (mx,my,mz) = global coords of moving stencil pt
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
// ek = 3 components of E-field on particle
|
// ek = 3 components of E-field on particle
|
||||||
|
|
||||||
const double * const q = atom->q;
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
const double * const * const x = atom->x;
|
const double * _noalias const q = atom->q;
|
||||||
const int nthreads = comm->nthreads;
|
|
||||||
const double qqrd2e = force->qqrd2e;
|
const double qqrd2e = force->qqrd2e;
|
||||||
|
const int nthreads = comm->nthreads;
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(none)
|
#pragma omp parallel default(none)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if defined(_OPENMP)
|
FFT_SCALAR dx,dy,dz,x0,y0,z0,ekx,eky,ekz;
|
||||||
// each thread works on a fixed chunk of atoms.
|
int i,ifrom,ito,tid,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
const int tid = omp_get_thread_num();
|
|
||||||
const int inum = num_charged;
|
loop_setup_thr(ifrom,ito,tid,num_charged,nthreads);
|
||||||
const int idelta = 1 + inum/nthreads;
|
|
||||||
const int ifrom = tid*idelta;
|
// get per thread data
|
||||||
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
|
|
||||||
#else
|
|
||||||
const int ifrom = 0;
|
|
||||||
const int ito = num_charged;
|
|
||||||
const int tid = 0;
|
|
||||||
#endif
|
|
||||||
ThrData *thr = fix->get_thr(tid);
|
ThrData *thr = fix->get_thr(tid);
|
||||||
double * const * const f = thr->get_f();
|
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
|
||||||
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
||||||
|
|
||||||
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
|
for (int j = ifrom; j < ito; ++j) {
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
i = is_charged[j];
|
||||||
FFT_SCALAR ekx,eky,ekz;
|
|
||||||
|
|
||||||
// this if protects against having more threads than charged local atoms
|
nx = part2grid[i][0];
|
||||||
if (ifrom < num_charged) {
|
ny = part2grid[i][1];
|
||||||
for (j = ifrom; j < ito; j++) {
|
nz = part2grid[i][2];
|
||||||
i = is_charged[j];
|
dx = nx+shiftone - (x[i].x-boxlo[0])*delxinv;
|
||||||
|
dy = ny+shiftone - (x[i].y-boxlo[1])*delyinv;
|
||||||
|
dz = nz+shiftone - (x[i].z-boxlo[2])*delzinv;
|
||||||
|
|
||||||
nx = part2grid[i][0];
|
compute_rho1d_thr(r1d,dx,dy,dz);
|
||||||
ny = part2grid[i][1];
|
|
||||||
nz = part2grid[i][2];
|
|
||||||
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
|
||||||
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
|
||||||
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
|
||||||
|
|
||||||
compute_rho1d_thr(r1d,dx,dy,dz);
|
ekx = eky = ekz = ZEROF;
|
||||||
|
for (n = nlower; n <= nupper; n++) {
|
||||||
ekx = eky = ekz = ZEROF;
|
mz = n+nz;
|
||||||
for (n = nlower; n <= nupper; n++) {
|
z0 = r1d[2][n];
|
||||||
mz = n+nz;
|
for (m = nlower; m <= nupper; m++) {
|
||||||
z0 = r1d[2][n];
|
my = m+ny;
|
||||||
for (m = nlower; m <= nupper; m++) {
|
y0 = z0*r1d[1][m];
|
||||||
my = m+ny;
|
for (l = nlower; l <= nupper; l++) {
|
||||||
y0 = z0*r1d[1][m];
|
mx = l+nx;
|
||||||
for (l = nlower; l <= nupper; l++) {
|
x0 = y0*r1d[0][l];
|
||||||
mx = l+nx;
|
ekx -= x0*vdx_brick[mz][my][mx];
|
||||||
x0 = y0*r1d[0][l];
|
eky -= x0*vdy_brick[mz][my][mx];
|
||||||
ekx -= x0*vdx_brick[mz][my][mx];
|
ekz -= x0*vdz_brick[mz][my][mx];
|
||||||
eky -= x0*vdy_brick[mz][my][mx];
|
}
|
||||||
ekz -= x0*vdz_brick[mz][my][mx];
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// convert E-field to force
|
|
||||||
const double qfactor = qqrd2e*scale*q[i];
|
|
||||||
f[i][0] += qfactor*ekx;
|
|
||||||
f[i][1] += qfactor*eky;
|
|
||||||
f[i][2] += qfactor*ekz;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// convert E-field to force
|
||||||
|
|
||||||
|
const double qfactor = qqrd2e * scale * q[i];
|
||||||
|
f[i].x += qfactor*ekx;
|
||||||
|
f[i].y += qfactor*eky;
|
||||||
|
if (slabflag != 2) f[i].z += qfactor*ekz;
|
||||||
}
|
}
|
||||||
}
|
} // end of parallel region
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
interpolate from grid to get per-atom energy/virial
|
interpolate from grid to get electric field & force on my particles for ad
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMCGOMP::fieldforce_ad()
|
||||||
|
{
|
||||||
|
const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda;
|
||||||
|
const double hx_inv = nx_pppm/prd[0];
|
||||||
|
const double hy_inv = ny_pppm/prd[1];
|
||||||
|
const double hz_inv = nz_pppm/prd[2];
|
||||||
|
|
||||||
|
// loop over my charges, interpolate electric field from nearby grid points
|
||||||
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
|
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||||
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
|
// ek = 3 components of E-field on particle
|
||||||
|
|
||||||
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
|
const double * _noalias const q = atom->q;
|
||||||
|
const double qqrd2e = force->qqrd2e;
|
||||||
|
const int nthreads = comm->nthreads;
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
int i,ifrom,ito,tid,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
|
FFT_SCALAR dx,dy,dz,ekx,eky,ekz;
|
||||||
|
double s1,s2,s3,sf;
|
||||||
|
|
||||||
|
loop_setup_thr(ifrom,ito,tid,num_charged,nthreads);
|
||||||
|
|
||||||
|
// get per thread data
|
||||||
|
ThrData *thr = fix->get_thr(tid);
|
||||||
|
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
|
||||||
|
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
||||||
|
FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
|
||||||
|
|
||||||
|
for (int j = ifrom; j < ito; ++j) {
|
||||||
|
i = is_charged[j];
|
||||||
|
|
||||||
|
nx = part2grid[i][0];
|
||||||
|
ny = part2grid[i][1];
|
||||||
|
nz = part2grid[i][2];
|
||||||
|
dx = nx+shiftone - (x[i].x-boxlo[0])*delxinv;
|
||||||
|
dy = ny+shiftone - (x[i].y-boxlo[1])*delyinv;
|
||||||
|
dz = nz+shiftone - (x[i].z-boxlo[2])*delzinv;
|
||||||
|
|
||||||
|
compute_rho1d_thr(r1d,dx,dy,dz);
|
||||||
|
compute_drho1d_thr(d1d,dx,dy,dz);
|
||||||
|
|
||||||
|
ekx = eky = ekz = ZEROF;
|
||||||
|
for (n = nlower; n <= nupper; n++) {
|
||||||
|
mz = n+nz;
|
||||||
|
for (m = nlower; m <= nupper; m++) {
|
||||||
|
my = m+ny;
|
||||||
|
for (l = nlower; l <= nupper; l++) {
|
||||||
|
mx = l+nx;
|
||||||
|
ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ekx *= hx_inv;
|
||||||
|
eky *= hy_inv;
|
||||||
|
ekz *= hz_inv;
|
||||||
|
|
||||||
|
// convert E-field to force and substract self forces
|
||||||
|
|
||||||
|
const double qi = q[i];
|
||||||
|
const double qfactor = qqrd2e * scale * qi;
|
||||||
|
|
||||||
|
s1 = x[i].x*hx_inv;
|
||||||
|
sf = sf_coeff[0]*sin(MY_2PI*s1);
|
||||||
|
sf += sf_coeff[1]*sin(MY_4PI*s1);
|
||||||
|
sf *= 2.0*qi;
|
||||||
|
f[i].x += qfactor*(ekx - sf);
|
||||||
|
|
||||||
|
s2 = x[i].y*hy_inv;
|
||||||
|
sf = sf_coeff[2]*sin(MY_2PI*s2);
|
||||||
|
sf += sf_coeff[3]*sin(MY_4PI*s2);
|
||||||
|
sf *= 2*qi;
|
||||||
|
f[i].y += qfactor*(eky - sf);
|
||||||
|
|
||||||
|
s3 = x[i].z*hz_inv;
|
||||||
|
sf = sf_coeff[4]*sin(MY_2PI*s3);
|
||||||
|
sf += sf_coeff[5]*sin(MY_4PI*s3);
|
||||||
|
sf *= 2*qi;
|
||||||
|
if (slabflag != 2) f[i].z += qfactor*(ekz - sf);
|
||||||
|
}
|
||||||
|
} // end of parallel region
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
interpolate from grid to get per-atom energy/virial
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMCGOMP::fieldforce_peratom()
|
void PPPMCGOMP::fieldforce_peratom()
|
||||||
{
|
{
|
||||||
@ -507,82 +498,71 @@ void PPPMCGOMP::fieldforce_peratom()
|
|||||||
// (dx,dy,dz) = distance to "lower left" grid pt
|
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||||
// (mx,my,mz) = global coords of moving stencil pt
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
|
|
||||||
const double * const q = atom->q;
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
const double * const * const x = atom->x;
|
const double * _noalias const q = atom->q;
|
||||||
const int nthreads = comm->nthreads;
|
const int nthreads = comm->nthreads;
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(none)
|
#pragma omp parallel default(none)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if defined(_OPENMP)
|
|
||||||
// each thread works on a fixed chunk of atoms.
|
|
||||||
const int tid = omp_get_thread_num();
|
|
||||||
const int inum = num_charged;
|
|
||||||
const int idelta = 1 + inum/nthreads;
|
|
||||||
const int ifrom = tid*idelta;
|
|
||||||
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
|
|
||||||
#else
|
|
||||||
const int ifrom = 0;
|
|
||||||
const int ito = num_charged;
|
|
||||||
const int tid = 0;
|
|
||||||
#endif
|
|
||||||
ThrData *thr = fix->get_thr(tid);
|
|
||||||
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
|
||||||
|
|
||||||
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
|
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||||
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
|
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
|
||||||
|
int i,ifrom,ito,tid,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
|
|
||||||
// this if protects against having more threads than charged local atoms
|
loop_setup_thr(ifrom,ito,tid,num_charged,nthreads);
|
||||||
if (ifrom < num_charged) {
|
|
||||||
for (j = ifrom; j < ito; j++) {
|
|
||||||
i = is_charged[j];
|
|
||||||
|
|
||||||
nx = part2grid[i][0];
|
// get per thread data
|
||||||
ny = part2grid[i][1];
|
ThrData *thr = fix->get_thr(tid);
|
||||||
nz = part2grid[i][2];
|
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
||||||
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
|
||||||
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
|
||||||
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
|
||||||
|
|
||||||
compute_rho1d_thr(r1d,dx,dy,dz);
|
for (int j=ifrom; j < ito; ++j) {
|
||||||
|
i = is_charged[j];
|
||||||
|
|
||||||
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
|
nx = part2grid[i][0];
|
||||||
for (n = nlower; n <= nupper; n++) {
|
ny = part2grid[i][1];
|
||||||
mz = n+nz;
|
nz = part2grid[i][2];
|
||||||
z0 = r1d[2][n];
|
dx = nx+shiftone - (x[i].x-boxlo[0])*delxinv;
|
||||||
for (m = nlower; m <= nupper; m++) {
|
dy = ny+shiftone - (x[i].y-boxlo[1])*delyinv;
|
||||||
my = m+ny;
|
dz = nz+shiftone - (x[i].z-boxlo[2])*delzinv;
|
||||||
y0 = z0*r1d[1][m];
|
|
||||||
for (l = nlower; l <= nupper; l++) {
|
|
||||||
mx = l+nx;
|
|
||||||
x0 = y0*r1d[0][l];
|
|
||||||
if (eflag_atom) u += x0*u_brick[mz][my][mx];
|
|
||||||
if (vflag_atom) {
|
|
||||||
v0 += x0*v0_brick[mz][my][mx];
|
|
||||||
v1 += x0*v1_brick[mz][my][mx];
|
|
||||||
v2 += x0*v2_brick[mz][my][mx];
|
|
||||||
v3 += x0*v3_brick[mz][my][mx];
|
|
||||||
v4 += x0*v4_brick[mz][my][mx];
|
|
||||||
v5 += x0*v5_brick[mz][my][mx];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (eflag_atom) eatom[i] += q[i]*u;
|
compute_rho1d_thr(r1d,dx,dy,dz);
|
||||||
if (vflag_atom) {
|
|
||||||
vatom[i][0] += v0;
|
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
|
||||||
vatom[i][1] += v1;
|
for (n = nlower; n <= nupper; n++) {
|
||||||
vatom[i][2] += v2;
|
mz = n+nz;
|
||||||
vatom[i][3] += v3;
|
z0 = r1d[2][n];
|
||||||
vatom[i][4] += v4;
|
for (m = nlower; m <= nupper; m++) {
|
||||||
vatom[i][5] += v5;
|
my = m+ny;
|
||||||
}
|
y0 = z0*r1d[1][m];
|
||||||
|
for (l = nlower; l <= nupper; l++) {
|
||||||
|
mx = l+nx;
|
||||||
|
x0 = y0*r1d[0][l];
|
||||||
|
if (eflag_atom) u += x0*u_brick[mz][my][mx];
|
||||||
|
if (vflag_atom) {
|
||||||
|
v0 += x0*v0_brick[mz][my][mx];
|
||||||
|
v1 += x0*v1_brick[mz][my][mx];
|
||||||
|
v2 += x0*v2_brick[mz][my][mx];
|
||||||
|
v3 += x0*v3_brick[mz][my][mx];
|
||||||
|
v4 += x0*v4_brick[mz][my][mx];
|
||||||
|
v5 += x0*v5_brick[mz][my][mx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const double qi = q[i];
|
||||||
|
if (eflag_atom) eatom[i] += qi*u;
|
||||||
|
if (vflag_atom) {
|
||||||
|
vatom[i][0] += qi*v0;
|
||||||
|
vatom[i][1] += qi*v1;
|
||||||
|
vatom[i][2] += qi*v2;
|
||||||
|
vatom[i][3] += qi*v3;
|
||||||
|
vatom[i][4] += qi*v4;
|
||||||
|
vatom[i][5] += qi*v5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} // end of parallel region
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -590,7 +570,7 @@ void PPPMCGOMP::fieldforce_peratom()
|
|||||||
dx,dy,dz = distance of particle from "lower left" grid point
|
dx,dy,dz = distance of particle from "lower left" grid point
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
void PPPMCGOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
|
void PPPMCGOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
|
||||||
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
|
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
|
||||||
{
|
{
|
||||||
int k,l;
|
int k,l;
|
||||||
FFT_SCALAR r1,r2,r3;
|
FFT_SCALAR r1,r2,r3;
|
||||||
@ -608,3 +588,28 @@ void PPPMCGOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCAL
|
|||||||
r1d[2][k] = r3;
|
r1d[2][k] = r3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
charge assignment into drho1d
|
||||||
|
dx,dy,dz = distance of particle from "lower left" grid point
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMCGOMP::compute_drho1d_thr(FFT_SCALAR * const * const d1d, const FFT_SCALAR &dx,
|
||||||
|
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
|
||||||
|
{
|
||||||
|
int k,l;
|
||||||
|
FFT_SCALAR r1,r2,r3;
|
||||||
|
|
||||||
|
for (k = (1-order)/2; k <= order/2; k++) {
|
||||||
|
r1 = r2 = r3 = ZEROF;
|
||||||
|
|
||||||
|
for (l = order-2; l >= 0; l--) {
|
||||||
|
r1 = drho_coeff[l][k] + r1*dx;
|
||||||
|
r2 = drho_coeff[l][k] + r2*dy;
|
||||||
|
r3 = drho_coeff[l][k] + r3*dz;
|
||||||
|
}
|
||||||
|
d1d[0][k] = r1;
|
||||||
|
d1d[1][k] = r2;
|
||||||
|
d1d[2][k] = r3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -25,23 +25,29 @@ KSpaceStyle(pppm/cg/omp,PPPMCGOMP)
|
|||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
class PPPMCGOMP : public PPPMCG, public ThrOMP {
|
class PPPMCGOMP : public PPPMCG, public ThrOMP {
|
||||||
public:
|
public:
|
||||||
PPPMCGOMP(class LAMMPS *, int, char **);
|
PPPMCGOMP(class LAMMPS *, int, char **);
|
||||||
virtual void compute(int, int);
|
|
||||||
virtual void setup();
|
|
||||||
virtual ~PPPMCGOMP () {};
|
virtual ~PPPMCGOMP () {};
|
||||||
|
virtual void compute(int, int);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void allocate();
|
virtual void allocate();
|
||||||
virtual void deallocate();
|
virtual void deallocate();
|
||||||
virtual void fieldforce();
|
|
||||||
virtual void fieldforce_peratom();
|
|
||||||
virtual void make_rho();
|
|
||||||
|
|
||||||
|
virtual void compute_gf_ik();
|
||||||
|
virtual void compute_gf_ad();
|
||||||
|
|
||||||
|
virtual void fieldforce_ik();
|
||||||
|
virtual void fieldforce_ad();
|
||||||
|
virtual void fieldforce_peratom();
|
||||||
|
// virtual void make_rho();
|
||||||
|
|
||||||
|
private:
|
||||||
void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
|
void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
|
||||||
const FFT_SCALAR &, const FFT_SCALAR &);
|
const FFT_SCALAR &, const FFT_SCALAR &);
|
||||||
// void compute_rho_coeff();
|
void compute_drho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
|
||||||
|
const FFT_SCALAR &, const FFT_SCALAR &);
|
||||||
// void slabcorr(int);
|
// void slabcorr(int);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
@ -19,9 +19,12 @@
|
|||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "comm.h"
|
#include "comm.h"
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
|
#include "error.h"
|
||||||
|
#include "fix_omp.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "math_const.h"
|
#include "math_const.h"
|
||||||
|
#include "math_special.h"
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
@ -29,13 +32,12 @@
|
|||||||
#include "suffix.h"
|
#include "suffix.h"
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace MathConst;
|
using namespace MathConst;
|
||||||
|
using namespace MathSpecial;
|
||||||
|
|
||||||
#ifdef FFT_SINGLE
|
#ifdef FFT_SINGLE
|
||||||
#define ZEROF 0.0f
|
#define ZEROF 0.0f
|
||||||
#define ONEF 1.0f
|
|
||||||
#else
|
#else
|
||||||
#define ZEROF 0.0
|
#define ZEROF 0.0
|
||||||
#define ONEF 1.0
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define EPS_HOC 1.0e-7
|
#define EPS_HOC 1.0e-7
|
||||||
@ -56,7 +58,27 @@ void PPPMOMP::allocate()
|
|||||||
{
|
{
|
||||||
PPPM::allocate();
|
PPPM::allocate();
|
||||||
|
|
||||||
const int nthreads = comm->nthreads;
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
const int tid = omp_get_thread_num();
|
||||||
|
#else
|
||||||
|
const int tid = 0;
|
||||||
|
#endif
|
||||||
|
ThrData *thr = fix->get_thr(tid);
|
||||||
|
thr->init_pppm(order,memory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
free memory that depends on # of K-vectors and order
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMOMP::deallocate()
|
||||||
|
{
|
||||||
|
PPPM::deallocate();
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(none)
|
#pragma omp parallel default(none)
|
||||||
@ -67,153 +89,26 @@ void PPPMOMP::allocate()
|
|||||||
#else
|
#else
|
||||||
const int tid = 0;
|
const int tid = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
FFT_SCALAR **rho1d_thr;
|
|
||||||
memory->create2d_offset(rho1d_thr,3,-order/2,order/2,"pppm:rho1d_thr");
|
|
||||||
ThrData *thr = fix->get_thr(tid);
|
ThrData *thr = fix->get_thr(tid);
|
||||||
thr->init_pppm(static_cast<void *>(rho1d_thr));
|
thr->init_pppm(-order,memory);
|
||||||
}
|
|
||||||
|
|
||||||
const int nzend = (nzhi_out-nzlo_out+1)*nthreads + nzlo_out -1;
|
|
||||||
|
|
||||||
// reallocate density brick, so it fits our needs
|
|
||||||
memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
|
|
||||||
memory->create3d_offset(density_brick,nzlo_out,nzend,nylo_out,nyhi_out,
|
|
||||||
nxlo_out,nxhi_out,"pppm:density_brick");
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: special version of reduce_data for FFT_SCALAR data type.
|
|
||||||
// reduce per thread data into the first part of the data
|
|
||||||
// array that is used for the non-threaded parts and reset
|
|
||||||
// the temporary storage to 0.0. this routine depends on
|
|
||||||
// multi-dimensional arrays like force stored in this order
|
|
||||||
// x1,y1,z1,x2,y2,z2,...
|
|
||||||
// we need to post a barrier to wait until all threads are done
|
|
||||||
// writing to the array.
|
|
||||||
static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid)
|
|
||||||
{
|
|
||||||
#if defined(_OPENMP)
|
|
||||||
// NOOP in non-threaded execution.
|
|
||||||
if (nthreads == 1) return;
|
|
||||||
#pragma omp barrier
|
|
||||||
{
|
|
||||||
const int nvals = ndim*nall;
|
|
||||||
const int idelta = nvals/nthreads + 1;
|
|
||||||
const int ifrom = tid*idelta;
|
|
||||||
const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
|
|
||||||
|
|
||||||
// this if protects against having more threads than atoms
|
|
||||||
if (ifrom < nall) {
|
|
||||||
for (int m = ifrom; m < ito; ++m) {
|
|
||||||
for (int n = 1; n < nthreads; ++n) {
|
|
||||||
dall[m] += dall[n*nvals + m];
|
|
||||||
dall[n*nvals + m] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// NOOP in non-threaded execution.
|
|
||||||
return;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
free memory that depends on # of K-vectors and order
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void PPPMOMP::deallocate()
|
|
||||||
{
|
|
||||||
PPPM::deallocate();
|
|
||||||
for (int i=0; i < comm->nthreads; ++i) {
|
|
||||||
ThrData * thr = fix->get_thr(i);
|
|
||||||
FFT_SCALAR ** rho1d_thr = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
|
||||||
memory->destroy2d_offset(rho1d_thr,-order/2);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
adjust PPPM coeffs, called initially and whenever volume has changed
|
pre-compute modified (Hockney-Eastwood) Coulomb Green's function
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMOMP::setup()
|
void PPPMOMP::compute_gf_ik()
|
||||||
{
|
{
|
||||||
int i,j,k,n;
|
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
|
||||||
double *prd;
|
|
||||||
|
|
||||||
// volume-dependent factors
|
|
||||||
// adjust z dimension for 2d slab PPPM
|
|
||||||
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
|
|
||||||
|
|
||||||
if (triclinic == 0) prd = domain->prd;
|
|
||||||
else prd = domain->prd_lamda;
|
|
||||||
|
|
||||||
const double xprd = prd[0];
|
const double xprd = prd[0];
|
||||||
const double yprd = prd[1];
|
const double yprd = prd[1];
|
||||||
const double zprd = prd[2];
|
const double zprd = prd[2];
|
||||||
const double zprd_slab = zprd*slab_volfactor;
|
const double zprd_slab = zprd*slab_volfactor;
|
||||||
volume = xprd * yprd * zprd_slab;
|
const double unitkx = (MY_2PI/xprd);
|
||||||
|
const double unitky = (MY_2PI/yprd);
|
||||||
delxinv = nx_pppm/xprd;
|
const double unitkz = (MY_2PI/zprd_slab);
|
||||||
delyinv = ny_pppm/yprd;
|
|
||||||
delzinv = nz_pppm/zprd_slab;
|
|
||||||
|
|
||||||
delvolinv = delxinv*delyinv*delzinv;
|
|
||||||
|
|
||||||
const double unitkx = (2.0*MY_PI/xprd);
|
|
||||||
const double unitky = (2.0*MY_PI/yprd);
|
|
||||||
const double unitkz = (2.0*MY_PI/zprd_slab);
|
|
||||||
|
|
||||||
// fkx,fky,fkz for my FFT grid pts
|
|
||||||
|
|
||||||
double per;
|
|
||||||
|
|
||||||
for (i = nxlo_fft; i <= nxhi_fft; i++) {
|
|
||||||
per = i - nx_pppm*(2*i/nx_pppm);
|
|
||||||
fkx[i] = unitkx*per;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = nylo_fft; i <= nyhi_fft; i++) {
|
|
||||||
per = i - ny_pppm*(2*i/ny_pppm);
|
|
||||||
fky[i] = unitky*per;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = nzlo_fft; i <= nzhi_fft; i++) {
|
|
||||||
per = i - nz_pppm*(2*i/nz_pppm);
|
|
||||||
fkz[i] = unitkz*per;
|
|
||||||
}
|
|
||||||
|
|
||||||
// virial coefficients
|
|
||||||
|
|
||||||
double sqk,vterm;
|
|
||||||
|
|
||||||
n = 0;
|
|
||||||
for (k = nzlo_fft; k <= nzhi_fft; k++) {
|
|
||||||
for (j = nylo_fft; j <= nyhi_fft; j++) {
|
|
||||||
for (i = nxlo_fft; i <= nxhi_fft; i++) {
|
|
||||||
sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
|
|
||||||
if (sqk == 0.0) {
|
|
||||||
vg[n][0] = 0.0;
|
|
||||||
vg[n][1] = 0.0;
|
|
||||||
vg[n][2] = 0.0;
|
|
||||||
vg[n][3] = 0.0;
|
|
||||||
vg[n][4] = 0.0;
|
|
||||||
vg[n][5] = 0.0;
|
|
||||||
} else {
|
|
||||||
vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
|
|
||||||
vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
|
|
||||||
vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
|
|
||||||
vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
|
|
||||||
vg[n][3] = vterm*fkx[i]*fky[j];
|
|
||||||
vg[n][4] = vterm*fkx[i]*fkz[k];
|
|
||||||
vg[n][5] = vterm*fky[j]*fkz[k];
|
|
||||||
}
|
|
||||||
n++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// modified (Hockney-Eastwood) Coulomb Green's function
|
|
||||||
|
|
||||||
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
|
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
|
||||||
pow(-log(EPS_HOC),0.25));
|
pow(-log(EPS_HOC),0.25));
|
||||||
@ -221,93 +116,188 @@ void PPPMOMP::setup()
|
|||||||
pow(-log(EPS_HOC),0.25));
|
pow(-log(EPS_HOC),0.25));
|
||||||
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
|
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
|
||||||
pow(-log(EPS_HOC),0.25));
|
pow(-log(EPS_HOC),0.25));
|
||||||
|
const int numk = nxhi_fft - nxlo_fft + 1;
|
||||||
|
const int numl = nyhi_fft - nylo_fft + 1;
|
||||||
|
|
||||||
const double form = 1.0;
|
const int twoorder = 2*order;
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(none)
|
#pragma omp parallel default(none)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
int tid,nn,nnfrom,nnto,nx,ny,nz,k,l,m;
|
double snx,sny,snz;
|
||||||
double snx,sny,snz,sqk;
|
|
||||||
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||||
double sum1,dot1,dot2;
|
double sum1,dot1,dot2;
|
||||||
double numerator,denominator;
|
double numerator,denominator;
|
||||||
const double gew2 = -4.0*g_ewald*g_ewald;
|
double sqk;
|
||||||
|
|
||||||
const int nnx = nxhi_fft-nxlo_fft+1;
|
int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;
|
||||||
const int nny = nyhi_fft-nylo_fft+1;
|
|
||||||
|
|
||||||
loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads);
|
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
|
||||||
|
|
||||||
for (m = nzlo_fft; m <= nzhi_fft; m++) {
|
for (n = nfrom; n < nto; ++n) {
|
||||||
|
m = n / (numl*numk);
|
||||||
|
l = (n - m*numl*numk) / numk;
|
||||||
|
k = n - m*numl*numk - l*numk;
|
||||||
|
m += nzlo_fft;
|
||||||
|
l += nylo_fft;
|
||||||
|
k += nxlo_fft;
|
||||||
|
|
||||||
const double fkzm = fkz[m];
|
mper = m - nz_pppm*(2*m/nz_pppm);
|
||||||
snz = sin(0.5*fkzm*zprd_slab/nz_pppm);
|
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
|
||||||
snz *= snz;
|
|
||||||
|
|
||||||
for (l = nylo_fft; l <= nyhi_fft; l++) {
|
lper = l - ny_pppm*(2*l/ny_pppm);
|
||||||
const double fkyl = fky[l];
|
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
|
||||||
sny = sin(0.5*fkyl*yprd/ny_pppm);
|
|
||||||
sny *= sny;
|
|
||||||
|
|
||||||
for (k = nxlo_fft; k <= nxhi_fft; k++) {
|
kper = k - nx_pppm*(2*k/nx_pppm);
|
||||||
|
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
|
||||||
|
|
||||||
/* only compute the part designated to this thread */
|
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
|
||||||
nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft));
|
|
||||||
if ((nn < nnfrom) || (nn >=nnto)) continue;
|
|
||||||
|
|
||||||
const double fkxk = fkx[k];
|
if (sqk != 0.0) {
|
||||||
snx = sin(0.5*fkxk*xprd/nx_pppm);
|
numerator = 12.5663706/sqk;
|
||||||
snx *= snx;
|
denominator = gf_denom(snx,sny,snz);
|
||||||
|
sum1 = 0.0;
|
||||||
|
|
||||||
sqk = fkxk*fkxk + fkyl*fkyl + fkzm*fkzm;
|
for (nx = -nbx; nx <= nbx; nx++) {
|
||||||
|
qx = unitkx*(kper+nx_pppm*nx);
|
||||||
|
sx = exp(-0.25*square(qx/g_ewald));
|
||||||
|
argx = 0.5*qx*xprd/nx_pppm;
|
||||||
|
wx = powsinxx(argx,twoorder);
|
||||||
|
|
||||||
if (sqk != 0.0) {
|
for (ny = -nby; ny <= nby; ny++) {
|
||||||
numerator = form*MY_4PI/sqk;
|
qy = unitky*(lper+ny_pppm*ny);
|
||||||
denominator = gf_denom(snx,sny,snz);
|
sy = exp(-0.25*square(qy/g_ewald));
|
||||||
sum1 = 0.0;
|
argy = 0.5*qy*yprd/ny_pppm;
|
||||||
const double dorder = static_cast<double>(order);
|
wy = powsinxx(argy,twoorder);
|
||||||
for (nx = -nbx; nx <= nbx; nx++) {
|
|
||||||
qx = fkxk + unitkx*nx_pppm*nx;
|
|
||||||
sx = exp(qx*qx/gew2);
|
|
||||||
wx = 1.0;
|
|
||||||
argx = 0.5*qx*xprd/nx_pppm;
|
|
||||||
if (argx != 0.0) wx = pow(sin(argx)/argx,dorder);
|
|
||||||
wx *=wx;
|
|
||||||
|
|
||||||
for (ny = -nby; ny <= nby; ny++) {
|
for (nz = -nbz; nz <= nbz; nz++) {
|
||||||
qy = fkyl + unitky*ny_pppm*ny;
|
qz = unitkz*(mper+nz_pppm*nz);
|
||||||
sy = exp(qy*qy/gew2);
|
sz = exp(-0.25*square(qz/g_ewald));
|
||||||
wy = 1.0;
|
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||||
argy = 0.5*qy*yprd/ny_pppm;
|
wz = powsinxx(argz,twoorder);
|
||||||
if (argy != 0.0) wy = pow(sin(argy)/argy,dorder);
|
|
||||||
wy *= wy;
|
|
||||||
|
|
||||||
for (nz = -nbz; nz <= nbz; nz++) {
|
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
|
||||||
qz = fkzm + unitkz*nz_pppm*nz;
|
dot2 = qx*qx+qy*qy+qz*qz;
|
||||||
sz = exp(qz*qz/gew2);
|
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
|
||||||
wz = 1.0;
|
|
||||||
argz = 0.5*qz*zprd_slab/nz_pppm;
|
|
||||||
if (argz != 0.0) wz = pow(sin(argz)/argz,dorder);
|
|
||||||
wz *= wz;
|
|
||||||
|
|
||||||
dot1 = fkxk*qx + fkyl*qy + fkzm*qz;
|
|
||||||
dot2 = qx*qx+qy*qy+qz*qz;
|
|
||||||
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
greensfn[nn] = numerator*sum1/denominator;
|
}
|
||||||
} else greensfn[nn] = 0.0;
|
|
||||||
}
|
}
|
||||||
}
|
greensfn[n] = numerator*sum1/denominator;
|
||||||
|
} else greensfn[n] = 0.0;
|
||||||
}
|
}
|
||||||
}
|
} // end of parallel region
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
run the regular toplevel compute method from plain PPPPM
|
compute optimized Green's function for energy calculation
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMOMP::compute_gf_ad()
|
||||||
|
{
|
||||||
|
|
||||||
|
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
|
||||||
|
|
||||||
|
const double xprd = prd[0];
|
||||||
|
const double yprd = prd[1];
|
||||||
|
const double zprd = prd[2];
|
||||||
|
const double zprd_slab = zprd*slab_volfactor;
|
||||||
|
const double unitkx = (MY_2PI/xprd);
|
||||||
|
const double unitky = (MY_2PI/yprd);
|
||||||
|
const double unitkz = (MY_2PI/zprd_slab);
|
||||||
|
|
||||||
|
const int numk = nxhi_fft - nxlo_fft + 1;
|
||||||
|
const int numl = nyhi_fft - nylo_fft + 1;
|
||||||
|
|
||||||
|
const int twoorder = 2*order;
|
||||||
|
double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0;
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
double snx,sny,snz,sqk;
|
||||||
|
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||||
|
double numerator,denominator;
|
||||||
|
int k,l,m,kper,lper,mper,n,nfrom,nto,tid;
|
||||||
|
|
||||||
|
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
|
||||||
|
|
||||||
|
for (n = nfrom; n < nto; ++n) {
|
||||||
|
|
||||||
|
m = n / (numl*numk);
|
||||||
|
l = (n - m*numl*numk) / numk;
|
||||||
|
k = n - m*numl*numk - l*numk;
|
||||||
|
m += nzlo_fft;
|
||||||
|
l += nylo_fft;
|
||||||
|
k += nxlo_fft;
|
||||||
|
|
||||||
|
mper = m - nz_pppm*(2*m/nz_pppm);
|
||||||
|
qz = unitkz*mper;
|
||||||
|
snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
|
||||||
|
sz = exp(-0.25*square(qz/g_ewald));
|
||||||
|
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||||
|
wz = powsinxx(argz,twoorder);
|
||||||
|
|
||||||
|
lper = l - ny_pppm*(2*l/ny_pppm);
|
||||||
|
qy = unitky*lper;
|
||||||
|
sny = square(sin(0.5*qy*yprd/ny_pppm));
|
||||||
|
sy = exp(-0.25*square(qy/g_ewald));
|
||||||
|
argy = 0.5*qy*yprd/ny_pppm;
|
||||||
|
wy = powsinxx(argy,twoorder);
|
||||||
|
|
||||||
|
kper = k - nx_pppm*(2*k/nx_pppm);
|
||||||
|
qx = unitkx*kper;
|
||||||
|
snx = square(sin(0.5*qx*xprd/nx_pppm));
|
||||||
|
sx = exp(-0.25*square(qx/g_ewald));
|
||||||
|
argx = 0.5*qx*xprd/nx_pppm;
|
||||||
|
wx = powsinxx(argx,twoorder);
|
||||||
|
|
||||||
|
sqk = qx*qx + qy*qy + qz*qz;
|
||||||
|
|
||||||
|
if (sqk != 0.0) {
|
||||||
|
numerator = MY_4PI/sqk;
|
||||||
|
denominator = gf_denom(snx,sny,snz);
|
||||||
|
greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
|
||||||
|
sf0 += sf_precoeff1[n]*greensfn[n];
|
||||||
|
sf1 += sf_precoeff2[n]*greensfn[n];
|
||||||
|
sf2 += sf_precoeff3[n]*greensfn[n];
|
||||||
|
sf3 += sf_precoeff4[n]*greensfn[n];
|
||||||
|
sf4 += sf_precoeff5[n]*greensfn[n];
|
||||||
|
sf5 += sf_precoeff6[n]*greensfn[n];
|
||||||
|
} else {
|
||||||
|
greensfn[n] = 0.0;
|
||||||
|
sf0 += sf_precoeff1[n]*greensfn[n];
|
||||||
|
sf1 += sf_precoeff2[n]*greensfn[n];
|
||||||
|
sf2 += sf_precoeff3[n]*greensfn[n];
|
||||||
|
sf3 += sf_precoeff4[n]*greensfn[n];
|
||||||
|
sf4 += sf_precoeff5[n]*greensfn[n];
|
||||||
|
sf5 += sf_precoeff6[n]*greensfn[n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end of paralle region
|
||||||
|
|
||||||
|
// compute the coefficients for the self-force correction
|
||||||
|
|
||||||
|
double prex, prey, prez, tmp[6];
|
||||||
|
prex = prey = prez = MY_PI/volume;
|
||||||
|
prex *= nx_pppm/xprd;
|
||||||
|
prey *= ny_pppm/yprd;
|
||||||
|
prez *= nz_pppm/zprd_slab;
|
||||||
|
tmp[0] = sf0 * prex;
|
||||||
|
tmp[1] = sf1 * prex*2;
|
||||||
|
tmp[2] = sf2 * prey;
|
||||||
|
tmp[3] = sf3 * prey*2;
|
||||||
|
tmp[4] = sf4 * prez;
|
||||||
|
tmp[5] = sf5 * prez*2;
|
||||||
|
|
||||||
|
// communicate values with other procs
|
||||||
|
|
||||||
|
MPI_Allreduce(tmp,sf_coeff,6,MPI_DOUBLE,MPI_SUM,world);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
run the regular toplevel compute method from plain PPPM
|
||||||
which will have individual methods replaced by our threaded
|
which will have individual methods replaced by our threaded
|
||||||
versions and then call the obligatory force reduction.
|
versions and then call the obligatory force reduction.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
@ -332,94 +322,10 @@ void PPPMOMP::compute(int eflag, int vflag)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
create discretized "density" on section of global grid due to my particles
|
interpolate from grid to get electric field & force on my particles for ik
|
||||||
density(x,y,z) = charge "density" at grid points of my 3d brick
|
|
||||||
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
|
|
||||||
in global grid
|
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMOMP::make_rho()
|
void PPPMOMP::fieldforce_ik()
|
||||||
{
|
|
||||||
const double * const q = atom->q;
|
|
||||||
const double * const * const x = atom->x;
|
|
||||||
const int nthreads = comm->nthreads;
|
|
||||||
const int nlocal = atom->nlocal;
|
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
|
||||||
#pragma omp parallel default(none)
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
#if defined(_OPENMP)
|
|
||||||
// each thread works on a fixed chunk of atoms.
|
|
||||||
const int tid = omp_get_thread_num();
|
|
||||||
const int inum = nlocal;
|
|
||||||
const int idelta = 1 + inum/nthreads;
|
|
||||||
const int ifrom = tid*idelta;
|
|
||||||
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
|
|
||||||
#else
|
|
||||||
const int tid = 0;
|
|
||||||
const int ifrom = 0;
|
|
||||||
const int ito = nlocal;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int l,m,n,nx,ny,nz,mx,my,mz;
|
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
|
||||||
|
|
||||||
// set up clear 3d density array
|
|
||||||
const int nzoffs = (nzhi_out-nzlo_out+1)*tid;
|
|
||||||
FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]);
|
|
||||||
memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));
|
|
||||||
|
|
||||||
ThrData *thr = fix->get_thr(tid);
|
|
||||||
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
|
||||||
|
|
||||||
// loop over my charges, add their contribution to nearby grid points
|
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
|
||||||
// (dx,dy,dz) = distance to "lower left" grid pt
|
|
||||||
// (mx,my,mz) = global coords of moving stencil pt
|
|
||||||
|
|
||||||
// this if protects against having more threads than local atoms
|
|
||||||
if (ifrom < nlocal) {
|
|
||||||
for (int i = ifrom; i < ito; i++) {
|
|
||||||
|
|
||||||
nx = part2grid[i][0];
|
|
||||||
ny = part2grid[i][1];
|
|
||||||
nz = part2grid[i][2];
|
|
||||||
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
|
||||||
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
|
||||||
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
|
||||||
|
|
||||||
compute_rho1d_thr(r1d,dx,dy,dz);
|
|
||||||
|
|
||||||
z0 = delvolinv * q[i];
|
|
||||||
for (n = nlower; n <= nupper; n++) {
|
|
||||||
mz = n+nz;
|
|
||||||
y0 = z0*r1d[2][n];
|
|
||||||
for (m = nlower; m <= nupper; m++) {
|
|
||||||
my = m+ny;
|
|
||||||
x0 = y0*r1d[1][m];
|
|
||||||
for (l = nlower; l <= nupper; l++) {
|
|
||||||
mx = l+nx;
|
|
||||||
db[mz][my][mx] += x0*r1d[0][l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#if defined(_OPENMP)
|
|
||||||
// reduce 3d density array
|
|
||||||
if (nthreads > 1) {
|
|
||||||
data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
interpolate from grid to get electric field & force on my particles
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void PPPMOMP::fieldforce()
|
|
||||||
{
|
{
|
||||||
// loop over my charges, interpolate electric field from nearby grid points
|
// loop over my charges, interpolate electric field from nearby grid points
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
@ -427,79 +333,170 @@ void PPPMOMP::fieldforce()
|
|||||||
// (mx,my,mz) = global coords of moving stencil pt
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
// ek = 3 components of E-field on particle
|
// ek = 3 components of E-field on particle
|
||||||
|
|
||||||
const double * const q = atom->q;
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
const double * const * const x = atom->x;
|
const double * _noalias const q = atom->q;
|
||||||
|
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
|
||||||
|
|
||||||
|
const double qqrd2e = force->qqrd2e;
|
||||||
|
const double boxlox = boxlo[0];
|
||||||
|
const double boxloy = boxlo[1];
|
||||||
|
const double boxloz = boxlo[2];
|
||||||
|
|
||||||
const int nthreads = comm->nthreads;
|
const int nthreads = comm->nthreads;
|
||||||
const int nlocal = atom->nlocal;
|
const int nlocal = atom->nlocal;
|
||||||
const double qqrd2e = force->qqrd2e;
|
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(none)
|
#pragma omp parallel default(none)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if defined(_OPENMP)
|
FFT_SCALAR x0,y0,z0,ekx,eky,ekz;
|
||||||
// each thread works on a fixed chunk of atoms.
|
int i,ifrom,ito,tid,l,m,n,mx,my,mz;
|
||||||
const int tid = omp_get_thread_num();
|
|
||||||
const int inum = nlocal;
|
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
|
||||||
const int idelta = 1 + inum/nthreads;
|
|
||||||
const int ifrom = tid*idelta;
|
// get per thread data
|
||||||
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
|
|
||||||
#else
|
|
||||||
const int ifrom = 0;
|
|
||||||
const int ito = nlocal;
|
|
||||||
const int tid = 0;
|
|
||||||
#endif
|
|
||||||
ThrData *thr = fix->get_thr(tid);
|
ThrData *thr = fix->get_thr(tid);
|
||||||
double * const * const f = thr->get_f();
|
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
|
||||||
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
||||||
|
|
||||||
int l,m,n,nx,ny,nz,mx,my,mz;
|
for (i = ifrom; i < ito; ++i) {
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
const int nx = p2g[i].a;
|
||||||
FFT_SCALAR ekx,eky,ekz;
|
const int ny = p2g[i].b;
|
||||||
|
const int nz = p2g[i].t;
|
||||||
|
const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
|
||||||
|
const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
|
||||||
|
const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
|
||||||
|
|
||||||
// this if protects against having more threads than local atoms
|
compute_rho1d_thr(r1d,dx,dy,dz);
|
||||||
if (ifrom < nlocal) {
|
|
||||||
for (int i = ifrom; i < ito; i++) {
|
|
||||||
|
|
||||||
nx = part2grid[i][0];
|
ekx = eky = ekz = ZEROF;
|
||||||
ny = part2grid[i][1];
|
for (n = nlower; n <= nupper; n++) {
|
||||||
nz = part2grid[i][2];
|
mz = n+nz;
|
||||||
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
z0 = r1d[2][n];
|
||||||
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
for (m = nlower; m <= nupper; m++) {
|
||||||
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
my = m+ny;
|
||||||
|
y0 = z0*r1d[1][m];
|
||||||
compute_rho1d_thr(r1d,dx,dy,dz);
|
for (l = nlower; l <= nupper; l++) {
|
||||||
|
mx = l+nx;
|
||||||
ekx = eky = ekz = ZEROF;
|
x0 = y0*r1d[0][l];
|
||||||
for (n = nlower; n <= nupper; n++) {
|
ekx -= x0*vdx_brick[mz][my][mx];
|
||||||
mz = n+nz;
|
eky -= x0*vdy_brick[mz][my][mx];
|
||||||
z0 = r1d[2][n];
|
ekz -= x0*vdz_brick[mz][my][mx];
|
||||||
for (m = nlower; m <= nupper; m++) {
|
|
||||||
my = m+ny;
|
|
||||||
y0 = z0*r1d[1][m];
|
|
||||||
for (l = nlower; l <= nupper; l++) {
|
|
||||||
mx = l+nx;
|
|
||||||
x0 = y0*r1d[0][l];
|
|
||||||
ekx -= x0*vdx_brick[mz][my][mx];
|
|
||||||
eky -= x0*vdy_brick[mz][my][mx];
|
|
||||||
ekz -= x0*vdz_brick[mz][my][mx];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert E-field to force
|
|
||||||
const double qfactor = qqrd2e*scale*q[i];
|
|
||||||
f[i][0] += qfactor*ekx;
|
|
||||||
f[i][1] += qfactor*eky;
|
|
||||||
f[i][2] += qfactor*ekz;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// convert E-field to force
|
||||||
|
|
||||||
|
const double qfactor = qqrd2e * scale * q[i];
|
||||||
|
f[i].x += qfactor*ekx;
|
||||||
|
f[i].y += qfactor*eky;
|
||||||
|
if (slabflag != 2) f[i].z += qfactor*ekz;
|
||||||
}
|
}
|
||||||
}
|
} // end of parallel region
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
interpolate from grid to get per-atom energy/virial
|
interpolate from grid to get electric field & force on my particles for ad
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMOMP::fieldforce_ad()
|
||||||
|
{
|
||||||
|
const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda;
|
||||||
|
const double hx_inv = nx_pppm/prd[0];
|
||||||
|
const double hy_inv = ny_pppm/prd[1];
|
||||||
|
const double hz_inv = nz_pppm/prd[2];
|
||||||
|
|
||||||
|
// loop over my charges, interpolate electric field from nearby grid points
|
||||||
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
|
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||||
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
|
// ek = 3 components of E-field on particle
|
||||||
|
|
||||||
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
|
const double * _noalias const q = atom->q;
|
||||||
|
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
|
||||||
|
const double qqrd2e = force->qqrd2e;
|
||||||
|
const double boxlox = boxlo[0];
|
||||||
|
const double boxloy = boxlo[1];
|
||||||
|
const double boxloz = boxlo[2];
|
||||||
|
|
||||||
|
const int nthreads = comm->nthreads;
|
||||||
|
const int nlocal = atom->nlocal;
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
double s1,s2,s3,sf;
|
||||||
|
FFT_SCALAR ekx,eky,ekz;
|
||||||
|
int i,ifrom,ito,tid,l,m,n,mx,my,mz;
|
||||||
|
|
||||||
|
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
|
||||||
|
|
||||||
|
// get per thread data
|
||||||
|
ThrData *thr = fix->get_thr(tid);
|
||||||
|
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
|
||||||
|
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
||||||
|
FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
|
||||||
|
|
||||||
|
for (i = ifrom; i < ito; ++i) {
|
||||||
|
const int nx = p2g[i].a;
|
||||||
|
const int ny = p2g[i].b;
|
||||||
|
const int nz = p2g[i].t;
|
||||||
|
const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
|
||||||
|
const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
|
||||||
|
const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
|
||||||
|
|
||||||
|
compute_rho1d_thr(r1d,dx,dy,dz);
|
||||||
|
compute_drho1d_thr(d1d,dx,dy,dz);
|
||||||
|
|
||||||
|
ekx = eky = ekz = ZEROF;
|
||||||
|
for (n = nlower; n <= nupper; n++) {
|
||||||
|
mz = n+nz;
|
||||||
|
for (m = nlower; m <= nupper; m++) {
|
||||||
|
my = m+ny;
|
||||||
|
for (l = nlower; l <= nupper; l++) {
|
||||||
|
mx = l+nx;
|
||||||
|
ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ekx *= hx_inv;
|
||||||
|
eky *= hy_inv;
|
||||||
|
ekz *= hz_inv;
|
||||||
|
|
||||||
|
// convert E-field to force and substract self forces
|
||||||
|
|
||||||
|
const double qi = q[i];
|
||||||
|
const double qfactor = qqrd2e * scale * qi;
|
||||||
|
|
||||||
|
s1 = x[i].x*hx_inv;
|
||||||
|
sf = sf_coeff[0]*sin(MY_2PI*s1);
|
||||||
|
sf += sf_coeff[1]*sin(MY_4PI*s1);
|
||||||
|
sf *= 2.0*qi;
|
||||||
|
f[i].x += qfactor*(ekx - sf);
|
||||||
|
|
||||||
|
s2 = x[i].y*hy_inv;
|
||||||
|
sf = sf_coeff[2]*sin(MY_2PI*s2);
|
||||||
|
sf += sf_coeff[3]*sin(MY_4PI*s2);
|
||||||
|
sf *= 2.0*qi;
|
||||||
|
f[i].y += qfactor*(eky - sf);
|
||||||
|
|
||||||
|
s3 = x[i].z*hz_inv;
|
||||||
|
sf = sf_coeff[4]*sin(MY_2PI*s3);
|
||||||
|
sf += sf_coeff[5]*sin(MY_4PI*s3);
|
||||||
|
sf *= 2.0*qi;
|
||||||
|
if (slabflag != 2) f[i].z += qfactor*(ekz - sf);
|
||||||
|
}
|
||||||
|
} // end of parallel region
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
interpolate from grid to get per-atom energy/virial
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMOMP::fieldforce_peratom()
|
void PPPMOMP::fieldforce_peratom()
|
||||||
{
|
{
|
||||||
@ -508,8 +505,8 @@ void PPPMOMP::fieldforce_peratom()
|
|||||||
// (dx,dy,dz) = distance to "lower left" grid pt
|
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||||
// (mx,my,mz) = global coords of moving stencil pt
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
|
|
||||||
const double * const q = atom->q;
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
const double * const * const x = atom->x;
|
const double * _noalias const q = atom->q;
|
||||||
const int nthreads = comm->nthreads;
|
const int nthreads = comm->nthreads;
|
||||||
const int nlocal = atom->nlocal;
|
const int nlocal = atom->nlocal;
|
||||||
|
|
||||||
@ -517,73 +514,61 @@ void PPPMOMP::fieldforce_peratom()
|
|||||||
#pragma omp parallel default(none)
|
#pragma omp parallel default(none)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if defined(_OPENMP)
|
|
||||||
// each thread works on a fixed chunk of atoms.
|
|
||||||
const int tid = omp_get_thread_num();
|
|
||||||
const int inum = nlocal;
|
|
||||||
const int idelta = 1 + inum/nthreads;
|
|
||||||
const int ifrom = tid*idelta;
|
|
||||||
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
|
|
||||||
#else
|
|
||||||
const int ifrom = 0;
|
|
||||||
const int ito = nlocal;
|
|
||||||
const int tid = 0;
|
|
||||||
#endif
|
|
||||||
ThrData *thr = fix->get_thr(tid);
|
|
||||||
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
|
||||||
|
|
||||||
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||||
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
|
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
|
||||||
|
int i,ifrom,ito,tid,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
|
|
||||||
// this if protects against having more threads than local atoms
|
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
|
||||||
if (ifrom < nlocal) {
|
|
||||||
for (int i = ifrom; i < ito; i++) {
|
|
||||||
|
|
||||||
nx = part2grid[i][0];
|
// get per thread data
|
||||||
ny = part2grid[i][1];
|
ThrData *thr = fix->get_thr(tid);
|
||||||
nz = part2grid[i][2];
|
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
||||||
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
|
|
||||||
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
|
|
||||||
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
|
|
||||||
|
|
||||||
compute_rho1d_thr(r1d,dx,dy,dz);
|
for (i = ifrom; i < ito; ++i) {
|
||||||
|
nx = part2grid[i][0];
|
||||||
|
ny = part2grid[i][1];
|
||||||
|
nz = part2grid[i][2];
|
||||||
|
dx = nx+shiftone - (x[i].x-boxlo[0])*delxinv;
|
||||||
|
dy = ny+shiftone - (x[i].y-boxlo[1])*delyinv;
|
||||||
|
dz = nz+shiftone - (x[i].z-boxlo[2])*delzinv;
|
||||||
|
|
||||||
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
|
compute_rho1d_thr(r1d,dx,dy,dz);
|
||||||
for (n = nlower; n <= nupper; n++) {
|
|
||||||
mz = n+nz;
|
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
|
||||||
z0 = r1d[2][n];
|
for (n = nlower; n <= nupper; n++) {
|
||||||
for (m = nlower; m <= nupper; m++) {
|
mz = n+nz;
|
||||||
my = m+ny;
|
z0 = r1d[2][n];
|
||||||
y0 = z0*r1d[1][m];
|
for (m = nlower; m <= nupper; m++) {
|
||||||
for (l = nlower; l <= nupper; l++) {
|
my = m+ny;
|
||||||
mx = l+nx;
|
y0 = z0*r1d[1][m];
|
||||||
x0 = y0*r1d[0][l];
|
for (l = nlower; l <= nupper; l++) {
|
||||||
if (eflag_atom) u += x0*u_brick[mz][my][mx];
|
mx = l+nx;
|
||||||
if (vflag_atom) {
|
x0 = y0*r1d[0][l];
|
||||||
v0 += x0*v0_brick[mz][my][mx];
|
if (eflag_atom) u += x0*u_brick[mz][my][mx];
|
||||||
v1 += x0*v1_brick[mz][my][mx];
|
if (vflag_atom) {
|
||||||
v2 += x0*v2_brick[mz][my][mx];
|
v0 += x0*v0_brick[mz][my][mx];
|
||||||
v3 += x0*v3_brick[mz][my][mx];
|
v1 += x0*v1_brick[mz][my][mx];
|
||||||
v4 += x0*v4_brick[mz][my][mx];
|
v2 += x0*v2_brick[mz][my][mx];
|
||||||
v5 += x0*v5_brick[mz][my][mx];
|
v3 += x0*v3_brick[mz][my][mx];
|
||||||
}
|
v4 += x0*v4_brick[mz][my][mx];
|
||||||
|
v5 += x0*v5_brick[mz][my][mx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (eflag_atom) eatom[i] += q[i]*u;
|
const double qi = q[i];
|
||||||
if (vflag_atom) {
|
if (eflag_atom) eatom[i] += qi*u;
|
||||||
vatom[i][0] += v0;
|
if (vflag_atom) {
|
||||||
vatom[i][1] += v1;
|
vatom[i][0] += qi*v0;
|
||||||
vatom[i][2] += v2;
|
vatom[i][1] += qi*v1;
|
||||||
vatom[i][3] += v3;
|
vatom[i][2] += qi*v2;
|
||||||
vatom[i][4] += v4;
|
vatom[i][3] += qi*v3;
|
||||||
vatom[i][5] += v5;
|
vatom[i][4] += qi*v4;
|
||||||
}
|
vatom[i][5] += qi*v5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} // end of parallel region
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -609,3 +594,28 @@ void PPPMOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR
|
|||||||
r1d[2][k] = r3;
|
r1d[2][k] = r3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
charge assignment into drho1d
|
||||||
|
dx,dy,dz = distance of particle from "lower left" grid point
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMOMP::compute_drho1d_thr(FFT_SCALAR * const * const d1d, const FFT_SCALAR &dx,
|
||||||
|
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
|
||||||
|
{
|
||||||
|
int k,l;
|
||||||
|
FFT_SCALAR r1,r2,r3;
|
||||||
|
|
||||||
|
for (k = (1-order)/2; k <= order/2; k++) {
|
||||||
|
r1 = r2 = r3 = ZEROF;
|
||||||
|
|
||||||
|
for (l = order-2; l >= 0; l--) {
|
||||||
|
r1 = drho_coeff[l][k] + r1*dx;
|
||||||
|
r2 = drho_coeff[l][k] + r2*dy;
|
||||||
|
r3 = drho_coeff[l][k] + r3*dz;
|
||||||
|
}
|
||||||
|
d1d[0][k] = r1;
|
||||||
|
d1d[1][k] = r2;
|
||||||
|
d1d[2][k] = r3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -25,23 +25,29 @@ KSpaceStyle(pppm/omp,PPPMOMP)
|
|||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
class PPPMOMP : public PPPM, public ThrOMP {
|
class PPPMOMP : public PPPM, public ThrOMP {
|
||||||
public:
|
public:
|
||||||
PPPMOMP(class LAMMPS *, int, char **);
|
PPPMOMP(class LAMMPS *, int, char **);
|
||||||
virtual ~PPPMOMP () {};
|
virtual ~PPPMOMP () {};
|
||||||
virtual void setup();
|
|
||||||
virtual void compute(int, int);
|
virtual void compute(int, int);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void allocate();
|
virtual void allocate();
|
||||||
virtual void deallocate();
|
virtual void deallocate();
|
||||||
virtual void fieldforce();
|
|
||||||
virtual void fieldforce_peratom();
|
|
||||||
virtual void make_rho();
|
|
||||||
|
|
||||||
|
virtual void compute_gf_ik();
|
||||||
|
virtual void compute_gf_ad();
|
||||||
|
|
||||||
|
virtual void fieldforce_ik();
|
||||||
|
virtual void fieldforce_ad();
|
||||||
|
virtual void fieldforce_peratom();
|
||||||
|
// virtual void make_rho();
|
||||||
|
|
||||||
|
private:
|
||||||
void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
|
void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
|
||||||
const FFT_SCALAR &, const FFT_SCALAR &);
|
const FFT_SCALAR &, const FFT_SCALAR &);
|
||||||
// void compute_rho_coeff();
|
void compute_drho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
|
||||||
|
const FFT_SCALAR &, const FFT_SCALAR &);
|
||||||
// void slabcorr(int);
|
// void slabcorr(int);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
@ -19,9 +19,12 @@
|
|||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "comm.h"
|
#include "comm.h"
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
|
#include "error.h"
|
||||||
|
#include "fix_omp.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "math_const.h"
|
#include "math_const.h"
|
||||||
|
#include "math_special.h"
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
@ -29,74 +32,294 @@
|
|||||||
#include "suffix.h"
|
#include "suffix.h"
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace MathConst;
|
using namespace MathConst;
|
||||||
|
using namespace MathSpecial;
|
||||||
#define OFFSET 16384
|
|
||||||
|
|
||||||
#ifdef FFT_SINGLE
|
#ifdef FFT_SINGLE
|
||||||
#define ZEROF 0.0f
|
#define ZEROF 0.0f
|
||||||
#define ONEF 1.0f
|
|
||||||
#else
|
#else
|
||||||
#define ZEROF 0.0
|
#define ZEROF 0.0
|
||||||
#define ONEF 1.0
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define EPS_HOC 1.0e-7
|
||||||
|
#define OFFSET 16384
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
PPPMTIP4POMP::PPPMTIP4POMP(LAMMPS *lmp, int narg, char **arg) :
|
PPPMTIP4POMP::PPPMTIP4POMP(LAMMPS *lmp, int narg, char **arg) :
|
||||||
PPPMOMP(lmp, narg, arg)
|
PPPMTIP4P(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
|
||||||
{
|
{
|
||||||
tip4pflag = 1;
|
|
||||||
suffix_flag |= Suffix::OMP;
|
suffix_flag |= Suffix::OMP;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ----------------------------------------------------------------------
|
||||||
|
allocate memory that depends on # of K-vectors and order
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
// NOTE: special version of reduce_data for FFT_SCALAR data type.
|
void PPPMTIP4POMP::allocate()
|
||||||
// reduce per thread data into the first part of the data
|
|
||||||
// array that is used for the non-threaded parts and reset
|
|
||||||
// the temporary storage to 0.0. this routine depends on
|
|
||||||
// multi-dimensional arrays like force stored in this order
|
|
||||||
// x1,y1,z1,x2,y2,z2,...
|
|
||||||
// we need to post a barrier to wait until all threads are done
|
|
||||||
// writing to the array.
|
|
||||||
static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid)
|
|
||||||
{
|
{
|
||||||
#if defined(_OPENMP)
|
PPPMTIP4P::allocate();
|
||||||
// NOOP in non-threaded execution.
|
|
||||||
if (nthreads == 1) return;
|
|
||||||
#pragma omp barrier
|
|
||||||
{
|
|
||||||
const int nvals = ndim*nall;
|
|
||||||
const int idelta = nvals/nthreads + 1;
|
|
||||||
const int ifrom = tid*idelta;
|
|
||||||
const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
|
|
||||||
|
|
||||||
// this if protects against having more threads than atoms
|
#if defined(_OPENMP)
|
||||||
if (ifrom < nall) {
|
#pragma omp parallel default(none)
|
||||||
for (int m = ifrom; m < ito; ++m) {
|
|
||||||
for (int n = 1; n < nthreads; ++n) {
|
|
||||||
dall[m] += dall[n*nvals + m];
|
|
||||||
dall[n*nvals + m] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// NOOP in non-threaded execution.
|
|
||||||
return;
|
|
||||||
#endif
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
const int tid = omp_get_thread_num();
|
||||||
|
#else
|
||||||
|
const int tid = 0;
|
||||||
|
#endif
|
||||||
|
ThrData *thr = fix->get_thr(tid);
|
||||||
|
thr->init_pppm(order,memory);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ----------------------------------------------------------------------
|
||||||
|
free memory that depends on # of K-vectors and order
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMTIP4POMP::init()
|
void PPPMTIP4POMP::deallocate()
|
||||||
{
|
{
|
||||||
// TIP4P PPPM requires newton on, b/c it computes forces on ghost atoms
|
PPPMTIP4P::deallocate();
|
||||||
|
|
||||||
if (force->newton == 0)
|
#if defined(_OPENMP)
|
||||||
error->all(FLERR,"Kspace style pppm/tip4p/omp requires newton on");
|
#pragma omp parallel default(none)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
const int tid = omp_get_thread_num();
|
||||||
|
#else
|
||||||
|
const int tid = 0;
|
||||||
|
#endif
|
||||||
|
ThrData *thr = fix->get_thr(tid);
|
||||||
|
thr->init_pppm(-order,memory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
PPPMOMP::init();
|
/* ----------------------------------------------------------------------
|
||||||
|
pre-compute modified (Hockney-Eastwood) Coulomb Green's function
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMTIP4POMP::compute_gf_ik()
|
||||||
|
{
|
||||||
|
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
|
||||||
|
|
||||||
|
const double xprd = prd[0];
|
||||||
|
const double yprd = prd[1];
|
||||||
|
const double zprd = prd[2];
|
||||||
|
const double zprd_slab = zprd*slab_volfactor;
|
||||||
|
const double unitkx = (MY_2PI/xprd);
|
||||||
|
const double unitky = (MY_2PI/yprd);
|
||||||
|
const double unitkz = (MY_2PI/zprd_slab);
|
||||||
|
|
||||||
|
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
|
||||||
|
pow(-log(EPS_HOC),0.25));
|
||||||
|
const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
|
||||||
|
pow(-log(EPS_HOC),0.25));
|
||||||
|
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
|
||||||
|
pow(-log(EPS_HOC),0.25));
|
||||||
|
const int numk = nxhi_fft - nxlo_fft + 1;
|
||||||
|
const int numl = nyhi_fft - nylo_fft + 1;
|
||||||
|
|
||||||
|
const int twoorder = 2*order;
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
double snx,sny,snz;
|
||||||
|
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||||
|
double sum1,dot1,dot2;
|
||||||
|
double numerator,denominator;
|
||||||
|
double sqk;
|
||||||
|
|
||||||
|
int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;
|
||||||
|
|
||||||
|
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
|
||||||
|
|
||||||
|
for (n = nfrom; n < nto; ++n) {
|
||||||
|
m = n / (numl*numk);
|
||||||
|
l = (n - m*numl*numk) / numk;
|
||||||
|
k = n - m*numl*numk - l*numk;
|
||||||
|
m += nzlo_fft;
|
||||||
|
l += nylo_fft;
|
||||||
|
k += nxlo_fft;
|
||||||
|
|
||||||
|
mper = m - nz_pppm*(2*m/nz_pppm);
|
||||||
|
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
|
||||||
|
|
||||||
|
lper = l - ny_pppm*(2*l/ny_pppm);
|
||||||
|
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
|
||||||
|
|
||||||
|
kper = k - nx_pppm*(2*k/nx_pppm);
|
||||||
|
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
|
||||||
|
|
||||||
|
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
|
||||||
|
|
||||||
|
if (sqk != 0.0) {
|
||||||
|
numerator = 12.5663706/sqk;
|
||||||
|
denominator = gf_denom(snx,sny,snz);
|
||||||
|
sum1 = 0.0;
|
||||||
|
|
||||||
|
for (nx = -nbx; nx <= nbx; nx++) {
|
||||||
|
qx = unitkx*(kper+nx_pppm*nx);
|
||||||
|
sx = exp(-0.25*square(qx/g_ewald));
|
||||||
|
argx = 0.5*qx*xprd/nx_pppm;
|
||||||
|
wx = powsinxx(argx,twoorder);
|
||||||
|
|
||||||
|
for (ny = -nby; ny <= nby; ny++) {
|
||||||
|
qy = unitky*(lper+ny_pppm*ny);
|
||||||
|
sy = exp(-0.25*square(qy/g_ewald));
|
||||||
|
argy = 0.5*qy*yprd/ny_pppm;
|
||||||
|
wy = powsinxx(argy,twoorder);
|
||||||
|
|
||||||
|
for (nz = -nbz; nz <= nbz; nz++) {
|
||||||
|
qz = unitkz*(mper+nz_pppm*nz);
|
||||||
|
sz = exp(-0.25*square(qz/g_ewald));
|
||||||
|
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||||
|
wz = powsinxx(argz,twoorder);
|
||||||
|
|
||||||
|
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
|
||||||
|
dot2 = qx*qx+qy*qy+qz*qz;
|
||||||
|
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
greensfn[n] = numerator*sum1/denominator;
|
||||||
|
} else greensfn[n] = 0.0;
|
||||||
|
}
|
||||||
|
} // end of parallel region
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
compute optimized Green's function for energy calculation
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMTIP4POMP::compute_gf_ad()
|
||||||
|
{
|
||||||
|
|
||||||
|
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
|
||||||
|
|
||||||
|
const double xprd = prd[0];
|
||||||
|
const double yprd = prd[1];
|
||||||
|
const double zprd = prd[2];
|
||||||
|
const double zprd_slab = zprd*slab_volfactor;
|
||||||
|
const double unitkx = (MY_2PI/xprd);
|
||||||
|
const double unitky = (MY_2PI/yprd);
|
||||||
|
const double unitkz = (MY_2PI/zprd_slab);
|
||||||
|
|
||||||
|
const int numk = nxhi_fft - nxlo_fft + 1;
|
||||||
|
const int numl = nyhi_fft - nylo_fft + 1;
|
||||||
|
|
||||||
|
const int twoorder = 2*order;
|
||||||
|
double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0;
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
double snx,sny,snz,sqk;
|
||||||
|
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||||
|
double numerator,denominator;
|
||||||
|
int k,l,m,kper,lper,mper,n,nfrom,nto,tid;
|
||||||
|
|
||||||
|
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
|
||||||
|
|
||||||
|
for (n = nfrom; n < nto; ++n) {
|
||||||
|
|
||||||
|
m = n / (numl*numk);
|
||||||
|
l = (n - m*numl*numk) / numk;
|
||||||
|
k = n - m*numl*numk - l*numk;
|
||||||
|
m += nzlo_fft;
|
||||||
|
l += nylo_fft;
|
||||||
|
k += nxlo_fft;
|
||||||
|
|
||||||
|
mper = m - nz_pppm*(2*m/nz_pppm);
|
||||||
|
qz = unitkz*mper;
|
||||||
|
snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
|
||||||
|
sz = exp(-0.25*square(qz/g_ewald));
|
||||||
|
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||||
|
wz = powsinxx(argz,twoorder);
|
||||||
|
|
||||||
|
lper = l - ny_pppm*(2*l/ny_pppm);
|
||||||
|
qy = unitky*lper;
|
||||||
|
sny = square(sin(0.5*qy*yprd/ny_pppm));
|
||||||
|
sy = exp(-0.25*square(qy/g_ewald));
|
||||||
|
argy = 0.5*qy*yprd/ny_pppm;
|
||||||
|
wy = powsinxx(argy,twoorder);
|
||||||
|
|
||||||
|
kper = k - nx_pppm*(2*k/nx_pppm);
|
||||||
|
qx = unitkx*kper;
|
||||||
|
snx = square(sin(0.5*qx*xprd/nx_pppm));
|
||||||
|
sx = exp(-0.25*square(qx/g_ewald));
|
||||||
|
argx = 0.5*qx*xprd/nx_pppm;
|
||||||
|
wx = powsinxx(argx,twoorder);
|
||||||
|
|
||||||
|
sqk = qx*qx + qy*qy + qz*qz;
|
||||||
|
|
||||||
|
if (sqk != 0.0) {
|
||||||
|
numerator = MY_4PI/sqk;
|
||||||
|
denominator = gf_denom(snx,sny,snz);
|
||||||
|
greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
|
||||||
|
sf0 += sf_precoeff1[n]*greensfn[n];
|
||||||
|
sf1 += sf_precoeff2[n]*greensfn[n];
|
||||||
|
sf2 += sf_precoeff3[n]*greensfn[n];
|
||||||
|
sf3 += sf_precoeff4[n]*greensfn[n];
|
||||||
|
sf4 += sf_precoeff5[n]*greensfn[n];
|
||||||
|
sf5 += sf_precoeff6[n]*greensfn[n];
|
||||||
|
} else {
|
||||||
|
greensfn[n] = 0.0;
|
||||||
|
sf0 += sf_precoeff1[n]*greensfn[n];
|
||||||
|
sf1 += sf_precoeff2[n]*greensfn[n];
|
||||||
|
sf2 += sf_precoeff3[n]*greensfn[n];
|
||||||
|
sf3 += sf_precoeff4[n]*greensfn[n];
|
||||||
|
sf4 += sf_precoeff5[n]*greensfn[n];
|
||||||
|
sf5 += sf_precoeff6[n]*greensfn[n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end of paralle region
|
||||||
|
|
||||||
|
// compute the coefficients for the self-force correction
|
||||||
|
|
||||||
|
double prex, prey, prez, tmp[6];
|
||||||
|
prex = prey = prez = MY_PI/volume;
|
||||||
|
prex *= nx_pppm/xprd;
|
||||||
|
prey *= ny_pppm/yprd;
|
||||||
|
prez *= nz_pppm/zprd_slab;
|
||||||
|
tmp[0] = sf0 * prex;
|
||||||
|
tmp[1] = sf1 * prex*2;
|
||||||
|
tmp[2] = sf2 * prey;
|
||||||
|
tmp[3] = sf3 * prey*2;
|
||||||
|
tmp[4] = sf4 * prez;
|
||||||
|
tmp[5] = sf5 * prez*2;
|
||||||
|
|
||||||
|
// communicate values with other procs
|
||||||
|
|
||||||
|
MPI_Allreduce(tmp,sf_coeff,6,MPI_DOUBLE,MPI_SUM,world);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
run the regular toplevel compute method from plain PPPM
|
||||||
|
which will have individual methods replaced by our threaded
|
||||||
|
versions and then call the obligatory force reduction.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMTIP4POMP::compute(int eflag, int vflag)
|
||||||
|
{
|
||||||
|
|
||||||
|
PPPMTIP4P::compute(eflag,vflag);
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none) shared(eflag,vflag)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
const int tid = omp_get_thread_num();
|
||||||
|
#else
|
||||||
|
const int tid = 0;
|
||||||
|
#endif
|
||||||
|
ThrData *thr = fix->get_thr(tid);
|
||||||
|
reduce_thr(this, eflag, vflag, thr);
|
||||||
|
} // end of omp parallel region
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -107,8 +330,12 @@ void PPPMTIP4POMP::init()
|
|||||||
|
|
||||||
void PPPMTIP4POMP::particle_map()
|
void PPPMTIP4POMP::particle_map()
|
||||||
{
|
{
|
||||||
const int * const type = atom->type;
|
const int * _noalias const type = atom->type;
|
||||||
const double * const * const x = atom->x;
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
|
int3_t * _noalias const p2g = (int3_t *) part2grid[0];
|
||||||
|
const double boxlox = boxlo[0];
|
||||||
|
const double boxloy = boxlo[1];
|
||||||
|
const double boxloz = boxlo[2];
|
||||||
const int nlocal = atom->nlocal;
|
const int nlocal = atom->nlocal;
|
||||||
|
|
||||||
int i, flag = 0;
|
int i, flag = 0;
|
||||||
@ -116,34 +343,33 @@ void PPPMTIP4POMP::particle_map()
|
|||||||
#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
|
#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < nlocal; i++) {
|
for (i = 0; i < nlocal; i++) {
|
||||||
int nx,ny,nz,iH1,iH2;
|
dbl3_t xM;
|
||||||
double xM[3];
|
int iH1,iH2;
|
||||||
|
|
||||||
if (type[i] == typeO) {
|
if (type[i] == typeO) {
|
||||||
find_M(i,iH1,iH2,xM);
|
find_M_thr(i,iH1,iH2,xM);
|
||||||
} else {
|
} else {
|
||||||
xM[0] = x[i][0];
|
xM = x[i];
|
||||||
xM[1] = x[i][1];
|
|
||||||
xM[2] = x[i][2];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
// current particle coord can be outside global and local box
|
// current particle coord can be outside global and local box
|
||||||
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
|
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
|
||||||
|
|
||||||
nx = static_cast<int> ((xM[0]-boxlo[0])*delxinv+shift) - OFFSET;
|
const int nx = static_cast<int> ((xM.x-boxlox)*delxinv+shift) - OFFSET;
|
||||||
ny = static_cast<int> ((xM[1]-boxlo[1])*delyinv+shift) - OFFSET;
|
const int ny = static_cast<int> ((xM.y-boxloy)*delyinv+shift) - OFFSET;
|
||||||
nz = static_cast<int> ((xM[2]-boxlo[2])*delzinv+shift) - OFFSET;
|
const int nz = static_cast<int> ((xM.z-boxloz)*delzinv+shift) - OFFSET;
|
||||||
|
|
||||||
part2grid[i][0] = nx;
|
p2g[i].a = nx;
|
||||||
part2grid[i][1] = ny;
|
p2g[i].b = ny;
|
||||||
part2grid[i][2] = nz;
|
p2g[i].t = nz;
|
||||||
|
|
||||||
// check that entire stencil around nx,ny,nz will fit in my 3d brick
|
// check that entire stencil around nx,ny,nz will fit in my 3d brick
|
||||||
|
|
||||||
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
|
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
|
||||||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
|
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
|
||||||
nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag++;
|
nz+nlower < nzlo_out || nz+nupper > nzhi_out)
|
||||||
|
flag++;
|
||||||
}
|
}
|
||||||
|
|
||||||
int flag_all;
|
int flag_all;
|
||||||
@ -160,96 +386,68 @@ void PPPMTIP4POMP::particle_map()
|
|||||||
|
|
||||||
void PPPMTIP4POMP::make_rho()
|
void PPPMTIP4POMP::make_rho()
|
||||||
{
|
{
|
||||||
const double * const q = atom->q;
|
const double * _noalias const q = atom->q;
|
||||||
const double * const * const x = atom->x;
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
const int * const type = atom->type;
|
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
|
||||||
const int nthreads = comm->nthreads;
|
const int * _noalias const type = atom->type;
|
||||||
|
|
||||||
|
dbl3_t xM;
|
||||||
|
FFT_SCALAR x0,y0,z0;
|
||||||
|
const double boxlox = boxlo[0];
|
||||||
|
const double boxloy = boxlo[1];
|
||||||
|
const double boxloz = boxlo[2];
|
||||||
|
|
||||||
|
int l,m,n,mx,my,mz,iH1,iH2;
|
||||||
|
|
||||||
const int nlocal = atom->nlocal;
|
const int nlocal = atom->nlocal;
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
// clear 3d density array
|
||||||
#pragma omp parallel default(none)
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
#if defined(_OPENMP)
|
|
||||||
// each thread works on a fixed chunk of atoms.
|
|
||||||
const int tid = omp_get_thread_num();
|
|
||||||
const int inum = nlocal;
|
|
||||||
const int idelta = 1 + inum/nthreads;
|
|
||||||
const int ifrom = tid*idelta;
|
|
||||||
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
|
|
||||||
#else
|
|
||||||
const int tid = 0;
|
|
||||||
const int ifrom = 0;
|
|
||||||
const int ito = nlocal;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
|
FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
memset(vec,0,ngrid*sizeof(FFT_SCALAR));
|
||||||
double xM[3];
|
|
||||||
|
|
||||||
// set up clear 3d density array
|
// loop over my charges, add their contribution to nearby grid points
|
||||||
const int nzoffs = (nzhi_out-nzlo_out+1)*tid;
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]);
|
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||||
memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
|
|
||||||
ThrData *thr = fix->get_thr(tid);
|
for (int i = 0; i < nlocal; i++) {
|
||||||
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
if (type[i] == typeO) {
|
||||||
|
find_M_thr(i,iH1,iH2,xM);
|
||||||
|
} else {
|
||||||
|
xM = x[i];
|
||||||
|
}
|
||||||
|
|
||||||
// loop over my charges, add their contribution to nearby grid points
|
const int nx = p2g[i].a;
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
const int ny = p2g[i].b;
|
||||||
// (dx,dy,dz) = distance to "lower left" grid pt
|
const int nz = p2g[i].t;
|
||||||
// (mx,my,mz) = global coords of moving stencil pt
|
const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
|
||||||
|
const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
|
||||||
|
const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;
|
||||||
|
|
||||||
// this if protects against having more threads than local atoms
|
compute_rho1d(dx,dy,dz);
|
||||||
if (ifrom < nlocal) {
|
|
||||||
for (int i = ifrom; i < ito; i++) {
|
|
||||||
|
|
||||||
if (type[i] == typeO) {
|
z0 = delvolinv * q[i];
|
||||||
find_M(i,iH1,iH2,xM);
|
for (n = nlower; n <= nupper; n++) {
|
||||||
} else {
|
mz = n+nz;
|
||||||
xM[0] = x[i][0];
|
y0 = z0*rho1d[2][n];
|
||||||
xM[1] = x[i][1];
|
for (m = nlower; m <= nupper; m++) {
|
||||||
xM[2] = x[i][2];
|
my = m+ny;
|
||||||
}
|
x0 = y0*rho1d[1][m];
|
||||||
|
for (l = nlower; l <= nupper; l++) {
|
||||||
nx = part2grid[i][0];
|
mx = l+nx;
|
||||||
ny = part2grid[i][1];
|
density_brick[mz][my][mx] += x0*rho1d[0][l];
|
||||||
nz = part2grid[i][2];
|
|
||||||
dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv;
|
|
||||||
dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv;
|
|
||||||
dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv;
|
|
||||||
|
|
||||||
compute_rho1d_thr(r1d,dx,dy,dz);
|
|
||||||
|
|
||||||
z0 = delvolinv * q[i];
|
|
||||||
for (n = nlower; n <= nupper; n++) {
|
|
||||||
mz = n+nz;
|
|
||||||
y0 = z0*r1d[2][n];
|
|
||||||
for (m = nlower; m <= nupper; m++) {
|
|
||||||
my = m+ny;
|
|
||||||
x0 = y0*r1d[1][m];
|
|
||||||
for (l = nlower; l <= nupper; l++) {
|
|
||||||
mx = l+nx;
|
|
||||||
db[mz][my][mx] += x0*r1d[0][l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if defined(_OPENMP)
|
|
||||||
// reduce 3d density array
|
|
||||||
if (nthreads > 1) {
|
|
||||||
data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
interpolate from grid to get electric field & force on my particles
|
interpolate from grid to get electric field & force on my particles for ik
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMTIP4POMP::fieldforce()
|
void PPPMTIP4POMP::fieldforce_ik()
|
||||||
{
|
{
|
||||||
// loop over my charges, interpolate electric field from nearby grid points
|
// loop over my charges, interpolate electric field from nearby grid points
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
@ -257,106 +455,216 @@ void PPPMTIP4POMP::fieldforce()
|
|||||||
// (mx,my,mz) = global coords of moving stencil pt
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
// ek = 3 components of E-field on particle
|
// ek = 3 components of E-field on particle
|
||||||
|
|
||||||
const double * const q = atom->q;
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
const double * const * const x = atom->x;
|
const double * _noalias const q = atom->q;
|
||||||
const int * const type = atom->type;
|
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
|
||||||
|
const int * _noalias const type = atom->type;
|
||||||
|
|
||||||
|
const double qqrd2e = force->qqrd2e;
|
||||||
|
const double boxlox = boxlo[0];
|
||||||
|
const double boxloy = boxlo[1];
|
||||||
|
const double boxloz = boxlo[2];
|
||||||
|
|
||||||
const int nthreads = comm->nthreads;
|
const int nthreads = comm->nthreads;
|
||||||
const int nlocal = atom->nlocal;
|
const int nlocal = atom->nlocal;
|
||||||
const double qqrd2e = force->qqrd2e;
|
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(none)
|
#pragma omp parallel default(none)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if defined(_OPENMP)
|
dbl3_t xM;
|
||||||
// each thread works on a fixed chunk of atoms.
|
FFT_SCALAR x0,y0,z0,ekx,eky,ekz;
|
||||||
const int tid = omp_get_thread_num();
|
int i,ifrom,ito,tid,iH1,iH2,l,m,n,mx,my,mz;
|
||||||
const int inum = nlocal;
|
|
||||||
const int idelta = 1 + inum/nthreads;
|
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
|
||||||
const int ifrom = tid*idelta;
|
|
||||||
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
|
// get per thread data
|
||||||
#else
|
|
||||||
const int ifrom = 0;
|
|
||||||
const int ito = nlocal;
|
|
||||||
const int tid = 0;
|
|
||||||
#endif
|
|
||||||
ThrData *thr = fix->get_thr(tid);
|
ThrData *thr = fix->get_thr(tid);
|
||||||
double * const * const f = thr->get_f();
|
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
|
||||||
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
||||||
|
|
||||||
int l,m,n,nx,ny,nz,mx,my,mz;
|
for (i = ifrom; i < ito; ++i) {
|
||||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
if (type[i] == typeO) {
|
||||||
FFT_SCALAR ekx,eky,ekz;
|
find_M_thr(i,iH1,iH2,xM);
|
||||||
int iH1,iH2;
|
} else xM = x[i];
|
||||||
double xM[3], fx,fy,fz;
|
|
||||||
|
|
||||||
// this if protects against having more threads than local atoms
|
const int nx = p2g[i].a;
|
||||||
if (ifrom < nlocal) {
|
const int ny = p2g[i].b;
|
||||||
for (int i = ifrom; i < ito; i++) {
|
const int nz = p2g[i].t;
|
||||||
|
const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
|
||||||
|
const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
|
||||||
|
const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;
|
||||||
|
|
||||||
if (type[i] == typeO) {
|
compute_rho1d_thr(r1d,dx,dy,dz);
|
||||||
find_M(i,iH1,iH2,xM);
|
|
||||||
} else {
|
|
||||||
xM[0] = x[i][0];
|
|
||||||
xM[1] = x[i][1];
|
|
||||||
xM[2] = x[i][2];
|
|
||||||
}
|
|
||||||
|
|
||||||
nx = part2grid[i][0];
|
ekx = eky = ekz = ZEROF;
|
||||||
ny = part2grid[i][1];
|
for (n = nlower; n <= nupper; n++) {
|
||||||
nz = part2grid[i][2];
|
mz = n+nz;
|
||||||
dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv;
|
z0 = r1d[2][n];
|
||||||
dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv;
|
for (m = nlower; m <= nupper; m++) {
|
||||||
dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv;
|
my = m+ny;
|
||||||
|
y0 = z0*r1d[1][m];
|
||||||
compute_rho1d_thr(r1d,dx,dy,dz);
|
for (l = nlower; l <= nupper; l++) {
|
||||||
|
mx = l+nx;
|
||||||
ekx = eky = ekz = ZEROF;
|
x0 = y0*r1d[0][l];
|
||||||
for (n = nlower; n <= nupper; n++) {
|
ekx -= x0*vdx_brick[mz][my][mx];
|
||||||
mz = n+nz;
|
eky -= x0*vdy_brick[mz][my][mx];
|
||||||
z0 = r1d[2][n];
|
ekz -= x0*vdz_brick[mz][my][mx];
|
||||||
for (m = nlower; m <= nupper; m++) {
|
|
||||||
my = m+ny;
|
|
||||||
y0 = z0*r1d[1][m];
|
|
||||||
for (l = nlower; l <= nupper; l++) {
|
|
||||||
mx = l+nx;
|
|
||||||
x0 = y0*r1d[0][l];
|
|
||||||
ekx -= x0*vdx_brick[mz][my][mx];
|
|
||||||
eky -= x0*vdy_brick[mz][my][mx];
|
|
||||||
ekz -= x0*vdz_brick[mz][my][mx];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// convert E-field to force
|
// convert E-field to force
|
||||||
const double qfactor = qqrd2e*scale*q[i];
|
|
||||||
|
|
||||||
if (type[i] != typeO) {
|
const double qfactor = qqrd2e * scale * q[i];
|
||||||
f[i][0] += qfactor*ekx;
|
if (type[i] != typeO) {
|
||||||
f[i][1] += qfactor*eky;
|
f[i].x += qfactor*ekx;
|
||||||
f[i][2] += qfactor*ekz;
|
f[i].y += qfactor*eky;
|
||||||
|
if (slabflag != 2) f[i].z += qfactor*ekz;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
fx = qfactor * ekx;
|
const double fx = qfactor * ekx;
|
||||||
fy = qfactor * eky;
|
const double fy = qfactor * eky;
|
||||||
fz = qfactor * ekz;
|
const double fz = qfactor * ekz;
|
||||||
find_M(i,iH1,iH2,xM);
|
|
||||||
|
|
||||||
f[i][0] += fx*(1 - alpha);
|
f[i].x += fx*(1 - alpha);
|
||||||
f[i][1] += fy*(1 - alpha);
|
f[i].y += fy*(1 - alpha);
|
||||||
f[i][2] += fz*(1 - alpha);
|
if (slabflag != 2) f[i].z += fz*(1 - alpha);
|
||||||
|
|
||||||
f[iH1][0] += 0.5*alpha*fx;
|
f[iH1].x += 0.5*alpha*fx;
|
||||||
f[iH1][1] += 0.5*alpha*fy;
|
f[iH1].y += 0.5*alpha*fy;
|
||||||
f[iH1][2] += 0.5*alpha*fz;
|
if (slabflag != 2) f[iH1].z += 0.5*alpha*fz;
|
||||||
|
|
||||||
f[iH2][0] += 0.5*alpha*fx;
|
f[iH2].x += 0.5*alpha*fx;
|
||||||
f[iH2][1] += 0.5*alpha*fy;
|
f[iH2].y += 0.5*alpha*fy;
|
||||||
f[iH2][2] += 0.5*alpha*fz;
|
if (slabflag != 2) f[iH2].z += 0.5*alpha*fz;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} // end of parallel region
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
interpolate from grid to get electric field & force on my particles for ad
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMTIP4POMP::fieldforce_ad()
|
||||||
|
{
|
||||||
|
const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda;
|
||||||
|
const double hx_inv = nx_pppm/prd[0];
|
||||||
|
const double hy_inv = ny_pppm/prd[1];
|
||||||
|
const double hz_inv = nz_pppm/prd[2];
|
||||||
|
|
||||||
|
// loop over my charges, interpolate electric field from nearby grid points
|
||||||
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
|
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||||
|
// (mx,my,mz) = global coords of moving stencil pt
|
||||||
|
// ek = 3 components of E-field on particle
|
||||||
|
|
||||||
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
|
const double * _noalias const q = atom->q;
|
||||||
|
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
|
||||||
|
const int * _noalias const type = atom->type;
|
||||||
|
|
||||||
|
const double qqrd2e = force->qqrd2e;
|
||||||
|
const double boxlox = boxlo[0];
|
||||||
|
const double boxloy = boxlo[1];
|
||||||
|
const double boxloz = boxlo[2];
|
||||||
|
|
||||||
|
const int nthreads = comm->nthreads;
|
||||||
|
const int nlocal = atom->nlocal;
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
#pragma omp parallel default(none)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
double s1,s2,s3,sf;
|
||||||
|
dbl3_t xM;
|
||||||
|
FFT_SCALAR ekx,eky,ekz;
|
||||||
|
int i,ifrom,ito,tid,iH1,iH2,l,m,n,mx,my,mz;
|
||||||
|
|
||||||
|
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
|
||||||
|
|
||||||
|
// get per thread data
|
||||||
|
ThrData *thr = fix->get_thr(tid);
|
||||||
|
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
|
||||||
|
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
||||||
|
FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
|
||||||
|
|
||||||
|
for (i = ifrom; i < ito; ++i) {
|
||||||
|
if (type[i] == typeO) {
|
||||||
|
find_M_thr(i,iH1,iH2,xM);
|
||||||
|
} else xM = x[i];
|
||||||
|
|
||||||
|
const int nx = p2g[i].a;
|
||||||
|
const int ny = p2g[i].b;
|
||||||
|
const int nz = p2g[i].t;
|
||||||
|
const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
|
||||||
|
const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
|
||||||
|
const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;
|
||||||
|
|
||||||
|
compute_rho1d_thr(r1d,dx,dy,dz);
|
||||||
|
compute_drho1d_thr(d1d,dx,dy,dz);
|
||||||
|
|
||||||
|
ekx = eky = ekz = ZEROF;
|
||||||
|
for (n = nlower; n <= nupper; n++) {
|
||||||
|
mz = n+nz;
|
||||||
|
for (m = nlower; m <= nupper; m++) {
|
||||||
|
my = m+ny;
|
||||||
|
for (l = nlower; l <= nupper; l++) {
|
||||||
|
mx = l+nx;
|
||||||
|
ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ekx *= hx_inv;
|
||||||
|
eky *= hy_inv;
|
||||||
|
ekz *= hz_inv;
|
||||||
|
|
||||||
|
// convert E-field to force and substract self forces
|
||||||
|
|
||||||
|
const double qi = q[i];
|
||||||
|
const double qfactor = qqrd2e * scale * qi;
|
||||||
|
|
||||||
|
s1 = x[i].x*hx_inv;
|
||||||
|
sf = sf_coeff[0]*sin(MY_2PI*s1);
|
||||||
|
sf += sf_coeff[1]*sin(MY_4PI*s1);
|
||||||
|
sf *= 2.0*qi;
|
||||||
|
const double fx = qfactor*(ekx - sf);
|
||||||
|
|
||||||
|
s2 = x[i].y*hy_inv;
|
||||||
|
sf = sf_coeff[2]*sin(MY_2PI*s2);
|
||||||
|
sf += sf_coeff[3]*sin(MY_4PI*s2);
|
||||||
|
sf *= 2.0*qi;
|
||||||
|
const double fy = qfactor*(eky - sf);
|
||||||
|
|
||||||
|
s3 = x[i].z*hz_inv;
|
||||||
|
sf = sf_coeff[4]*sin(MY_2PI*s3);
|
||||||
|
sf += sf_coeff[5]*sin(MY_4PI*s3);
|
||||||
|
sf *= 2.0*qi;
|
||||||
|
const double fz = qfactor*(ekz - sf);
|
||||||
|
|
||||||
|
if (type[i] != typeO) {
|
||||||
|
f[i].x += fx;
|
||||||
|
f[i].y += fy;
|
||||||
|
if (slabflag != 2) f[i].z += fz;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
f[i].x += fx*(1 - alpha);
|
||||||
|
f[i].y += fy*(1 - alpha);
|
||||||
|
if (slabflag != 2) f[i].z += fz*(1 - alpha);
|
||||||
|
|
||||||
|
f[iH1].x += 0.5*alpha*fx;
|
||||||
|
f[iH1].y += 0.5*alpha*fy;
|
||||||
|
if (slabflag != 2) f[iH1].z += 0.5*alpha*fz;
|
||||||
|
|
||||||
|
f[iH2].x += 0.5*alpha*fx;
|
||||||
|
f[iH2].y += 0.5*alpha*fy;
|
||||||
|
if (slabflag != 2) f[iH2].z += 0.5*alpha*fz;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end of parallel region
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -365,7 +673,7 @@ void PPPMTIP4POMP::fieldforce()
|
|||||||
also return local indices iH1,iH2 of H atoms
|
also return local indices iH1,iH2 of H atoms
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void PPPMTIP4POMP::find_M(int i, int &iH1, int &iH2, double *xM)
|
void PPPMTIP4POMP::find_M_thr(int i, int &iH1, int &iH2, dbl3_t &xM)
|
||||||
{
|
{
|
||||||
iH1 = atom->map(atom->tag[i] + 1);
|
iH1 = atom->map(atom->tag[i] + 1);
|
||||||
iH2 = atom->map(atom->tag[i] + 2);
|
iH2 = atom->map(atom->tag[i] + 2);
|
||||||
@ -374,19 +682,69 @@ void PPPMTIP4POMP::find_M(int i, int &iH1, int &iH2, double *xM)
|
|||||||
if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
|
if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
|
||||||
error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
|
error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
|
||||||
|
|
||||||
const double * const * const x = atom->x;
|
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
|
||||||
|
|
||||||
double delx1 = x[iH1][0] - x[i][0];
|
double delx1 = x[iH1].x - x[i].x;
|
||||||
double dely1 = x[iH1][1] - x[i][1];
|
double dely1 = x[iH1].y - x[i].y;
|
||||||
double delz1 = x[iH1][2] - x[i][2];
|
double delz1 = x[iH1].z - x[i].z;
|
||||||
domain->minimum_image(delx1,dely1,delz1);
|
domain->minimum_image(delx1,dely1,delz1);
|
||||||
|
|
||||||
double delx2 = x[iH2][0] - x[i][0];
|
double delx2 = x[iH2].x - x[i].x;
|
||||||
double dely2 = x[iH2][1] - x[i][1];
|
double dely2 = x[iH2].y - x[i].y;
|
||||||
double delz2 = x[iH2][2] - x[i][2];
|
double delz2 = x[iH2].z - x[i].z;
|
||||||
domain->minimum_image(delx2,dely2,delz2);
|
domain->minimum_image(delx2,dely2,delz2);
|
||||||
|
|
||||||
xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2);
|
xM.x = x[i].x + alpha * 0.5 * (delx1 + delx2);
|
||||||
xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2);
|
xM.y = x[i].y + alpha * 0.5 * (dely1 + dely2);
|
||||||
xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2);
|
xM.z = x[i].z + alpha * 0.5 * (delz1 + delz2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
charge assignment into rho1d
|
||||||
|
dx,dy,dz = distance of particle from "lower left" grid point
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
void PPPMTIP4POMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
|
||||||
|
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
|
||||||
|
{
|
||||||
|
int k,l;
|
||||||
|
FFT_SCALAR r1,r2,r3;
|
||||||
|
|
||||||
|
for (k = (1-order)/2; k <= order/2; k++) {
|
||||||
|
r1 = r2 = r3 = ZEROF;
|
||||||
|
|
||||||
|
for (l = order-1; l >= 0; l--) {
|
||||||
|
r1 = rho_coeff[l][k] + r1*dx;
|
||||||
|
r2 = rho_coeff[l][k] + r2*dy;
|
||||||
|
r3 = rho_coeff[l][k] + r3*dz;
|
||||||
|
}
|
||||||
|
r1d[0][k] = r1;
|
||||||
|
r1d[1][k] = r2;
|
||||||
|
r1d[2][k] = r3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
charge assignment into drho1d
|
||||||
|
dx,dy,dz = distance of particle from "lower left" grid point
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PPPMTIP4POMP::compute_drho1d_thr(FFT_SCALAR * const * const d1d, const FFT_SCALAR &dx,
|
||||||
|
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
|
||||||
|
{
|
||||||
|
int k,l;
|
||||||
|
FFT_SCALAR r1,r2,r3;
|
||||||
|
|
||||||
|
for (k = (1-order)/2; k <= order/2; k++) {
|
||||||
|
r1 = r2 = r3 = ZEROF;
|
||||||
|
|
||||||
|
for (l = order-2; l >= 0; l--) {
|
||||||
|
r1 = drho_coeff[l][k] + r1*dx;
|
||||||
|
r2 = drho_coeff[l][k] + r2*dy;
|
||||||
|
r3 = drho_coeff[l][k] + r3*dz;
|
||||||
|
}
|
||||||
|
d1d[0][k] = r1;
|
||||||
|
d1d[1][k] = r2;
|
||||||
|
d1d[2][k] = r3;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -20,25 +20,40 @@ KSpaceStyle(pppm/tip4p/omp,PPPMTIP4POMP)
|
|||||||
#ifndef LMP_PPPM_TIP4P_OMP_H
|
#ifndef LMP_PPPM_TIP4P_OMP_H
|
||||||
#define LMP_PPPM_TIP4P_OMP_H
|
#define LMP_PPPM_TIP4P_OMP_H
|
||||||
|
|
||||||
#include "pppm_omp.h"
|
#include "pppm_tip4p.h"
|
||||||
|
#include "thr_omp.h"
|
||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
class PPPMTIP4POMP : public PPPMOMP {
|
class PPPMTIP4POMP : public PPPMTIP4P, public ThrOMP {
|
||||||
public:
|
public:
|
||||||
PPPMTIP4POMP(class LAMMPS *, int, char **);
|
PPPMTIP4POMP(class LAMMPS *, int, char **);
|
||||||
virtual ~PPPMTIP4POMP () {};
|
virtual ~PPPMTIP4POMP () {};
|
||||||
virtual void init();
|
virtual void compute(int, int);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
virtual void allocate();
|
||||||
|
virtual void deallocate();
|
||||||
|
|
||||||
|
virtual void compute_gf_ik();
|
||||||
|
virtual void compute_gf_ad();
|
||||||
|
|
||||||
virtual void particle_map();
|
virtual void particle_map();
|
||||||
virtual void fieldforce();
|
virtual void make_rho(); // XXX: not (yet) multi-threaded
|
||||||
virtual void make_rho();
|
|
||||||
|
virtual void fieldforce_ik();
|
||||||
|
virtual void fieldforce_ad();
|
||||||
|
// virtual void fieldforce_peratom(); XXX: need to benchmark first.
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void find_M(int, int &, int &, double *);
|
void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
|
||||||
|
const FFT_SCALAR &, const FFT_SCALAR &);
|
||||||
|
void compute_drho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
|
||||||
|
const FFT_SCALAR &, const FFT_SCALAR &);
|
||||||
|
|
||||||
// void slabcorr(int);
|
void find_M_thr(const int, int &, int &, dbl3_t &);
|
||||||
|
|
||||||
|
// void slabcorr(int); // XXX: not (yet) multi-threaded
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -21,13 +21,14 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "memory.h"
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
|
|
||||||
ThrData::ThrData(int tid)
|
ThrData::ThrData(int tid)
|
||||||
: _f(NULL), _torque(NULL), _erforce(NULL), _de(NULL), _drho(NULL), _mu(NULL),
|
: _f(0),_torque(0),_erforce(0),_de(0),_drho(0),_mu(0),_lambda(0),_rhoB(0),
|
||||||
_lambda(NULL), _rhoB(NULL), _D_values(NULL), _rho(NULL), _fp(NULL),
|
_D_values(0),_rho(0),_fp(0),_rho1d(0),_drho1d(0),_tid(tid)
|
||||||
_rho1d(NULL), _tid(tid)
|
|
||||||
{
|
{
|
||||||
// nothing else to do here.
|
// nothing else to do here.
|
||||||
}
|
}
|
||||||
@ -136,6 +137,33 @@ void ThrData::init_eim(int nall, double *rho, double *fp)
|
|||||||
memset(_fp,0,nall*sizeof(double));
|
memset(_fp,0,nall*sizeof(double));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
if order > 0 : set up per thread storage for PPPM
|
||||||
|
if order < 0 : free per thread storage for PPPM
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
#if defined(FFT_SINGLE)
|
||||||
|
typedef float FFT_SCALAR;
|
||||||
|
#else
|
||||||
|
typedef double FFT_SCALAR;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void ThrData::init_pppm(int order, Memory *memory)
|
||||||
|
{
|
||||||
|
FFT_SCALAR **rho1d, **drho1d;
|
||||||
|
if (order > 0) {
|
||||||
|
memory->create2d_offset(rho1d,3,-order/2,order/2,"thr_data:rho1d");
|
||||||
|
memory->create2d_offset(drho1d,3,-order/2,order/2,"thr_data:drho1d");
|
||||||
|
_rho1d = static_cast<void *>(rho1d);
|
||||||
|
_drho1d = static_cast<void *>(drho1d);
|
||||||
|
} else {
|
||||||
|
order = -order;
|
||||||
|
rho1d = static_cast<FFT_SCALAR **>(_rho1d);
|
||||||
|
drho1d = static_cast<FFT_SCALAR **>(_drho1d);
|
||||||
|
memory->destroy2d_offset(rho1d,-order/2);
|
||||||
|
memory->destroy2d_offset(drho1d,-order/2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
compute global pair virial via summing F dot r over own & ghost atoms
|
compute global pair virial via summing F dot r over own & ghost atoms
|
||||||
at this point, only pairwise forces have been accumulated in atom->f
|
at this point, only pairwise forces have been accumulated in atom->f
|
||||||
|
|||||||
@ -53,7 +53,7 @@ class ThrData {
|
|||||||
void init_eam(int, double *); // EAM
|
void init_eam(int, double *); // EAM
|
||||||
void init_eim(int, double *, double *); // EIM (+ EAM)
|
void init_eim(int, double *, double *); // EIM (+ EAM)
|
||||||
|
|
||||||
void init_pppm(void *r1d) { _rho1d = r1d; };
|
void init_pppm(int, class Memory *);
|
||||||
|
|
||||||
// access methods for arrays that we handle in this class
|
// access methods for arrays that we handle in this class
|
||||||
double **get_lambda() const { return _lambda; };
|
double **get_lambda() const { return _lambda; };
|
||||||
@ -63,6 +63,7 @@ class ThrData {
|
|||||||
double *get_rho() const { return _rho; };
|
double *get_rho() const { return _rho; };
|
||||||
double *get_rhoB() const { return _rhoB; };
|
double *get_rhoB() const { return _rhoB; };
|
||||||
void *get_rho1d() const { return _rho1d; };
|
void *get_rho1d() const { return _rho1d; };
|
||||||
|
void *get_drho1d() const { return _drho1d; };
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double eng_vdwl; // non-bonded non-coulomb energy
|
double eng_vdwl; // non-bonded non-coulomb energy
|
||||||
@ -108,6 +109,7 @@ class ThrData {
|
|||||||
|
|
||||||
// this is for pppm/omp
|
// this is for pppm/omp
|
||||||
void *_rho1d;
|
void *_rho1d;
|
||||||
|
void *_drho1d;
|
||||||
|
|
||||||
// my thread id
|
// my thread id
|
||||||
const int _tid;
|
const int _tid;
|
||||||
|
|||||||
@ -186,4 +186,19 @@ static inline void loop_setup_thr(int &ifrom, int &ito, int &tid,
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// helpful definitions to help compilers optimizing code better
|
||||||
|
|
||||||
|
typedef struct { double x,y,z; } dbl3_t;
|
||||||
|
typedef struct { double x,y,z,w; } dbl4_t;
|
||||||
|
typedef struct { int a,b,t; } int3_t;
|
||||||
|
typedef struct { int a,b,c,t; } int4_t;
|
||||||
|
typedef struct { int a,b,c,d,t; } int5_t;
|
||||||
|
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
#define _noalias __restrict
|
||||||
|
#else
|
||||||
|
#define _noalias
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user