git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7782 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2012-02-14 20:14:03 +00:00
parent 860fbe49c1
commit 7a22c79244
18 changed files with 905 additions and 93 deletions

View File

@ -132,8 +132,8 @@ void Ewald::init()
gsqmx = -4.0*g_ewald*g_ewald*log(precision); gsqmx = -4.0*g_ewald*g_ewald*log(precision);
if (comm->me == 0) { if (comm->me == 0) {
if (screen) fprintf(screen," G vector = %g\n",g_ewald); if (screen) fprintf(screen," G vector (1/distance) = %g\n",g_ewald);
if (logfile) fprintf(logfile," G vector = %g\n",g_ewald); if (logfile) fprintf(logfile," G vector (1/distnace) = %g\n",g_ewald);
} }
// setup Ewald coefficients so can print stats // setup Ewald coefficients so can print stats
@ -212,8 +212,16 @@ void Ewald::compute(int eflag, int vflag)
{ {
int i,k,n; int i,k,n;
energy = 0.0; // set energy/virial flags
if (vflag) for (n = 0; n < 6; n++) virial[n] = 0.0; // invoke allocate_peratom() if needed for first time
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) {
allocate_peratom();
peratom_allocate_flag = 1;
}
// extend size of per-atom arrays if necessary // extend size of per-atom arrays if necessary
@ -278,9 +286,9 @@ void Ewald::compute(int eflag, int vflag)
f[i][2] += qscale * q[i]*ek[i][2]; f[i][2] += qscale * q[i]*ek[i][2];
} }
// energy if requested // global energy if requested
if (eflag) { if (eflag_global) {
for (k = 0; k < kcount; k++) for (k = 0; k < kcount; k++)
energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] +
sfacim_all[k]*sfacim_all[k]); sfacim_all[k]*sfacim_all[k]);
@ -289,9 +297,9 @@ void Ewald::compute(int eflag, int vflag)
energy *= qscale; energy *= qscale;
} }
// virial if requested // global virial if requested
if (vflag) { if (vflag_global) {
double uk; double uk;
for (k = 0; k < kcount; k++) { for (k = 0; k < kcount; k++) {
uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]);
@ -300,7 +308,9 @@ void Ewald::compute(int eflag, int vflag)
for (n = 0; n < 6; n++) virial[n] *= qscale; for (n = 0; n < 6; n++) virial[n] *= qscale;
} }
if (slabflag) slabcorr(eflag); // 2d slab correction
if (slabflag) slabcorr();
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -801,7 +811,7 @@ void Ewald::deallocate()
111, 3155). Slabs defined here to be parallel to the xy plane. 111, 3155). Slabs defined here to be parallel to the xy plane.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Ewald::slabcorr(int eflag) void Ewald::slabcorr()
{ {
// compute local contribution to global dipole moment // compute local contribution to global dipole moment
@ -822,7 +832,7 @@ void Ewald::slabcorr(int eflag)
const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume;
const double qscale = force->qqrd2e * scale; const double qscale = force->qqrd2e * scale;
if (eflag) energy += qscale * e_slabcorr; if (eflag_global) energy += qscale * e_slabcorr;
// add on force corrections // add on force corrections

View File

@ -51,7 +51,7 @@ class Ewald : public KSpace {
void coeffs(); void coeffs();
virtual void allocate(); virtual void allocate();
void deallocate(); void deallocate();
void slabcorr(int); void slabcorr();
}; };
} }

View File

@ -72,11 +72,13 @@ PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
density_fft = NULL; density_fft = NULL;
u_brick = NULL;
v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
greensfn = NULL; greensfn = NULL;
work1 = work2 = NULL; work1 = work2 = NULL;
vg = NULL; vg = NULL;
fkx = fky = fkz = NULL; fkx = fky = fkz = NULL;
buf1 = buf2 = NULL; buf1 = buf2 = buf3 = buf4 = NULL;
gf_b = NULL; gf_b = NULL;
rho1d = rho_coeff = NULL; rho1d = rho_coeff = NULL;
@ -96,6 +98,7 @@ PPPM::~PPPM()
{ {
delete [] factors; delete [] factors;
deallocate(); deallocate();
deallocate_peratom();
memory->destroy(part2grid); memory->destroy(part2grid);
} }
@ -136,6 +139,8 @@ void PPPM::init()
// free all arrays previously allocated // free all arrays previously allocated
deallocate(); deallocate();
deallocate_peratom();
peratom_allocate_flag = 0;
// extract short-range Coulombic cutoff from pair style // extract short-range Coulombic cutoff from pair style
@ -479,6 +484,8 @@ void PPPM::init()
nbuf = MAX(nxx,nyy); nbuf = MAX(nxx,nyy);
nbuf = MAX(nbuf,nzz); nbuf = MAX(nbuf,nzz);
nbuf_peratom = 7*nbuf;
nbuf *= 3; nbuf *= 3;
// print stats // print stats
@ -496,6 +503,7 @@ void PPPM::init()
} }
// allocate K-space dependent memory // allocate K-space dependent memory
// don't invoke allocate_peratom() here, wait to see if needed
allocate(); allocate();
@ -665,7 +673,18 @@ void PPPM::setup()
void PPPM::compute(int eflag, int vflag) void PPPM::compute(int eflag, int vflag)
{ {
int i; int i,j;
// set energy/virial flags
// invoke allocate_peratom() if needed for first time
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) {
allocate_peratom();
peratom_allocate_flag = 1;
}
// convert atoms from box to lamda coords // convert atoms from box to lamda coords
@ -683,9 +702,6 @@ void PPPM::compute(int eflag, int vflag)
memory->create(part2grid,nmax,3,"pppm:part2grid"); memory->create(part2grid,nmax,3,"pppm:part2grid");
} }
energy = 0.0;
if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
// find grid points for all my particles // find grid points for all my particles
// map my particle charge onto my local 3d density grid // map my particle charge onto my local 3d density grid
@ -701,23 +717,32 @@ void PPPM::compute(int eflag, int vflag)
// compute potential gradient on my FFT grid and // compute potential gradient on my FFT grid and
// portion of e_long on this proc's FFT grid // portion of e_long on this proc's FFT grid
// return gradients (electric fields) in 3d brick decomposition // return gradients (electric fields) in 3d brick decomposition
// also performs per-atom calculations via poisson_peratom()
poisson(eflag,vflag);
// all procs communicate E-field values to fill ghost cells poisson();
// surrounding their 3d bricks
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
fillbrick(); fillbrick();
// extra per-atom energy/virial communication
if (eflag_atom || vflag_atom) fillbrick_peratom();
// calculate the force on my particles // calculate the force on my particles
fieldforce(); fieldforce();
// sum energy across procs and add in volume-dependent term // extra per-atom energy/virial communication
if (eflag_atom || vflag_atom) fieldforce_peratom();
// sum global energy across procs and add in volume-dependent term
const double qscale = force->qqrd2e * scale; const double qscale = force->qqrd2e * scale;
if (eflag) { if (eflag_global) {
double energy_all; double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all; energy = energy_all;
@ -728,17 +753,39 @@ void PPPM::compute(int eflag, int vflag)
energy *= qscale; energy *= qscale;
} }
// sum virial across procs // sum global virial across procs
if (vflag) { if (vflag_global) {
double virial_all[6]; double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
} }
// per-atom energy/virial
// energy includes self-energy correction
if (eflag_atom || vflag_atom) {
double *q = atom->q;
int nlocal = atom->nlocal;
if (eflag_atom) {
for (i = 0; i < nlocal; i++) {
eatom[i] *= 0.5;
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
(g_ewald*g_ewald*volume);
eatom[i] *= qscale;
}
}
if (vflag_atom) {
for (i = 0; i < nlocal; i++)
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale;
}
}
// 2d slab correction // 2d slab correction
if (slabflag) slabcorr(eflag); if (slabflag) slabcorr(eflag_global);
// convert atoms back from lamda to box coords // convert atoms back from lamda to box coords
@ -802,6 +849,32 @@ void PPPM::allocate()
1,0,0,FFT_PRECISION); 1,0,0,FFT_PRECISION);
} }
/* ----------------------------------------------------------------------
allocate per-atom memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPM::allocate_peratom()
{
memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:u_brick");
memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v0_brick");
memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v1_brick");
memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v2_brick");
memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v3_brick");
memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v4_brick");
memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v5_brick");
memory->create(buf3,nbuf_peratom,"pppm:buf3");
memory->create(buf4,nbuf_peratom,"pppm:buf4");
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
deallocate memory that depends on # of K-vectors and order deallocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
@ -835,6 +908,25 @@ void PPPM::deallocate()
delete remap; delete remap;
} }
/* ----------------------------------------------------------------------
deallocate per-atom memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPM::deallocate_peratom()
{
memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy(buf3);
memory->destroy(buf4);
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
set size of FFT grid (nx,ny,nz_pppm) and g_ewald set size of FFT grid (nx,ny,nz_pppm) and g_ewald
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
@ -995,14 +1087,14 @@ void PPPM::set_grid()
const char fft_prec[] = "double"; const char fft_prec[] = "double";
#endif #endif
if (screen) { if (screen) {
fprintf(screen," G vector = %g\n",g_ewald); fprintf(screen," G vector (1/distance)= %g\n",g_ewald);
fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
fprintf(screen," stencil order = %d\n",order); fprintf(screen," stencil order = %d\n",order);
fprintf(screen," RMS precision = %g\n",MAX(lpr,spr)); fprintf(screen," RMS precision = %g\n",MAX(lpr,spr));
fprintf(screen," using %s precision FFTs\n",fft_prec); fprintf(screen," using %s precision FFTs\n",fft_prec);
} }
if (logfile) { if (logfile) {
fprintf(logfile," G vector = %g\n",g_ewald); fprintf(logfile," G vector (1/distance) = %g\n",g_ewald);
fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
fprintf(logfile," stencil order = %d\n",order); fprintf(logfile," stencil order = %d\n",order);
fprintf(logfile," RMS precision = %g\n",MAX(lpr,spr)); fprintf(logfile," RMS precision = %g\n",MAX(lpr,spr));
@ -1454,6 +1546,275 @@ void PPPM::fillbrick()
} }
} }
/* ----------------------------------------------------------------------
ghost-swap to fill ghost cells of my brick with per-atom field values
------------------------------------------------------------------------- */
void PPPM::fillbrick_peratom()
{
int i,n,ix,iy,iz;
MPI_Request request;
MPI_Status status;
// pack my real cells for +z processor
// pass data to self or +z processor
// unpack and sum recv data into my ghost cells
n = 0;
for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
for (iy = nylo_in; iy <= nyhi_in; iy++)
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
if (vflag_atom) {
buf3[n++] = v0_brick[iz][iy][ix];
buf3[n++] = v1_brick[iz][iy][ix];
buf3[n++] = v2_brick[iz][iy][ix];
buf3[n++] = v3_brick[iz][iy][ix];
buf3[n++] = v4_brick[iz][iy][ix];
buf3[n++] = v5_brick[iz][iy][ix];
}
}
if (comm->procneigh[2][1] == me)
for (i = 0; i < n; i++) buf4[i] = buf3[i];
else {
MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
comm->procneigh[2][0],0,world,&request);
MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
MPI_Wait(&request,&status);
}
n = 0;
for (iz = nzlo_out; iz < nzlo_in; iz++)
for (iy = nylo_in; iy <= nyhi_in; iy++)
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
if (vflag_atom) {
v0_brick[iz][iy][ix] = buf4[n++];
v1_brick[iz][iy][ix] = buf4[n++];
v2_brick[iz][iy][ix] = buf4[n++];
v3_brick[iz][iy][ix] = buf4[n++];
v4_brick[iz][iy][ix] = buf4[n++];
v5_brick[iz][iy][ix] = buf4[n++];
}
}
// pack my real cells for -z processor
// pass data to self or -z processor
// unpack and sum recv data into my ghost cells
n = 0;
for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
for (iy = nylo_in; iy <= nyhi_in; iy++)
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
if (vflag_atom) {
buf3[n++] = v0_brick[iz][iy][ix];
buf3[n++] = v1_brick[iz][iy][ix];
buf3[n++] = v2_brick[iz][iy][ix];
buf3[n++] = v3_brick[iz][iy][ix];
buf3[n++] = v4_brick[iz][iy][ix];
buf3[n++] = v5_brick[iz][iy][ix];
}
}
if (comm->procneigh[2][0] == me)
for (i = 0; i < n; i++) buf4[i] = buf3[i];
else {
MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
comm->procneigh[2][1],0,world,&request);
MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
MPI_Wait(&request,&status);
}
n = 0;
for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
for (iy = nylo_in; iy <= nyhi_in; iy++)
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
if (vflag_atom) {
v0_brick[iz][iy][ix] = buf4[n++];
v1_brick[iz][iy][ix] = buf4[n++];
v2_brick[iz][iy][ix] = buf4[n++];
v3_brick[iz][iy][ix] = buf4[n++];
v4_brick[iz][iy][ix] = buf4[n++];
v5_brick[iz][iy][ix] = buf4[n++];
}
}
// pack my real cells for +y processor
// pass data to self or +y processor
// unpack and sum recv data into my ghost cells
n = 0;
for (iz = nzlo_out; iz <= nzhi_out; iz++)
for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
if (vflag_atom) {
buf3[n++] = v0_brick[iz][iy][ix];
buf3[n++] = v1_brick[iz][iy][ix];
buf3[n++] = v2_brick[iz][iy][ix];
buf3[n++] = v3_brick[iz][iy][ix];
buf3[n++] = v4_brick[iz][iy][ix];
buf3[n++] = v5_brick[iz][iy][ix];
}
}
if (comm->procneigh[1][1] == me)
for (i = 0; i < n; i++) buf4[i] = buf3[i];
else {
MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
comm->procneigh[1][0],0,world,&request);
MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
MPI_Wait(&request,&status);
}
n = 0;
for (iz = nzlo_out; iz <= nzhi_out; iz++)
for (iy = nylo_out; iy < nylo_in; iy++)
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
if (vflag_atom) {
v0_brick[iz][iy][ix] = buf4[n++];
v1_brick[iz][iy][ix] = buf4[n++];
v2_brick[iz][iy][ix] = buf4[n++];
v3_brick[iz][iy][ix] = buf4[n++];
v4_brick[iz][iy][ix] = buf4[n++];
v5_brick[iz][iy][ix] = buf4[n++];
}
}
// pack my real cells for -y processor
// pass data to self or -y processor
// unpack and sum recv data into my ghost cells
n = 0;
for (iz = nzlo_out; iz <= nzhi_out; iz++)
for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
if (vflag_atom) {
buf3[n++] = v0_brick[iz][iy][ix];
buf3[n++] = v1_brick[iz][iy][ix];
buf3[n++] = v2_brick[iz][iy][ix];
buf3[n++] = v3_brick[iz][iy][ix];
buf3[n++] = v4_brick[iz][iy][ix];
buf3[n++] = v5_brick[iz][iy][ix];
}
}
if (comm->procneigh[1][0] == me)
for (i = 0; i < n; i++) buf4[i] = buf3[i];
else {
MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
comm->procneigh[1][1],0,world,&request);
MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
MPI_Wait(&request,&status);
}
n = 0;
for (iz = nzlo_out; iz <= nzhi_out; iz++)
for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
if (vflag_atom) {
v0_brick[iz][iy][ix] = buf4[n++];
v1_brick[iz][iy][ix] = buf4[n++];
v2_brick[iz][iy][ix] = buf4[n++];
v3_brick[iz][iy][ix] = buf4[n++];
v4_brick[iz][iy][ix] = buf4[n++];
v5_brick[iz][iy][ix] = buf4[n++];
}
}
// pack my real cells for +x processor
// pass data to self or +x processor
// unpack and sum recv data into my ghost cells
n = 0;
for (iz = nzlo_out; iz <= nzhi_out; iz++)
for (iy = nylo_out; iy <= nyhi_out; iy++)
for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) {
if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
if (vflag_atom) {
buf3[n++] = v0_brick[iz][iy][ix];
buf3[n++] = v1_brick[iz][iy][ix];
buf3[n++] = v2_brick[iz][iy][ix];
buf3[n++] = v3_brick[iz][iy][ix];
buf3[n++] = v4_brick[iz][iy][ix];
buf3[n++] = v5_brick[iz][iy][ix];
}
}
if (comm->procneigh[0][1] == me)
for (i = 0; i < n; i++) buf4[i] = buf3[i];
else {
MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
comm->procneigh[0][0],0,world,&request);
MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
MPI_Wait(&request,&status);
}
n = 0;
for (iz = nzlo_out; iz <= nzhi_out; iz++)
for (iy = nylo_out; iy <= nyhi_out; iy++)
for (ix = nxlo_out; ix < nxlo_in; ix++) {
if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
if (vflag_atom) {
v0_brick[iz][iy][ix] = buf4[n++];
v1_brick[iz][iy][ix] = buf4[n++];
v2_brick[iz][iy][ix] = buf4[n++];
v3_brick[iz][iy][ix] = buf4[n++];
v4_brick[iz][iy][ix] = buf4[n++];
v5_brick[iz][iy][ix] = buf4[n++];
}
}
// pack my real cells for -x processor
// pass data to self or -x processor
// unpack and sum recv data into my ghost cells
n = 0;
for (iz = nzlo_out; iz <= nzhi_out; iz++)
for (iy = nylo_out; iy <= nyhi_out; iy++)
for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) {
if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
if (vflag_atom) {
buf3[n++] = v0_brick[iz][iy][ix];
buf3[n++] = v1_brick[iz][iy][ix];
buf3[n++] = v2_brick[iz][iy][ix];
buf3[n++] = v3_brick[iz][iy][ix];
buf3[n++] = v4_brick[iz][iy][ix];
buf3[n++] = v5_brick[iz][iy][ix];
}
}
if (comm->procneigh[0][0] == me)
for (i = 0; i < n; i++) buf4[i] = buf3[i];
else {
MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
comm->procneigh[0][1],0,world,&request);
MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
MPI_Wait(&request,&status);
}
n = 0;
for (iz = nzlo_out; iz <= nzhi_out; iz++)
for (iy = nylo_out; iy <= nyhi_out; iy++)
for (ix = nxhi_in+1; ix <= nxhi_out; ix++) {
if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
if (vflag_atom) {
v0_brick[iz][iy][ix] = buf4[n++];
v1_brick[iz][iy][ix] = buf4[n++];
v2_brick[iz][iy][ix] = buf4[n++];
v3_brick[iz][iy][ix] = buf4[n++];
v4_brick[iz][iy][ix] = buf4[n++];
v5_brick[iz][iy][ix] = buf4[n++];
}
}
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
find center grid pt for each of my particles find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick check that full stencil for the particle will fit in my 3d brick
@ -1550,7 +1911,7 @@ void PPPM::make_rho()
FFT-based Poisson solver FFT-based Poisson solver
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void PPPM::poisson(int eflag, int vflag) void PPPM::poisson()
{ {
int i,j,k,n; int i,j,k,n;
double eng; double eng;
@ -1565,18 +1926,18 @@ void PPPM::poisson(int eflag, int vflag)
fft1->compute(work1,work1,1); fft1->compute(work1,work1,1);
// if requested, compute energy and virial contribution // global energy and virial contribution
double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
double s2 = scaleinv*scaleinv; double s2 = scaleinv*scaleinv;
if (eflag || vflag) { if (eflag_global || vflag_global) {
if (vflag) { if (vflag_global) {
n = 0; n = 0;
for (i = 0; i < nfft; i++) { for (i = 0; i < nfft; i++) {
eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
energy += eng; if (eflag_global) energy += eng;
n += 2; n += 2;
} }
} else { } else {
@ -1598,6 +1959,10 @@ void PPPM::poisson(int eflag, int vflag)
work1[n++] *= scaleinv * greensfn[i]; work1[n++] *= scaleinv * greensfn[i];
} }
// extra FFTs for per-atom energy/virial
if (eflag_atom || vflag_atom) poisson_peratom();
// compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
// FFT leaves data in 3d brick decomposition // FFT leaves data in 3d brick decomposition
// copy it into inner portion of vdx,vdy,vdz arrays // copy it into inner portion of vdx,vdy,vdz arrays
@ -1666,6 +2031,142 @@ void PPPM::poisson(int eflag, int vflag)
} }
} }
/* ----------------------------------------------------------------------
FFT-based Poisson solver for per-atom energy/virial
------------------------------------------------------------------------- */
void PPPM::poisson_peratom()
{
int i,j,k,n;
// energy
if (eflag_atom) {
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n];
work2[n+1] = work1[n+1];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
u_brick[k][j][i] = work2[n];
n += 2;
}
}
// 6 components of virial in v0 thru v5
if (!vflag_atom) return;
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][0];
work2[n+1] = work1[n+1]*vg[i][0];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v0_brick[k][j][i] = work2[n];
n += 2;
}
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][1];
work2[n+1] = work1[n+1]*vg[i][1];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v1_brick[k][j][i] = work2[n];
n += 2;
}
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][2];
work2[n+1] = work1[n+1]*vg[i][2];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v2_brick[k][j][i] = work2[n];
n += 2;
}
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][3];
work2[n+1] = work1[n+1]*vg[i][3];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v3_brick[k][j][i] = work2[n];
n += 2;
}
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][4];
work2[n+1] = work1[n+1]*vg[i][4];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v4_brick[k][j][i] = work2[n];
n += 2;
}
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][5];
work2[n+1] = work1[n+1]*vg[i][5];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v5_brick[k][j][i] = work2[n];
n += 2;
}
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles interpolate from grid to get electric field & force on my particles
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
@ -1724,6 +2225,72 @@ void PPPM::fieldforce()
} }
} }
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial
------------------------------------------------------------------------- */
void PPPM::fieldforce_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
if (eflag_atom) u += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
if (eflag_atom) eatom[i] += q[i]*u;
if (vflag_atom) {
vatom[i][0] += v0;
vatom[i][1] += v1;
vatom[i][2] += v2;
vatom[i][3] += v3;
vatom[i][4] += v4;
vatom[i][5] += v5;
}
}
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
map nprocs to NX by NY grid as PX by PY procs - return optimal px,py map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
@ -1853,7 +2420,7 @@ void PPPM::compute_rho_coeff()
111, 3155). Slabs defined here to be parallel to the xy plane. 111, 3155). Slabs defined here to be parallel to the xy plane.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void PPPM::slabcorr(int eflag) void PPPM::slabcorr()
{ {
// compute local contribution to global dipole moment // compute local contribution to global dipole moment
@ -1874,7 +2441,7 @@ void PPPM::slabcorr(int eflag)
const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume;
const double qscale = force->qqrd2e * scale; const double qscale = force->qqrd2e * scale;
if (eflag) energy += qscale * e_slabcorr; if (eflag_global) energy += qscale * e_slabcorr;
// add on force corrections // add on force corrections
@ -1937,5 +2504,11 @@ double PPPM::memory_usage()
bytes += nfft_both * sizeof(double); bytes += nfft_both * sizeof(double);
bytes += nfft_both*5 * sizeof(FFT_SCALAR); bytes += nfft_both*5 * sizeof(FFT_SCALAR);
bytes += 2 * nbuf * sizeof(FFT_SCALAR); bytes += 2 * nbuf * sizeof(FFT_SCALAR);
if (peratom_allocate_flag) {
bytes += 7 * nbrick * sizeof(FFT_SCALAR);
bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR);
}
return bytes; return bytes;
} }

View File

@ -55,22 +55,27 @@ class PPPM : public KSpace {
double volume; double volume;
double delxinv,delyinv,delzinv,delvolinv; double delxinv,delyinv,delzinv,delvolinv;
double shift,shiftone; double shift,shiftone;
int peratom_allocate_flag;
int nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in; int nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in;
int nxlo_out,nylo_out,nzlo_out,nxhi_out,nyhi_out,nzhi_out; int nxlo_out,nylo_out,nzlo_out,nxhi_out,nyhi_out,nzhi_out;
int nxlo_ghost,nxhi_ghost,nylo_ghost,nyhi_ghost,nzlo_ghost,nzhi_ghost; int nxlo_ghost,nxhi_ghost,nylo_ghost,nyhi_ghost,nzlo_ghost,nzhi_ghost;
int nxlo_fft,nylo_fft,nzlo_fft,nxhi_fft,nyhi_fft,nzhi_fft; int nxlo_fft,nylo_fft,nzlo_fft,nxhi_fft,nyhi_fft,nzhi_fft;
int nlower,nupper; int nlower,nupper;
int ngrid,nfft,nbuf,nfft_both; int ngrid,nfft,nfft_both;
int nbuf,nbuf_peratom;
FFT_SCALAR ***density_brick; FFT_SCALAR ***density_brick;
FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick; FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick;
FFT_SCALAR ***u_brick;
FFT_SCALAR ***v0_brick,***v1_brick,***v2_brick;
FFT_SCALAR ***v3_brick,***v4_brick,***v5_brick;
double *greensfn; double *greensfn;
double **vg; double **vg;
double *fkx,*fky,*fkz; double *fkx,*fky,*fkz;
FFT_SCALAR *density_fft; FFT_SCALAR *density_fft;
FFT_SCALAR *work1,*work2; FFT_SCALAR *work1,*work2;
FFT_SCALAR *buf1,*buf2; FFT_SCALAR *buf1,*buf2,*buf3,*buf4;
double *gf_b; double *gf_b;
FFT_SCALAR **rho1d,**rho_coeff; FFT_SCALAR **rho1d,**rho_coeff;
@ -90,7 +95,9 @@ class PPPM : public KSpace {
void set_grid(); void set_grid();
virtual void allocate(); virtual void allocate();
virtual void allocate_peratom();
virtual void deallocate(); virtual void deallocate();
virtual void deallocate_peratom();
int factorable(int); int factorable(int);
double rms(double, double, bigint, double, double **); double rms(double, double, bigint, double, double **);
double diffpr(double, double, double, double, double **); double diffpr(double, double, double, double, double **);
@ -100,13 +107,16 @@ class PPPM : public KSpace {
virtual void make_rho(); virtual void make_rho();
virtual void brick2fft(); virtual void brick2fft();
virtual void fillbrick(); virtual void fillbrick();
virtual void poisson(int, int); virtual void fillbrick_peratom();
virtual void poisson();
virtual void poisson_peratom();
virtual void fieldforce(); virtual void fieldforce();
virtual void fieldforce_peratom();
void procs2grid2d(int,int,int,int *, int*); void procs2grid2d(int,int,int,int *, int*);
void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &, void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &,
const FFT_SCALAR &); const FFT_SCALAR &);
void compute_rho_coeff(); void compute_rho_coeff();
void slabcorr(int); void slabcorr();
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
denominator for Hockney-Eastwood Green's function denominator for Hockney-Eastwood Green's function
@ -120,7 +130,8 @@ class PPPM : public KSpace {
gf_b = denominator expansion coeffs gf_b = denominator expansion coeffs
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
inline double gf_denom(const double &x, const double &y, const double &z) const { inline double gf_denom(const double &x, const double &y,
const double &z) const {
double sx,sy,sz; double sx,sy,sz;
sz = sy = sx = 0.0; sz = sy = sx = 0.0;
for (int l = order-1; l >= 0; l--) { for (int l = order-1; l >= 0; l--) {

View File

@ -44,7 +44,8 @@ using namespace MathConst;
PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg) PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
{ {
if ((narg < 1) || (narg > 2)) error->all(FLERR,"Illegal kspace_style pppm/cg command"); if ((narg < 1) || (narg > 2))
error->all(FLERR,"Illegal kspace_style pppm/cg command");
if (narg == 2) if (narg == 2)
smallq = atof(arg[1]); smallq = atof(arg[1]);
@ -70,7 +71,18 @@ PPPMCG::~PPPMCG()
void PPPMCG::compute(int eflag, int vflag) void PPPMCG::compute(int eflag, int vflag)
{ {
int i; int i,j;
// set energy/virial flags
// invoke allocate_peratom() if needed for first time
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) {
allocate_peratom();
peratom_allocate_flag = 1;
}
// convert atoms from box to lamda coords // convert atoms from box to lamda coords
@ -90,7 +102,7 @@ void PPPMCG::compute(int eflag, int vflag)
memory->create(is_charged,nmax,"pppm/cg:is_charged"); memory->create(is_charged,nmax,"pppm/cg:is_charged");
} }
// one time setup message. // one time setup message
if (num_charged < 0) { if (num_charged < 0) {
bigint charged_all, charged_num; bigint charged_all, charged_num;
@ -134,16 +146,13 @@ void PPPMCG::compute(int eflag, int vflag)
} }
} }
num_charged=0; num_charged = 0;
for (i=0; i < atom->nlocal; ++i) for (i = 0; i < atom->nlocal; ++i)
if (fabs(atom->q[i]) > smallq) { if (fabs(atom->q[i]) > smallq) {
is_charged[num_charged] = i; is_charged[num_charged] = i;
++num_charged; ++num_charged;
} }
energy = 0.0;
if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
// find grid points for all my particles // find grid points for all my particles
// map my particle charge onto my local 3d density grid // map my particle charge onto my local 3d density grid
@ -159,23 +168,32 @@ void PPPMCG::compute(int eflag, int vflag)
// compute potential gradient on my FFT grid and // compute potential gradient on my FFT grid and
// portion of e_long on this proc's FFT grid // portion of e_long on this proc's FFT grid
// return gradients (electric fields) in 3d brick decomposition // return gradients (electric fields) in 3d brick decomposition
// also performs per-atom calculations via poisson_peratom()
poisson(eflag,vflag);
// all procs communicate E-field values to fill ghost cells poisson();
// surrounding their 3d bricks
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
fillbrick(); fillbrick();
// extra per-atom energy/virial communication
if (eflag_atom || vflag_atom) fillbrick_peratom();
// calculate the force on my particles // calculate the force on my particles
fieldforce(); fieldforce();
// sum energy across procs and add in volume-dependent term // extra per-atom energy/virial communication
if (eflag_atom || vflag_atom) fieldforce_peratom();
// sum global energy across procs and add in volume-dependent term
const double qscale = force->qqrd2e * scale; const double qscale = force->qqrd2e * scale;
if (eflag) { if (eflag_global) {
double energy_all; double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all; energy = energy_all;
@ -186,14 +204,36 @@ void PPPMCG::compute(int eflag, int vflag)
energy *= qscale; energy *= qscale;
} }
// sum virial across procs // sum global virial across procs
if (vflag) { if (vflag_global) {
double virial_all[6]; double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
} }
// per-atom energy/virial
// energy includes self-energy correction
if (eflag_atom || vflag_atom) {
double *q = atom->q;
int nlocal = atom->nlocal;
if (eflag_atom) {
for (i = 0; i < nlocal; i++) {
eatom[i] *= 0.5;
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
(g_ewald*g_ewald*volume);
eatom[i] *= qscale;
}
}
if (vflag_atom) {
for (i = 0; i < nlocal; i++)
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale;
}
}
// 2d slab correction // 2d slab correction
if (slabflag) slabcorr(eflag); if (slabflag) slabcorr(eflag);
@ -344,6 +384,7 @@ void PPPMCG::fieldforce()
} }
// convert E-field to force // convert E-field to force
const double qfactor = force->qqrd2e * scale * q[i]; const double qfactor = force->qqrd2e * scale * q[i];
f[i][0] += qfactor*ekx; f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky; f[i][1] += qfactor*eky;

View File

@ -28,7 +28,7 @@ class PPPMCG : public PPPM {
public: public:
PPPMCG(class LAMMPS *, int, char **); PPPMCG(class LAMMPS *, int, char **);
virtual ~PPPMCG(); virtual ~PPPMCG();
virtual void compute(int eflag, int vflag); virtual void compute(int, int);
virtual double memory_usage(); virtual double memory_usage();
protected: protected:

View File

@ -66,7 +66,8 @@ ComputeHeatFlux::ComputeHeatFlux(LAMMPS *lmp, int narg, char **arg) :
if (modify->compute[ipe]->peatomflag == 0) if (modify->compute[ipe]->peatomflag == 0)
error->all(FLERR,"Compute heat/flux compute ID does not compute pe/atom"); error->all(FLERR,"Compute heat/flux compute ID does not compute pe/atom");
if (modify->compute[istress]->pressatomflag == 0) if (modify->compute[istress]->pressatomflag == 0)
error->all(FLERR,"Compute heat/flux compute ID does not compute stress/atom"); error->all(FLERR,
"Compute heat/flux compute ID does not compute stress/atom");
vector = new double[6]; vector = new double[6];
} }

View File

@ -88,6 +88,7 @@ double ComputePE::compute_scalar()
MPI_Allreduce(&one,&scalar,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&one,&scalar,1,MPI_DOUBLE,MPI_SUM,world);
if (kspaceflag && force->kspace) scalar += force->kspace->energy; if (kspaceflag && force->kspace) scalar += force->kspace->energy;
if (pairflag && force->pair && force->pair->tail_flag) { if (pairflag && force->pair && force->pair->tail_flag) {
double volume = domain->xprd * domain->yprd * domain->zprd; double volume = domain->xprd * domain->yprd * domain->zprd;
scalar += force->pair->etail / volume; scalar += force->pair->etail / volume;

View File

@ -22,6 +22,7 @@
#include "angle.h" #include "angle.h"
#include "dihedral.h" #include "dihedral.h"
#include "improper.h" #include "improper.h"
#include "kspace.h"
#include "memory.h" #include "memory.h"
#include "error.h" #include "error.h"
@ -43,9 +44,11 @@ ComputePEAtom::ComputePEAtom(LAMMPS *lmp, int narg, char **arg) :
if (narg == 3) { if (narg == 3) {
pairflag = 1; pairflag = 1;
bondflag = angleflag = dihedralflag = improperflag = 1; bondflag = angleflag = dihedralflag = improperflag = 1;
kspaceflag = 1;
} else { } else {
pairflag = 0; pairflag = 0;
bondflag = angleflag = dihedralflag = improperflag = 0; bondflag = angleflag = dihedralflag = improperflag = 0;
kspaceflag = 0;
int iarg = 3; int iarg = 3;
while (iarg < narg) { while (iarg < narg) {
if (strcmp(arg[iarg],"pair") == 0) pairflag = 1; if (strcmp(arg[iarg],"pair") == 0) pairflag = 1;
@ -53,6 +56,7 @@ ComputePEAtom::ComputePEAtom(LAMMPS *lmp, int narg, char **arg) :
else if (strcmp(arg[iarg],"angle") == 0) angleflag = 1; else if (strcmp(arg[iarg],"angle") == 0) angleflag = 1;
else if (strcmp(arg[iarg],"dihedral") == 0) dihedralflag = 1; else if (strcmp(arg[iarg],"dihedral") == 0) dihedralflag = 1;
else if (strcmp(arg[iarg],"improper") == 0) improperflag = 1; else if (strcmp(arg[iarg],"improper") == 0) improperflag = 1;
else if (strcmp(arg[iarg],"kspace") == 0) kspaceflag = 1;
else error->all(FLERR,"Illegal compute pe/atom command"); else error->all(FLERR,"Illegal compute pe/atom command");
iarg++; iarg++;
} }
@ -137,6 +141,13 @@ void ComputePEAtom::compute_peratom()
if (force->newton) comm->reverse_comm_compute(this); if (force->newton) comm->reverse_comm_compute(this);
// KSpace contribution is already per local atom
if (kspaceflag && force->kspace) {
double *eatom = force->kspace->eatom;
for (i = 0; i < nlocal; i++) energy[i] += eatom[i];
}
// zero energy of atoms not in group // zero energy of atoms not in group
// only do this after comm since ghost contributions must be included // only do this after comm since ghost contributions must be included

View File

@ -35,7 +35,7 @@ class ComputePEAtom : public Compute {
double memory_usage(); double memory_usage();
private: private:
int pairflag,bondflag,angleflag,dihedralflag,improperflag; int pairflag,bondflag,angleflag,dihedralflag,improperflag,kspaceflag;
int nmax; int nmax;
double *energy; double *energy;
}; };

View File

@ -57,7 +57,8 @@ ComputePressure::ComputePressure(LAMMPS *lmp, int narg, char **arg) :
if (icompute < 0) if (icompute < 0)
error->all(FLERR,"Could not find compute pressure temperature ID"); error->all(FLERR,"Could not find compute pressure temperature ID");
if (modify->compute[icompute]->tempflag == 0) if (modify->compute[icompute]->tempflag == 0)
error->all(FLERR,"Compute pressure temperature ID does not compute temperature"); error->all(FLERR,
"Compute pressure temperature ID does not compute temperature");
// process optional args // process optional args

View File

@ -23,6 +23,7 @@
#include "angle.h" #include "angle.h"
#include "dihedral.h" #include "dihedral.h"
#include "improper.h" #include "improper.h"
#include "kspace.h"
#include "modify.h" #include "modify.h"
#include "fix.h" #include "fix.h"
#include "memory.h" #include "memory.h"
@ -47,11 +48,13 @@ ComputeStressAtom::ComputeStressAtom(LAMMPS *lmp, int narg, char **arg) :
keflag = 1; keflag = 1;
pairflag = 1; pairflag = 1;
bondflag = angleflag = dihedralflag = improperflag = 1; bondflag = angleflag = dihedralflag = improperflag = 1;
kspaceflag = 1;
fixflag = 1; fixflag = 1;
} else { } else {
keflag = 0; keflag = 0;
pairflag = 0; pairflag = 0;
bondflag = angleflag = dihedralflag = improperflag = 0; bondflag = angleflag = dihedralflag = improperflag = 0;
kspaceflag = 0;
fixflag = 0; fixflag = 0;
int iarg = 3; int iarg = 3;
while (iarg < narg) { while (iarg < narg) {
@ -61,6 +64,7 @@ ComputeStressAtom::ComputeStressAtom(LAMMPS *lmp, int narg, char **arg) :
else if (strcmp(arg[iarg],"angle") == 0) angleflag = 1; else if (strcmp(arg[iarg],"angle") == 0) angleflag = 1;
else if (strcmp(arg[iarg],"dihedral") == 0) dihedralflag = 1; else if (strcmp(arg[iarg],"dihedral") == 0) dihedralflag = 1;
else if (strcmp(arg[iarg],"improper") == 0) improperflag = 1; else if (strcmp(arg[iarg],"improper") == 0) improperflag = 1;
else if (strcmp(arg[iarg],"kspace") == 0) kspaceflag = 1;
else if (strcmp(arg[iarg],"fix") == 0) fixflag = 1; else if (strcmp(arg[iarg],"fix") == 0) fixflag = 1;
else if (strcmp(arg[iarg],"virial") == 0) { else if (strcmp(arg[iarg],"virial") == 0) {
pairflag = 1; pairflag = 1;
@ -175,6 +179,15 @@ void ComputeStressAtom::compute_peratom()
if (force->newton) comm->reverse_comm_compute(this); if (force->newton) comm->reverse_comm_compute(this);
// KSpace contribution is already per local atom
if (kspaceflag && force->kspace) {
double **vatom = force->kspace->vatom;
for (i = 0; i < nlocal; i++)
for (j = 0; j < 6; j++)
stress[i][j] += vatom[i][j];
}
// zero virial of atoms not in group // zero virial of atoms not in group
// only do this after comm since ghost contributions must be included // only do this after comm since ghost contributions must be included

View File

@ -35,7 +35,8 @@ class ComputeStressAtom : public Compute {
double memory_usage(); double memory_usage();
private: private:
int keflag,pairflag,bondflag,angleflag,dihedralflag,improperflag,fixflag; int keflag,pairflag,bondflag,angleflag,dihedralflag,improperflag;
int kspaceflag,fixflag;
int nmax; int nmax;
double **stress; double **stress;
}; };

View File

@ -14,8 +14,10 @@
#include "stdlib.h" #include "stdlib.h"
#include "string.h" #include "string.h"
#include "kspace.h" #include "kspace.h"
#include "error.h" #include "atom.h"
#include "comm.h" #include "comm.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
@ -29,6 +31,71 @@ KSpace::KSpace(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
gewaldflag = 0; gewaldflag = 0;
slabflag = 0; slabflag = 0;
slab_volfactor = 1; slab_volfactor = 1;
maxeatom = maxvatom = 0;
eatom = NULL;
vatom = NULL;
}
/* ---------------------------------------------------------------------- */
KSpace::~KSpace()
{
memory->destroy(eatom);
memory->destroy(vatom);
}
/* ----------------------------------------------------------------------
setup for energy, virial computation
see integrate::ev_set() for values of eflag (0-3) and vflag (0-6)
------------------------------------------------------------------------- */
void KSpace::ev_setup(int eflag, int vflag)
{
int i,n;
evflag = 1;
eflag_either = eflag;
eflag_global = eflag % 2;
eflag_atom = eflag / 2;
vflag_either = vflag;
vflag_global = vflag % 4;
vflag_atom = vflag / 4;
// reallocate per-atom arrays if necessary
if (eflag_atom && atom->nlocal > maxeatom) {
maxeatom = atom->nmax;
memory->destroy(eatom);
memory->create(eatom,maxeatom,"kspace:eatom");
}
if (vflag_atom && atom->nlocal > maxvatom) {
maxvatom = atom->nmax;
memory->destroy(vatom);
memory->create(vatom,maxvatom,6,"kspace:vatom");
}
// zero accumulators
if (eflag_global) energy = 0.0;
if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
if (eflag_atom) {
n = atom->nlocal;
for (i = 0; i < n; i++) eatom[i] = 0.0;
}
if (vflag_atom) {
n = atom->nlocal;
for (i = 0; i < n; i++) {
vatom[i][0] = 0.0;
vatom[i][1] = 0.0;
vatom[i][2] = 0.0;
vatom[i][3] = 0.0;
vatom[i][4] = 0.0;
vatom[i][5] = 0.0;
}
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------

View File

@ -21,13 +21,15 @@ namespace LAMMPS_NS {
class KSpace : protected Pointers { class KSpace : protected Pointers {
friend class ThrOMP; friend class ThrOMP;
public: public:
double energy; double energy; // accumulated energy
double virial[6]; double virial[6]; // accumlated virial
double *eatom,**vatom; // accumulated per-atom energy/virial
double g_ewald; double g_ewald;
int nx_pppm,ny_pppm,nz_pppm; int nx_pppm,ny_pppm,nz_pppm;
KSpace(class LAMMPS *, int, char **); KSpace(class LAMMPS *, int, char **);
virtual ~KSpace() {} virtual ~KSpace();
void modify_params(int, char **); void modify_params(int, char **);
void *extract(const char *); void *extract(const char *);
@ -38,11 +40,18 @@ class KSpace : protected Pointers {
virtual double memory_usage() {return 0.0;} virtual double memory_usage() {return 0.0;}
protected: protected:
double slab_volfactor;
int gridflag,gewaldflag; int gridflag,gewaldflag;
int order; int order;
int slabflag; int slabflag;
double scale; double scale;
double slab_volfactor;
int evflag;
int eflag_either,eflag_global,eflag_atom;
int vflag_either,vflag_global,vflag_atom;
int maxeatom,maxvatom;
void ev_setup(int, int);
}; };
} }

View File

@ -97,6 +97,7 @@ void ReadRestart::command(int narg, char **arg)
else multiproc = 0; else multiproc = 0;
// open single restart file or base file for multiproc case // open single restart file or base file for multiproc case
// auto-detect whether byte swapping needs to be done as file is read
if (me == 0) { if (me == 0) {
if (screen) fprintf(screen,"Reading restart file ...\n"); if (screen) fprintf(screen,"Reading restart file ...\n");
@ -114,9 +115,12 @@ void ReadRestart::command(int narg, char **arg)
sprintf(str,"Cannot open restart file %s",hfile); sprintf(str,"Cannot open restart file %s",hfile);
error->one(FLERR,str); error->one(FLERR,str);
} }
swapflag = autodetect(&fp,hfile);
if (multiproc) delete [] hfile; if (multiproc) delete [] hfile;
} }
MPI_Bcast(&swapflag,1,MPI_INT,0,world);
// read header info and create atom style and simulation box // read header info and create atom style and simulation box
header(); header();
@ -175,8 +179,7 @@ void ReadRestart::command(int narg, char **arg)
} }
for (int iproc = 0; iproc < nprocs_file; iproc++) { for (int iproc = 0; iproc < nprocs_file; iproc++) {
if (me == 0) fread(&n,sizeof(int),1,fp); n = read_int();
MPI_Bcast(&n,1,MPI_INT,0,world);
if (n > maxbuf) { if (n > maxbuf) {
maxbuf = n; maxbuf = n;
memory->destroy(buf); memory->destroy(buf);
@ -184,7 +187,7 @@ void ReadRestart::command(int narg, char **arg)
} }
if (n > 0) { if (n > 0) {
if (me == 0) fread(buf,sizeof(double),n,fp); if (me == 0) nread_double(buf,n,fp);
MPI_Bcast(buf,n,MPI_DOUBLE,0,world); MPI_Bcast(buf,n,MPI_DOUBLE,0,world);
} }
@ -229,13 +232,13 @@ void ReadRestart::command(int narg, char **arg)
error->one(FLERR,str); error->one(FLERR,str);
} }
fread(&n,sizeof(int),1,fp); nread_int(&n,1,fp);
if (n > maxbuf) { if (n > maxbuf) {
maxbuf = n; maxbuf = n;
memory->destroy(buf); memory->destroy(buf);
memory->create(buf,maxbuf,"read_restart:buf"); memory->create(buf,maxbuf,"read_restart:buf");
} }
if (n > 0) fread(buf,sizeof(double),n,fp); if (n > 0) nread_double(buf,n,fp);
m = 0; m = 0;
while (m < n) m += avec->unpack_restart(&buf[m]); while (m < n) m += avec->unpack_restart(&buf[m]);
@ -708,7 +711,7 @@ void ReadRestart::type_arrays()
if (flag == MASS) { if (flag == MASS) {
double *mass = new double[atom->ntypes+1]; double *mass = new double[atom->ntypes+1];
if (me == 0) fread(&mass[1],sizeof(double),atom->ntypes,fp); if (me == 0) nread_double(&mass[1],atom->ntypes,fp);
MPI_Bcast(&mass[1],atom->ntypes,MPI_DOUBLE,0,world); MPI_Bcast(&mass[1],atom->ntypes,MPI_DOUBLE,0,world);
atom->set_mass(mass); atom->set_mass(mass);
delete [] mass; delete [] mass;
@ -731,10 +734,9 @@ void ReadRestart::force_fields()
while (flag >= 0) { while (flag >= 0) {
if (flag == PAIR) { if (flag == PAIR) {
if (me == 0) fread(&n,sizeof(int),1,fp); n = read_int();
MPI_Bcast(&n,1,MPI_INT,0,world);
style = new char[n]; style = new char[n];
if (me == 0) fread(style,sizeof(char),n,fp); if (me == 0) nread_char(style,n,fp);
MPI_Bcast(style,n,MPI_CHAR,0,world); MPI_Bcast(style,n,MPI_CHAR,0,world);
force->create_pair(style); force->create_pair(style);
@ -746,10 +748,9 @@ void ReadRestart::force_fields()
} }
} else if (flag == BOND) { } else if (flag == BOND) {
if (me == 0) fread(&n,sizeof(int),1,fp); n = read_int();
MPI_Bcast(&n,1,MPI_INT,0,world);
style = new char[n]; style = new char[n];
if (me == 0) fread(style,sizeof(char),n,fp); if (me == 0) nread_char(style,n,fp);
MPI_Bcast(style,n,MPI_CHAR,0,world); MPI_Bcast(style,n,MPI_CHAR,0,world);
force->create_bond(style); force->create_bond(style);
@ -757,10 +758,9 @@ void ReadRestart::force_fields()
force->bond->read_restart(fp); force->bond->read_restart(fp);
} else if (flag == ANGLE) { } else if (flag == ANGLE) {
if (me == 0) fread(&n,sizeof(int),1,fp); n = read_int();
MPI_Bcast(&n,1,MPI_INT,0,world);
style = new char[n]; style = new char[n];
if (me == 0) fread(style,sizeof(char),n,fp); if (me == 0) nread_char(style,n,fp);
MPI_Bcast(style,n,MPI_CHAR,0,world); MPI_Bcast(style,n,MPI_CHAR,0,world);
force->create_angle(style); force->create_angle(style);
@ -768,10 +768,9 @@ void ReadRestart::force_fields()
force->angle->read_restart(fp); force->angle->read_restart(fp);
} else if (flag == DIHEDRAL) { } else if (flag == DIHEDRAL) {
if (me == 0) fread(&n,sizeof(int),1,fp); n = read_int();
MPI_Bcast(&n,1,MPI_INT,0,world);
style = new char[n]; style = new char[n];
if (me == 0) fread(style,sizeof(char),n,fp); if (me == 0) nread_char(style,n,fp);
MPI_Bcast(style,n,MPI_CHAR,0,world); MPI_Bcast(style,n,MPI_CHAR,0,world);
force->create_dihedral(style); force->create_dihedral(style);
@ -779,10 +778,9 @@ void ReadRestart::force_fields()
force->dihedral->read_restart(fp); force->dihedral->read_restart(fp);
} else if (flag == IMPROPER) { } else if (flag == IMPROPER) {
if (me == 0) fread(&n,sizeof(int),1,fp); n = read_int();
MPI_Bcast(&n,1,MPI_INT,0,world);
style = new char[n]; style = new char[n];
if (me == 0) fread(style,sizeof(char),n,fp); if (me == 0) nread_char(style,n,fp);
MPI_Bcast(style,n,MPI_CHAR,0,world); MPI_Bcast(style,n,MPI_CHAR,0,world);
force->create_improper(style); force->create_improper(style);
@ -796,6 +794,38 @@ void ReadRestart::force_fields()
} }
} }
/* ----------------------------------------------------------------------
read N ints from restart file
do not bcast them, caller does that if required
------------------------------------------------------------------------- */
void ReadRestart::nread_int(int *buf, int n, FILE *fp)
{
fread(buf,sizeof(int),n,fp);
if (swapflag) {}
}
/* ----------------------------------------------------------------------
read N doubles from restart file
do not bcast them, caller does that if required
------------------------------------------------------------------------- */
void ReadRestart::nread_double(double *buf, int n, FILE *fp)
{
fread(buf,sizeof(double),n,fp);
if (swapflag) {}
}
/* ----------------------------------------------------------------------
read N chars from restart file
do not bcast them, caller does that if required
------------------------------------------------------------------------- */
void ReadRestart::nread_char(char *buf, int n, FILE *fp)
{
fread(buf,sizeof(char),n,fp);
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
read an int from restart file and bcast it read an int from restart file and bcast it
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
@ -803,7 +833,10 @@ void ReadRestart::force_fields()
int ReadRestart::read_int() int ReadRestart::read_int()
{ {
int value; int value;
if (me == 0) fread(&value,sizeof(int),1,fp); if (me == 0) {
fread(&value,sizeof(int),1,fp);
if (swapflag) {}
}
MPI_Bcast(&value,1,MPI_INT,0,world); MPI_Bcast(&value,1,MPI_INT,0,world);
return value; return value;
} }
@ -815,7 +848,10 @@ int ReadRestart::read_int()
double ReadRestart::read_double() double ReadRestart::read_double()
{ {
double value; double value;
if (me == 0) fread(&value,sizeof(double),1,fp); if (me == 0) {
fread(&value,sizeof(double),1,fp);
if (swapflag) {}
}
MPI_Bcast(&value,1,MPI_DOUBLE,0,world); MPI_Bcast(&value,1,MPI_DOUBLE,0,world);
return value; return value;
} }
@ -828,10 +864,16 @@ double ReadRestart::read_double()
char *ReadRestart::read_char() char *ReadRestart::read_char()
{ {
int n; int n;
if (me == 0) fread(&n,sizeof(int),1,fp); if (me == 0) {
fread(&n,sizeof(int),1,fp);
if (swapflag) {}
}
MPI_Bcast(&n,1,MPI_INT,0,world); MPI_Bcast(&n,1,MPI_INT,0,world);
char *value = new char[n]; char *value = new char[n];
if (me == 0) fread(value,sizeof(char),n,fp); if (me == 0) {
fread(value,sizeof(char),n,fp);
if (swapflag) {}
}
MPI_Bcast(value,n,MPI_CHAR,0,world); MPI_Bcast(value,n,MPI_CHAR,0,world);
return value; return value;
} }
@ -843,7 +885,34 @@ char *ReadRestart::read_char()
bigint ReadRestart::read_bigint() bigint ReadRestart::read_bigint()
{ {
bigint value; bigint value;
if (me == 0) fread(&value,sizeof(bigint),1,fp); if (me == 0) {
fread(&value,sizeof(bigint),1,fp);
if (swapflag) {}
}
MPI_Bcast(&value,1,MPI_LMP_BIGINT,0,world); MPI_Bcast(&value,1,MPI_LMP_BIGINT,0,world);
return value; return value;
} }
/* ----------------------------------------------------------------------
// auto-detect if restart file needs to be byte-swapped on this platform
// return 0 if not, 1 if it does
// re-open file with fp after checking first few bytes
read a bigint from restart file and bcast it
------------------------------------------------------------------------- */
int ReadRestart::autodetect(FILE **pfp, char *file)
{
FILE *fp = *pfp;
// read, check, set return flag
int flag = 0;
// reset file pointer
fclose(fp);
fp = fopen(file,"rb");
*pfp = fp;
return flag;
}

View File

@ -34,16 +34,21 @@ class ReadRestart : protected Pointers {
int me,nprocs,nprocs_file; int me,nprocs,nprocs_file;
FILE *fp; FILE *fp;
int nfix_restart_global,nfix_restart_peratom; int nfix_restart_global,nfix_restart_peratom;
int swapflag;
void file_search(char *, char *); void file_search(char *, char *);
void header(); void header();
void type_arrays(); void type_arrays();
void force_fields(); void force_fields();
void nread_int(int *, int, FILE *);
void nread_double(double *, int, FILE *);
void nread_char(char *, int, FILE *);
int read_int(); int read_int();
double read_double(); double read_double();
char *read_char(); char *read_char();
bigint read_bigint(); bigint read_bigint();
int autodetect(FILE **, char *);
}; };
} }

View File

@ -492,4 +492,3 @@ void WriteRestart::write_bigint(int flag, bigint value)
fwrite(&flag,sizeof(int),1,fp); fwrite(&flag,sizeof(int),1,fp);
fwrite(&value,sizeof(bigint),1,fp); fwrite(&value,sizeof(bigint),1,fp);
} }