diff --git a/src/KSPACE/ewald.cpp b/src/KSPACE/ewald.cpp index 7989e0e28d..066582188d 100644 --- a/src/KSPACE/ewald.cpp +++ b/src/KSPACE/ewald.cpp @@ -132,8 +132,8 @@ void Ewald::init() gsqmx = -4.0*g_ewald*g_ewald*log(precision); if (comm->me == 0) { - if (screen) fprintf(screen," G vector = %g\n",g_ewald); - if (logfile) fprintf(logfile," G vector = %g\n",g_ewald); + if (screen) fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + if (logfile) fprintf(logfile," G vector (1/distnace) = %g\n",g_ewald); } // setup Ewald coefficients so can print stats @@ -212,8 +212,16 @@ void Ewald::compute(int eflag, int vflag) { int i,k,n; - energy = 0.0; - if (vflag) for (n = 0; n < 6; n++) virial[n] = 0.0; + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; + + if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { + allocate_peratom(); + peratom_allocate_flag = 1; + } // extend size of per-atom arrays if necessary @@ -278,9 +286,9 @@ void Ewald::compute(int eflag, int vflag) f[i][2] += qscale * q[i]*ek[i][2]; } - // energy if requested + // global energy if requested - if (eflag) { + if (eflag_global) { for (k = 0; k < kcount; k++) energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); @@ -289,9 +297,9 @@ void Ewald::compute(int eflag, int vflag) energy *= qscale; } - // virial if requested + // global virial if requested - if (vflag) { + if (vflag_global) { double uk; for (k = 0; k < kcount; k++) { uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); @@ -300,7 +308,9 @@ void Ewald::compute(int eflag, int vflag) for (n = 0; n < 6; n++) virial[n] *= qscale; } - if (slabflag) slabcorr(eflag); + // 2d slab correction + + if (slabflag) slabcorr(); } /* ---------------------------------------------------------------------- */ @@ -801,7 +811,7 @@ void Ewald::deallocate() 111, 3155). Slabs defined here to be parallel to the xy plane. ------------------------------------------------------------------------- */ -void Ewald::slabcorr(int eflag) +void Ewald::slabcorr() { // compute local contribution to global dipole moment @@ -822,7 +832,7 @@ void Ewald::slabcorr(int eflag) const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; const double qscale = force->qqrd2e * scale; - if (eflag) energy += qscale * e_slabcorr; + if (eflag_global) energy += qscale * e_slabcorr; // add on force corrections diff --git a/src/KSPACE/ewald.h b/src/KSPACE/ewald.h index 243dc71666..51075b265a 100644 --- a/src/KSPACE/ewald.h +++ b/src/KSPACE/ewald.h @@ -51,7 +51,7 @@ class Ewald : public KSpace { void coeffs(); virtual void allocate(); void deallocate(); - void slabcorr(int); + void slabcorr(); }; } diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index a3331daf72..21f9916c12 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -72,11 +72,13 @@ PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; density_fft = NULL; + u_brick = NULL; + v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; greensfn = NULL; work1 = work2 = NULL; vg = NULL; fkx = fky = fkz = NULL; - buf1 = buf2 = NULL; + buf1 = buf2 = buf3 = buf4 = NULL; gf_b = NULL; rho1d = rho_coeff = NULL; @@ -96,6 +98,7 @@ PPPM::~PPPM() { delete [] factors; deallocate(); + deallocate_peratom(); memory->destroy(part2grid); } @@ -136,6 +139,8 @@ void PPPM::init() // free all arrays previously allocated deallocate(); + deallocate_peratom(); + peratom_allocate_flag = 0; // extract short-range Coulombic cutoff from pair style @@ -479,6 +484,8 @@ void PPPM::init() nbuf = MAX(nxx,nyy); nbuf = MAX(nbuf,nzz); + + nbuf_peratom = 7*nbuf; nbuf *= 3; // print stats @@ -496,6 +503,7 @@ void PPPM::init() } // allocate K-space dependent memory + // don't invoke allocate_peratom() here, wait to see if needed allocate(); @@ -665,7 +673,18 @@ void PPPM::setup() void PPPM::compute(int eflag, int vflag) { - int i; + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; + + if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { + allocate_peratom(); + peratom_allocate_flag = 1; + } // convert atoms from box to lamda coords @@ -683,9 +702,6 @@ void PPPM::compute(int eflag, int vflag) memory->create(part2grid,nmax,3,"pppm:part2grid"); } - energy = 0.0; - if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0; - // find grid points for all my particles // map my particle charge onto my local 3d density grid @@ -701,23 +717,32 @@ void PPPM::compute(int eflag, int vflag) // compute potential gradient on my FFT grid and // portion of e_long on this proc's FFT grid // return gradients (electric fields) in 3d brick decomposition - - poisson(eflag,vflag); + // also performs per-atom calculations via poisson_peratom() - // all procs communicate E-field values to fill ghost cells - // surrounding their 3d bricks + poisson(); + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks fillbrick(); + // extra per-atom energy/virial communication + + if (eflag_atom || vflag_atom) fillbrick_peratom(); + // calculate the force on my particles fieldforce(); - // sum energy across procs and add in volume-dependent term + // extra per-atom energy/virial communication + + if (eflag_atom || vflag_atom) fieldforce_peratom(); + + // sum global energy across procs and add in volume-dependent term const double qscale = force->qqrd2e * scale; - if (eflag) { + if (eflag_global) { double energy_all; MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); energy = energy_all; @@ -728,17 +753,39 @@ void PPPM::compute(int eflag, int vflag) energy *= qscale; } - // sum virial across procs + // sum global virial across procs - if (vflag) { + if (vflag_global) { double virial_all[6]; MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; } + // per-atom energy/virial + // energy includes self-energy correction + + if (eflag_atom || vflag_atom) { + double *q = atom->q; + int nlocal = atom->nlocal; + + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] *= 0.5; + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + } + + if (vflag_atom) { + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; + } + } + // 2d slab correction - if (slabflag) slabcorr(eflag); + if (slabflag) slabcorr(eflag_global); // convert atoms back from lamda to box coords @@ -802,6 +849,32 @@ void PPPM::allocate() 1,0,0,FFT_PRECISION); } +/* ---------------------------------------------------------------------- + allocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::allocate_peratom() +{ + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:u_brick"); + + memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v0_brick"); + memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v1_brick"); + memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v2_brick"); + memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v3_brick"); + memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v4_brick"); + memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v5_brick"); + + memory->create(buf3,nbuf_peratom,"pppm:buf3"); + memory->create(buf4,nbuf_peratom,"pppm:buf4"); +} + /* ---------------------------------------------------------------------- deallocate memory that depends on # of K-vectors and order ------------------------------------------------------------------------- */ @@ -835,6 +908,25 @@ void PPPM::deallocate() delete remap; } +/* ---------------------------------------------------------------------- + deallocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::deallocate_peratom() +{ + memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy(buf3); + memory->destroy(buf4); +} + /* ---------------------------------------------------------------------- set size of FFT grid (nx,ny,nz_pppm) and g_ewald ------------------------------------------------------------------------- */ @@ -995,14 +1087,14 @@ void PPPM::set_grid() const char fft_prec[] = "double"; #endif if (screen) { - fprintf(screen," G vector = %g\n",g_ewald); + fprintf(screen," G vector (1/distance)= %g\n",g_ewald); fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); fprintf(screen," stencil order = %d\n",order); fprintf(screen," RMS precision = %g\n",MAX(lpr,spr)); fprintf(screen," using %s precision FFTs\n",fft_prec); } if (logfile) { - fprintf(logfile," G vector = %g\n",g_ewald); + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); fprintf(logfile," stencil order = %d\n",order); fprintf(logfile," RMS precision = %g\n",MAX(lpr,spr)); @@ -1454,6 +1546,275 @@ void PPPM::fillbrick() } } +/* ---------------------------------------------------------------------- + ghost-swap to fill ghost cells of my brick with per-atom field values +------------------------------------------------------------------------- */ + +void PPPM::fillbrick_peratom() +{ + int i,n,ix,iy,iz; + MPI_Request request; + MPI_Status status; + + // pack my real cells for +z processor + // pass data to self or +z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[2][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[2][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz < nzlo_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -z processor + // pass data to self or -z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[2][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[2][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzhi_in+1; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for +y processor + // pass data to self or +y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[1][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[1][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy < nylo_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -y processor + // pass data to self or -y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[1][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[1][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in+1; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for +x processor + // pass data to self or +x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[0][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[0][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_out; ix < nxlo_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -x processor + // pass data to self or -x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[0][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[0][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } +} + /* ---------------------------------------------------------------------- find center grid pt for each of my particles check that full stencil for the particle will fit in my 3d brick @@ -1550,7 +1911,7 @@ void PPPM::make_rho() FFT-based Poisson solver ------------------------------------------------------------------------- */ -void PPPM::poisson(int eflag, int vflag) +void PPPM::poisson() { int i,j,k,n; double eng; @@ -1565,18 +1926,18 @@ void PPPM::poisson(int eflag, int vflag) fft1->compute(work1,work1,1); - // if requested, compute energy and virial contribution + // global energy and virial contribution double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); double s2 = scaleinv*scaleinv; - if (eflag || vflag) { - if (vflag) { + if (eflag_global || vflag_global) { + if (vflag_global) { n = 0; for (i = 0; i < nfft; i++) { eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - energy += eng; + if (eflag_global) energy += eng; n += 2; } } else { @@ -1598,6 +1959,10 @@ void PPPM::poisson(int eflag, int vflag) work1[n++] *= scaleinv * greensfn[i]; } + // extra FFTs for per-atom energy/virial + + if (eflag_atom || vflag_atom) poisson_peratom(); + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) // FFT leaves data in 3d brick decomposition // copy it into inner portion of vdx,vdy,vdz arrays @@ -1666,6 +2031,142 @@ void PPPM::poisson(int eflag, int vflag) } } +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPM::poisson_peratom() +{ + int i,j,k,n; + + // energy + + if (eflag_atom) { + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]; + work2[n+1] = work1[n+1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + u_brick[k][j][i] = work2[n]; + n += 2; + } + } + + // 6 components of virial in v0 thru v5 + + if (!vflag_atom) return; + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][0]; + work2[n+1] = work1[n+1]*vg[i][0]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v0_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][1]; + work2[n+1] = work1[n+1]*vg[i][1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v1_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][2]; + work2[n+1] = work1[n+1]*vg[i][2]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v2_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][3]; + work2[n+1] = work1[n+1]*vg[i][3]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v3_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][4]; + work2[n+1] = work1[n+1]*vg[i][4]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v4_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][5]; + work2[n+1] = work1[n+1]*vg[i][5]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v5_brick[k][j][i] = work2[n]; + n += 2; + } +} + /* ---------------------------------------------------------------------- interpolate from grid to get electric field & force on my particles ------------------------------------------------------------------------- */ @@ -1724,6 +2225,72 @@ void PPPM::fieldforce() } } +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPM::fieldforce_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += v0; + vatom[i][1] += v1; + vatom[i][2] += v2; + vatom[i][3] += v3; + vatom[i][4] += v4; + vatom[i][5] += v5; + } + } +} + /* ---------------------------------------------------------------------- map nprocs to NX by NY grid as PX by PY procs - return optimal px,py ------------------------------------------------------------------------- */ @@ -1853,7 +2420,7 @@ void PPPM::compute_rho_coeff() 111, 3155). Slabs defined here to be parallel to the xy plane. ------------------------------------------------------------------------- */ -void PPPM::slabcorr(int eflag) +void PPPM::slabcorr() { // compute local contribution to global dipole moment @@ -1874,7 +2441,7 @@ void PPPM::slabcorr(int eflag) const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; const double qscale = force->qqrd2e * scale; - if (eflag) energy += qscale * e_slabcorr; + if (eflag_global) energy += qscale * e_slabcorr; // add on force corrections @@ -1937,5 +2504,11 @@ double PPPM::memory_usage() bytes += nfft_both * sizeof(double); bytes += nfft_both*5 * sizeof(FFT_SCALAR); bytes += 2 * nbuf * sizeof(FFT_SCALAR); + + if (peratom_allocate_flag) { + bytes += 7 * nbrick * sizeof(FFT_SCALAR); + bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR); + } + return bytes; } diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h index 8845dda246..dd327fb170 100644 --- a/src/KSPACE/pppm.h +++ b/src/KSPACE/pppm.h @@ -55,22 +55,27 @@ class PPPM : public KSpace { double volume; double delxinv,delyinv,delzinv,delvolinv; double shift,shiftone; + int peratom_allocate_flag; int nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in; int nxlo_out,nylo_out,nzlo_out,nxhi_out,nyhi_out,nzhi_out; int nxlo_ghost,nxhi_ghost,nylo_ghost,nyhi_ghost,nzlo_ghost,nzhi_ghost; int nxlo_fft,nylo_fft,nzlo_fft,nxhi_fft,nyhi_fft,nzhi_fft; int nlower,nupper; - int ngrid,nfft,nbuf,nfft_both; + int ngrid,nfft,nfft_both; + int nbuf,nbuf_peratom; FFT_SCALAR ***density_brick; FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick; + FFT_SCALAR ***u_brick; + FFT_SCALAR ***v0_brick,***v1_brick,***v2_brick; + FFT_SCALAR ***v3_brick,***v4_brick,***v5_brick; double *greensfn; double **vg; double *fkx,*fky,*fkz; FFT_SCALAR *density_fft; FFT_SCALAR *work1,*work2; - FFT_SCALAR *buf1,*buf2; + FFT_SCALAR *buf1,*buf2,*buf3,*buf4; double *gf_b; FFT_SCALAR **rho1d,**rho_coeff; @@ -90,7 +95,9 @@ class PPPM : public KSpace { void set_grid(); virtual void allocate(); + virtual void allocate_peratom(); virtual void deallocate(); + virtual void deallocate_peratom(); int factorable(int); double rms(double, double, bigint, double, double **); double diffpr(double, double, double, double, double **); @@ -100,13 +107,16 @@ class PPPM : public KSpace { virtual void make_rho(); virtual void brick2fft(); virtual void fillbrick(); - virtual void poisson(int, int); + virtual void fillbrick_peratom(); + virtual void poisson(); + virtual void poisson_peratom(); virtual void fieldforce(); + virtual void fieldforce_peratom(); void procs2grid2d(int,int,int,int *, int*); void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &, const FFT_SCALAR &); void compute_rho_coeff(); - void slabcorr(int); + void slabcorr(); /* ---------------------------------------------------------------------- denominator for Hockney-Eastwood Green's function @@ -120,7 +130,8 @@ class PPPM : public KSpace { gf_b = denominator expansion coeffs ------------------------------------------------------------------------- */ - inline double gf_denom(const double &x, const double &y, const double &z) const { + inline double gf_denom(const double &x, const double &y, + const double &z) const { double sx,sy,sz; sz = sy = sx = 0.0; for (int l = order-1; l >= 0; l--) { diff --git a/src/KSPACE/pppm_cg.cpp b/src/KSPACE/pppm_cg.cpp index 663d263a20..02200d78a2 100644 --- a/src/KSPACE/pppm_cg.cpp +++ b/src/KSPACE/pppm_cg.cpp @@ -44,7 +44,8 @@ using namespace MathConst; PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg) { - if ((narg < 1) || (narg > 2)) error->all(FLERR,"Illegal kspace_style pppm/cg command"); + if ((narg < 1) || (narg > 2)) + error->all(FLERR,"Illegal kspace_style pppm/cg command"); if (narg == 2) smallq = atof(arg[1]); @@ -70,7 +71,18 @@ PPPMCG::~PPPMCG() void PPPMCG::compute(int eflag, int vflag) { - int i; + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; + + if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { + allocate_peratom(); + peratom_allocate_flag = 1; + } // convert atoms from box to lamda coords @@ -90,7 +102,7 @@ void PPPMCG::compute(int eflag, int vflag) memory->create(is_charged,nmax,"pppm/cg:is_charged"); } - // one time setup message. + // one time setup message if (num_charged < 0) { bigint charged_all, charged_num; @@ -134,16 +146,13 @@ void PPPMCG::compute(int eflag, int vflag) } } - num_charged=0; - for (i=0; i < atom->nlocal; ++i) + num_charged = 0; + for (i = 0; i < atom->nlocal; ++i) if (fabs(atom->q[i]) > smallq) { is_charged[num_charged] = i; ++num_charged; } - energy = 0.0; - if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0; - // find grid points for all my particles // map my particle charge onto my local 3d density grid @@ -159,23 +168,32 @@ void PPPMCG::compute(int eflag, int vflag) // compute potential gradient on my FFT grid and // portion of e_long on this proc's FFT grid // return gradients (electric fields) in 3d brick decomposition - - poisson(eflag,vflag); + // also performs per-atom calculations via poisson_peratom() - // all procs communicate E-field values to fill ghost cells - // surrounding their 3d bricks + poisson(); + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks fillbrick(); + // extra per-atom energy/virial communication + + if (eflag_atom || vflag_atom) fillbrick_peratom(); + // calculate the force on my particles fieldforce(); - // sum energy across procs and add in volume-dependent term + // extra per-atom energy/virial communication + + if (eflag_atom || vflag_atom) fieldforce_peratom(); + + // sum global energy across procs and add in volume-dependent term const double qscale = force->qqrd2e * scale; - if (eflag) { + if (eflag_global) { double energy_all; MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); energy = energy_all; @@ -186,14 +204,36 @@ void PPPMCG::compute(int eflag, int vflag) energy *= qscale; } - // sum virial across procs + // sum global virial across procs - if (vflag) { + if (vflag_global) { double virial_all[6]; MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; } + // per-atom energy/virial + // energy includes self-energy correction + + if (eflag_atom || vflag_atom) { + double *q = atom->q; + int nlocal = atom->nlocal; + + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] *= 0.5; + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + } + + if (vflag_atom) { + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; + } + } + // 2d slab correction if (slabflag) slabcorr(eflag); @@ -344,6 +384,7 @@ void PPPMCG::fieldforce() } // convert E-field to force + const double qfactor = force->qqrd2e * scale * q[i]; f[i][0] += qfactor*ekx; f[i][1] += qfactor*eky; diff --git a/src/KSPACE/pppm_cg.h b/src/KSPACE/pppm_cg.h index 9e2fe88320..72e65e3026 100644 --- a/src/KSPACE/pppm_cg.h +++ b/src/KSPACE/pppm_cg.h @@ -28,7 +28,7 @@ class PPPMCG : public PPPM { public: PPPMCG(class LAMMPS *, int, char **); virtual ~PPPMCG(); - virtual void compute(int eflag, int vflag); + virtual void compute(int, int); virtual double memory_usage(); protected: diff --git a/src/compute_heat_flux.cpp b/src/compute_heat_flux.cpp index f5a38a5656..a575eaf5e7 100644 --- a/src/compute_heat_flux.cpp +++ b/src/compute_heat_flux.cpp @@ -66,7 +66,8 @@ ComputeHeatFlux::ComputeHeatFlux(LAMMPS *lmp, int narg, char **arg) : if (modify->compute[ipe]->peatomflag == 0) error->all(FLERR,"Compute heat/flux compute ID does not compute pe/atom"); if (modify->compute[istress]->pressatomflag == 0) - error->all(FLERR,"Compute heat/flux compute ID does not compute stress/atom"); + error->all(FLERR, + "Compute heat/flux compute ID does not compute stress/atom"); vector = new double[6]; } diff --git a/src/compute_pe.cpp b/src/compute_pe.cpp index c153e15d8a..75ec66dfd6 100644 --- a/src/compute_pe.cpp +++ b/src/compute_pe.cpp @@ -88,6 +88,7 @@ double ComputePE::compute_scalar() MPI_Allreduce(&one,&scalar,1,MPI_DOUBLE,MPI_SUM,world); if (kspaceflag && force->kspace) scalar += force->kspace->energy; + if (pairflag && force->pair && force->pair->tail_flag) { double volume = domain->xprd * domain->yprd * domain->zprd; scalar += force->pair->etail / volume; diff --git a/src/compute_pe_atom.cpp b/src/compute_pe_atom.cpp index 20a16ae5b0..c16349402c 100755 --- a/src/compute_pe_atom.cpp +++ b/src/compute_pe_atom.cpp @@ -22,6 +22,7 @@ #include "angle.h" #include "dihedral.h" #include "improper.h" +#include "kspace.h" #include "memory.h" #include "error.h" @@ -43,9 +44,11 @@ ComputePEAtom::ComputePEAtom(LAMMPS *lmp, int narg, char **arg) : if (narg == 3) { pairflag = 1; bondflag = angleflag = dihedralflag = improperflag = 1; + kspaceflag = 1; } else { pairflag = 0; bondflag = angleflag = dihedralflag = improperflag = 0; + kspaceflag = 0; int iarg = 3; while (iarg < narg) { if (strcmp(arg[iarg],"pair") == 0) pairflag = 1; @@ -53,6 +56,7 @@ ComputePEAtom::ComputePEAtom(LAMMPS *lmp, int narg, char **arg) : else if (strcmp(arg[iarg],"angle") == 0) angleflag = 1; else if (strcmp(arg[iarg],"dihedral") == 0) dihedralflag = 1; else if (strcmp(arg[iarg],"improper") == 0) improperflag = 1; + else if (strcmp(arg[iarg],"kspace") == 0) kspaceflag = 1; else error->all(FLERR,"Illegal compute pe/atom command"); iarg++; } @@ -137,6 +141,13 @@ void ComputePEAtom::compute_peratom() if (force->newton) comm->reverse_comm_compute(this); + // KSpace contribution is already per local atom + + if (kspaceflag && force->kspace) { + double *eatom = force->kspace->eatom; + for (i = 0; i < nlocal; i++) energy[i] += eatom[i]; + } + // zero energy of atoms not in group // only do this after comm since ghost contributions must be included diff --git a/src/compute_pe_atom.h b/src/compute_pe_atom.h index f4990a439a..6feaa4af4a 100755 --- a/src/compute_pe_atom.h +++ b/src/compute_pe_atom.h @@ -35,7 +35,7 @@ class ComputePEAtom : public Compute { double memory_usage(); private: - int pairflag,bondflag,angleflag,dihedralflag,improperflag; + int pairflag,bondflag,angleflag,dihedralflag,improperflag,kspaceflag; int nmax; double *energy; }; diff --git a/src/compute_pressure.cpp b/src/compute_pressure.cpp index 5f981db3cc..1870882a18 100644 --- a/src/compute_pressure.cpp +++ b/src/compute_pressure.cpp @@ -57,7 +57,8 @@ ComputePressure::ComputePressure(LAMMPS *lmp, int narg, char **arg) : if (icompute < 0) error->all(FLERR,"Could not find compute pressure temperature ID"); if (modify->compute[icompute]->tempflag == 0) - error->all(FLERR,"Compute pressure temperature ID does not compute temperature"); + error->all(FLERR, + "Compute pressure temperature ID does not compute temperature"); // process optional args diff --git a/src/compute_stress_atom.cpp b/src/compute_stress_atom.cpp index 5560221225..e93a468670 100644 --- a/src/compute_stress_atom.cpp +++ b/src/compute_stress_atom.cpp @@ -23,6 +23,7 @@ #include "angle.h" #include "dihedral.h" #include "improper.h" +#include "kspace.h" #include "modify.h" #include "fix.h" #include "memory.h" @@ -47,11 +48,13 @@ ComputeStressAtom::ComputeStressAtom(LAMMPS *lmp, int narg, char **arg) : keflag = 1; pairflag = 1; bondflag = angleflag = dihedralflag = improperflag = 1; + kspaceflag = 1; fixflag = 1; } else { keflag = 0; pairflag = 0; bondflag = angleflag = dihedralflag = improperflag = 0; + kspaceflag = 0; fixflag = 0; int iarg = 3; while (iarg < narg) { @@ -61,6 +64,7 @@ ComputeStressAtom::ComputeStressAtom(LAMMPS *lmp, int narg, char **arg) : else if (strcmp(arg[iarg],"angle") == 0) angleflag = 1; else if (strcmp(arg[iarg],"dihedral") == 0) dihedralflag = 1; else if (strcmp(arg[iarg],"improper") == 0) improperflag = 1; + else if (strcmp(arg[iarg],"kspace") == 0) kspaceflag = 1; else if (strcmp(arg[iarg],"fix") == 0) fixflag = 1; else if (strcmp(arg[iarg],"virial") == 0) { pairflag = 1; @@ -175,6 +179,15 @@ void ComputeStressAtom::compute_peratom() if (force->newton) comm->reverse_comm_compute(this); + // KSpace contribution is already per local atom + + if (kspaceflag && force->kspace) { + double **vatom = force->kspace->vatom; + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) + stress[i][j] += vatom[i][j]; + } + // zero virial of atoms not in group // only do this after comm since ghost contributions must be included diff --git a/src/compute_stress_atom.h b/src/compute_stress_atom.h index 641fe38406..5665b875b1 100644 --- a/src/compute_stress_atom.h +++ b/src/compute_stress_atom.h @@ -35,7 +35,8 @@ class ComputeStressAtom : public Compute { double memory_usage(); private: - int keflag,pairflag,bondflag,angleflag,dihedralflag,improperflag,fixflag; + int keflag,pairflag,bondflag,angleflag,dihedralflag,improperflag; + int kspaceflag,fixflag; int nmax; double **stress; }; diff --git a/src/kspace.cpp b/src/kspace.cpp index aa50ec1cd7..f187563bcd 100644 --- a/src/kspace.cpp +++ b/src/kspace.cpp @@ -14,8 +14,10 @@ #include "stdlib.h" #include "string.h" #include "kspace.h" -#include "error.h" +#include "atom.h" #include "comm.h" +#include "memory.h" +#include "error.h" using namespace LAMMPS_NS; @@ -29,6 +31,71 @@ KSpace::KSpace(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) gewaldflag = 0; slabflag = 0; slab_volfactor = 1; + + maxeatom = maxvatom = 0; + eatom = NULL; + vatom = NULL; +} + +/* ---------------------------------------------------------------------- */ + +KSpace::~KSpace() +{ + memory->destroy(eatom); + memory->destroy(vatom); +} + +/* ---------------------------------------------------------------------- + setup for energy, virial computation + see integrate::ev_set() for values of eflag (0-3) and vflag (0-6) +------------------------------------------------------------------------- */ + +void KSpace::ev_setup(int eflag, int vflag) +{ + int i,n; + + evflag = 1; + + eflag_either = eflag; + eflag_global = eflag % 2; + eflag_atom = eflag / 2; + + vflag_either = vflag; + vflag_global = vflag % 4; + vflag_atom = vflag / 4; + + // reallocate per-atom arrays if necessary + + if (eflag_atom && atom->nlocal > maxeatom) { + maxeatom = atom->nmax; + memory->destroy(eatom); + memory->create(eatom,maxeatom,"kspace:eatom"); + } + if (vflag_atom && atom->nlocal > maxvatom) { + maxvatom = atom->nmax; + memory->destroy(vatom); + memory->create(vatom,maxvatom,6,"kspace:vatom"); + } + + // zero accumulators + + if (eflag_global) energy = 0.0; + if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0; + if (eflag_atom) { + n = atom->nlocal; + for (i = 0; i < n; i++) eatom[i] = 0.0; + } + if (vflag_atom) { + n = atom->nlocal; + for (i = 0; i < n; i++) { + vatom[i][0] = 0.0; + vatom[i][1] = 0.0; + vatom[i][2] = 0.0; + vatom[i][3] = 0.0; + vatom[i][4] = 0.0; + vatom[i][5] = 0.0; + } + } } /* ---------------------------------------------------------------------- diff --git a/src/kspace.h b/src/kspace.h index fa54e48375..32aa939cbd 100644 --- a/src/kspace.h +++ b/src/kspace.h @@ -21,13 +21,15 @@ namespace LAMMPS_NS { class KSpace : protected Pointers { friend class ThrOMP; public: - double energy; - double virial[6]; + double energy; // accumulated energy + double virial[6]; // accumlated virial + double *eatom,**vatom; // accumulated per-atom energy/virial + double g_ewald; int nx_pppm,ny_pppm,nz_pppm; KSpace(class LAMMPS *, int, char **); - virtual ~KSpace() {} + virtual ~KSpace(); void modify_params(int, char **); void *extract(const char *); @@ -38,11 +40,18 @@ class KSpace : protected Pointers { virtual double memory_usage() {return 0.0;} protected: - double slab_volfactor; int gridflag,gewaldflag; int order; int slabflag; double scale; + double slab_volfactor; + + int evflag; + int eflag_either,eflag_global,eflag_atom; + int vflag_either,vflag_global,vflag_atom; + int maxeatom,maxvatom; + + void ev_setup(int, int); }; } diff --git a/src/read_restart.cpp b/src/read_restart.cpp index c015449ff4..accb4b9458 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -97,6 +97,7 @@ void ReadRestart::command(int narg, char **arg) else multiproc = 0; // open single restart file or base file for multiproc case + // auto-detect whether byte swapping needs to be done as file is read if (me == 0) { if (screen) fprintf(screen,"Reading restart file ...\n"); @@ -114,9 +115,12 @@ void ReadRestart::command(int narg, char **arg) sprintf(str,"Cannot open restart file %s",hfile); error->one(FLERR,str); } + swapflag = autodetect(&fp,hfile); if (multiproc) delete [] hfile; } + MPI_Bcast(&swapflag,1,MPI_INT,0,world); + // read header info and create atom style and simulation box header(); @@ -175,8 +179,7 @@ void ReadRestart::command(int narg, char **arg) } for (int iproc = 0; iproc < nprocs_file; iproc++) { - if (me == 0) fread(&n,sizeof(int),1,fp); - MPI_Bcast(&n,1,MPI_INT,0,world); + n = read_int(); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); @@ -184,7 +187,7 @@ void ReadRestart::command(int narg, char **arg) } if (n > 0) { - if (me == 0) fread(buf,sizeof(double),n,fp); + if (me == 0) nread_double(buf,n,fp); MPI_Bcast(buf,n,MPI_DOUBLE,0,world); } @@ -229,13 +232,13 @@ void ReadRestart::command(int narg, char **arg) error->one(FLERR,str); } - fread(&n,sizeof(int),1,fp); + nread_int(&n,1,fp); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } - if (n > 0) fread(buf,sizeof(double),n,fp); + if (n > 0) nread_double(buf,n,fp); m = 0; while (m < n) m += avec->unpack_restart(&buf[m]); @@ -708,7 +711,7 @@ void ReadRestart::type_arrays() if (flag == MASS) { double *mass = new double[atom->ntypes+1]; - if (me == 0) fread(&mass[1],sizeof(double),atom->ntypes,fp); + if (me == 0) nread_double(&mass[1],atom->ntypes,fp); MPI_Bcast(&mass[1],atom->ntypes,MPI_DOUBLE,0,world); atom->set_mass(mass); delete [] mass; @@ -731,10 +734,9 @@ void ReadRestart::force_fields() while (flag >= 0) { if (flag == PAIR) { - if (me == 0) fread(&n,sizeof(int),1,fp); - MPI_Bcast(&n,1,MPI_INT,0,world); + n = read_int(); style = new char[n]; - if (me == 0) fread(style,sizeof(char),n,fp); + if (me == 0) nread_char(style,n,fp); MPI_Bcast(style,n,MPI_CHAR,0,world); force->create_pair(style); @@ -746,10 +748,9 @@ void ReadRestart::force_fields() } } else if (flag == BOND) { - if (me == 0) fread(&n,sizeof(int),1,fp); - MPI_Bcast(&n,1,MPI_INT,0,world); + n = read_int(); style = new char[n]; - if (me == 0) fread(style,sizeof(char),n,fp); + if (me == 0) nread_char(style,n,fp); MPI_Bcast(style,n,MPI_CHAR,0,world); force->create_bond(style); @@ -757,10 +758,9 @@ void ReadRestart::force_fields() force->bond->read_restart(fp); } else if (flag == ANGLE) { - if (me == 0) fread(&n,sizeof(int),1,fp); - MPI_Bcast(&n,1,MPI_INT,0,world); + n = read_int(); style = new char[n]; - if (me == 0) fread(style,sizeof(char),n,fp); + if (me == 0) nread_char(style,n,fp); MPI_Bcast(style,n,MPI_CHAR,0,world); force->create_angle(style); @@ -768,10 +768,9 @@ void ReadRestart::force_fields() force->angle->read_restart(fp); } else if (flag == DIHEDRAL) { - if (me == 0) fread(&n,sizeof(int),1,fp); - MPI_Bcast(&n,1,MPI_INT,0,world); + n = read_int(); style = new char[n]; - if (me == 0) fread(style,sizeof(char),n,fp); + if (me == 0) nread_char(style,n,fp); MPI_Bcast(style,n,MPI_CHAR,0,world); force->create_dihedral(style); @@ -779,10 +778,9 @@ void ReadRestart::force_fields() force->dihedral->read_restart(fp); } else if (flag == IMPROPER) { - if (me == 0) fread(&n,sizeof(int),1,fp); - MPI_Bcast(&n,1,MPI_INT,0,world); + n = read_int(); style = new char[n]; - if (me == 0) fread(style,sizeof(char),n,fp); + if (me == 0) nread_char(style,n,fp); MPI_Bcast(style,n,MPI_CHAR,0,world); force->create_improper(style); @@ -796,6 +794,38 @@ void ReadRestart::force_fields() } } +/* ---------------------------------------------------------------------- + read N ints from restart file + do not bcast them, caller does that if required +------------------------------------------------------------------------- */ + +void ReadRestart::nread_int(int *buf, int n, FILE *fp) +{ + fread(buf,sizeof(int),n,fp); + if (swapflag) {} +} + +/* ---------------------------------------------------------------------- + read N doubles from restart file + do not bcast them, caller does that if required +------------------------------------------------------------------------- */ + +void ReadRestart::nread_double(double *buf, int n, FILE *fp) +{ + fread(buf,sizeof(double),n,fp); + if (swapflag) {} +} + +/* ---------------------------------------------------------------------- + read N chars from restart file + do not bcast them, caller does that if required +------------------------------------------------------------------------- */ + +void ReadRestart::nread_char(char *buf, int n, FILE *fp) +{ + fread(buf,sizeof(char),n,fp); +} + /* ---------------------------------------------------------------------- read an int from restart file and bcast it ------------------------------------------------------------------------- */ @@ -803,7 +833,10 @@ void ReadRestart::force_fields() int ReadRestart::read_int() { int value; - if (me == 0) fread(&value,sizeof(int),1,fp); + if (me == 0) { + fread(&value,sizeof(int),1,fp); + if (swapflag) {} + } MPI_Bcast(&value,1,MPI_INT,0,world); return value; } @@ -815,7 +848,10 @@ int ReadRestart::read_int() double ReadRestart::read_double() { double value; - if (me == 0) fread(&value,sizeof(double),1,fp); + if (me == 0) { + fread(&value,sizeof(double),1,fp); + if (swapflag) {} + } MPI_Bcast(&value,1,MPI_DOUBLE,0,world); return value; } @@ -828,10 +864,16 @@ double ReadRestart::read_double() char *ReadRestart::read_char() { int n; - if (me == 0) fread(&n,sizeof(int),1,fp); + if (me == 0) { + fread(&n,sizeof(int),1,fp); + if (swapflag) {} + } MPI_Bcast(&n,1,MPI_INT,0,world); char *value = new char[n]; - if (me == 0) fread(value,sizeof(char),n,fp); + if (me == 0) { + fread(value,sizeof(char),n,fp); + if (swapflag) {} + } MPI_Bcast(value,n,MPI_CHAR,0,world); return value; } @@ -843,7 +885,34 @@ char *ReadRestart::read_char() bigint ReadRestart::read_bigint() { bigint value; - if (me == 0) fread(&value,sizeof(bigint),1,fp); + if (me == 0) { + fread(&value,sizeof(bigint),1,fp); + if (swapflag) {} + } MPI_Bcast(&value,1,MPI_LMP_BIGINT,0,world); return value; } + +/* ---------------------------------------------------------------------- +// auto-detect if restart file needs to be byte-swapped on this platform +// return 0 if not, 1 if it does +// re-open file with fp after checking first few bytes + read a bigint from restart file and bcast it +------------------------------------------------------------------------- */ + +int ReadRestart::autodetect(FILE **pfp, char *file) +{ + FILE *fp = *pfp; + + // read, check, set return flag + + int flag = 0; + + // reset file pointer + + fclose(fp); + fp = fopen(file,"rb"); + *pfp = fp; + + return flag; +} diff --git a/src/read_restart.h b/src/read_restart.h index 14c1131bda..c7186a771d 100644 --- a/src/read_restart.h +++ b/src/read_restart.h @@ -34,16 +34,21 @@ class ReadRestart : protected Pointers { int me,nprocs,nprocs_file; FILE *fp; int nfix_restart_global,nfix_restart_peratom; + int swapflag; void file_search(char *, char *); void header(); void type_arrays(); void force_fields(); + void nread_int(int *, int, FILE *); + void nread_double(double *, int, FILE *); + void nread_char(char *, int, FILE *); int read_int(); double read_double(); char *read_char(); bigint read_bigint(); + int autodetect(FILE **, char *); }; } diff --git a/src/write_restart.cpp b/src/write_restart.cpp index 6a909ae107..6dcec7659f 100644 --- a/src/write_restart.cpp +++ b/src/write_restart.cpp @@ -492,4 +492,3 @@ void WriteRestart::write_bigint(int flag, bigint value) fwrite(&flag,sizeof(int),1,fp); fwrite(&value,sizeof(bigint),1,fp); } -