From 6c836ed0e4c19edcb023e066c70705db9275c8e1 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Wed, 7 Mar 2012 15:51:43 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7924 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/KSPACE/ewald.cpp | 9 ++-- src/KSPACE/pppm.cpp | 9 ++-- src/KSPACE/pppm_cg.cpp | 85 ++++++++++++++++++++++++++++++-- src/KSPACE/pppm_cg.h | 3 +- src/USER-OMP/pppm_cg_omp.cpp | 95 +++++++++++++++++++++++++++++++++++- src/USER-OMP/pppm_cg_omp.h | 1 + src/USER-OMP/pppm_omp.cpp | 91 +++++++++++++++++++++++++++++++++- src/USER-OMP/pppm_omp.h | 1 + 8 files changed, 281 insertions(+), 13 deletions(-) diff --git a/src/KSPACE/ewald.cpp b/src/KSPACE/ewald.cpp index a95d4a3302..fcb7014ffa 100644 --- a/src/KSPACE/ewald.cpp +++ b/src/KSPACE/ewald.cpp @@ -151,9 +151,12 @@ void Ewald::init() // fluid-occupied volume used to estimate real-space error // zprd used rather than zprd_slab - if (!gewaldflag) - g_ewald = sqrt(-log(accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / - (2.0*q2))) / cutoff; + if (!gewaldflag) { + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) + error->all(FLERR,"KSpace accuracy too large to estimate G vector"); + g_ewald = sqrt(-log(g_ewald)) / cutoff; + } // setup Ewald coefficients so can print stats diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index 8fae7b2d55..94e71f29cb 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -993,9 +993,12 @@ void PPPM::set_grid() double h_x,h_y,h_z; bigint natoms = atom->natoms; - if (!gewaldflag) - g_ewald = sqrt(-log(accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / - (2.0*q2))) / cutoff; + if (!gewaldflag) { + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) + error->all(FLERR,"KSpace accuracy too large to estimate G vector"); + g_ewald = sqrt(-log(g_ewald)) / cutoff; + } // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy // nz_pppm uses extended zprd_slab instead of zprd diff --git a/src/KSPACE/pppm_cg.cpp b/src/KSPACE/pppm_cg.cpp index dd6396fe38..fa4ece88ed 100644 --- a/src/KSPACE/pppm_cg.cpp +++ b/src/KSPACE/pppm_cg.cpp @@ -221,7 +221,8 @@ void PPPMCG::compute(int eflag, int vflag) int nlocal = atom->nlocal; if (eflag_atom) { - for (i = 0; i < nlocal; i++) { + for (int j = 0; j < num_charged; j++) { + int i = is_charged[j]; eatom[i] *= 0.5; eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); @@ -230,8 +231,10 @@ void PPPMCG::compute(int eflag, int vflag) } if (vflag_atom) { - for (i = 0; i < nlocal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; + for (int j = 0; j < num_charged; j++) { + int i = is_charged[j]; + for (int n = 0; n < 6; n++) vatom[i][n] *= 0.5*q[i]*qscale; + } } } @@ -393,6 +396,72 @@ void PPPMCG::fieldforce() } } +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial + ------------------------------------------------------------------------- */ + +void PPPMCG::fieldforce_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + for (int j = 0; j < num_charged; j++) { + i = is_charged[j]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += v0; + vatom[i][1] += v1; + vatom[i][2] += v2; + vatom[i][3] += v3; + vatom[i][4] += v4; + vatom[i][5] += v5; + } + } +} + /* ---------------------------------------------------------------------- Slab-geometry correction term to dampen inter-slab interactions between periodically repeating slabs. Yields good approximation to 2D Ewald if @@ -425,6 +494,16 @@ void PPPMCG::slabcorr() if (eflag_global) energy += qscale * e_slabcorr; + //per-atom energy + + if (eflag_atom) { + double efact = 2.0*MY_PI*dipole_all/volume; + for (int j = 0; j < num_charged; j++) { + int i = is_charged[j]; + eatom[i] += qscale * q[i]*x[i][2]*efact; + } + } + // add on force corrections const double ffact = -4.0*MY_PI*dipole_all/volume * qscale; diff --git a/src/KSPACE/pppm_cg.h b/src/KSPACE/pppm_cg.h index 8fe6c2825d..4c0c1baa27 100644 --- a/src/KSPACE/pppm_cg.h +++ b/src/KSPACE/pppm_cg.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -39,6 +39,7 @@ class PPPMCG : public PPPM { virtual void particle_map(); virtual void make_rho(); virtual void fieldforce(); + virtual void fieldforce_peratom(); virtual void slabcorr(); }; diff --git a/src/USER-OMP/pppm_cg_omp.cpp b/src/USER-OMP/pppm_cg_omp.cpp index 32526d2d2e..aa15a96517 100644 --- a/src/USER-OMP/pppm_cg_omp.cpp +++ b/src/USER-OMP/pppm_cg_omp.cpp @@ -344,7 +344,9 @@ void PPPMCGOMP::make_rho() #if defined(_OPENMP) #pragma omp parallel default(none) +#endif { +#if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = num_charged; @@ -407,8 +409,8 @@ void PPPMCGOMP::make_rho() if (nthreads > 1) { data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid); } - } #endif + } } /* ---------------------------------------------------------------------- @@ -430,7 +432,9 @@ void PPPMCGOMP::fieldforce() #if defined(_OPENMP) #pragma omp parallel default(none) +#endif { +#if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = num_charged; @@ -488,9 +492,96 @@ void PPPMCGOMP::fieldforce() f[i][2] += qfactor*ekz; } } -#if defined(_OPENMP) } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial + ------------------------------------------------------------------------- */ + +void PPPMCGOMP::fieldforce_peratom() +{ + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) #endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = num_charged; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = num_charged; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int i,j,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // this if protects against having more threads than charged local atoms + if (ifrom < num_charged) { + for (j = ifrom; j < ito; j++) { + i = is_charged[j]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += v0; + vatom[i][1] += v1; + vatom[i][2] += v2; + vatom[i][3] += v3; + vatom[i][4] += v4; + vatom[i][5] += v5; + } + } + } + } } /* ---------------------------------------------------------------------- diff --git a/src/USER-OMP/pppm_cg_omp.h b/src/USER-OMP/pppm_cg_omp.h index 4c32630271..598c5522a6 100644 --- a/src/USER-OMP/pppm_cg_omp.h +++ b/src/USER-OMP/pppm_cg_omp.h @@ -36,6 +36,7 @@ namespace LAMMPS_NS { virtual void allocate(); virtual void deallocate(); virtual void fieldforce(); + virtual void fieldforce_peratom(); virtual void make_rho(); void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &, diff --git a/src/USER-OMP/pppm_omp.cpp b/src/USER-OMP/pppm_omp.cpp index 706e3e4270..496ee27aa8 100644 --- a/src/USER-OMP/pppm_omp.cpp +++ b/src/USER-OMP/pppm_omp.cpp @@ -434,7 +434,9 @@ void PPPMOMP::fieldforce() #if defined(_OPENMP) #pragma omp parallel default(none) +#endif { +#if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; @@ -491,9 +493,96 @@ void PPPMOMP::fieldforce() f[i][2] += qfactor*ekz; } } -#if defined(_OPENMP) } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial + ------------------------------------------------------------------------- */ + +void PPPMOMP::fieldforce_peratom() +{ + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) #endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += v0; + vatom[i][1] += v1; + vatom[i][2] += v2; + vatom[i][3] += v3; + vatom[i][4] += v4; + vatom[i][5] += v5; + } + } + } + } } /* ---------------------------------------------------------------------- diff --git a/src/USER-OMP/pppm_omp.h b/src/USER-OMP/pppm_omp.h index 8abcd64aa6..93caa6ed6e 100644 --- a/src/USER-OMP/pppm_omp.h +++ b/src/USER-OMP/pppm_omp.h @@ -36,6 +36,7 @@ namespace LAMMPS_NS { virtual void allocate(); virtual void deallocate(); virtual void fieldforce(); + virtual void fieldforce_peratom(); virtual void make_rho(); void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,