diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp index 713fe7445f..25a447ccbf 100755 --- a/src/KSPACE/pppm_disp.cpp +++ b/src/KSPACE/pppm_disp.cpp @@ -355,7 +355,6 @@ void PPPMDisp::init() // initialize the pair style to get the coefficients - pair->init(); init_coeffs(); @@ -4751,7 +4750,7 @@ void PPPMDisp::fieldforce_c_ik() const double qfactor = force->qqrd2e * scale * q[i]; f[i][0] += qfactor*ekx; f[i][1] += qfactor*eky; - f[i][2] += qfactor*ekz; + if (slabflag != 2) f[i][2] += qfactor*ekz; } } /* ---------------------------------------------------------------------- @@ -4970,7 +4969,7 @@ void PPPMDisp::fieldforce_g_ik() lj = B[type]; f[i][0] += lj*ekx; f[i][1] += lj*eky; - f[i][2] += lj*ekz; + if (slabflag != 2) f[i][2] += lj*ekz; } } @@ -5228,7 +5227,7 @@ void PPPMDisp::fieldforce_a_ik() lj6 = B[7*type]; f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6; - f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; + if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; } } diff --git a/src/KSPACE/pppm_disp_tip4p.cpp b/src/KSPACE/pppm_disp_tip4p.cpp index c777b2660b..4a93013ae0 100755 --- a/src/KSPACE/pppm_disp_tip4p.cpp +++ b/src/KSPACE/pppm_disp_tip4p.cpp @@ -232,7 +232,7 @@ void PPPMDispTIP4P::fieldforce_c_ik() if (type[i] != typeO) { f[i][0] += qfactor*ekx; f[i][1] += qfactor*eky; - f[i][2] += qfactor*ekz; + if (slabflag != 2) f[i][2] += qfactor*ekz; } else { fx = qfactor * ekx; @@ -242,15 +242,15 @@ void PPPMDispTIP4P::fieldforce_c_ik() f[i][0] += fx*(1 - alpha); f[i][1] += fy*(1 - alpha); - f[i][2] += fz*(1 - alpha); + if (slabflag != 2) f[i][2] += fz*(1 - alpha); f[iH1][0] += 0.5*alpha*fx; f[iH1][1] += 0.5*alpha*fy; - f[iH1][2] += 0.5*alpha*fz; + if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz; f[iH2][0] += 0.5*alpha*fx; f[iH2][1] += 0.5*alpha*fy; - f[iH2][2] += 0.5*alpha*fz; + if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz; } } } @@ -309,9 +309,9 @@ void PPPMDispTIP4P::fieldforce_c_ad() nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv; + dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv; + dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv; compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d); @@ -358,22 +358,22 @@ void PPPMDispTIP4P::fieldforce_c_ad() if (type[i] != typeO) { f[i][0] += fx; f[i][1] += fy; - f[i][2] += fz; + if (slabflag != 2) f[i][2] += fz; } else { find_M(i,iH1,iH2,xM); f[i][0] += fx*(1 - alpha); f[i][1] += fy*(1 - alpha); - f[i][2] += fz*(1 - alpha); + if (slabflag != 2) f[i][2] += fz*(1 - alpha); f[iH1][0] += 0.5*alpha*fx; f[iH1][1] += 0.5*alpha*fy; - f[iH1][2] += 0.5*alpha*fz; + if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz; f[iH2][0] += 0.5*alpha*fx; f[iH2][1] += 0.5*alpha*fy; - f[iH2][2] += 0.5*alpha*fz; + if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz; } } } diff --git a/src/USER-OMP/pppm_cg_omp.cpp b/src/USER-OMP/pppm_cg_omp.cpp index e65a9265e5..5f1b5e1604 100644 --- a/src/USER-OMP/pppm_cg_omp.cpp +++ b/src/USER-OMP/pppm_cg_omp.cpp @@ -323,6 +323,93 @@ void PPPMCGOMP::compute(int eflag, int vflag) } // end of omp parallel region } +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMCGOMP::make_rho() +{ + + // clear 3d density array + + FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); + memset(d,0,ngrid*sizeof(FFT_SCALAR)); + + const int ix = nxhi_out - nxlo_out + 1; + const int iy = nyhi_out - nylo_out + 1; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const double * _noalias const q = atom->q; + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; + + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + // determine range of grid points handled by this thread + int i,jfrom,jto,tid; + loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + + // loop over all local atoms for all threads + + for (int j = 0; j < num_charged; j++) { + i = is_charged[j]; + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + + // pre-screen whether this atom will ever come within + // reach of the data segement this thread is updating. + if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) + || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; + + const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv; + const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv; + const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + const FFT_SCALAR z0 = delvolinv * q[i]; + + for (int n = nlower; n <= nupper; ++n) { + const int jn = (nz+n-nzlo_out)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower; m <= nupper; ++m) { + const int jm = jn+(ny+m-nylo_out)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower; l <= nupper; ++l) { + const int jl = jm+nx+l-nxlo_out; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } + } + } + } +} + /* ---------------------------------------------------------------------- interpolate from grid to get electric field & force on my particles for ik ------------------------------------------------------------------------- */ diff --git a/src/USER-OMP/pppm_cg_omp.h b/src/USER-OMP/pppm_cg_omp.h index 37be55eeef..e1184138f3 100644 --- a/src/USER-OMP/pppm_cg_omp.h +++ b/src/USER-OMP/pppm_cg_omp.h @@ -38,6 +38,7 @@ class PPPMCGOMP : public PPPMCG, public ThrOMP { virtual void compute_gf_ik(); virtual void compute_gf_ad(); + virtual void make_rho(); virtual void fieldforce_ik(); virtual void fieldforce_ad(); virtual void fieldforce_peratom(); diff --git a/src/USER-OMP/pppm_omp.cpp b/src/USER-OMP/pppm_omp.cpp index d45cc492ed..6ada6b4137 100644 --- a/src/USER-OMP/pppm_omp.cpp +++ b/src/USER-OMP/pppm_omp.cpp @@ -321,6 +321,92 @@ void PPPMOMP::compute(int eflag, int vflag) } // end of omp parallel region } +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMOMP::make_rho() +{ + + // clear 3d density array + + FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); + memset(d,0,ngrid*sizeof(FFT_SCALAR)); + + const int ix = nxhi_out - nxlo_out + 1; + const int iy = nyhi_out - nylo_out + 1; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const double * _noalias const q = atom->q; + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; + + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + // determine range of grid points handled by this thread + int i,jfrom,jto,tid; + loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + + // loop over all local atoms for all threads + const int nlocal = atom->nlocal; + for (i = 0; i < nlocal; i++) { + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + + // pre-screen whether this atom will ever come within + // reach of the data segement this thread is updating. + if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) + || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; + + const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv; + const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv; + const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + const FFT_SCALAR z0 = delvolinv * q[i]; + + for (int n = nlower; n <= nupper; ++n) { + const int jn = (nz+n-nzlo_out)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower; m <= nupper; ++m) { + const int jm = jn+(ny+m-nylo_out)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower; l <= nupper; ++l) { + const int jl = jm+nx+l-nxlo_out; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } + } + } + } +} + /* ---------------------------------------------------------------------- interpolate from grid to get electric field & force on my particles for ik ------------------------------------------------------------------------- */ diff --git a/src/USER-OMP/pppm_omp.h b/src/USER-OMP/pppm_omp.h index 835398b79f..03eb0e110b 100644 --- a/src/USER-OMP/pppm_omp.h +++ b/src/USER-OMP/pppm_omp.h @@ -38,10 +38,10 @@ class PPPMOMP : public PPPM, public ThrOMP { virtual void compute_gf_ik(); virtual void compute_gf_ad(); + virtual void make_rho(); virtual void fieldforce_ik(); virtual void fieldforce_ad(); virtual void fieldforce_peratom(); -// virtual void make_rho(); private: void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &, diff --git a/src/USER-OMP/pppm_tip4p_omp.cpp b/src/USER-OMP/pppm_tip4p_omp.cpp index 92c8098f1c..dcc84b6e26 100644 --- a/src/USER-OMP/pppm_tip4p_omp.cpp +++ b/src/USER-OMP/pppm_tip4p_omp.cpp @@ -386,58 +386,85 @@ void PPPMTIP4POMP::particle_map() void PPPMTIP4POMP::make_rho() { - const double * _noalias const q = atom->q; - const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; - const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; - const int * _noalias const type = atom->type; - - dbl3_t xM; - FFT_SCALAR x0,y0,z0; - const double boxlox = boxlo[0]; - const double boxloy = boxlo[1]; - const double boxloz = boxlo[2]; - - int l,m,n,mx,my,mz,iH1,iH2; - - const int nlocal = atom->nlocal; // clear 3d density array - FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; - memset(vec,0,ngrid*sizeof(FFT_SCALAR)); + FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); + memset(d,0,ngrid*sizeof(FFT_SCALAR)); - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt + const int ix = nxhi_out - nxlo_out + 1; + const int iy = nyhi_out - nylo_out + 1; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const double * _noalias const q = atom->q; + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; + const int * _noalias const type = atom->type; + dbl3_t xM; + + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + // determine range of grid points handled by this thread + int i,jfrom,jto,tid,iH1,iH2; + loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + + // loop over all local atoms for all threads + const int nlocal = atom->nlocal; + for (i = 0; i < nlocal; i++) { + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + + // pre-screen whether this atom will ever come within + // reach of the data segement this thread is updating. + if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) + || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; - for (int i = 0; i < nlocal; i++) { if (type[i] == typeO) { find_M_thr(i,iH1,iH2,xM); } else { xM = x[i]; } + const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv; + const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv; + const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv; - const int nx = p2g[i].a; - const int ny = p2g[i].b; - const int nz = p2g[i].t; - const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv; - const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv; - const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv; + compute_rho1d_thr(r1d,dx,dy,dz); - compute_rho1d(dx,dy,dz); + const FFT_SCALAR z0 = delvolinv * q[i]; - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } + for (int n = nlower; n <= nupper; ++n) { + const int jn = (nz+n-nzlo_out)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower; m <= nupper; ++m) { + const int jm = jn+(ny+m-nylo_out)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower; l <= nupper; ++l) { + const int jl = jm+nx+l-nxlo_out; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } } } }