git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9479 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2013-02-14 15:36:49 +00:00
parent 41801fa762
commit 8f94aa8010
7 changed files with 255 additions and 55 deletions
--- a/src/KSPACE/pppm_disp.cpp
+++ b/src/KSPACE/pppm_disp.cpp
@ -355,7 +355,6 @@ void PPPMDisp::init()


  // initialize the pair style to get the coefficients
-
  pair->init();
  init_coeffs();

@ -4751,7 +4750,7 @@ void PPPMDisp::fieldforce_c_ik()
    const double qfactor = force->qqrd2e * scale * q[i];
    f[i][0] += qfactor*ekx;
    f[i][1] += qfactor*eky;
-    f[i][2] += qfactor*ekz;
+    if (slabflag != 2) f[i][2] += qfactor*ekz;
  }
 }
 /* ----------------------------------------------------------------------
@ -4970,7 +4969,7 @@ void PPPMDisp::fieldforce_g_ik()
    lj = B[type];
    f[i][0] += lj*ekx;
    f[i][1] += lj*eky;
-    f[i][2] += lj*ekz;
+    if (slabflag != 2) f[i][2] += lj*ekz;
  }
 }

@ -5228,7 +5227,7 @@ void PPPMDisp::fieldforce_a_ik()
    lj6 = B[7*type];
    f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
    f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
-    f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
+    if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
  }
 }

--- a/src/KSPACE/pppm_disp_tip4p.cpp
+++ b/src/KSPACE/pppm_disp_tip4p.cpp
@ -232,7 +232,7 @@ void PPPMDispTIP4P::fieldforce_c_ik()
    if (type[i] != typeO) {
      f[i][0] += qfactor*ekx;
      f[i][1] += qfactor*eky;
-      f[i][2] += qfactor*ekz;
+      if (slabflag != 2) f[i][2] += qfactor*ekz;

    } else {
      fx = qfactor * ekx;
@ -242,15 +242,15 @@ void PPPMDispTIP4P::fieldforce_c_ik()

      f[i][0] += fx*(1 - alpha);
      f[i][1] += fy*(1 - alpha);
-      f[i][2] += fz*(1 - alpha);
+      if (slabflag != 2) f[i][2] += fz*(1 - alpha);

      f[iH1][0] += 0.5*alpha*fx;
      f[iH1][1] += 0.5*alpha*fy;
-      f[iH1][2] += 0.5*alpha*fz;
+      if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz;

      f[iH2][0] += 0.5*alpha*fx;
      f[iH2][1] += 0.5*alpha*fy;
-      f[iH2][2] += 0.5*alpha*fz;
+      if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz;
    }
  }
 }
@ -309,9 +309,9 @@ void PPPMDispTIP4P::fieldforce_c_ad()
    nx = part2grid[i][0];
    ny = part2grid[i][1];
    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+    dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv;
    
    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
    compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d);
@ -358,22 +358,22 @@ void PPPMDispTIP4P::fieldforce_c_ad()
    if (type[i] != typeO) {
      f[i][0] += fx;
      f[i][1] += fy;
-      f[i][2] += fz;
+      if (slabflag != 2) f[i][2] += fz;

    } else {
      find_M(i,iH1,iH2,xM);

      f[i][0] += fx*(1 - alpha);
      f[i][1] += fy*(1 - alpha);
-      f[i][2] += fz*(1 - alpha);
+      if (slabflag != 2) f[i][2] += fz*(1 - alpha);

      f[iH1][0] += 0.5*alpha*fx;
      f[iH1][1] += 0.5*alpha*fy;
-      f[iH1][2] += 0.5*alpha*fz;
+      if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz;

      f[iH2][0] += 0.5*alpha*fx;
      f[iH2][1] += 0.5*alpha*fy;
-      f[iH2][2] += 0.5*alpha*fz;
+      if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz;
    }
  }
 }
--- a/src/USER-OMP/pppm_cg_omp.cpp
+++ b/src/USER-OMP/pppm_cg_omp.cpp
@ -323,6 +323,93 @@ void PPPMCGOMP::compute(int eflag, int vflag)
  } // end of omp parallel region
 }

+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid
+------------------------------------------------------------------------- */
+
+void PPPMCGOMP::make_rho()
+{
+
+  // clear 3d density array
+
+  FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]);
+  memset(d,0,ngrid*sizeof(FFT_SCALAR));
+
+  const int ix = nxhi_out - nxlo_out + 1;
+  const int iy = nyhi_out - nylo_out + 1;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    const double * _noalias const q = atom->q;
+    const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
+    const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
+
+    const double boxlox = boxlo[0];
+    const double boxloy = boxlo[1];
+    const double boxloz = boxlo[2];
+
+    // determine range of grid points handled by this thread
+    int i,jfrom,jto,tid;
+    loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads);
+
+    // get per thread data
+    ThrData *thr = fix->get_thr(tid);
+    FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
+
+    // loop over my charges, add their contribution to nearby grid points
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // (dx,dy,dz) = distance to "lower left" grid pt
+
+    // loop over all local atoms for all threads
+
+    for (int j = 0; j < num_charged; j++) {
+      i = is_charged[j];
+
+      const int nx = p2g[i].a;
+      const int ny = p2g[i].b;
+      const int nz = p2g[i].t;
+
+      // pre-screen whether this atom will ever come within 
+      // reach of the data segement this thread is updating.
+      if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
+	   || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
+
+      const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
+      const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
+      const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
+
+      compute_rho1d_thr(r1d,dx,dy,dz);
+
+      const FFT_SCALAR z0 = delvolinv * q[i];
+
+      for (int n = nlower; n <= nupper; ++n) {
+	const int jn = (nz+n-nzlo_out)*ix*iy;
+	const FFT_SCALAR y0 = z0*r1d[2][n];
+
+	for (int m = nlower; m <= nupper; ++m) {
+	  const int jm = jn+(ny+m-nylo_out)*ix;
+	  const FFT_SCALAR x0 = y0*r1d[1][m];
+
+	  for (int l = nlower; l <= nupper; ++l) {
+	    const int jl = jm+nx+l-nxlo_out;
+	    // make sure each thread only updates
+	    // "his" elements of the density grid
+	    if (jl >= jto) break;
+	    if (jl < jfrom) continue;
+
+	    d[jl] += x0*r1d[0][l];
+	  }
+	}
+      }
+    }
+  }
+}
+
 /* ----------------------------------------------------------------------
   interpolate from grid to get electric field & force on my particles for ik
 ------------------------------------------------------------------------- */
--- a/src/USER-OMP/pppm_cg_omp.h
+++ b/src/USER-OMP/pppm_cg_omp.h
@ -38,6 +38,7 @@ class PPPMCGOMP : public PPPMCG, public ThrOMP {
  virtual void compute_gf_ik();
  virtual void compute_gf_ad();

+  virtual void make_rho();
  virtual void fieldforce_ik();
  virtual void fieldforce_ad();
  virtual void fieldforce_peratom();
--- a/src/USER-OMP/pppm_omp.cpp
+++ b/src/USER-OMP/pppm_omp.cpp
@ -321,6 +321,92 @@ void PPPMOMP::compute(int eflag, int vflag)
  } // end of omp parallel region
 }

+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid
+------------------------------------------------------------------------- */
+
+void PPPMOMP::make_rho()
+{
+
+  // clear 3d density array
+
+  FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]);
+  memset(d,0,ngrid*sizeof(FFT_SCALAR));
+
+  const int ix = nxhi_out - nxlo_out + 1;
+  const int iy = nyhi_out - nylo_out + 1;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    const double * _noalias const q = atom->q;
+    const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
+    const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
+
+    const double boxlox = boxlo[0];
+    const double boxloy = boxlo[1];
+    const double boxloz = boxlo[2];
+
+    // determine range of grid points handled by this thread
+    int i,jfrom,jto,tid;
+    loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads);
+
+    // get per thread data
+    ThrData *thr = fix->get_thr(tid);
+    FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
+
+    // loop over my charges, add their contribution to nearby grid points
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // (dx,dy,dz) = distance to "lower left" grid pt
+
+    // loop over all local atoms for all threads
+    const int nlocal = atom->nlocal;
+    for (i = 0; i < nlocal; i++) {
+
+      const int nx = p2g[i].a;
+      const int ny = p2g[i].b;
+      const int nz = p2g[i].t;
+
+      // pre-screen whether this atom will ever come within 
+      // reach of the data segement this thread is updating.
+      if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
+	   || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
+
+      const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
+      const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
+      const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
+
+      compute_rho1d_thr(r1d,dx,dy,dz);
+
+      const FFT_SCALAR z0 = delvolinv * q[i];
+
+      for (int n = nlower; n <= nupper; ++n) {
+	const int jn = (nz+n-nzlo_out)*ix*iy;
+	const FFT_SCALAR y0 = z0*r1d[2][n];
+
+	for (int m = nlower; m <= nupper; ++m) {
+	  const int jm = jn+(ny+m-nylo_out)*ix;
+	  const FFT_SCALAR x0 = y0*r1d[1][m];
+
+	  for (int l = nlower; l <= nupper; ++l) {
+	    const int jl = jm+nx+l-nxlo_out;
+	    // make sure each thread only updates
+	    // "his" elements of the density grid
+	    if (jl >= jto) break;
+	    if (jl < jfrom) continue;
+
+	    d[jl] += x0*r1d[0][l];
+	  }
+	}
+      }
+    }
+  }
+}
+
 /* ----------------------------------------------------------------------
   interpolate from grid to get electric field & force on my particles for ik
 ------------------------------------------------------------------------- */
--- a/src/USER-OMP/pppm_omp.h
+++ b/src/USER-OMP/pppm_omp.h
@ -38,10 +38,10 @@ class PPPMOMP : public PPPM, public ThrOMP {
  virtual void compute_gf_ik();
  virtual void compute_gf_ad();

+  virtual void make_rho();
  virtual void fieldforce_ik();
  virtual void fieldforce_ad();
  virtual void fieldforce_peratom();
-//  virtual void make_rho();

 private:
  void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
--- a/src/USER-OMP/pppm_tip4p_omp.cpp
+++ b/src/USER-OMP/pppm_tip4p_omp.cpp
@ -386,58 +386,85 @@ void PPPMTIP4POMP::particle_map()

 void PPPMTIP4POMP::make_rho()
 {
-  const double * _noalias const q = atom->q;
-  const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
-  const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
-  const int * _noalias const type = atom->type;
-
-  dbl3_t xM;
-  FFT_SCALAR x0,y0,z0;
-  const double boxlox = boxlo[0];
-  const double boxloy = boxlo[1];
-  const double boxloz = boxlo[2];
-
-  int l,m,n,mx,my,mz,iH1,iH2;
-
-  const int nlocal = atom->nlocal;

  // clear 3d density array

-  FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
-  memset(vec,0,ngrid*sizeof(FFT_SCALAR));
+  FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]);
+  memset(d,0,ngrid*sizeof(FFT_SCALAR));

-  // loop over my charges, add their contribution to nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
+  const int ix = nxhi_out - nxlo_out + 1;
+  const int iy = nyhi_out - nylo_out + 1;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    const double * _noalias const q = atom->q;
+    const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
+    const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
+    const int * _noalias const type = atom->type;
+    dbl3_t xM;
+
+    const double boxlox = boxlo[0];
+    const double boxloy = boxlo[1];
+    const double boxloz = boxlo[2];
+
+    // determine range of grid points handled by this thread
+    int i,jfrom,jto,tid,iH1,iH2;
+    loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads);
+
+    // get per thread data
+    ThrData *thr = fix->get_thr(tid);
+    FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
+
+    // loop over my charges, add their contribution to nearby grid points
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // (dx,dy,dz) = distance to "lower left" grid pt
+
+    // loop over all local atoms for all threads
+    const int nlocal = atom->nlocal;
+    for (i = 0; i < nlocal; i++) {
+
+      const int nx = p2g[i].a;
+      const int ny = p2g[i].b;
+      const int nz = p2g[i].t;
+
+      // pre-screen whether this atom will ever come within 
+      // reach of the data segement this thread is updating.
+      if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
+	   || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;

-  for (int i = 0; i < nlocal; i++) {
    if (type[i] == typeO) {
      find_M_thr(i,iH1,iH2,xM);
    } else {
      xM = x[i];
    }
+      const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
+      const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
+      const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;

-    const int nx = p2g[i].a;
-    const int ny = p2g[i].b;
-    const int nz = p2g[i].t;
-    const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
-    const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
-    const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;
+      compute_rho1d_thr(r1d,dx,dy,dz);

-    compute_rho1d(dx,dy,dz);
+      const FFT_SCALAR z0 = delvolinv * q[i];

-    z0 = delvolinv * q[i];
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      y0 = z0*rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        x0 = y0*rho1d[1][m];
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          density_brick[mz][my][mx] += x0*rho1d[0][l];
-        }
+      for (int n = nlower; n <= nupper; ++n) {
+	const int jn = (nz+n-nzlo_out)*ix*iy;
+	const FFT_SCALAR y0 = z0*r1d[2][n];
+
+	for (int m = nlower; m <= nupper; ++m) {
+	  const int jm = jn+(ny+m-nylo_out)*ix;
+	  const FFT_SCALAR x0 = y0*r1d[1][m];
+
+	  for (int l = nlower; l <= nupper; ++l) {
+	    const int jl = jm+nx+l-nxlo_out;
+	    // make sure each thread only updates
+	    // "his" elements of the density grid
+	    if (jl >= jto) break;
+	    if (jl < jfrom) continue;
+
+	    d[jl] += x0*r1d[0][l];
+	  }
+	}
      }
    }
  }