Cleaned up debug statements and unused sections in the amoeba and hippo gpu styles

2023-01-14 20:02:36 -06:00
parent 03e48f2658
commit c21f2faa1f
2 changed files with 22 additions and 366 deletions
--- a/src/GPU/pair_amoeba_gpu.cpp
+++ b/src/GPU/pair_amoeba_gpu.cpp
@ -13,7 +13,7 @@
 ------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-   Contributing author: Trung Nguyen (Northwestern)
+   Contributing author: Trung Nguyen (Northwestern/UChicago)
 ------------------------------------------------------------------------- */

 #include "pair_amoeba_gpu.h"
@ -486,8 +486,6 @@ void PairAmoebaGPU::induce()
      comm->reverse_comm(this);
    }

-    //error->all(FLERR,"STOP GPU");
-
    // set initial conjugate gradient residual and conjugate vector

    for (i = 0; i < nlocal; i++) {
@ -547,8 +545,6 @@ void PairAmoebaGPU::induce()
        comm->reverse_comm(this);
      }

-      //error->all(FLERR,"STOP");
-
      for (i = 0; i < nlocal; i++) {
        for (j = 0; j < 3; j++) {
          uind[i][j] = vec[i][j];
@ -1751,166 +1747,6 @@ void PairAmoebaGPU::polar_kspace()
    }
  }

-  // account for dipole response terms in the TCG method
-
-  /*
-  if (poltyp == TCG) {
-
-    for (m = 0; m < tcgnab; m++) {
-      for (i = 0; i < nlocal; i++) {
-        for (j = 0; j < 3; j++) {
-          fuind[i][j] = a[0][j]*uad[i][m][0] + a[1][j]*uad[i][m][1] +
-            a[2][j]*uad[i][m][2];
-          fuinp[i][j] = a[0][j]*ubp[i][m][0] + a[1][j]*ubp[i][m][1] +
-            a[2][j]*ubp[i][m][2];
-        }
-      }
-
-      grid_uind(fuind,fuinp);
-      efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
-
-      for (k = 0; k < nfft3; k++) {
-        for (j = 0; j < nfft2; j++) {
-          for (i = 0; i < nfft1; i++) {
-            term = qfac[k][j][i];
-            qgrid[k][j][i][0] *= term;
-            qgrid[k][j][i][1] *= term;
-          }
-        }
-      }
-
-      efft->compute(qgrid[0][0][0],qgrid[0][0][0],-1);
-      fphi_uind(fphid,fphip,fphidp);
-
-      for (i = 0; i < nlocal; i++) {
-        for (j = 1; j < 10; j++) {
-          fphid[i][j] *= felec;
-          fphip[i][j] *= felec;
-        }
-      }
-
-      for (i = 0; i < nlocal; i++) {
-        f1 = 0.0;
-        f2 = 0.0;
-        f3 = 0.0;
-        for (k = 0; k < 3; k++) {
-          j1 = deriv1[k+1];
-          j2 = deriv2[k+1];
-          j3 = deriv3[k+1];
-          f1 += fuind[i][k]*fphip[i][j1]+fuinp[i][k]*fphid[i][j1];
-          f2 += fuind[i][k]*fphip[i][j2]+fuinp[i][k]*fphid[i][j2];
-          f3 += fuind[i][k]*fphip[i][j3]+fuinp[i][k]*fphid[i][j3];
-        }
-
-        f1 *= 0.5 * nfft1;
-        f2 *= 0.5 * nfft2;
-        f3 *= 0.5 * nfft3;
-        h1 = recip[0][0]*f1 + recip[0][1]*f2 + recip[0][2]*f3;
-        h2 = recip[1][0]*f1 + recip[1][1]*f2 + recip[1][2]*f3;
-        h3 = recip[2][0]*f1 + recip[2][1]*f2 + recip[2][2]*f3;
-        f[i][0] -= h1;
-        f[i][1] -= h2;
-        f[i][2] -= h3;
-
-        for (j = 1; j < 4; j++) {
-          cphid[j] = 0.0;
-          cphip[j] = 0.0;
-          for (k = 1; k < 4; k++) {
-            cphid[j] += ftc[j][k]*fphid[i][k];
-            cphip[j] += ftc[j][k]*fphip[i][k];
-          }
-        }
-
-        vxx -= 0.5*(cphid[1]*ubp[i][m][0] + cphip[1]*uad[i][m][0]);
-        vyy -= 0.5*(cphid[2]*ubp[i][m][1] + cphip[2]*uad[i][m][1]);
-        vzz -= 0.5*(cphid[3]*ubp[i][m][2] + cphip[3]*uad[i][m][2]);
-
-        vxy -= 0.25*(cphid[1]*ubp[i][m][1] + cphip[1]*uad[i][m][1] +
-                        cphid[2]*ubp[i][m][0] + cphip[2]*uad[i][m][0]);
-        vyz -= 0.25*(cphid[1]*ubp[i][m][2] + cphip[1]*uad[i][m][2] +
-                        cphid[3]*ubp[i][m][0] + cphip[3]*uad[i][m][0]);
-        vxz -= 0.25*(cphid[2]*ubp[i][m][2] + cphip[2]*uad[i][m][2] +
-                        cphid[3]*ubp[i][m][1] + cphip[3]*uad[i][m][1]);
-      }
-
-      for (i = 0; i < nlocal; i++) {
-        for (j = 0; j < 3; j++) {
-          fuind[i][j] = a[0][j]*ubd[i][m][0] + a[1][j]*ubd[i][m][1] +
-            a[2][j]*ubd[i][m][2];
-          fuinp[i][j] = a[0][j]*uap[i][m][0] + a[1][j]*uap[i][m][1] +
-            a[2][j]*uap[i][m][2];
-        }
-      }
-
-      grid_uind(fuind,fuinp);
-      efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
-
-      for (k = 0; k < nfft3; k++) {
-        for (j = 0; j < nfft2; j++) {
-          for (i = 0; i < nfft1; i++) {
-            term = qfac[k][j][i];
-            qgrid[k][j][i][0] *= term;
-            qgrid[k][j][i][1] *= term;
-          }
-        }
-      }
-
-      efft->compute(qgrid[0][0][0],qgrid[0][0][0],-1);
-      fphi_uind(fphid,fphip,fphidp);
-
-      for (i = 0; i < nlocal; i++) {
-        for (j = 1; j < 10; j++) {
-          fphid[i][j] *= felec;
-          fphip[i][j] *= felec;
-        }
-      }
-
-      for (i = 0; i < nlocal; i++) {
-        f1 = 0.0;
-        f2 = 0.0;
-        f3 = 0.0;
-        for (k = 0; k < 3; k++) {
-          j1 = deriv1[k+1];
-          j2 = deriv2[k+1];
-          j3 = deriv3[k+1];
-          f1 += fuind[i][k]*fphip[i][j1]+fuinp[i][k]*fphid[i][j1];
-          f2 += fuind[i][k]*fphip[i][j2]+fuinp[i][k]*fphid[i][j2];
-          f3 += fuind[i][k]*fphip[i][j3]+fuinp[i][k]*fphid[i][j3];
-        }
-
-        f1 *= 0.5 * nfft1;
-        f2 *= 0.5 * nfft2;
-        f3 *= 0.5 * nfft3;
-        h1 = recip[0][0]*f1 + recip[0][1]*f2 + recip[0][2]*f3;  // matvec
-        h2 = recip[1][0]*f1 + recip[1][1]*f2 + recip[1][2]*f3;
-        h3 = recip[2][0]*f1 + recip[2][1]*f2 + recip[2][2]*f3;
-        f[i][0] -= h1;
-        f[i][1] -= h2;
-        f[i][2] -= h3;
-
-        for (j = 1; j < 4; j++) {
-          cphid[j] = 0.0;
-          cphip[j] = 0.0;
-          for (k = 1; k < 4; k++) {
-            cphid[j] += ftc[j][k]*fphid[i][k];
-            cphip[j] += ftc[j][k]*fphip[i][k];
-          }
-        }
-
-        vxx -= 0.5*(cphid[1]*uap[i][m][0] + cphip[1]*ubd[i][m][0]);
-        vyy -= 0.5*(cphid[2]*uap[i][m][1] + cphip[2]*ubd[i][m][1]);
-        vzz -= 0.5*(cphid[3]*uap[i][m][2] + cphip[3]*ubd[i][m][2]);
-        vxy -= 0.25*(cphid[1]*uap[i][m][1] + cphip[1]*ubd[i][m][1] +
-                     cphid[2]*uap[i][m][0] + cphip[2]*ubd[i][m][0]);
-        vxz -= 0.25*(cphid[1]*uap[i][m][2] + cphip[1]*ubd[i][m][2] +
-                     cphid[3]*uap[i][m][0] + cphip[3]*ubd[i][m][0]);
-        vyz -= 0.25*(cphid[2]*uap[i][m][2] + cphip[2]*ubd[i][m][2] +
-                     cphid[3]*uap[i][m][1] + cphip[3]*ubd[i][m][1]);
-      }
-    }
-  }
-  */
-
  // assign permanent and induced multipoles to the PME grid

  for (i = 0; i < nlocal; i++) {
@ -2097,142 +1933,6 @@ void PairAmoebaGPU::polar_kspace()
    }
  }

-  // add back missing terms for the TCG polarization method;
-  // first do the term for "UAD" dotted with "UBP"
-
-  /*
-  if (poltyp == TCG) {
-
-    for (m = 0; m < tcgnab; m++) {
-      for (i = 0; i < nlocal; i++) {
-        for (j = 0; j < 10; j++)
-          cmp[i][j] = 0.0;
-        for (j = 1; j < 4; j++)
-          cmp[i][j] = ubp[i][m][j-1];
-      }
-
-      cmp_to_fmp(cmp,fmp);
-      grid_mpole(fmp);
-      efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
-
-      for (k = 0; k < nfft3; k++) {
-        for (j = 0; j < nfft2; j++) {
-          for (i = 0; i < nfft1; i++) {
-            qgrip[k][j][i][0] = qgrid[k][j][i][0];
-            qgrip[k][j][i][1] = qgrid[k][j][i][1];
-          }
-        }
-      }
-
-      for (i = 0; i < nlocal; i++) {
-        for (j = 1; j < 4; j++)
-          cmp[i][j] = uad[i][m][j-1];
-      }
-
-      cmp_to_fmp(cmp,fmp);
-      grid_mpole(fmp);
-      efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
-
-      // make the scalar summation over reciprocal lattice
-      // NOTE: this loop has to be distributed for parallel
-      // NOTE: why does this one include m = 0 ?
-
-      for (m = 1; m < ntot; m++) {
-        k1 = m % nfft1;
-        k2 = (m % nff) / nfft1;
-        k3 = m/nff;
-        r1 = (k1 >= nf1) ? k1-nfft1 : k1;
-        r2 = (k2 >= nf2) ? k2-nfft2 : k2;
-        r3 = (k3 >= nf3) ? k3-nfft3 : k3;
-        h1 = recip[0][0]*r1 + recip[0][1]*r2 + recip[0][2]*r3;
-        h2 = recip[1][0]*r1 + recip[1][1]*r2 + recip[1][2]*r3;
-        h3 = recip[2][0]*r1 + recip[2][1]*r2 + recip[2][2]*r3;
-        hsq = h1*h1 + h2*h2 + h3*h3;
-        term = -pterm * hsq;
-        expterm = 0.0;
-        if (term > -50.0 && hsq != 0.0) {
-          denom = volterm*hsq*bsmod1[k1]*bsmod2[k2]*bsmod3[k3];
-          expterm = exp(term) / denom;
-          struc2 = qgrid[k3][k2][k1][0]*qgrip[k3][k2][k1][0] +
-            qgrid[k3][k2][k1][1]*qgrip[k3][k2][k1][1];
-          eterm = 0.5 * felec * expterm * struc2;
-          vterm = (2.0/hsq) * (1.0-term) * eterm;
-          virpolar[0] -= h1*h1*vterm - eterm;
-          virpolar[1] -= h2*h2*vterm - eterm;
-          virpolar[2] -= h3*h3*vterm - eterm;
-          virpolar[3] -= h1*h2*vterm;
-          virpolar[4] -= h1*h3*vterm;
-          virpolar[5] -= h2*h3*vterm;
-        }
-      }
-
-      // now do the TCG terms with "UBD" dotted with "UAP"
-
-      for (i = 0; i < nlocal; i++) {
-        for (j = 0; j < 10; j++)
-          cmp[i][j] = 0.0;
-        for (j = 1; j < 4; j++)
-          cmp[i][j] = uap[i][m][j-1];
-      }
-
-      cmp_to_fmp(cmp,fmp);
-      grid_mpole(fmp);
-      efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
-
-      for (k = 0; k < nfft3; k++) {
-        for (j = 0; j < nfft2; j++) {
-          for (i = 0; i < nfft1; i++) {
-            qgrip[k][j][i][0] = qgrid[k][j][i][0];
-            qgrip[k][j][i][1] = qgrid[k][j][i][1];
-          }
-        }
-      }
-
-      for (i = 0; i < nlocal; i++) {
-        for (j = 1; j < 4; j++)
-          cmp[i][j] = ubd[i][m][j-1];
-      }
-
-      cmp_to_fmp(cmp,fmp);
-      grid_mpole(fmp);
-      efft->compute(qgrid[0][0][0],qgrid[0][0][0],1);
-
-      // make the scalar summation over reciprocal lattice
-      // NOTE: this loop has to be distributed for parallel
-      // NOTE: why does this one include m = 0 ?
-
-      for (m = 1; m < ntot; m++) {
-        k1 = m % nfft1;
-        k2 = (m % nff) / nfft1;
-        k3 = m/nff;
-        r1 = (k1 >= nf1) ? k1-nfft1 : k1;
-        r2 = (k2 >= nf2) ? k2-nfft2 : k2;
-        r3 = (k3 >= nf3) ? k3-nfft3 : k3;
-        h1 = recip[0][0]*r1 + recip[0][1]*r2 + recip[0][2]*r3;
-        h2 = recip[1][0]*r1 + recip[1][1]*r2 + recip[1][2]*r3;
-        h3 = recip[2][0]*r1 + recip[2][1]*r2 + recip[2][2]*r3;
-        hsq = h1*h1 + h2*h2 + h3*h3;
-        term = -pterm * hsq;
-        expterm = 0.0;
-        if (term > -50.0 && hsq != 0.0) {
-          denom = volterm*hsq*bsmod1[k1]*bsmod2[k2]*bsmod3[k3];
-          expterm = exp(term) / denom;
-          struc2 = qgrid[k3][k2][k1][0]*qgrip[k3][k2][k1][0] +
-            qgrid[k3][k2][k1][1]*qgrip[k3][k2][k1][1];
-          eterm = 0.5 * felec * expterm * struc2;
-          vterm = (2.0/hsq) * (1.0-term) * eterm;
-          virpolar[0] -= h1*h1*vterm - eterm;
-          virpolar[1] -= h2*h2*vterm - eterm;
-          virpolar[2] -= h3*h3*vterm - eterm;
-          virpolar[3] -= h1*h2*vterm;
-          virpolar[4] -= h1*h3*vterm;
-          virpolar[5] -= h2*h3*vterm;
-        }
-      }
-    }
-  }
-  */
-
  // increment the total internal virial tensor components

  if (vflag_global) {
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@ -13,7 +13,7 @@
 ------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-   Contributing author: Trung Nguyen (Northwestern)
+   Contributing author: Trung Nguyen (Northwestern/UChicago)
 ------------------------------------------------------------------------- */

 #include "pair_hippo_gpu.h"
@ -208,14 +208,14 @@ void PairHippoGPU::init_style()
  int tq_size;
  int mnf = 5e-2 * neighbor->oneatom;
  int success = hippo_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
-                                pdamp, thole, dirdamp, amtype2class,
-                                special_repel, special_disp, special_mpole,
-                                special_polar_wscale, special_polar_piscale,
-                                special_polar_pscale, sizpr, dmppr, elepr,
-                                csix, adisp, pcore, palpha,
-                                atom->nlocal, atom->nlocal+atom->nghost, mnf,
-                                maxspecial, maxspecial15, cell_size, gpu_mode,
-                                screen, polar_dscale, polar_uscale, tq_size);
+                               pdamp, thole, dirdamp, amtype2class,
+                               special_repel, special_disp, special_mpole,
+                               special_polar_wscale, special_polar_piscale,
+                               special_polar_pscale, sizpr, dmppr, elepr,
+                               csix, adisp, pcore, palpha,
+                               atom->nlocal, atom->nlocal+atom->nghost, mnf,
+                               maxspecial, maxspecial15, cell_size, gpu_mode,
+                               screen, polar_dscale, polar_uscale, tq_size);
  GPU_EXTRA::check_flag(success,error,world);

  if (gpu_mode == GPU_FORCE)
@ -271,14 +271,14 @@ void PairHippoGPU::repulsion()
  inum = atom->nlocal;

  firstneigh = hippo_gpu_precompute(neighbor->ago, inum, nall, atom->x,
-                                     atom->type, amtype, amgroup, rpole,
-                                     nullptr, nullptr, nullptr,
-                                     sublo, subhi, atom->tag,
-                                     atom->nspecial, atom->special,
-                                     atom->nspecial15, atom->special15,
-                                     eflag, vflag, eflag_atom, vflag_atom,
-                                     host_start, &ilist, &numneigh, cpu_time,
-                                     success, atom->q, domain->boxlo, domain->prd);
+                                    atom->type, amtype, amgroup, rpole,
+                                    nullptr, nullptr, nullptr,
+                                    sublo, subhi, atom->tag,
+                                    atom->nspecial, atom->special,
+                                    atom->nspecial15, atom->special15,
+                                    eflag, vflag, eflag_atom, vflag_atom,
+                                    host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);

  // select the correct cutoff for the term

@ -480,14 +480,6 @@ void PairHippoGPU::induce()
      }
    }
  }
-/*
-  printf("GPU: cutghost = %f\n", comm->cutghost[0]);
-  for (i = 0; i < 10; i++) {
-    printf("i = %d: udir = %f %f %f; udirp = %f %f %f\n",
-      i, udir[i][0], udir[i][1], udir[i][2],
-      udirp[i][0], udirp[i][1], udirp[i][2]);
-  }
-*/

  // allocate memory and make early host-device transfers
  // must be done before the first ufield0c
@ -611,8 +603,6 @@ void PairHippoGPU::induce()
      comm->reverse_comm(this);
    }

-    //error->all(FLERR,"STOP GPU");
-
    // set initial conjugate gradient residual and conjugate vector

    for (i = 0; i < nlocal; i++) {
@ -1022,7 +1012,7 @@ void PairHippoGPU::udirect2b_cpu()
        tdipdip[ndip++] = bcn[1]*yr*zr;
        tdipdip[ndip++] = -bcn[0] + bcn[1]*zr*zr;
      } else {
-        if (comm->me == 0) printf("i = %d: j = %d: poltyp == DIRECT\n", i, j);
+
      }

    } // jj
@ -1055,16 +1045,7 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)

  memset(&field[0][0], 0, 3*nall *sizeof(double));
  memset(&fieldp[0][0], 0, 3*nall *sizeof(double));
-
-/*  
-  for (int i = 0; i < nall; i++) {
-    for (int j = 0; j < 3; j++) {
-      field[i][j] = 0.0;
-      fieldp[i][j] = 0.0;
-    }
-  }
-*/
-  
+ 
  // get the real space portion of the mutual field first

  MPI_Barrier(world);
@ -1086,19 +1067,13 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
    field[i][1] += term*uind[i][1];
    field[i][2] += term*uind[i][2];
  }
+
  for (int i = 0; i < nlocal; i++) {
    fieldp[i][0] += term*uinp[i][0];
    fieldp[i][1] += term*uinp[i][1];
    fieldp[i][2] += term*uinp[i][2];
  }
-/*  
-  for (i = 0; i < nlocal; i++) {
-    for (j = 0; j < 3; j++) {
-      field[i][j] += term*uind[i][j];
-      fieldp[i][j] += term*uinp[i][j];
-    }
-  }
-*/
+
  // accumulate the field and fieldp values from the real-space portion from umutual2b() on the GPU
  //   field and fieldp may already have some nonzero values from kspace (umutual1 and self)

@ -1271,25 +1246,6 @@ void PairHippoGPU::umutual1(double **field, double **fieldp)
    fieldp[i][1] -= dfy;
    fieldp[i][2] -= dfz;
  }
-/*
-  for (int i = 0; i < nlocal; i++) {
-    for (j = 0; j < 3; j++) {
-      dipfield1[i][j] = a[j][0]*fdip_phi1[i][1] +
-        a[j][1]*fdip_phi1[i][2] + a[j][2]*fdip_phi1[i][3];
-      dipfield2[i][j] = a[j][0]*fdip_phi2[i][1] +
-        a[j][1]*fdip_phi2[i][2] + a[j][2]*fdip_phi2[i][3];
-    }
-  }
-
-  // increment the field at each multipole site
-
-  for (i = 0; i < nlocal; i++) {
-    for (j = 0; j < 3; j++) {
-      field[i][j] -= dipfield1[i][j];
-      fieldp[i][j] -= dipfield2[i][j];
-    }
-  }
-*/
 }

 /* ----------------------------------------------------------------------