Working on umutual2b, tdipdip are correct, but incorrect results for field and fieldp

2021-09-12 00:51:48 -05:00
parent 94d6f7219c
commit edd76733a1
4 changed files with 59 additions and 12 deletions
--- a/lib/gpu/lal_amoeba.cu
+++ b/lib/gpu/lal_amoeba.cu
@ -465,6 +465,10 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
    numtyp pdi = damping[itype].x;
    numtyp ddi = damping[itype].z;

+    numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
+    numtyp aesq2n = (numtyp)0.0;
+    if (aewald > (numtyp)0.0) aesq2n = (numtyp)1.0 / (MY_PIS*aewald);
+
    for ( ; nbor<nbor_end; nbor+=n_stride) {

      int jextra=nbor_mem[nbor];
@ -498,6 +502,32 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
      numtyp ukzp = polar5[j].z; // uinp[j][2];

      numtyp factor_uscale;
+      //const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
+      //factor_wscale = sp_pol.x; // sp_polar_wscale[sbmask15(jextra)];
+      if (igroup == jgroup) {
+        //factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)];
+        //factor_dscale = polar_dscale;
+        factor_uscale = polar_uscale;
+      } else {
+        //factor_pscale = sp_pol.z; // sp_polar_pscale[sbmask15(jextra)];
+        factor_uscale = (numtyp)1.0;
+      }
+
+      // calculate the real space Ewald error function terms
+
+      numtyp ralpha = aewald * r;
+      numtyp exp2a = ucl_exp(-ralpha*ralpha);
+      numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*ralpha);
+      numtyp _erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * exp2a;
+      //bn[0] = erfc(ralpha) / r;
+      bn[0] = _erfc * rinv;
+
+      numtyp aefac = aesq2n;
+      for (int m = 1; m <= 3; m++) {
+        numtyp bfac = (numtyp) (m+m-1);
+        aefac = aesq2 * aefac;
+        bn[m] = (bfac*bn[m-1]+aefac*exp2a) * r2inv;
+      }

      // find terms needed later to compute mutual polarization
      // if (poltyp != DIRECT) 
@ -520,6 +550,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
      numtyp scalek = factor_uscale;
      bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
      bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
+
      numtyp tdipdip[6]; // the following tdipdip is incorrect!! needs work to store tdipdip
      tdipdip[0] = -bcn[0] + bcn[1]*xr*xr;
      tdipdip[1] = bcn[1]*xr*yr;
@ -527,7 +558,9 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
      tdipdip[3] = -bcn[0] + bcn[1]*yr*yr;
      tdipdip[4] = bcn[1]*yr*zr;
      tdipdip[5] = -bcn[0] + bcn[1]*zr*zr;
-
+      //if (i==0 && j == 10) 
+      //  printf("i = %d: j = %d: tdipdip %f %f %f %f %f %f\n",
+      //    i, j,tdipdip[0],tdipdip[1],tdipdip[2],tdipdip[3],tdipdip[4],tdipdip[5]);
      fid[0] = tdipdip[0]*ukx + tdipdip[1]*uky + tdipdip[2]*ukz;
      fid[1] = tdipdip[1]*ukx + tdipdip[3]*uky + tdipdip[4]*ukz;
      fid[2] = tdipdip[2]*ukx + tdipdip[4]*uky + tdipdip[5]*ukz;
--- a/lib/gpu/lal_base_amoeba.cpp
+++ b/lib/gpu/lal_base_amoeba.cpp
@ -614,8 +614,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const

  // ------------------- Resize _tep array ------------------------

-  if (nall>_max_tep_size) {
-    _max_tep_size=static_cast<int>(static_cast<double>(nall)*1.10);
+  if (inum_full>_max_tep_size) {
+    _max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
    _tep.resize(_max_tep_size*4);
  }
  *tep_ptr=_tep.host.begin();
--- a/src/AMOEBA/amoeba_induce.cpp
+++ b/src/AMOEBA/amoeba_induce.cpp
@ -279,6 +279,10 @@ void PairAmoeba::induce()

    crstyle = FIELD;
    comm->reverse_comm_pair(this);
+    for (int i = 0; i < 10; i++) {
+        printf("i = %d; fieldp = %f %f %f\n", i, fieldp[i][0], fieldp[i][1], fieldp[i][2]);
+      }
+    //error->all(FLERR,"STOP CPU");
 /*
    if (comm->me == 0) {
      printf("CPU: cutghost = %f\n", comm->cutghost[0]);
@ -369,12 +373,13 @@ void PairAmoeba::induce()
      cfstyle = INDUCE;
      comm->forward_comm_pair(this);

-      ufield0c(field,fieldp);
-
-      //error->all(FLERR,"STOP");
+      ufield0c(field,fieldp);    

      crstyle = FIELD;
      comm->reverse_comm_pair(this);
+      
+
+      //error->all(FLERR,"STOP");
 /*
      if (comm->me == 0) {
        printf("CPU: iter = %d\n", iter);
@ -1243,7 +1248,9 @@ void PairAmoeba::umutual2b(double **field, double **fieldp)
      j = jlist[jj];
      uindj = uind[j];
      uinpj = uinp[j];
-
+      //if (i==0 && j == 10) 
+      //  printf("i = %d: j = %d: tdipdip %f %f %f %f %f %f\n",
+      //    i, j,tdipdip[0],tdipdip[1],tdipdip[2],tdipdip[3],tdipdip[4],tdipdip[5]);
      fid[0] = tdipdip[0]*uindj[0] + tdipdip[1]*uindj[1] + tdipdip[2]*uindj[2];
      fid[1] = tdipdip[1]*uindj[0] + tdipdip[3]*uindj[1] + tdipdip[4]*uindj[2];
      fid[2] = tdipdip[2]*uindj[0] + tdipdip[4]*uindj[1] + tdipdip[5]*uindj[2];
--- a/src/GPU/pair_amoeba_gpu.cpp
+++ b/src/GPU/pair_amoeba_gpu.cpp
@ -111,7 +111,7 @@ PairAmoebaGPU::PairAmoebaGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
  tep_pinned = nullptr;

  gpu_udirect2b_ready = true;
-  gpu_umutual2b_ready = false;
+  gpu_umutual2b_ready = true;
  gpu_polar_real_ready = true;

  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
@ -532,6 +532,14 @@ void PairAmoebaGPU::induce()
      comm->reverse_comm_pair(this);
    }

+    if (comm->me == 0) {
+      for (int i = 0; i < 10; i++) {
+        printf("i = %d; fieldp = %f %f %f\n", i, fieldp[i][0], fieldp[i][1], fieldp[i][2]);
+      }
+    }
+    
+    //error->all(FLERR,"STOP GPU");
+
 /*
    if (comm->me == 0) {
      printf("GPU: cutghost = %f\n", comm->cutghost[0]);
@ -596,12 +604,12 @@ void PairAmoebaGPU::induce()

      ufield0c(field,fieldp);

-      //error->all(FLERR,"STOP");
-
      if (!gpu_umutual2b_ready) {
        crstyle = FIELD;
        comm->reverse_comm_pair(this);
      }
+
+      //error->all(FLERR,"STOP");
 /*     
     if (comm->me == 0) {
       printf("GPU: iter = %d\n", iter);
@ -1051,7 +1059,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp)
    error->one(FLERR,"Insufficient memory on accelerator");

  // accumulate the field and fieldp values from the GPU lib
-  //   field and fieldp may already have some nonzero values from kspace (udirect1)
+  //   field and fieldp may already have some nonzero values from kspace (umutual1)

  int nlocal = atom->nlocal;
  double *field_ptr = (double *)fieldp_pinned;
@ -1071,7 +1079,6 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp)
    fieldp[i][1] += fieldp_ptr[idx+1];
    fieldp[i][2] += fieldp_ptr[idx+2];
  }
-
 }

 /* ---------------------------------------------------------------------- */