Fixed bugs in the dispersion real-space term for hippo. NOTE: CPU version filter out neighbors with zero special_disp

2021-09-24 00:21:25 -05:00
parent 830b5fa2dd
commit ad8164dfc0
3 changed files with 12 additions and 10 deletions
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@ -210,10 +210,11 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
  this->_aewald = aewald;
  const int red_blocks=dispersion_real(eflag,vflag);
-  // leave the answers (forces, energies and virial) on the device,
+  // only copy them back if this is the last kernel
-  //   only copy them back in the last kernel (polar_real)
+  //   otherwise, commenting out these two lines to leave the answers
-  //ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
+  //   (forces, energies and virial) on the device until the last kernel
-  //device->add_ans_object(ans);
+  this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
  this->device->add_ans_object(this->ans);
  this->hd_balancer.stop_timer();
@ -238,7 +239,7 @@ int HippoT::dispersion_real(const int eflag, const int vflag) {
                               (BX/this->_threads_per_atom)));
  this->time_pair.start();
-  // Build the short neighbor list for the cutoff off2_mpole,
+  // Build the short neighbor list for the cutoff off2_disp,
  //   at this point mpole is the first kernel in a time step
  this->k_short_nbor.set_size(GX,BX);
--- a/lib/gpu/lal_hippo.cu
+++ b/lib/gpu/lal_hippo.cu
@ -512,7 +512,8 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
      numtyp tk,tk2;
      numtyp damp3,damp5;
      numtyp ddamp;
-      numtyp factor_disp = (numtyp)1.0; // factor_disp = special_disp[sbmask15(j)];
+      const numtyp4 sp_nonpol = sp_nonpolar[sbmask15(jextra)];
      numtyp factor_disp = sp_nonpol.z; // factor_disp = special_disp[sbmask15(j)];
      if (ai != ak) {
        ai2 = ai * ai;
@ -547,7 +548,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
      // apply damping and scaling factors for this interaction
      numtyp scale = factor_disp * damp*damp;
-      scale = scale - (numtyp )1.0;
+      scale = scale - (numtyp)1.0;
      numtyp e = -ci * ck * (expa+scale) / r6;
      numtyp rterm = -ucl_powr(ralpha2,(numtyp)3.0) * expterm / r;
      numtyp de = (numtyp)-6.0*e/r2 - ci*ck*rterm/r7 - (numtyp)2.0*ci*ck*factor_disp*damp*ddamp/r7;
@ -562,7 +563,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
      f.x += dedx;
      f.y += dedy;
      f.z += dedz;
-      
+
      // increment the internal virial tensor components
      numtyp vxx = xr * dedx;
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@ -133,11 +133,11 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
  gpu_hal_ready = false;               // always false for HIPPO
  gpu_repulsion_ready = false;         // true for HIPPO when ready
-  gpu_dispersion_real_ready = false;   // true for HIPPO when ready
+  gpu_dispersion_real_ready = true;   // true for HIPPO when ready
  gpu_multipole_real_ready = false;
  gpu_udirect2b_ready = false;
  gpu_umutual2b_ready = false;
-  gpu_polar_real_ready = true;
+  gpu_polar_real_ready = false;
  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }