diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index 7fa358e35a..07f8732bcb 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -210,10 +210,11 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full, this->_aewald = aewald; const int red_blocks=dispersion_real(eflag,vflag); - // leave the answers (forces, energies and virial) on the device, - // only copy them back in the last kernel (polar_real) - //ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); - //device->add_ans_object(ans); + // only copy them back if this is the last kernel + // otherwise, commenting out these two lines to leave the answers + // (forces, energies and virial) on the device until the last kernel + this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); + this->device->add_ans_object(this->ans); this->hd_balancer.stop_timer(); @@ -238,7 +239,7 @@ int HippoT::dispersion_real(const int eflag, const int vflag) { (BX/this->_threads_per_atom))); this->time_pair.start(); - // Build the short neighbor list for the cutoff off2_mpole, + // Build the short neighbor list for the cutoff off2_disp, // at this point mpole is the first kernel in a time step this->k_short_nbor.set_size(GX,BX); diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu index 07df4c6ad0..f9020cf9a6 100644 --- a/lib/gpu/lal_hippo.cu +++ b/lib/gpu/lal_hippo.cu @@ -512,7 +512,8 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_, numtyp tk,tk2; numtyp damp3,damp5; numtyp ddamp; - numtyp factor_disp = (numtyp)1.0; // factor_disp = special_disp[sbmask15(j)]; + const numtyp4 sp_nonpol = sp_nonpolar[sbmask15(jextra)]; + numtyp factor_disp = sp_nonpol.z; // factor_disp = special_disp[sbmask15(j)]; if (ai != ak) { ai2 = ai * ai; @@ -547,7 +548,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_, // apply damping and scaling factors for this interaction numtyp scale = factor_disp * damp*damp; - scale = scale - (numtyp )1.0; + scale = scale - (numtyp)1.0; numtyp e = -ci * ck * (expa+scale) / r6; numtyp rterm = -ucl_powr(ralpha2,(numtyp)3.0) * expterm / r; numtyp de = (numtyp)-6.0*e/r2 - ci*ck*rterm/r7 - (numtyp)2.0*ci*ck*factor_disp*damp*ddamp/r7; @@ -562,7 +563,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_, f.x += dedx; f.y += dedy; f.z += dedz; - + // increment the internal virial tensor components numtyp vxx = xr * dedx; diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index be5d4afc2b..a6e7b9edc6 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -133,11 +133,11 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE) gpu_hal_ready = false; // always false for HIPPO gpu_repulsion_ready = false; // true for HIPPO when ready - gpu_dispersion_real_ready = false; // true for HIPPO when ready + gpu_dispersion_real_ready = true; // true for HIPPO when ready gpu_multipole_real_ready = false; gpu_udirect2b_ready = false; gpu_umutual2b_ready = false; - gpu_polar_real_ready = true; + gpu_polar_real_ready = false; GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); }