diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index 10d75f2393..b4b84cc47d 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -72,7 +72,6 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass, // specific to HIPPO k_dispersion.set_function(*(this->pair_program),"k_hippo_dispersion"); - _pval.alloc(this->_max_tep_size,*(this->ucl_device),UCL_READ_ONLY,UCL_READ_ONLY); // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; @@ -312,8 +311,8 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full, // only copy them back if this is the last kernel // otherwise, commenting out these two lines to leave the answers // (forces, energies and virial) on the device until the last kernel - this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); - this->device->add_ans_object(this->ans); + //this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); + //this->device->add_ans_object(this->ans); this->hd_balancer.stop_timer(); @@ -430,9 +429,9 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full, const int red_blocks=multipole_real(eflag,vflag); // leave the answers (forces, energies and virial) on the device, - // only copy them back in the last kernel (polar_real) - //ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); - //device->add_ans_object(ans); + // only copy them back in the last kernel (this one, or polar_real once done) + this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); + this->device->add_ans_object(this->ans); this->hd_balancer.stop_timer(); diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu index 040ecf9308..3bfd4f7019 100644 --- a/lib/gpu/lal_hippo.cu +++ b/lib/gpu/lal_hippo.cu @@ -1032,9 +1032,6 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_, numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass]; numtyp valk = polar6[j].x; - if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n", - j, corei, corek, alphai, alphak, vali, valk); - // intermediates involving moments and separation distance numtyp dir = dix*xr + diy*yr + diz*zr; diff --git a/lib/gpu/lal_hippo.h b/lib/gpu/lal_hippo.h index ae604e8401..251f909b78 100644 --- a/lib/gpu/lal_hippo.h +++ b/lib/gpu/lal_hippo.h @@ -130,8 +130,6 @@ class Hippo : public BaseAmoeba { UCL_Kernel k_dispersion; - UCL_Vector _pval; - protected: bool _allocated; int dispersion_real(const int eflag, const int vflag); diff --git a/src/AMOEBA/amoeba_multipole.cpp b/src/AMOEBA/amoeba_multipole.cpp index 8d9e0c101d..945ee976eb 100644 --- a/src/AMOEBA/amoeba_multipole.cpp +++ b/src/AMOEBA/amoeba_multipole.cpp @@ -379,8 +379,7 @@ void PairAmoeba::multipole_real() corek = pcore[jclass]; alphak = palpha[jclass]; valk = pval[j]; - if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n", - j, corei, corek, alphai, alphak, vali, valk); + /* printf("HIPPO MPOLE ij %d %d: pcore/alpha/val I %g %g %g: J %g %g %g\n", atom->tag[i],atom->tag[j],corei,alphai,vali,corek,alphak,valk); @@ -421,6 +420,8 @@ void PairAmoeba::multipole_real() term2i*rr3i + term2k*rr3k + term2ik*rr3ik + term3i*rr5i + term3k*rr5k + term3ik*rr5ik; + + // find damped multipole intermediates for force and torque de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik + @@ -527,14 +528,14 @@ void PairAmoeba::multipole_real() // increment force-based gradient and torque on second site // commenting out j parts for DEBUGGING - + fmpole[j][0] -= frcx; fmpole[j][1] -= frcy; fmpole[j][2] -= frcz; tq[j][0] += ttmk[0]; tq[j][1] += ttmk[1]; tq[j][2] += ttmk[2]; - + // increment the virial due to pairwise Cartesian forces vxx = -xr * frcx; diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index 3bad2d4f52..6ac22e0721 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -292,7 +292,7 @@ void PairHippoGPU::multipole_real() // set the energy unit conversion factor for multipolar real-space calculation double felec = electric / am_dielectric; - printf("hippo gpu multipole\n"); + firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x, atom->type, amtype, amgroup, rpole, pval, sublo, subhi, atom->tag,