From f8bc091cb8336a486823b4df25c9339a18808cf5 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sat, 25 Sep 2021 13:17:06 -0500 Subject: [PATCH] Kept working on the multipole real-space term of hippo --- lib/gpu/lal_base_amoeba.cpp | 9 ++++--- lib/gpu/lal_hippo.cu | 43 ++++++++++++++++++--------------- src/AMOEBA/amoeba_multipole.cpp | 3 ++- src/GPU/pair_hippo_gpu.cpp | 2 +- 4 files changed, 31 insertions(+), 26 deletions(-) diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp index 1a299e902f..c4fdb8c9e5 100644 --- a/lib/gpu/lal_base_amoeba.cpp +++ b/lib/gpu/lal_base_amoeba.cpp @@ -793,8 +793,8 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole, pextra[idx+3] = (numtyp)amgroup[i]; } + n += nstride*_nall; if (uind) { - n += nstride*_nall; for (int i = 0; i < _nall; i++) { int idx = n+i*nstride; pextra[idx] = uind[i][0]; @@ -802,9 +802,9 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole, pextra[idx+2] = uind[i][2]; } } - + + n += nstride*_nall; if (uinp) { - n += nstride*_nall; for (int i = 0; i < _nall; i++) { int idx = n+i*nstride; pextra[idx] = uinp[i][0]; @@ -813,8 +813,9 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole, } } + n += nstride*_nall; if (pval) { - n += nstride*_nall; + for (int i = 0; i < _nall; i++) { int idx = n+i*nstride; pextra[idx] = pval[i]; diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu index bc5d9270d4..040ecf9308 100644 --- a/lib/gpu/lal_hippo.cu +++ b/lib/gpu/lal_hippo.cu @@ -1032,6 +1032,9 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_, numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass]; numtyp valk = polar6[j].x; + if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n", + j, corei, corek, alphai, alphak, vali, valk); + // intermediates involving moments and separation distance numtyp dir = dix*xr + diy*yr + diz*zr; @@ -1149,22 +1152,22 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_, numtyp dmpij[11]; damppole(r,11,alphai,alphak,dmpi,dmpj,dmpij); numtyp scalek = factor_mpole; - numtyp rr1i = bn[0] - (1.0-scalek*dmpi[0])*rr1; - numtyp rr3i = bn[1] - (1.0-scalek*dmpi[2])*rr3; - numtyp rr5i = bn[2] - (1.0-scalek*dmpi[4])*rr5; - numtyp rr7i = bn[3] - (1.0-scalek*dmpi[6])*rr7; - numtyp rr1k = bn[0] - (1.0-scalek*dmpj[0])*rr1; - numtyp rr3k = bn[1] - (1.0-scalek*dmpj[2])*rr3; - numtyp rr5k = bn[2] - (1.0-scalek*dmpj[4])*rr5; - numtyp rr7k = bn[3] - (1.0-scalek*dmpj[6])*rr7; - numtyp rr1ik = bn[0] - (1.0-scalek*dmpij[0])*rr1; - numtyp rr3ik = bn[1] - (1.0-scalek*dmpij[2])*rr3; - numtyp rr5ik = bn[2] - (1.0-scalek*dmpij[4])*rr5; - numtyp rr7ik = bn[3] - (1.0-scalek*dmpij[6])*rr7; - numtyp rr9ik = bn[4] - (1.0-scalek*dmpij[8])*rr9; - numtyp rr11ik = bn[5] - (1.0-scalek*dmpij[10])*rr11; - rr1 = bn[0] - (1.0-scalek)*rr1; - rr3 = bn[1] - (1.0-scalek)*rr3; + numtyp rr1i = bn[0] - ((numtyp)1.0-scalek*dmpi[0])*rr1; + numtyp rr3i = bn[1] - ((numtyp)1.0-scalek*dmpi[2])*rr3; + numtyp rr5i = bn[2] - ((numtyp)1.0-scalek*dmpi[4])*rr5; + numtyp rr7i = bn[3] - ((numtyp)1.0-scalek*dmpi[6])*rr7; + numtyp rr1k = bn[0] - ((numtyp)1.0-scalek*dmpj[0])*rr1; + numtyp rr3k = bn[1] - ((numtyp)1.0-scalek*dmpj[2])*rr3; + numtyp rr5k = bn[2] - ((numtyp)1.0-scalek*dmpj[4])*rr5; + numtyp rr7k = bn[3] - ((numtyp)1.0-scalek*dmpj[6])*rr7; + numtyp rr1ik = bn[0] - ((numtyp)1.0-scalek*dmpij[0])*rr1; + numtyp rr3ik = bn[1] - ((numtyp)1.0-scalek*dmpij[2])*rr3; + numtyp rr5ik = bn[2] - ((numtyp)1.0-scalek*dmpij[4])*rr5; + numtyp rr7ik = bn[3] - ((numtyp)1.0-scalek*dmpij[6])*rr7; + numtyp rr9ik = bn[4] - ((numtyp)1.0-scalek*dmpij[8])*rr9; + numtyp rr11ik = bn[5] - ((numtyp)1.0-scalek*dmpij[10])*rr11; + rr1 = bn[0] - ((numtyp)1.0-scalek)*rr1; + rr3 = bn[1] - ((numtyp)1.0-scalek)*rr3; numtyp e = term1*rr1 + term4ik*rr7ik + term5ik*rr9ik + term1i*rr1i + term1k*rr1k + term1ik*rr1ik + term2i*rr3i + term2k*rr3k + term2ik*rr3ik + @@ -1178,10 +1181,10 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_, term3i*rr7i + term3k*rr7k + term3ik*rr7ik; term1 = -corek*rr3i - valk*rr3ik + dkr*rr5ik - qkr*rr7ik; term2 = corei*rr3k + vali*rr3ik + dir*rr5ik + qir*rr7ik; - term3 = 2.0 * rr5ik; - term4 = -2.0 * (corek*rr5i+valk*rr5ik - dkr*rr7ik+qkr*rr9ik); - term5 = -2.0 * (corei*rr5k+vali*rr5ik + dir*rr7ik+qir*rr9ik); - term6 = 4.0 * rr7ik; + term3 = (numtyp)2.0 * rr5ik; + term4 = (numtyp)-2.0 * (corek*rr5i+valk*rr5ik - dkr*rr7ik+qkr*rr9ik); + term5 = (numtyp)-2.0 * (corei*rr5k+vali*rr5ik + dir*rr7ik+qir*rr9ik); + term6 = (numtyp)4.0 * rr7ik; rr3 = rr3ik; energy += e; diff --git a/src/AMOEBA/amoeba_multipole.cpp b/src/AMOEBA/amoeba_multipole.cpp index 3f5c9082e7..8d9e0c101d 100644 --- a/src/AMOEBA/amoeba_multipole.cpp +++ b/src/AMOEBA/amoeba_multipole.cpp @@ -379,7 +379,8 @@ void PairAmoeba::multipole_real() corek = pcore[jclass]; alphak = palpha[jclass]; valk = pval[j]; - + if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n", + j, corei, corek, alphai, alphak, vali, valk); /* printf("HIPPO MPOLE ij %d %d: pcore/alpha/val I %g %g %g: J %g %g %g\n", atom->tag[i],atom->tag[j],corei,alphai,vali,corek,alphak,valk); diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index 6ac22e0721..3bad2d4f52 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -292,7 +292,7 @@ void PairHippoGPU::multipole_real() // set the energy unit conversion factor for multipolar real-space calculation double felec = electric / am_dielectric; - + printf("hippo gpu multipole\n"); firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x, atom->type, amtype, amgroup, rpole, pval, sublo, subhi, atom->tag,