Trying to find the difference in the neighbor list build in hippo vs amoeba

This commit is contained in:
Trung Nguyen
2021-09-27 11:35:35 -05:00
parent 7437c98628
commit 2efd841a7e
6 changed files with 114 additions and 62 deletions

View File

@ -430,8 +430,8 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
// leave the answers (forces, energies and virial) on the device, // leave the answers (forces, energies and virial) on the device,
// only copy them back in the last kernel (this one, or polar_real once done) // only copy them back in the last kernel (this one, or polar_real once done)
//this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
//this->device->add_ans_object(this->ans); this->device->add_ans_object(this->ans);
this->hd_balancer.stop_timer(); this->hd_balancer.stop_timer();
@ -444,7 +444,7 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
numtyp4* p = (numtyp4*)(&this->_tep[4*i]); numtyp4* p = (numtyp4*)(&this->_tep[4*i]);
printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z); printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z);
} }
*/ */
return firstneigh; // nbor->host_jlist.begin()-host_start; return firstneigh; // nbor->host_jlist.begin()-host_start;
} }

View File

@ -1032,6 +1032,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass]; numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
numtyp valk = polar6[j].x; numtyp valk = polar6[j].x;
if (i == 0 && j < 10) printf("j = %d: r = %f; factor_mpole = %f\n", j, r, factor_mpole);
// intermediates involving moments and separation distance // intermediates involving moments and separation distance
numtyp dir = dix*xr + diy*yr + diz*zr; numtyp dir = dix*xr + diy*yr + diz*zr;
@ -1772,7 +1773,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
//if (r2>off2) continue; //if (r2>off2) continue;
numtyp r = ucl_sqrt(r2); numtyp r = ucl_sqrt(r2);
const numtyp4 pol1j = polar1[j]; const numtyp4 pol1j = polar1[j];
numtyp ck = polar1[j].x; // rpole[j][0]; numtyp ck = polar1[j].x; // rpole[j][0];
numtyp dkx = polar1[j].y; // rpole[j][1]; numtyp dkx = polar1[j].y; // rpole[j][1];
@ -1800,6 +1801,11 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale; numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)]; const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
factor_wscale = sp_pol.x; // special_polar_wscale[sbmask15(jextra)]; factor_wscale = sp_pol.x; // special_polar_wscale[sbmask15(jextra)];
// NOTE: for in.water_box/water_hexamer.hippo: there exist wscale = 0.2
//if (factor_wscale < (numtyp)1.0) continue; //factor_wscale = (numtyp)0;
//if (i == 12 && j < 20) printf("j = %d: r = %f; factor_wscale = %f\n", j, r, factor_wscale);
if (igroup == jgroup) { if (igroup == jgroup) {
factor_dscale = factor_pscale = sp_pol.y; // special_polar_piscale[sbmask15(jextra)]; factor_dscale = factor_pscale = sp_pol.y; // special_polar_piscale[sbmask15(jextra)];
factor_uscale = polar_uscale; factor_uscale = polar_uscale;
@ -1910,7 +1916,8 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
dufld[3] += xr*tiz5 + zr*tix5 + (numtyp)2.0*xr*zr*tuir; dufld[3] += xr*tiz5 + zr*tix5 + (numtyp)2.0*xr*zr*tuir;
dufld[4] += yr*tiz5 + zr*tiy5 + (numtyp)2.0*yr*zr*tuir; dufld[4] += yr*tiz5 + zr*tiy5 + (numtyp)2.0*yr*zr*tuir;
dufld[5] += zr*tiz5 + zr*zr*tuir; dufld[5] += zr*tiz5 + zr*zr*tuir;
// get the field gradient for direct polarization force // get the field gradient for direct polarization force
numtyp term1i,term2i,term3i,term4i,term5i,term6i,term7i,term8i; numtyp term1i,term2i,term3i,term4i,term5i,term6i,term7i,term8i;
@ -1929,7 +1936,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
term1k = rr3k - rr5k*xr*xr; term1k = rr3k - rr5k*xr*xr;
term2k = (numtyp)2.0*rr5k*xr; term2k = (numtyp)2.0*rr5k*xr;
term3k = rr7k*xr*xr - rr5k; term3k = rr7k*xr*xr - rr5k;
term4k = 2.0*rr5k; term4k = (numtyp)2.0*rr5k;
term5k = (numtyp)5.0*rr7k*xr; term5k = (numtyp)5.0*rr7k*xr;
term6k = rr9k*xr*xr; term6k = rr9k*xr*xr;
tixx = vali*term1i + corei*term1core + dix*term2i - dir*term3i - tixx = vali*term1i + corei*term1core + dix*term2i - dir*term3i -
@ -2046,7 +2053,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
numtyp frcx = (numtyp)-2.0 * depx; numtyp frcx = (numtyp)-2.0 * depx;
numtyp frcy = (numtyp)-2.0 * depy; numtyp frcy = (numtyp)-2.0 * depy;
numtyp frcz = (numtyp)-2.0 * depz; numtyp frcz = (numtyp)-2.0 * depz;
// get the dEp/dR terms used for direct polarization force // get the dEp/dR terms used for direct polarization force
// poltyp == MUTUAL && hippo // poltyp == MUTUAL && hippo
// tixx and tkxx // tixx and tkxx
@ -2159,7 +2166,7 @@ __kernel void k_special15(__global int * dev_nbor,
int which = sj >> SBBITS & 3; int which = sj >> SBBITS & 3;
int j = sj & NEIGHMASK; int j = sj & NEIGHMASK;
tagint jtag = tag[j]; tagint jtag = tag[j];
if (i == 0 && j < 20) printf("GPU: j = %d; jtag = %d\n", j, jtag);
if (!which) { if (!which) {
int offset=ii; int offset=ii;
for (int k=0; k<n15; k++) { for (int k=0; k<n15; k++) {

View File

@ -81,14 +81,14 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
pre = (numtyp)128.0; pre = (numtyp)128.0;
s = (r + dmpi2*r2 + dmpi22*r3/(numtyp)3.0) * expi; s = (r + dmpi2*r2 + dmpi22*r3/(numtyp)3.0) * expi;
ds = (dmpi22*r3 + dmpi23*r4) * expi / 3.0; ds = (dmpi22*r3 + dmpi23*r4) * expi / (numtyp)3.0;
d2s = dmpi24 * expi * r5 / 9.0; d2s = dmpi24 * expi * r5 / (numtyp)9.0;
d3s = dmpi25 * expi * r6 / 45.0; d3s = dmpi25 * expi * r6 / (numtyp)45.0;
d4s = (dmpi25*r6 + dmpi26*r7) * expi / 315.0; d4s = (dmpi25*r6 + dmpi26*r7) * expi / (numtyp)315.0;
if (rorder >= 11) { if (rorder >= 11) {
r8 = r7 * r; r8 = r7 * r;
dmpi27 = dmpi2 * dmpi26; dmpi27 = dmpi2 * dmpi26;
d5s = (dmpi25*r6 + dmpi26*r7 + dmpi27*r8/3.0) * expi / 945.0; d5s = (dmpi25*r6 + dmpi26*r7 + dmpi27*r8/(numtyp)3.0) * expi / (numtyp)945.0;
} }
// treat the case where alpha damping exponents are unequal // treat the case where alpha damping exponents are unequal
@ -97,12 +97,12 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
r3 = r2 * r; r3 = r2 * r;
r4 = r3 * r; r4 = r3 * r;
r5 = r4 * r; r5 = r4 * r;
dmpi2 = 0.5 * dmpi; dmpi2 = (numtyp)0.5 * dmpi;
dmpk2 = 0.5 * dmpk; dmpk2 = (numtyp)0.5 * dmpk;
dampi = dmpi2 * r; dampi = dmpi2 * r;
dampk = dmpk2 * r; dampk = dmpk2 * r;
expi = exp(-dampi); expi = ucl_exp(-dampi);
expk = exp(-dampk); expk = ucl_exp(-dampk);
dmpi22 = dmpi2 * dmpi2; dmpi22 = dmpi2 * dmpi2;
dmpi23 = dmpi22 * dmpi2; dmpi23 = dmpi22 * dmpi2;
dmpi24 = dmpi23 * dmpi2; dmpi24 = dmpi23 * dmpi2;
@ -112,34 +112,34 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
dmpk24 = dmpk23 * dmpk2; dmpk24 = dmpk23 * dmpk2;
dmpk25 = dmpk24 * dmpk2; dmpk25 = dmpk24 * dmpk2;
term = dmpi22 - dmpk22; term = dmpi22 - dmpk22;
pre = 8192.0 * dmpi23 * dmpk23 / pow(term,4.0); pre = (numtyp)8192.0 * dmpi23 * dmpk23 / ucl_powr(term,(numtyp)4.0);
tmp = 4.0 * dmpi2 * dmpk2 / term; tmp = (numtyp)4.0 * dmpi2 * dmpk2 / term;
s = (dampi-tmp)*expk + (dampk+tmp)*expi; s = (dampi-tmp)*expk + (dampk+tmp)*expi;
ds = (dmpi2*dmpk2*r2 - 4.0*dmpi2*dmpk22*r/term - ds = (dmpi2*dmpk2*r2 - (numtyp)4.0*dmpi2*dmpk22*r/term -
4.0*dmpi2*dmpk2/term) * expk + (numtyp)4.0*dmpi2*dmpk2/term) * expk +
(dmpi2*dmpk2*r2 + 4.0*dmpi22*dmpk2*r/term + 4.0*dmpi2*dmpk2/term) * expi; (dmpi2*dmpk2*r2 + (numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0*dmpi2*dmpk2/term) * expi;
d2s = (dmpi2*dmpk2*r2/3.0 + dmpi2*dmpk22*r3/3.0 - d2s = (dmpi2*dmpk2*r2/3.0 + dmpi2*dmpk22*r3/(numtyp)3.0 -
(4.0/3.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term - ((numtyp)4.0/(numtyp)3.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
4.0*dmpi2*dmpk2/term) * expk + (numtyp)4.0*dmpi2*dmpk2/term) * expk +
(dmpi2*dmpk2*r2/3.0 + dmpi22*dmpk2*r3/3.0 + (dmpi2*dmpk2*r2/(numtyp)3.0 + dmpi22*dmpk2*r3/(numtyp)3.0 +
(4.0/3.0)*dmpi23*dmpk2*r2/term + 4.0*dmpi22*dmpk2*r/term + ((numtyp)4.0/(numtyp)3.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
4.0*dmpi2*dmpk2/term) * expi; (numtyp)4.0*dmpi2*dmpk2/term) * expi;
d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/5.0 + dmpi2*dmpk2*r2/5.0 - d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 -
(4.0/15.0)*dmpi2*dmpk24*r3/term - (8.0/5.0)*dmpi2*dmpk23*r2/term - (4.0/15.0)*dmpi2*dmpk24*r3/term - ((numtyp)8.0/(numtyp)5.0)*dmpi2*dmpk23*r2/term -
4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk + (numtyp)4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk +
(dmpi23*dmpk2*r4/15.0 + dmpi22*dmpk2*r3/5.0 + dmpi2*dmpk2*r2/5.0 + (dmpi23*dmpk2*r4/(numtyp)15.0 + dmpi22*dmpk2*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 +
(4.0/15.0)*dmpi24*dmpk2*r3/term + (8.0/5.0)*dmpi23*dmpk2*r2/term + ((numtyp)4.0/(numtyp)15.0)*dmpi24*dmpk2*r3/term + ((numtyp)8.0/(numtyp)5.0)*dmpi23*dmpk2*r2/term +
4.0*dmpi22*dmpk2*r/term + 4.0/term*dmpi2*dmpk2) * expi; (numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0/term*dmpi2*dmpk2) * expi;
d4s = (dmpi2*dmpk24*r5/105.0 + (2.0/35.0)*dmpi2*dmpk23*r4 + d4s = (dmpi2*dmpk24*r5/(numtyp)105.0 + ((numtyp)2.0/(numtyp)35.0)*dmpi2*dmpk23*r4 +
dmpi2*dmpk22*r3/7.0 + dmpi2*dmpk2*r2/7.0 - dmpi2*dmpk22*r3/7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 -
(4.0/105.0)*dmpi2*dmpk25*r4/term - (8.0/21.0)*dmpi2*dmpk24*r3/term - ((numtyp)4.0/(numtyp)105.0)*dmpi2*dmpk25*r4/term - ((numtyp)8.0/21.0)*dmpi2*dmpk24*r3/term -
(12.0/7.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term - ((numtyp)12.0/7.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
4.0*dmpi2*dmpk2/term) * expk + (numtyp)4.0*dmpi2*dmpk2/term) * expk +
(dmpi24*dmpk2*r5/105.0 + (2.0/35.0)*dmpi23*dmpk2*r4 + (dmpi24*dmpk2*r5/(numtyp)105.0 + (2.0/(numtyp)35.0)*dmpi23*dmpk2*r4 +
dmpi22*dmpk2*r3/7.0 + dmpi2*dmpk2*r2/7.0 + dmpi22*dmpk2*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 +
(4.0/105.0)*dmpi25*dmpk2*r4/term + (8.0/21.0)*dmpi24*dmpk2*r3/term + (4.0/105.0)*dmpi25*dmpk2*r4/term + ((numtyp)8.0/21.0)*dmpi24*dmpk2*r3/term +
(12.0/7.0)*dmpi23*dmpk2*r2/term + 4.0*dmpi22*dmpk2*r/term + (12.0/7.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
4.0*dmpi2*dmpk2/term) * expi; 4.0*dmpi2*dmpk2/term) * expi;
if (rorder >= 11) { if (rorder >= 11) {
@ -217,8 +217,8 @@ ucl_inline void damppole(const numtyp r, const int rorder,
diff = fabs(alphai-alphak); diff = fabs(alphai-alphak);
dampi = alphai * r; dampi = alphai * r;
dampk = alphak * r; dampk = alphak * r;
expi = exp(-dampi); expi = ucl_exp(-dampi);
expk = exp(-dampk); expk = ucl_exp(-dampk);
// core-valence charge penetration damping for Gordon f1 // core-valence charge penetration damping for Gordon f1
@ -308,15 +308,15 @@ ucl_inline void damppole(const numtyp r, const int rorder,
if (rorder >= 11) { if (rorder >= 11) {
dampi6 = dampi3 * dampi3; dampi6 = dampi3 * dampi3;
dampk6 = dampk3 * dampk3; dampk6 = dampk3 * dampk3;
dmpik[10] = 1.0 - termi2*(1.0 + dampi + 0.5*dampi2 + dampi3/6.0 + dmpik[10] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
5.0*dampi4/126.0 + 2.0*dampi5/315.0 + (numtyp)5.0*dampi4/(numtyp)126.0 + (numtyp)2.0*dampi5/(numtyp)315.0 +
dampi6/1890.0)*expi - dampi6/(numtyp)1890.0)*expi -
termk2*(1.0 + dampk + 0.5*dampk2 + dampk3/6.0 + 5.0*dampk4/126.0 + termk2*((numtyp)1.0 + dampk + 0.5*dampk2 + dampk3/(numtyp)6.0 + 5.0*dampk4/(numtyp)126.0 +
2.0*dampk5/315.0 + dampk6/1890.0)*expk - (numtyp)2.0*dampk5/(numtyp)315.0 + dampk6/(numtyp)1890.0)*expk -
2.0*termi2*termk*(1.0 + dampi + 4.0*dampi2/9.0 + dampi3/9.0 + (numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)4.0*dampi2/(numtyp)9.0 + dampi3/(numtyp)9.0 +
dampi4/63.0 + dampi5/945.0)*expi - dampi4/63.0 + dampi5/(numtyp)945.0)*expi -
2.0*termk2*termi*(1.0 + dampk + 4.0*dampk2/9.0 + dampk3/9.0 + (numtyp)2.0*termk2*termi*(1.0 + dampk + 4.0*dampk2/(numtyp)9.0 + dampk3/(numtyp)9.0 +
dampk4/63.0 + dampk5/945.0)*expk; dampk4/63.0 + dampk5/(numtyp)945.0)*expk;
} }
} }
} }

View File

@ -366,6 +366,7 @@ void PairAmoeba::polar_real()
yr = x[j][1] - yi; yr = x[j][1] - yi;
zr = x[j][2] - zi; zr = x[j][2] - zi;
r2 = xr*xr + yr*yr + zr*zr; r2 = xr*xr + yr*yr + zr*zr;
if (r2 > off2) continue; if (r2 > off2) continue;
jtype = amtype[j]; jtype = amtype[j];
@ -393,7 +394,7 @@ void PairAmoeba::polar_real()
factor_uscale = 1.0; factor_uscale = 1.0;
} }
} }
//if (i == 12 && j < 20) printf("j = %d: r = %f; factor_wscale = %f\n", j, sqrt(r2), factor_wscale);
r = sqrt(r2); r = sqrt(r2);
ck = rpole[j][0]; ck = rpole[j][0];
dkx = rpole[j][1]; dkx = rpole[j][1];
@ -567,7 +568,7 @@ void PairAmoeba::polar_real()
ufld[j][0] += tkx3 + xr*tukr; ufld[j][0] += tkx3 + xr*tukr;
ufld[j][1] += tky3 + yr*tukr; ufld[j][1] += tky3 + yr*tukr;
ufld[j][2] += tkz3 + zr*tukr; ufld[j][2] += tkz3 + zr*tukr;
// get induced dipole field gradient used for quadrupole torques // get induced dipole field gradient used for quadrupole torques
if (amoeba) { if (amoeba) {
@ -579,7 +580,6 @@ void PairAmoeba::polar_real()
tkz5 = 2.0 * (psr5*uiz+dsr5*uizp); tkz5 = 2.0 * (psr5*uiz+dsr5*uizp);
tuir = -psr7*ukr - dsr7*ukrp; tuir = -psr7*ukr - dsr7*ukrp;
tukr = -psr7*uir - dsr7*uirp; tukr = -psr7*uir - dsr7*uirp;
// reached here...
} else if (hippo) { } else if (hippo) {
tix5 = 4.0 * (rr5i*ukx); tix5 = 4.0 * (rr5i*ukx);
tiy5 = 4.0 * (rr5i*uky); tiy5 = 4.0 * (rr5i*uky);
@ -597,7 +597,6 @@ void PairAmoeba::polar_real()
dufld[i][3] += xr*tiz5 + zr*tix5 + 2.0*xr*zr*tuir; dufld[i][3] += xr*tiz5 + zr*tix5 + 2.0*xr*zr*tuir;
dufld[i][4] += yr*tiz5 + zr*tiy5 + 2.0*yr*zr*tuir; dufld[i][4] += yr*tiz5 + zr*tiy5 + 2.0*yr*zr*tuir;
dufld[i][5] += zr*tiz5 + zr*zr*tuir; dufld[i][5] += zr*tiz5 + zr*zr*tuir;
dufld[j][0] -= xr*tkx5 + xr*xr*tukr; dufld[j][0] -= xr*tkx5 + xr*xr*tukr;
dufld[j][1] -= xr*tky5 + yr*tkx5 + 2.0*xr*yr*tukr; dufld[j][1] -= xr*tky5 + yr*tkx5 + 2.0*xr*yr*tukr;
dufld[j][2] -= yr*tky5 + yr*yr*tukr; dufld[j][2] -= yr*tky5 + yr*yr*tukr;
@ -668,7 +667,7 @@ void PairAmoeba::polar_real()
frcx = depx; frcx = depx;
frcy = depy; frcy = depy;
frcz = depz; frcz = depz;
// get the dEp/dR terms used for direct polarization force // get the dEp/dR terms used for direct polarization force
term1 = bn[2] - psc3*rr5; term1 = bn[2] - psc3*rr5;
@ -855,6 +854,7 @@ void PairAmoeba::polar_real()
frcx = -2.0 * depx; frcx = -2.0 * depx;
frcy = -2.0 * depy; frcy = -2.0 * depy;
frcz = -2.0 * depz; frcz = -2.0 * depz;
} }
// get the dtau/dr terms used for mutual polarization force // get the dtau/dr terms used for mutual polarization force
@ -1199,6 +1199,8 @@ void PairAmoeba::polar_real()
torque2force(i,tep,fix,fiy,fiz,fpolar); torque2force(i,tep,fix,fiy,fiz,fpolar);
//if (i < 10) printf("i = %d: tep = %f %f %f\n", i, tep[0], tep[1], tep[2]);
iz = zaxis2local[i]; iz = zaxis2local[i];
ix = xaxis2local[i]; ix = xaxis2local[i];
iy = yaxis2local[i]; iy = yaxis2local[i];

View File

@ -242,6 +242,47 @@ void PairAmoeba::compute(int eflag, int vflag)
time_induce = time_polar = time_qxfer = 0.0; time_induce = time_polar = time_qxfer = 0.0;
} }
{ // DEBUGGING
double **x = atom->x;
int inum,jnum;
int *ilist,*jlist,*numneigh,**firstneigh;
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
if (use_ewald) choose(MPOLE_LONG);
else choose(MPOLE);
int i,ii,j,jj;
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
double xi = x[i][0];
double yi = x[i][1];
double zi = x[i][2];
jlist = firstneigh[i];
jnum = numneigh[i];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
double factor_mpole = special_mpole[sbmask15(j)];
j &= NEIGHMASK15;
double xr = x[j][0] - xi;
double yr = x[j][1] - yi;
double zr = x[j][2] - zi;
double r2 = xr*xr + yr*yr + zr*zr;
if (r2 > off2) continue;
double r = sqrt(r2);
if (i == 0 && j < 10) printf("j = %d: r = %f; factor_mpole = %f\n", j, r, factor_mpole);
}
}
} // DEBUGGING
double evdwl; double evdwl;
evdwl = 0.0; evdwl = 0.0;
@ -973,8 +1014,8 @@ void PairAmoeba::init_style()
int irequest = neighbor->request(this,instance_me); int irequest = neighbor->request(this,instance_me);
// for DEBUGGING with GPU // for DEBUGGING with GPU
//neighbor->requests[irequest]->half = 0; neighbor->requests[irequest]->half = 0;
//neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->full = 1;
// open debug output files // open debug output files
// names are hard-coded // names are hard-coded

View File

@ -138,7 +138,7 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
gpu_multipole_real_ready = true; gpu_multipole_real_ready = true;
gpu_udirect2b_ready = false; gpu_udirect2b_ready = false;
gpu_umutual2b_ready = false; gpu_umutual2b_ready = false;
gpu_polar_real_ready = true; gpu_polar_real_ready = false;
GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
} }
@ -1137,6 +1137,8 @@ void PairHippoGPU::compute_force_from_torque(const numtyp* tq_ptr,
_tq[2] = tq_ptr[4*i+2]; _tq[2] = tq_ptr[4*i+2];
torque2force(i,_tq,fix,fiy,fiz,force_comp); torque2force(i,_tq,fix,fiy,fiz,force_comp);
//if (i < 10) printf("i = %d: tep = %f %f %f\n", i, _tq[0], _tq[1], _tq[2]);
iz = zaxis2local[i]; iz = zaxis2local[i];
ix = xaxis2local[i]; ix = xaxis2local[i];
iy = yaxis2local[i]; iy = yaxis2local[i];