Trying to find the difference in the neighbor list build in hippo vs amoeba
This commit is contained in:
@ -430,8 +430,8 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
|
||||
|
||||
// leave the answers (forces, energies and virial) on the device,
|
||||
// only copy them back in the last kernel (this one, or polar_real once done)
|
||||
//this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
||||
//this->device->add_ans_object(this->ans);
|
||||
this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
||||
this->device->add_ans_object(this->ans);
|
||||
|
||||
this->hd_balancer.stop_timer();
|
||||
|
||||
|
||||
@ -1032,6 +1032,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
||||
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
|
||||
numtyp valk = polar6[j].x;
|
||||
|
||||
if (i == 0 && j < 10) printf("j = %d: r = %f; factor_mpole = %f\n", j, r, factor_mpole);
|
||||
// intermediates involving moments and separation distance
|
||||
|
||||
numtyp dir = dix*xr + diy*yr + diz*zr;
|
||||
@ -1800,6 +1801,11 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
|
||||
numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
|
||||
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
||||
factor_wscale = sp_pol.x; // special_polar_wscale[sbmask15(jextra)];
|
||||
// NOTE: for in.water_box/water_hexamer.hippo: there exist wscale = 0.2
|
||||
//if (factor_wscale < (numtyp)1.0) continue; //factor_wscale = (numtyp)0;
|
||||
|
||||
//if (i == 12 && j < 20) printf("j = %d: r = %f; factor_wscale = %f\n", j, r, factor_wscale);
|
||||
|
||||
if (igroup == jgroup) {
|
||||
factor_dscale = factor_pscale = sp_pol.y; // special_polar_piscale[sbmask15(jextra)];
|
||||
factor_uscale = polar_uscale;
|
||||
@ -1911,6 +1917,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
|
||||
dufld[4] += yr*tiz5 + zr*tiy5 + (numtyp)2.0*yr*zr*tuir;
|
||||
dufld[5] += zr*tiz5 + zr*zr*tuir;
|
||||
|
||||
|
||||
// get the field gradient for direct polarization force
|
||||
|
||||
numtyp term1i,term2i,term3i,term4i,term5i,term6i,term7i,term8i;
|
||||
@ -1929,7 +1936,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
|
||||
term1k = rr3k - rr5k*xr*xr;
|
||||
term2k = (numtyp)2.0*rr5k*xr;
|
||||
term3k = rr7k*xr*xr - rr5k;
|
||||
term4k = 2.0*rr5k;
|
||||
term4k = (numtyp)2.0*rr5k;
|
||||
term5k = (numtyp)5.0*rr7k*xr;
|
||||
term6k = rr9k*xr*xr;
|
||||
tixx = vali*term1i + corei*term1core + dix*term2i - dir*term3i -
|
||||
@ -2159,7 +2166,7 @@ __kernel void k_special15(__global int * dev_nbor,
|
||||
int which = sj >> SBBITS & 3;
|
||||
int j = sj & NEIGHMASK;
|
||||
tagint jtag = tag[j];
|
||||
|
||||
if (i == 0 && j < 20) printf("GPU: j = %d; jtag = %d\n", j, jtag);
|
||||
if (!which) {
|
||||
int offset=ii;
|
||||
for (int k=0; k<n15; k++) {
|
||||
|
||||
@ -81,14 +81,14 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
||||
pre = (numtyp)128.0;
|
||||
s = (r + dmpi2*r2 + dmpi22*r3/(numtyp)3.0) * expi;
|
||||
|
||||
ds = (dmpi22*r3 + dmpi23*r4) * expi / 3.0;
|
||||
d2s = dmpi24 * expi * r5 / 9.0;
|
||||
d3s = dmpi25 * expi * r6 / 45.0;
|
||||
d4s = (dmpi25*r6 + dmpi26*r7) * expi / 315.0;
|
||||
ds = (dmpi22*r3 + dmpi23*r4) * expi / (numtyp)3.0;
|
||||
d2s = dmpi24 * expi * r5 / (numtyp)9.0;
|
||||
d3s = dmpi25 * expi * r6 / (numtyp)45.0;
|
||||
d4s = (dmpi25*r6 + dmpi26*r7) * expi / (numtyp)315.0;
|
||||
if (rorder >= 11) {
|
||||
r8 = r7 * r;
|
||||
dmpi27 = dmpi2 * dmpi26;
|
||||
d5s = (dmpi25*r6 + dmpi26*r7 + dmpi27*r8/3.0) * expi / 945.0;
|
||||
d5s = (dmpi25*r6 + dmpi26*r7 + dmpi27*r8/(numtyp)3.0) * expi / (numtyp)945.0;
|
||||
}
|
||||
|
||||
// treat the case where alpha damping exponents are unequal
|
||||
@ -97,12 +97,12 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
||||
r3 = r2 * r;
|
||||
r4 = r3 * r;
|
||||
r5 = r4 * r;
|
||||
dmpi2 = 0.5 * dmpi;
|
||||
dmpk2 = 0.5 * dmpk;
|
||||
dmpi2 = (numtyp)0.5 * dmpi;
|
||||
dmpk2 = (numtyp)0.5 * dmpk;
|
||||
dampi = dmpi2 * r;
|
||||
dampk = dmpk2 * r;
|
||||
expi = exp(-dampi);
|
||||
expk = exp(-dampk);
|
||||
expi = ucl_exp(-dampi);
|
||||
expk = ucl_exp(-dampk);
|
||||
dmpi22 = dmpi2 * dmpi2;
|
||||
dmpi23 = dmpi22 * dmpi2;
|
||||
dmpi24 = dmpi23 * dmpi2;
|
||||
@ -112,34 +112,34 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
||||
dmpk24 = dmpk23 * dmpk2;
|
||||
dmpk25 = dmpk24 * dmpk2;
|
||||
term = dmpi22 - dmpk22;
|
||||
pre = 8192.0 * dmpi23 * dmpk23 / pow(term,4.0);
|
||||
tmp = 4.0 * dmpi2 * dmpk2 / term;
|
||||
pre = (numtyp)8192.0 * dmpi23 * dmpk23 / ucl_powr(term,(numtyp)4.0);
|
||||
tmp = (numtyp)4.0 * dmpi2 * dmpk2 / term;
|
||||
s = (dampi-tmp)*expk + (dampk+tmp)*expi;
|
||||
|
||||
ds = (dmpi2*dmpk2*r2 - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi2*dmpk2*r2 + 4.0*dmpi22*dmpk2*r/term + 4.0*dmpi2*dmpk2/term) * expi;
|
||||
d2s = (dmpi2*dmpk2*r2/3.0 + dmpi2*dmpk22*r3/3.0 -
|
||||
(4.0/3.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi2*dmpk2*r2/3.0 + dmpi22*dmpk2*r3/3.0 +
|
||||
(4.0/3.0)*dmpi23*dmpk2*r2/term + 4.0*dmpi22*dmpk2*r/term +
|
||||
4.0*dmpi2*dmpk2/term) * expi;
|
||||
d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/5.0 + dmpi2*dmpk2*r2/5.0 -
|
||||
(4.0/15.0)*dmpi2*dmpk24*r3/term - (8.0/5.0)*dmpi2*dmpk23*r2/term -
|
||||
4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk +
|
||||
(dmpi23*dmpk2*r4/15.0 + dmpi22*dmpk2*r3/5.0 + dmpi2*dmpk2*r2/5.0 +
|
||||
(4.0/15.0)*dmpi24*dmpk2*r3/term + (8.0/5.0)*dmpi23*dmpk2*r2/term +
|
||||
4.0*dmpi22*dmpk2*r/term + 4.0/term*dmpi2*dmpk2) * expi;
|
||||
d4s = (dmpi2*dmpk24*r5/105.0 + (2.0/35.0)*dmpi2*dmpk23*r4 +
|
||||
dmpi2*dmpk22*r3/7.0 + dmpi2*dmpk2*r2/7.0 -
|
||||
(4.0/105.0)*dmpi2*dmpk25*r4/term - (8.0/21.0)*dmpi2*dmpk24*r3/term -
|
||||
(12.0/7.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi24*dmpk2*r5/105.0 + (2.0/35.0)*dmpi23*dmpk2*r4 +
|
||||
dmpi22*dmpk2*r3/7.0 + dmpi2*dmpk2*r2/7.0 +
|
||||
(4.0/105.0)*dmpi25*dmpk2*r4/term + (8.0/21.0)*dmpi24*dmpk2*r3/term +
|
||||
(12.0/7.0)*dmpi23*dmpk2*r2/term + 4.0*dmpi22*dmpk2*r/term +
|
||||
ds = (dmpi2*dmpk2*r2 - (numtyp)4.0*dmpi2*dmpk22*r/term -
|
||||
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi2*dmpk2*r2 + (numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0*dmpi2*dmpk2/term) * expi;
|
||||
d2s = (dmpi2*dmpk2*r2/3.0 + dmpi2*dmpk22*r3/(numtyp)3.0 -
|
||||
((numtyp)4.0/(numtyp)3.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
|
||||
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi2*dmpk2*r2/(numtyp)3.0 + dmpi22*dmpk2*r3/(numtyp)3.0 +
|
||||
((numtyp)4.0/(numtyp)3.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
|
||||
(numtyp)4.0*dmpi2*dmpk2/term) * expi;
|
||||
d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 -
|
||||
(4.0/15.0)*dmpi2*dmpk24*r3/term - ((numtyp)8.0/(numtyp)5.0)*dmpi2*dmpk23*r2/term -
|
||||
(numtyp)4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk +
|
||||
(dmpi23*dmpk2*r4/(numtyp)15.0 + dmpi22*dmpk2*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 +
|
||||
((numtyp)4.0/(numtyp)15.0)*dmpi24*dmpk2*r3/term + ((numtyp)8.0/(numtyp)5.0)*dmpi23*dmpk2*r2/term +
|
||||
(numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0/term*dmpi2*dmpk2) * expi;
|
||||
d4s = (dmpi2*dmpk24*r5/(numtyp)105.0 + ((numtyp)2.0/(numtyp)35.0)*dmpi2*dmpk23*r4 +
|
||||
dmpi2*dmpk22*r3/7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 -
|
||||
((numtyp)4.0/(numtyp)105.0)*dmpi2*dmpk25*r4/term - ((numtyp)8.0/21.0)*dmpi2*dmpk24*r3/term -
|
||||
((numtyp)12.0/7.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
|
||||
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi24*dmpk2*r5/(numtyp)105.0 + (2.0/(numtyp)35.0)*dmpi23*dmpk2*r4 +
|
||||
dmpi22*dmpk2*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 +
|
||||
(4.0/105.0)*dmpi25*dmpk2*r4/term + ((numtyp)8.0/21.0)*dmpi24*dmpk2*r3/term +
|
||||
(12.0/7.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
|
||||
4.0*dmpi2*dmpk2/term) * expi;
|
||||
|
||||
if (rorder >= 11) {
|
||||
@ -217,8 +217,8 @@ ucl_inline void damppole(const numtyp r, const int rorder,
|
||||
diff = fabs(alphai-alphak);
|
||||
dampi = alphai * r;
|
||||
dampk = alphak * r;
|
||||
expi = exp(-dampi);
|
||||
expk = exp(-dampk);
|
||||
expi = ucl_exp(-dampi);
|
||||
expk = ucl_exp(-dampk);
|
||||
|
||||
// core-valence charge penetration damping for Gordon f1
|
||||
|
||||
@ -308,15 +308,15 @@ ucl_inline void damppole(const numtyp r, const int rorder,
|
||||
if (rorder >= 11) {
|
||||
dampi6 = dampi3 * dampi3;
|
||||
dampk6 = dampk3 * dampk3;
|
||||
dmpik[10] = 1.0 - termi2*(1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
5.0*dampi4/126.0 + 2.0*dampi5/315.0 +
|
||||
dampi6/1890.0)*expi -
|
||||
termk2*(1.0 + dampk + 0.5*dampk2 + dampk3/6.0 + 5.0*dampk4/126.0 +
|
||||
2.0*dampk5/315.0 + dampk6/1890.0)*expk -
|
||||
2.0*termi2*termk*(1.0 + dampi + 4.0*dampi2/9.0 + dampi3/9.0 +
|
||||
dampi4/63.0 + dampi5/945.0)*expi -
|
||||
2.0*termk2*termi*(1.0 + dampk + 4.0*dampk2/9.0 + dampk3/9.0 +
|
||||
dampk4/63.0 + dampk5/945.0)*expk;
|
||||
dmpik[10] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
|
||||
(numtyp)5.0*dampi4/(numtyp)126.0 + (numtyp)2.0*dampi5/(numtyp)315.0 +
|
||||
dampi6/(numtyp)1890.0)*expi -
|
||||
termk2*((numtyp)1.0 + dampk + 0.5*dampk2 + dampk3/(numtyp)6.0 + 5.0*dampk4/(numtyp)126.0 +
|
||||
(numtyp)2.0*dampk5/(numtyp)315.0 + dampk6/(numtyp)1890.0)*expk -
|
||||
(numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)4.0*dampi2/(numtyp)9.0 + dampi3/(numtyp)9.0 +
|
||||
dampi4/63.0 + dampi5/(numtyp)945.0)*expi -
|
||||
(numtyp)2.0*termk2*termi*(1.0 + dampk + 4.0*dampk2/(numtyp)9.0 + dampk3/(numtyp)9.0 +
|
||||
dampk4/63.0 + dampk5/(numtyp)945.0)*expk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -366,6 +366,7 @@ void PairAmoeba::polar_real()
|
||||
yr = x[j][1] - yi;
|
||||
zr = x[j][2] - zi;
|
||||
r2 = xr*xr + yr*yr + zr*zr;
|
||||
|
||||
if (r2 > off2) continue;
|
||||
|
||||
jtype = amtype[j];
|
||||
@ -393,7 +394,7 @@ void PairAmoeba::polar_real()
|
||||
factor_uscale = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
//if (i == 12 && j < 20) printf("j = %d: r = %f; factor_wscale = %f\n", j, sqrt(r2), factor_wscale);
|
||||
r = sqrt(r2);
|
||||
ck = rpole[j][0];
|
||||
dkx = rpole[j][1];
|
||||
@ -579,7 +580,6 @@ void PairAmoeba::polar_real()
|
||||
tkz5 = 2.0 * (psr5*uiz+dsr5*uizp);
|
||||
tuir = -psr7*ukr - dsr7*ukrp;
|
||||
tukr = -psr7*uir - dsr7*uirp;
|
||||
// reached here...
|
||||
} else if (hippo) {
|
||||
tix5 = 4.0 * (rr5i*ukx);
|
||||
tiy5 = 4.0 * (rr5i*uky);
|
||||
@ -597,7 +597,6 @@ void PairAmoeba::polar_real()
|
||||
dufld[i][3] += xr*tiz5 + zr*tix5 + 2.0*xr*zr*tuir;
|
||||
dufld[i][4] += yr*tiz5 + zr*tiy5 + 2.0*yr*zr*tuir;
|
||||
dufld[i][5] += zr*tiz5 + zr*zr*tuir;
|
||||
|
||||
dufld[j][0] -= xr*tkx5 + xr*xr*tukr;
|
||||
dufld[j][1] -= xr*tky5 + yr*tkx5 + 2.0*xr*yr*tukr;
|
||||
dufld[j][2] -= yr*tky5 + yr*yr*tukr;
|
||||
@ -855,6 +854,7 @@ void PairAmoeba::polar_real()
|
||||
frcx = -2.0 * depx;
|
||||
frcy = -2.0 * depy;
|
||||
frcz = -2.0 * depz;
|
||||
|
||||
}
|
||||
|
||||
// get the dtau/dr terms used for mutual polarization force
|
||||
@ -1199,6 +1199,8 @@ void PairAmoeba::polar_real()
|
||||
|
||||
torque2force(i,tep,fix,fiy,fiz,fpolar);
|
||||
|
||||
//if (i < 10) printf("i = %d: tep = %f %f %f\n", i, tep[0], tep[1], tep[2]);
|
||||
|
||||
iz = zaxis2local[i];
|
||||
ix = xaxis2local[i];
|
||||
iy = yaxis2local[i];
|
||||
|
||||
@ -242,6 +242,47 @@ void PairAmoeba::compute(int eflag, int vflag)
|
||||
time_induce = time_polar = time_qxfer = 0.0;
|
||||
}
|
||||
|
||||
{ // DEBUGGING
|
||||
double **x = atom->x;
|
||||
int inum,jnum;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
if (use_ewald) choose(MPOLE_LONG);
|
||||
else choose(MPOLE);
|
||||
|
||||
int i,ii,j,jj;
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
double xi = x[i][0];
|
||||
double yi = x[i][1];
|
||||
double zi = x[i][2];
|
||||
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
double factor_mpole = special_mpole[sbmask15(j)];
|
||||
j &= NEIGHMASK15;
|
||||
|
||||
double xr = x[j][0] - xi;
|
||||
double yr = x[j][1] - yi;
|
||||
double zr = x[j][2] - zi;
|
||||
double r2 = xr*xr + yr*yr + zr*zr;
|
||||
if (r2 > off2) continue;
|
||||
double r = sqrt(r2);
|
||||
if (i == 0 && j < 10) printf("j = %d: r = %f; factor_mpole = %f\n", j, r, factor_mpole);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // DEBUGGING
|
||||
|
||||
double evdwl;
|
||||
|
||||
evdwl = 0.0;
|
||||
@ -973,8 +1014,8 @@ void PairAmoeba::init_style()
|
||||
|
||||
int irequest = neighbor->request(this,instance_me);
|
||||
// for DEBUGGING with GPU
|
||||
//neighbor->requests[irequest]->half = 0;
|
||||
//neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
|
||||
// open debug output files
|
||||
// names are hard-coded
|
||||
|
||||
@ -138,7 +138,7 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
|
||||
gpu_multipole_real_ready = true;
|
||||
gpu_udirect2b_ready = false;
|
||||
gpu_umutual2b_ready = false;
|
||||
gpu_polar_real_ready = true;
|
||||
gpu_polar_real_ready = false;
|
||||
|
||||
GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
|
||||
}
|
||||
@ -1137,6 +1137,8 @@ void PairHippoGPU::compute_force_from_torque(const numtyp* tq_ptr,
|
||||
_tq[2] = tq_ptr[4*i+2];
|
||||
torque2force(i,_tq,fix,fiy,fiz,force_comp);
|
||||
|
||||
//if (i < 10) printf("i = %d: tep = %f %f %f\n", i, _tq[0], _tq[1], _tq[2]);
|
||||
|
||||
iz = zaxis2local[i];
|
||||
ix = xaxis2local[i];
|
||||
iy = yaxis2local[i];
|
||||
|
||||
Reference in New Issue
Block a user