Fixed bugs in the repulsion kernel, now working correctly with the double precision mode

This commit is contained in:
Trung Nguyen
2021-09-29 11:57:25 -05:00
parent 4be44c386f
commit 01381b7f54
4 changed files with 30 additions and 26 deletions

View File

@ -78,7 +78,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
bool rot = false;
bool vel = false;
_extra_fields = 24; // round up to accomodate quadruples of numtyp values
// rpole 13; uind 3; uinp 3; amtype, amgroup
// rpole 13; uind 3; uinp 3; amtype, amgroup; pval
int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel,_extra_fields);
if (success!=0)
return success;

View File

@ -493,9 +493,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
//int jtype=jx.w;
// Compute r12
numtyp xr = ix.x - jx.x;
numtyp yr = ix.y - jx.y;
numtyp zr = ix.z - jx.z;
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
numtyp zr = jx.z - ix.z;
numtyp r2 = xr*xr + yr*yr + zr*zr;
if (r2>off2) continue;
@ -521,6 +521,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
const numtyp4 sp_nonpol = sp_nonpolar[sbmask15(jextra)];
numtyp factor_repel = sp_nonpol.y; // factor_repel = special_repel[sbmask15(j)];
if (factor_repel == (numtyp)0) continue;
// intermediates involving moments and separation distance
@ -614,7 +615,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
numtyp term1 = vali*valk;
numtyp term2 = valk*dir - vali*dkr + dik;
numtyp term3 = vali*qkr + valk*qir - dir*dkr + (numtyp)2.0*(dkqi-diqk+qiqk);
numtyp term4 = dir*qkr - dkr*qir - 4.0*qik;
numtyp term4 = dir*qkr - dkr*qir - (numtyp)4.0*qik;
numtyp term5 = qir*qkr;
numtyp eterm = term1*dmpik[0] + term2*dmpik[2] +
term3*dmpik[4] + term4*dmpik[6] + term5*dmpik[8];
@ -646,6 +647,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
frcx = frcx*rr1 + eterm*rr3*xr;
frcy = frcy*rr1 + eterm*rr3*yr;
frcz = frcz*rr1 + eterm*rr3*zr;
frcx = sizik * frcx;
frcy = sizik * frcy;
frcz = sizik * frcz;
// compute the torque components for this interaction
@ -666,7 +670,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
numtyp r4 = r2 * r2;
numtyp r5 = r2 * r3;
numtyp taper = c5*r5 + c4*r4 + c3*r3 + c2*r2 + c1*r + c0;
numtyp dtaper = (numtyp)5.0*c5*r4 + (numtyp).0*c4*r3 +
numtyp dtaper = (numtyp)5.0*c5*r4 + (numtyp)4.0*c4*r3 +
(numtyp)3.0*c3*r2 + (numtyp)2.0*c2*r + c1;
dtaper *= e * rr1;
e *= taper;
@ -896,10 +900,10 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
} // ii<inum
// accumate force, energy and virial: use _acc if not the first kernel
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
offset,eflag,vflag,ans,engv);
//store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
// offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
// offset,eflag,vflag,ans,engv);
store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
}
/* ----------------------------------------------------------------------

View File

@ -125,9 +125,9 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
(dmpi2*dmpk2*r2/(numtyp)3.0 + dmpi22*dmpk2*r3/(numtyp)3.0 +
((numtyp)4.0/(numtyp)3.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
(numtyp)4.0*dmpi2*dmpk2/term) * expi;
d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 -
(4.0/15.0)*dmpi2*dmpk24*r3/term - ((numtyp)8.0/(numtyp)5.0)*dmpi2*dmpk23*r2/term -
(numtyp)4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk +
d3s = (dmpi2*dmpk23*r4/(numtyp)15.0 + dmpi2*dmpk22*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 -
((numtyp)4.0/(numtyp)15.0)*dmpi2*dmpk24*r3/term - ((numtyp)8.0/(numtyp)5.0)*dmpi2*dmpk23*r2/term -
(numtyp)4.0*dmpi2*dmpk22*r/term - (numtyp)4.0/term*dmpi2*dmpk2) * expk +
(dmpi23*dmpk2*r4/(numtyp)15.0 + dmpi22*dmpk2*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 +
((numtyp)4.0/(numtyp)15.0)*dmpi24*dmpk2*r3/term + ((numtyp)8.0/(numtyp)5.0)*dmpi23*dmpk2*r2/term +
(numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0/term*dmpi2*dmpk2) * expi;
@ -136,7 +136,7 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
((numtyp)4.0/(numtyp)105.0)*dmpi2*dmpk25*r4/term - ((numtyp)8.0/21.0)*dmpi2*dmpk24*r3/term -
((numtyp)12.0/(numtyp)7.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
(dmpi24*dmpk2*r5/(numtyp)105.0 + (2.0/(numtyp)35.0)*dmpi23*dmpk2*r4 +
(dmpi24*dmpk2*r5/(numtyp)105.0 + ((numtyp)2.0/(numtyp)35.0)*dmpi23*dmpk2*r4 +
dmpi22*dmpk2*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 +
((numtyp)4.0/(numtyp)105.0)*dmpi25*dmpk2*r4/term + ((numtyp)8.0/(numtyp)21.0)*dmpi24*dmpk2*r3/term +
((numtyp)12.0/(numtyp)7.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
@ -146,17 +146,17 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
r6 = r5 * r;
dmpi26 = dmpi25 * dmpi2;
dmpk26 = dmpk25 * dmpk2;
d5s = (dmpi2*dmpk25*r6/945.0 + (2.0/189.0)*dmpi2*dmpk24*r5 +
dmpi2*dmpk23*r4/21.0 + dmpi2*dmpk22*r3/9.0 + dmpi2*dmpk2*r2/9.0 -
(4.0/945.0)*dmpi2*dmpk26*r5/term -
(4.0/63.0)*dmpi2*dmpk25*r4/term - (4.0/9.0)*dmpi2*dmpk24*r3/term -
(16.0/9.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
4.0*dmpi2*dmpk2/term) * expk +
(dmpi25*dmpk2*r6/945.0 + (2.0/189.0)*dmpi24*dmpk2*r5 +
dmpi23*dmpk2*r4/21.0 + dmpi22*dmpk2*r3/9.0 + dmpi2*dmpk2*r2/9.0 +
(4.0/945.0)*dmpi26*dmpk2*r5/term + (4.0/63.0)*dmpi25*dmpk2*r4/term +
(4.0/9.0)*dmpi24*dmpk2*r3/term + (16.0/9.0)*dmpi23*dmpk2*r2/term +
4.0*dmpi22*dmpk2*r/term + 4.0*dmpi2*dmpk2/term) * expi;
d5s = (dmpi2*dmpk25*r6/(numtyp)945.0 + ((numtyp)2.0/(numtyp)189.0)*dmpi2*dmpk24*r5 +
dmpi2*dmpk23*r4/(numtyp)21.0 + dmpi2*dmpk22*r3/(numtyp)9.0 + dmpi2*dmpk2*r2/(numtyp)9.0 -
((numtyp)4.0/(numtyp)945.0)*dmpi2*dmpk26*r5/term -
((numtyp)4.0/(numtyp)63.0)*dmpi2*dmpk25*r4/term - ((numtyp)4.0/(numtyp)9.0)*dmpi2*dmpk24*r3/term -
((numtyp)16.0/(numtyp)9.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
(dmpi25*dmpk2*r6/(numtyp)945.0 + ((numtyp)2.0/(numtyp)189.0)*dmpi24*dmpk2*r5 +
dmpi23*dmpk2*r4/(numtyp)21.0 + dmpi22*dmpk2*r3/(numtyp)9.0 + dmpi2*dmpk2*r2/(numtyp)9.0 +
((numtyp)4.0/(numtyp)945.0)*dmpi26*dmpk2*r5/term + ((numtyp)4.0/(numtyp)63.0)*dmpi25*dmpk2*r4/term +
((numtyp)4.0/(numtyp)9.0)*dmpi24*dmpk2*r3/term + ((numtyp)16.0/(numtyp)9.0)*dmpi23*dmpk2*r2/term +
(numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0*dmpi2*dmpk2/term) * expi;
}
}

View File

@ -147,7 +147,7 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
tq_pinned = nullptr;
gpu_hal_ready = false; // always false for HIPPO
gpu_repulsion_ready = false; // true for HIPPO when ready
gpu_repulsion_ready = true; // true for HIPPO when ready
gpu_dispersion_real_ready = true; // true for HIPPO when ready
gpu_multipole_real_ready = true;
gpu_udirect2b_ready = true;