Fixed bugs in the repulsion kernel, now working correctly with the double precision mode
This commit is contained in:
@ -78,7 +78,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
|
||||
bool rot = false;
|
||||
bool vel = false;
|
||||
_extra_fields = 24; // round up to accomodate quadruples of numtyp values
|
||||
// rpole 13; uind 3; uinp 3; amtype, amgroup
|
||||
// rpole 13; uind 3; uinp 3; amtype, amgroup; pval
|
||||
int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel,_extra_fields);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
@ -493,9 +493,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
||||
//int jtype=jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp xr = ix.x - jx.x;
|
||||
numtyp yr = ix.y - jx.y;
|
||||
numtyp zr = ix.z - jx.z;
|
||||
numtyp xr = jx.x - ix.x;
|
||||
numtyp yr = jx.y - ix.y;
|
||||
numtyp zr = jx.z - ix.z;
|
||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||
|
||||
if (r2>off2) continue;
|
||||
@ -521,6 +521,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
||||
|
||||
const numtyp4 sp_nonpol = sp_nonpolar[sbmask15(jextra)];
|
||||
numtyp factor_repel = sp_nonpol.y; // factor_repel = special_repel[sbmask15(j)];
|
||||
if (factor_repel == (numtyp)0) continue;
|
||||
|
||||
// intermediates involving moments and separation distance
|
||||
|
||||
@ -614,7 +615,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
||||
numtyp term1 = vali*valk;
|
||||
numtyp term2 = valk*dir - vali*dkr + dik;
|
||||
numtyp term3 = vali*qkr + valk*qir - dir*dkr + (numtyp)2.0*(dkqi-diqk+qiqk);
|
||||
numtyp term4 = dir*qkr - dkr*qir - 4.0*qik;
|
||||
numtyp term4 = dir*qkr - dkr*qir - (numtyp)4.0*qik;
|
||||
numtyp term5 = qir*qkr;
|
||||
numtyp eterm = term1*dmpik[0] + term2*dmpik[2] +
|
||||
term3*dmpik[4] + term4*dmpik[6] + term5*dmpik[8];
|
||||
@ -646,6 +647,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
||||
frcx = frcx*rr1 + eterm*rr3*xr;
|
||||
frcy = frcy*rr1 + eterm*rr3*yr;
|
||||
frcz = frcz*rr1 + eterm*rr3*zr;
|
||||
frcx = sizik * frcx;
|
||||
frcy = sizik * frcy;
|
||||
frcz = sizik * frcz;
|
||||
|
||||
// compute the torque components for this interaction
|
||||
|
||||
@ -666,7 +670,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
||||
numtyp r4 = r2 * r2;
|
||||
numtyp r5 = r2 * r3;
|
||||
numtyp taper = c5*r5 + c4*r4 + c3*r3 + c2*r2 + c1*r + c0;
|
||||
numtyp dtaper = (numtyp)5.0*c5*r4 + (numtyp).0*c4*r3 +
|
||||
numtyp dtaper = (numtyp)5.0*c5*r4 + (numtyp)4.0*c4*r3 +
|
||||
(numtyp)3.0*c3*r2 + (numtyp)2.0*c2*r + c1;
|
||||
dtaper *= e * rr1;
|
||||
e *= taper;
|
||||
@ -896,10 +900,10 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
|
||||
} // ii<inum
|
||||
|
||||
// accumate force, energy and virial: use _acc if not the first kernel
|
||||
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||
offset,eflag,vflag,ans,engv);
|
||||
//store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||
// offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
|
||||
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||
// offset,eflag,vflag,ans,engv);
|
||||
store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||
offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
||||
@ -125,9 +125,9 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
||||
(dmpi2*dmpk2*r2/(numtyp)3.0 + dmpi22*dmpk2*r3/(numtyp)3.0 +
|
||||
((numtyp)4.0/(numtyp)3.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
|
||||
(numtyp)4.0*dmpi2*dmpk2/term) * expi;
|
||||
d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 -
|
||||
(4.0/15.0)*dmpi2*dmpk24*r3/term - ((numtyp)8.0/(numtyp)5.0)*dmpi2*dmpk23*r2/term -
|
||||
(numtyp)4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk +
|
||||
d3s = (dmpi2*dmpk23*r4/(numtyp)15.0 + dmpi2*dmpk22*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 -
|
||||
((numtyp)4.0/(numtyp)15.0)*dmpi2*dmpk24*r3/term - ((numtyp)8.0/(numtyp)5.0)*dmpi2*dmpk23*r2/term -
|
||||
(numtyp)4.0*dmpi2*dmpk22*r/term - (numtyp)4.0/term*dmpi2*dmpk2) * expk +
|
||||
(dmpi23*dmpk2*r4/(numtyp)15.0 + dmpi22*dmpk2*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 +
|
||||
((numtyp)4.0/(numtyp)15.0)*dmpi24*dmpk2*r3/term + ((numtyp)8.0/(numtyp)5.0)*dmpi23*dmpk2*r2/term +
|
||||
(numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0/term*dmpi2*dmpk2) * expi;
|
||||
@ -136,7 +136,7 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
||||
((numtyp)4.0/(numtyp)105.0)*dmpi2*dmpk25*r4/term - ((numtyp)8.0/21.0)*dmpi2*dmpk24*r3/term -
|
||||
((numtyp)12.0/(numtyp)7.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
|
||||
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi24*dmpk2*r5/(numtyp)105.0 + (2.0/(numtyp)35.0)*dmpi23*dmpk2*r4 +
|
||||
(dmpi24*dmpk2*r5/(numtyp)105.0 + ((numtyp)2.0/(numtyp)35.0)*dmpi23*dmpk2*r4 +
|
||||
dmpi22*dmpk2*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 +
|
||||
((numtyp)4.0/(numtyp)105.0)*dmpi25*dmpk2*r4/term + ((numtyp)8.0/(numtyp)21.0)*dmpi24*dmpk2*r3/term +
|
||||
((numtyp)12.0/(numtyp)7.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
|
||||
@ -146,17 +146,17 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
||||
r6 = r5 * r;
|
||||
dmpi26 = dmpi25 * dmpi2;
|
||||
dmpk26 = dmpk25 * dmpk2;
|
||||
d5s = (dmpi2*dmpk25*r6/945.0 + (2.0/189.0)*dmpi2*dmpk24*r5 +
|
||||
dmpi2*dmpk23*r4/21.0 + dmpi2*dmpk22*r3/9.0 + dmpi2*dmpk2*r2/9.0 -
|
||||
(4.0/945.0)*dmpi2*dmpk26*r5/term -
|
||||
(4.0/63.0)*dmpi2*dmpk25*r4/term - (4.0/9.0)*dmpi2*dmpk24*r3/term -
|
||||
(16.0/9.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi25*dmpk2*r6/945.0 + (2.0/189.0)*dmpi24*dmpk2*r5 +
|
||||
dmpi23*dmpk2*r4/21.0 + dmpi22*dmpk2*r3/9.0 + dmpi2*dmpk2*r2/9.0 +
|
||||
(4.0/945.0)*dmpi26*dmpk2*r5/term + (4.0/63.0)*dmpi25*dmpk2*r4/term +
|
||||
(4.0/9.0)*dmpi24*dmpk2*r3/term + (16.0/9.0)*dmpi23*dmpk2*r2/term +
|
||||
4.0*dmpi22*dmpk2*r/term + 4.0*dmpi2*dmpk2/term) * expi;
|
||||
d5s = (dmpi2*dmpk25*r6/(numtyp)945.0 + ((numtyp)2.0/(numtyp)189.0)*dmpi2*dmpk24*r5 +
|
||||
dmpi2*dmpk23*r4/(numtyp)21.0 + dmpi2*dmpk22*r3/(numtyp)9.0 + dmpi2*dmpk2*r2/(numtyp)9.0 -
|
||||
((numtyp)4.0/(numtyp)945.0)*dmpi2*dmpk26*r5/term -
|
||||
((numtyp)4.0/(numtyp)63.0)*dmpi2*dmpk25*r4/term - ((numtyp)4.0/(numtyp)9.0)*dmpi2*dmpk24*r3/term -
|
||||
((numtyp)16.0/(numtyp)9.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
|
||||
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi25*dmpk2*r6/(numtyp)945.0 + ((numtyp)2.0/(numtyp)189.0)*dmpi24*dmpk2*r5 +
|
||||
dmpi23*dmpk2*r4/(numtyp)21.0 + dmpi22*dmpk2*r3/(numtyp)9.0 + dmpi2*dmpk2*r2/(numtyp)9.0 +
|
||||
((numtyp)4.0/(numtyp)945.0)*dmpi26*dmpk2*r5/term + ((numtyp)4.0/(numtyp)63.0)*dmpi25*dmpk2*r4/term +
|
||||
((numtyp)4.0/(numtyp)9.0)*dmpi24*dmpk2*r3/term + ((numtyp)16.0/(numtyp)9.0)*dmpi23*dmpk2*r2/term +
|
||||
(numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0*dmpi2*dmpk2/term) * expi;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -147,7 +147,7 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
|
||||
tq_pinned = nullptr;
|
||||
|
||||
gpu_hal_ready = false; // always false for HIPPO
|
||||
gpu_repulsion_ready = false; // true for HIPPO when ready
|
||||
gpu_repulsion_ready = true; // true for HIPPO when ready
|
||||
gpu_dispersion_real_ready = true; // true for HIPPO when ready
|
||||
gpu_multipole_real_ready = true;
|
||||
gpu_udirect2b_ready = true;
|
||||
|
||||
Reference in New Issue
Block a user