Fixed bugs with damprep where ucl_powr in mixed precision failed with a negative single-reprecision base
This commit is contained in:
@ -292,7 +292,8 @@ int HippoT::repulsion(const int eflag, const int vflag) {
|
|||||||
&this->ans->force, &this->ans->engv, &this->_tep,
|
&this->ans->force, &this->ans->engv, &this->_tep,
|
||||||
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
||||||
&this->_threads_per_atom, &this->_aewald,
|
&this->_threads_per_atom, &this->_aewald,
|
||||||
&this->_off2_repulse, &_cut2, &_c0, &_c1, &_c2, &_c3, &_c4, &_c5);
|
&this->_off2_repulse, &_cut2,
|
||||||
|
&_c0, &_c1, &_c2, &_c3, &_c4, &_c5);
|
||||||
this->time_pair.stop();
|
this->time_pair.stop();
|
||||||
|
|
||||||
return GX;
|
return GX;
|
||||||
|
|||||||
@ -644,13 +644,14 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
|||||||
term4*qiy + term5*qky + term6*(qiyk+qkyi);
|
term4*qiy + term5*qky + term6*(qiyk+qkyi);
|
||||||
numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
|
numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
|
||||||
term4*qiz + term5*qkz + term6*(qizk+qkzi);
|
term4*qiz + term5*qkz + term6*(qizk+qkzi);
|
||||||
|
|
||||||
frcx = frcx*rr1 + eterm*rr3*xr;
|
frcx = frcx*rr1 + eterm*rr3*xr;
|
||||||
frcy = frcy*rr1 + eterm*rr3*yr;
|
frcy = frcy*rr1 + eterm*rr3*yr;
|
||||||
frcz = frcz*rr1 + eterm*rr3*zr;
|
frcz = frcz*rr1 + eterm*rr3*zr;
|
||||||
frcx = sizik * frcx;
|
frcx = sizik * frcx;
|
||||||
frcy = sizik * frcy;
|
frcy = sizik * frcy;
|
||||||
frcz = sizik * frcz;
|
frcz = sizik * frcz;
|
||||||
|
|
||||||
// compute the torque components for this interaction
|
// compute the torque components for this interaction
|
||||||
|
|
||||||
numtyp ttmix = -dmpik[2]*dikx + term1*dirx + term3*(dqikx+dkqirx) -
|
numtyp ttmix = -dmpik[2]*dikx + term1*dirx + term3*(dqikx+dkqirx) -
|
||||||
@ -903,7 +904,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
|
|||||||
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||||
// offset,eflag,vflag,ans,engv);
|
// offset,eflag,vflag,ans,engv);
|
||||||
store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||||
offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
|
offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
|
|||||||
@ -112,7 +112,7 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
|||||||
dmpk24 = dmpk23 * dmpk2;
|
dmpk24 = dmpk23 * dmpk2;
|
||||||
dmpk25 = dmpk24 * dmpk2;
|
dmpk25 = dmpk24 * dmpk2;
|
||||||
term = dmpi22 - dmpk22;
|
term = dmpi22 - dmpk22;
|
||||||
pre = (numtyp)8192.0 * dmpi23 * dmpk23 / ucl_powr(term,(numtyp)4.0);
|
pre = (numtyp)8192.0 * dmpi23 * dmpk23 / (term*term*term*term); //ucl_powr(term,(numtyp)4.0);
|
||||||
tmp = (numtyp)4.0 * dmpi2 * dmpk2 / term;
|
tmp = (numtyp)4.0 * dmpi2 * dmpk2 / term;
|
||||||
s = (dampi-tmp)*expk + (dampk+tmp)*expi;
|
s = (dampi-tmp)*expk + (dampk+tmp)*expi;
|
||||||
|
|
||||||
@ -173,6 +173,7 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
|||||||
dmpik[4] = pre * (s*d2s + ds*ds);
|
dmpik[4] = pre * (s*d2s + ds*ds);
|
||||||
dmpik[6] = pre * (s*d3s + (numtyp)3.0*ds*d2s);
|
dmpik[6] = pre * (s*d3s + (numtyp)3.0*ds*d2s);
|
||||||
dmpik[8] = pre * (s*d4s + (numtyp)4.0*ds*d3s + (numtyp)3.0*d2s*d2s);
|
dmpik[8] = pre * (s*d4s + (numtyp)4.0*ds*d3s + (numtyp)3.0*d2s*d2s);
|
||||||
|
|
||||||
if (rorder >= 11) dmpik[10] = pre * (s*d5s + (numtyp)5.0*ds*d4s + (numtyp)10.0*d2s*d3s);
|
if (rorder >= 11) dmpik[10] = pre * (s*d5s + (numtyp)5.0*ds*d4s + (numtyp)10.0*d2s*d3s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user