Fixed bugs with damprep where ucl_powr in mixed precision failed with a negative single-reprecision base

This commit is contained in:
Trung Nguyen
2021-09-29 12:32:08 -05:00
parent 01381b7f54
commit ad9d45639e
3 changed files with 7 additions and 4 deletions

View File

@ -292,7 +292,8 @@ int HippoT::repulsion(const int eflag, const int vflag) {
&this->ans->force, &this->ans->engv, &this->_tep, &this->ans->force, &this->ans->engv, &this->_tep,
&eflag, &vflag, &ainum, &_nall, &nbor_pitch, &eflag, &vflag, &ainum, &_nall, &nbor_pitch,
&this->_threads_per_atom, &this->_aewald, &this->_threads_per_atom, &this->_aewald,
&this->_off2_repulse, &_cut2, &_c0, &_c1, &_c2, &_c3, &_c4, &_c5); &this->_off2_repulse, &_cut2,
&_c0, &_c1, &_c2, &_c3, &_c4, &_c5);
this->time_pair.stop(); this->time_pair.stop();
return GX; return GX;

View File

@ -644,13 +644,14 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
term4*qiy + term5*qky + term6*(qiyk+qkyi); term4*qiy + term5*qky + term6*(qiyk+qkyi);
numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) + numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
term4*qiz + term5*qkz + term6*(qizk+qkzi); term4*qiz + term5*qkz + term6*(qizk+qkzi);
frcx = frcx*rr1 + eterm*rr3*xr; frcx = frcx*rr1 + eterm*rr3*xr;
frcy = frcy*rr1 + eterm*rr3*yr; frcy = frcy*rr1 + eterm*rr3*yr;
frcz = frcz*rr1 + eterm*rr3*zr; frcz = frcz*rr1 + eterm*rr3*zr;
frcx = sizik * frcx; frcx = sizik * frcx;
frcy = sizik * frcy; frcy = sizik * frcy;
frcz = sizik * frcz; frcz = sizik * frcz;
// compute the torque components for this interaction // compute the torque components for this interaction
numtyp ttmix = -dmpik[2]*dikx + term1*dirx + term3*(dqikx+dkqirx) - numtyp ttmix = -dmpik[2]*dikx + term1*dirx + term3*(dqikx+dkqirx) -
@ -903,7 +904,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom, //store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
// offset,eflag,vflag,ans,engv); // offset,eflag,vflag,ans,engv);
store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom, store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
offset,eflag,vflag,ans,engv,NUM_BLOCKS_X); offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------

View File

@ -112,7 +112,7 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
dmpk24 = dmpk23 * dmpk2; dmpk24 = dmpk23 * dmpk2;
dmpk25 = dmpk24 * dmpk2; dmpk25 = dmpk24 * dmpk2;
term = dmpi22 - dmpk22; term = dmpi22 - dmpk22;
pre = (numtyp)8192.0 * dmpi23 * dmpk23 / ucl_powr(term,(numtyp)4.0); pre = (numtyp)8192.0 * dmpi23 * dmpk23 / (term*term*term*term); //ucl_powr(term,(numtyp)4.0);
tmp = (numtyp)4.0 * dmpi2 * dmpk2 / term; tmp = (numtyp)4.0 * dmpi2 * dmpk2 / term;
s = (dampi-tmp)*expk + (dampk+tmp)*expi; s = (dampi-tmp)*expk + (dampk+tmp)*expi;
@ -173,6 +173,7 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
dmpik[4] = pre * (s*d2s + ds*ds); dmpik[4] = pre * (s*d2s + ds*ds);
dmpik[6] = pre * (s*d3s + (numtyp)3.0*ds*d2s); dmpik[6] = pre * (s*d3s + (numtyp)3.0*ds*d2s);
dmpik[8] = pre * (s*d4s + (numtyp)4.0*ds*d3s + (numtyp)3.0*d2s*d2s); dmpik[8] = pre * (s*d4s + (numtyp)4.0*ds*d3s + (numtyp)3.0*d2s*d2s);
if (rorder >= 11) dmpik[10] = pre * (s*d5s + (numtyp)5.0*ds*d4s + (numtyp)10.0*d2s*d3s); if (rorder >= 11) dmpik[10] = pre * (s*d5s + (numtyp)5.0*ds*d4s + (numtyp)10.0*d2s*d3s);
} }