another OpenCL bugfix attempt from Trung

This commit is contained in:
Axel Kohlmeyer
2023-02-07 17:31:43 -05:00
parent d170f83c6d
commit 3b4c873beb
6 changed files with 51 additions and 43 deletions

View File

@ -37,14 +37,16 @@ ucl_inline void vec3_add(const numtyp x[3], const numtyp y[3], numtyp z[3])
z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2];
}
ucl_inline void vec3_scale(const numtyp k, const numtyp x[3], numtyp y[3])
ucl_inline void vec3_scale(const numtyp k, numtyp x[3], numtyp y[3])
{
// return y = k * x (y can be at the same address as x)
y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2];
}
ucl_inline void vec3_scaleadd(const numtyp k, const numtyp x[3],
ucl_inline void vec3_scaleadd(const numtyp k, numtyp x[3],
const numtyp y[3], numtyp z[3])
{
// return z = k * x + y (z can be at the same address as x)
z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2];
}
@ -85,9 +87,9 @@ ucl_inline void costheta_d(const numtyp rij_hat[3],
const numtyp rij,
const numtyp rik_hat[3],
const numtyp rik,
numtyp *dri,
numtyp *drj,
numtyp *drk)
numtyp dri[3],
numtyp drj[3],
numtyp drk[3])
{
// first element is derivative wrt Ri, second wrt Rj, third wrt Rk
@ -187,12 +189,12 @@ ucl_inline numtyp ters_bij(const numtyp zeta,
numtyp tmp = param_beta * zeta;
if (tmp > param_c1) return ucl_rsqrt(tmp);
if (tmp > param_c2)
return ((numtyp)1.0 - ucl_powr(tmp,-param_powern) /
return ((numtyp)1.0 - ucl_pow(tmp,-param_powern) /
((numtyp)2.0*param_powern))*ucl_rsqrt(tmp);
if (tmp < param_c4) return (numtyp)1.0;
if (tmp < param_c3)
return (numtyp)1.0 - ucl_powr(tmp,param_powern)/((numtyp)2.0*param_powern);
return ucl_powr((numtyp)1.0 + ucl_powr(tmp,param_powern),
return (numtyp)1.0 - ucl_pow(tmp,param_powern)/((numtyp)2.0*param_powern);
return ucl_pow((numtyp)1.0 + ucl_pow(tmp,param_powern),
(numtyp)-1.0/((numtyp)2.0*param_powern));
}
@ -208,19 +210,19 @@ ucl_inline numtyp ters_bij_d(const numtyp zeta,
{
numtyp tmp = param_beta * zeta;
if (tmp > param_c1)
return param_beta * (numtyp)-0.5*ucl_powr(tmp,(numtyp)-1.5);
return param_beta * (numtyp)-0.5*ucl_pow(tmp,(numtyp)-1.5);
if (tmp > param_c2)
return param_beta * ((numtyp)-0.5*ucl_powr(tmp,(numtyp)-1.5) *
return param_beta * ((numtyp)-0.5*ucl_pow(tmp,(numtyp)-1.5) *
// error in negligible 2nd term fixed 9/30/2015
// (1.0 - 0.5*(1.0 + 1.0/(2.0*param->powern)) *
((numtyp)1.0 - ((numtyp)1.0 + (numtyp)1.0 /((numtyp)2.0 * param_powern)) *
ucl_powr(tmp,-param_powern)));
ucl_pow(tmp,-param_powern)));
if (tmp < param_c4) return (numtyp)0.0;
if (tmp < param_c3)
return (numtyp)-0.5*param_beta * ucl_powr(tmp,param_powern-(numtyp)1.0);
return (numtyp)-0.5*param_beta * ucl_pow(tmp,param_powern-(numtyp)1.0);
numtyp tmp_n = ucl_powr(tmp,param_powern);
return (numtyp)-0.5 * ucl_powr((numtyp)1.0+tmp_n, (numtyp) -
numtyp tmp_n = ucl_pow(tmp,param_powern);
return (numtyp)-0.5 * ucl_pow((numtyp)1.0+tmp_n, (numtyp) -
(numtyp)1.0-((numtyp)1.0 / ((numtyp)2.0 * param_powern)))*tmp_n / zeta;
}
@ -474,7 +476,7 @@ ucl_inline void repulsive(const numtyp param_bigr,
numtyp esq = global_e*global_e;
numtyp a_ij = ((numtyp)0.8854*global_a_0) /
(ucl_powr(param_Z_i,(numtyp)0.23) + ucl_powr(param_Z_j,(numtyp)0.23));
(ucl_pow(param_Z_i,(numtyp)0.23) + ucl_pow(param_Z_j,(numtyp)0.23));
numtyp premult = (param_Z_i * param_Z_j * esq)/((numtyp)4.0*MY_PI*global_epsilon_0);
numtyp r_ov_a = r/a_ij;
numtyp t1 = (numtyp)0.1818*ucl_exp((numtyp)-3.2*r_ov_a);