diff --git a/lib/gpu/lal_beck.cu b/lib/gpu/lal_beck.cu index a2a15e4d21..12f1314c52 100644 --- a/lib/gpu/lal_beck.cu +++ b/lib/gpu/lal_beck.cu @@ -88,7 +88,7 @@ __kernel void k_beck(const __global numtyp4 *restrict x_, numtyp alphaij = beck1[mtype].y; numtyp betaij = beck1[mtype].z; numtyp term1 = aaij*aaij + rsq; - numtyp term2 = pow(term1,(numtyp)-5.0); + numtyp term2 = (numtyp)1.0/(term1*term1*term1*term1*term1); // ucl_powr(term1,(numtyp)-5.0); numtyp term3 = (numtyp)21.672 + (numtyp)30.0*aaij*aaij + (numtyp)6.0*rsq; numtyp term4 = alphaij + r5*betaij; numtyp term5 = alphaij + (numtyp)6.0*r5*betaij; @@ -102,7 +102,7 @@ __kernel void k_beck(const __global numtyp4 *restrict x_, f.z+=delz*force; if (EVFLAG && eflag) { - numtyp term6 = pow(term1,(numtyp)-3); + numtyp term6 = (numtyp)1.0/(term1*term1*term1); //ucl_powr(term1,(numtyp)-3); numtyp term1inv = ucl_recip(term1); numtyp e = beck2[mtype].x*ucl_exp((numtyp)-1.0*r*term4); e -= beck2[mtype].y*term6*((numtyp)1.0+((numtyp)2.709+(numtyp)3.0*aaij*aaij)*term1inv); @@ -193,7 +193,7 @@ __kernel void k_beck_fast(const __global numtyp4 *restrict x_, numtyp alphaij = beck1[mtype].y; numtyp betaij = beck1[mtype].z; numtyp term1 = aaij*aaij + rsq; - numtyp term2 = pow(term1,(numtyp)-5.0); + numtyp term2 = (numtyp)1.0/(term1*term1*term1*term1*term1); //ucl_powr(term1,(numtyp)-5.0); numtyp term3 = (numtyp)21.672 + (numtyp)30.0*aaij*aaij + (numtyp)6.0*rsq; numtyp term4 = alphaij + r5*betaij; numtyp term5 = alphaij + (numtyp)6.0*r5*betaij; @@ -207,7 +207,7 @@ __kernel void k_beck_fast(const __global numtyp4 *restrict x_, f.z+=delz*force; if (EVFLAG && eflag) { - numtyp term6 = pow(term1,(numtyp)-3); + numtyp term6 = (numtyp)1.0/(term1*term1*term1); //ucl_powr(term1,(numtyp)-3); numtyp term1inv = ucl_recip(term1); numtyp e = beck2[mtype].x*ucl_exp((numtyp)-1.0*r*term4); e -= beck2[mtype].y*term6*((numtyp)1.0+((numtyp)2.709+(numtyp)3.0*aaij*aaij)*term1inv); diff --git a/lib/gpu/lal_colloid.cu b/lib/gpu/lal_colloid.cu index 8a20f0c400..f59215e882 100644 --- a/lib/gpu/lal_colloid.cu +++ b/lib/gpu/lal_colloid.cu @@ -123,10 +123,10 @@ __kernel void k_colloid(const __global numtyp4 *restrict x_, K[6] = K[2]-r; K[7] = ucl_recip(K[3]*K[4]); K[8] = ucl_recip(K[5]*K[6]); - g[0] = ucl_powr(K[3],(numtyp)-7.0); - g[1] = -ucl_powr(-K[4],(numtyp)-7.0); - g[2] = ucl_powr(K[5],(numtyp)-7.0); - g[3] = -ucl_powr(-K[6],(numtyp)-7.0); + g[0] = (numtyp)1.0/(K[3]*K[3]*K[3]*K[3]*K[3]*K[3]*K[3]); // ucl_powr(K[3],(numtyp)-7.0); + g[1] = (numtyp)1.0/(K[4]*K[4]*K[4]*K[4]*K[4]*K[4]*K[4]); //-ucl_powr(-K[4],(numtyp)-7.0); + g[2] = (numtyp)1.0/(K[5]*K[5]*K[5]*K[5]*K[5]*K[5]*K[5]); // ucl_powr(K[5],(numtyp)-7.0); + g[3] = (numtyp)1.0/(K[6]*K[6]*K[6]*K[6]*K[6]*K[6]*K[6]); //-ucl_powr(-K[6],(numtyp)-7.0); h[0] = ((K[3]+(numtyp)5.0*K[1])*K[3]+(numtyp)30.0*K[0])*g[0]; h[1] = ((K[4]+(numtyp)5.0*K[1])*K[4]+(numtyp)30.0*K[0])*g[1]; h[2] = ((K[5]+(numtyp)5.0*K[2])*K[5]-(numtyp)30.0*K[0])*g[2]; @@ -290,10 +290,10 @@ __kernel void k_colloid_fast(const __global numtyp4 *restrict x_, K[6] = K[2]-r; K[7] = ucl_recip(K[3]*K[4]); K[8] = ucl_recip(K[5]*K[6]); - g[0] = ucl_powr(K[3],(numtyp)-7.0); - g[1] = -ucl_powr(-K[4],(numtyp)-7.0); - g[2] = ucl_powr(K[5],(numtyp)-7.0); - g[3] = -ucl_powr(-K[6],(numtyp)-7.0); + g[0] = (numtyp)1.0/(K[3]*K[3]*K[3]*K[3]*K[3]*K[3]*K[3]); // ucl_powr(K[3],(numtyp)-7.0); + g[1] = (numtyp)1.0/(K[4]*K[4]*K[4]*K[4]*K[4]*K[4]*K[4]); //-ucl_powr(-K[4],(numtyp)-7.0); + g[2] = (numtyp)1.0/(K[5]*K[5]*K[5]*K[5]*K[5]*K[5]*K[5]); // ucl_powr(K[5],(numtyp)-7.0); + g[3] = (numtyp)1.0/(K[6]*K[6]*K[6]*K[6]*K[6]*K[6]*K[6]); //-ucl_powr(-K[6],(numtyp)-7.0); h[0] = ((K[3]+(numtyp)5.0*K[1])*K[3]+(numtyp)30.0*K[0])*g[0]; h[1] = ((K[4]+(numtyp)5.0*K[1])*K[4]+(numtyp)30.0*K[0])*g[1]; h[2] = ((K[5]+(numtyp)5.0*K[2])*K[5]-(numtyp)30.0*K[0])*g[2];