diff --git a/lib/gpu/lal_tersoff.cu b/lib/gpu/lal_tersoff.cu index 0175f3fcf7..50202c0ee7 100644 --- a/lib/gpu/lal_tersoff.cu +++ b/lib/gpu/lal_tersoff.cu @@ -177,8 +177,8 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict ts4_in, const __global numtyp4 *restrict ts5_in, const __global numtyp *restrict cutsq, - const __global int *restrict map_in, - const __global int *restrict elem2param_in, + const __global int *restrict map, + const __global int *restrict elem2param, const int nelements, const int nparams, __global numtyp4 * zetaij, const __global int * dev_nbor, @@ -197,16 +197,12 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_, __local numtyp4 ts3[SHARED_SIZE]; __local numtyp4 ts4[SHARED_SIZE]; __local numtyp4 ts5[SHARED_SIZE]; - __local int elem2param[SHARED_SIZE]; - __local int map[SHARED_SIZE]; if (tid cutsq[ijparam]) continue; // compute zeta_ij @@ -261,6 +257,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_, numtyp4 kx; fetch4(kx,k,pos_tex); //x_[k]; int ktype=kx.w; ktype=map[ktype]; + int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype]; // Compute rik delr2.x = kx.x-ix.x; @@ -268,7 +265,6 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_, delr2.z = kx.z-ix.z; numtyp rsq2 = delr2.x*delr2.x+delr2.y*delr2.y+delr2.z*delr2.z; - int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype]; if (rsq2 > cutsq[ijkparam]) continue; numtyp4 ts1_ijkparam = ts1[ijkparam]; //fetch4(ts1_ijkparam,ijkparam,ts1_tex); @@ -330,8 +326,8 @@ __kernel void k_tersoff_repulsive(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict ts1_in, const __global numtyp4 *restrict ts2_in, const __global numtyp *restrict cutsq, - const __global int *restrict map_in, - const __global int *restrict elem2param_in, + const __global int *restrict map, + const __global int *restrict elem2param, const int nelements, const int nparams, const __global int * dev_nbor, const __global int * dev_packed, @@ -346,13 +342,9 @@ __kernel void k_tersoff_repulsive(const __global numtyp4 *restrict x_, __local numtyp4 ts1[SHARED_SIZE]; __local numtyp4 ts2[SHARED_SIZE]; - __local int elem2param[SHARED_SIZE]; - __local int map[SHARED_SIZE]; if (tid cutsq[ijparam]) continue; numtyp r1 = ucl_sqrt(rsq1); numtyp r1inv = ucl_rsqrt(rsq1); @@ -544,6 +531,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_, numtyp4 kx; fetch4(kx,k,pos_tex); int ktype=kx.w; ktype=map[ktype]; + int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype]; numtyp delr2[3]; delr2[0] = kx.x-ix.x; @@ -551,7 +539,6 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_, delr2[2] = kx.z-ix.z; numtyp rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; - int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype]; if (rsq2 > cutsq[ijkparam]) continue; numtyp r2 = ucl_sqrt(rsq2); numtyp r2inv = ucl_rsqrt(rsq2); @@ -605,8 +592,8 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict ts2_in, const __global numtyp4 *restrict ts4_in, const __global numtyp *restrict cutsq, - const __global int *restrict map_in, - const __global int *restrict elem2param_in, + const __global int *restrict map, + const __global int *restrict elem2param, const int nelements, const int nparams, const __global numtyp4 *restrict zetaij, const __global int * dev_nbor, @@ -626,14 +613,10 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_, __local numtyp4 ts1[SHARED_SIZE]; __local numtyp4 ts2[SHARED_SIZE]; __local numtyp4 ts4[SHARED_SIZE]; - __local int elem2param[SHARED_SIZE]; - __local int map[SHARED_SIZE]; if (tid cutsq[ijparam]) continue; - numtyp r1 = ucl_sqrt(rsq1); - numtyp r1inv = ucl_rsqrt(rsq1); numtyp mdelr1[3]; mdelr1[0] = -delr1[0]; @@ -714,6 +695,8 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_, } } + numtyp r1 = ucl_sqrt(rsq1); + numtyp r1inv = ucl_rsqrt(rsq1); int offset_kf; if (ijnum >= 0) { offset_kf = offset_k; @@ -830,8 +813,8 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict ts2_in, const __global numtyp4 *restrict ts4_in, const __global numtyp *restrict cutsq, - const __global int *restrict map_in, - const __global int *restrict elem2param_in, + const __global int *restrict map, + const __global int *restrict elem2param, const int nelements, const int nparams, const __global numtyp4 *restrict zetaij, const __global int * dev_nbor, @@ -851,14 +834,10 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_, __local numtyp4 ts1[SHARED_SIZE]; __local numtyp4 ts2[SHARED_SIZE]; __local numtyp4 ts4[SHARED_SIZE]; - __local int elem2param[SHARED_SIZE]; - __local int map[SHARED_SIZE]; if (tid cutsq[ijparam]) continue; - numtyp r1 = ucl_sqrt(rsq1); - numtyp r1inv = ucl_rsqrt(rsq1); numtyp mdelr1[3]; mdelr1[0] = -delr1[0]; @@ -939,6 +916,8 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_, } } + numtyp r1 = ucl_sqrt(rsq1); + numtyp r1inv = ucl_rsqrt(rsq1); int offset_kf; if (ijnum >= 0) { offset_kf = offset_k;