diff --git a/lib/gpu/lal_lj_tip4p_long.cpp b/lib/gpu/lal_lj_tip4p_long.cpp index 890bbab3bd..200f185e63 100644 --- a/lib/gpu/lal_lj_tip4p_long.cpp +++ b/lib/gpu/lal_lj_tip4p_long.cpp @@ -66,6 +66,7 @@ int LJTIP4PLongT::init(const int ntypes, return success; k_pair_distrib.set_function(*this->pair_program,"k_lj_tip4p_long_distrib"); k_pair_reneigh.set_function(*this->pair_program,"k_lj_tip4p_reneigh"); + k_pair_newsite.set_function(*this->pair_program,"k_lj_tip4p_newsite"); TypeH = tH; TypeO = tO; @@ -163,6 +164,7 @@ void LJTIP4PLongT::clear() { k_pair_distrib.clear(); k_pair_reneigh.clear(); + k_pair_newsite.clear(); this->clear_atomic(); } @@ -195,9 +197,8 @@ void LJTIP4PLongT::loop(const bool _eflag, const bool _vflag) { int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); int GX; - + GX=static_cast(ceil(static_cast(nall)/BX)); if (t_ago == 0) { - GX=static_cast(ceil(static_cast(nall)/BX)); this->k_pair_reneigh.set_size(GX,BX); this->k_pair_reneigh.run(&this->atom->x, &this->nbor->dev_nbor, &this->_nbor_data->begin(), @@ -205,6 +206,14 @@ void LJTIP4PLongT::loop(const bool _eflag, const bool _vflag) { &hneight, &m, &TypeO, &TypeH, &tag, &map_array, &atom_sametag); } + this->k_pair_newsite.set_size(GX,BX); + this->k_pair_newsite.run(&this->atom->x, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &nall, &ainum, + &nbor_pitch, &this->_threads_per_atom, + &hneight, &m, &TypeO, &TypeH, &alpha, + &this->atom->q, &tag, &map_array, + &atom_sametag); GX=static_cast(ceil(static_cast(this->ans->inum())/ (BX/this->_threads_per_atom))); diff --git a/lib/gpu/lal_lj_tip4p_long.cu b/lib/gpu/lal_lj_tip4p_long.cu index 5bb99d0426..fae5744224 100644 --- a/lib/gpu/lal_lj_tip4p_long.cu +++ b/lib/gpu/lal_lj_tip4p_long.cu @@ -65,11 +65,12 @@ ucl_inline int closest_image(int i, int j, const __global int* sametag, } ucl_inline void compute_newsite(int iO, int iH1, int iH2, - __global numtyp4 *xM, + __global numtyp4 *xM, numtyp q, numtyp alpha, const __global numtyp4 *restrict x_){ numtyp4 xO; fetch4(xO,iO,pos_tex); numtyp4 xH1; fetch4(xH1,iH1,pos_tex); numtyp4 xH2; fetch4(xH2,iH2,pos_tex); + numtyp4 M; numtyp delx1 = xH1.x - xO.x; numtyp dely1 = xH1.y - xO.y; @@ -81,9 +82,12 @@ ucl_inline void compute_newsite(int iO, int iH1, int iH2, numtyp ap = alpha * (numtyp)0.5; - (*xM).x = xO.x + ap * (delx1 + delx2); - (*xM).y = xO.y + ap * (dely1 + dely2); - (*xM).z = xO.z + ap * (delz1 + delz2); + M.x = xO.x + ap * (delx1 + delx2); + M.y = xO.y + ap * (dely1 + dely2); + M.z = xO.z + ap * (delz1 + delz2); + M.w = q; + + *xM = M; } __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_, @@ -214,6 +218,37 @@ __kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_, } +__kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_, + const __global int * dev_nbor, + const __global int * dev_packed, + const int nall, const int inum, + const int nbor_pitch, const int t_per_atom, + __global int *restrict hneigh, + __global numtyp4 *restrict m, + const int typeO, const int typeH, + const numtyp alpha, const __global numtyp *restrict q_, + const __global int *restrict tag, const __global int *restrict map, + const __global int *restrict sametag) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + int i = BLOCK_ID_X*(BLOCK_SIZE_X)+tid; + + if (i= inum) { non_local_oxy = 1; - if(fabs(m[iO].w) <= eq_zero) { - compute_newsite(iO,iH1,iH2, &m[iO], alpha, x_); - numtyp qO; fetch(qO,iO,q_tex); - __threadfence(); - m[iO].w = qO; - } } } @@ -341,11 +365,6 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_, jO = j; jH1 = hneigh[j*4 ]; jH2 = hneigh[j*4+1]; - if (fabs(m[j].w) <= eq_zero) { - compute_newsite(j, jH1, jH2, &m[j], alpha, x_); - __threadfence(); - m[j].w = qj; - } x2 = m[j]; } delx = x1.x-x2.x; diff --git a/lib/gpu/lal_lj_tip4p_long.h b/lib/gpu/lal_lj_tip4p_long.h index 67c43def9b..ccf507caa0 100644 --- a/lib/gpu/lal_lj_tip4p_long.h +++ b/lib/gpu/lal_lj_tip4p_long.h @@ -116,7 +116,7 @@ public: UCL_D_Vec map_array; UCL_D_Vec atom_sametag; - UCL_Kernel k_pair_distrib, k_pair_reneigh; + UCL_Kernel k_pair_distrib, k_pair_reneigh, k_pair_newsite; private: bool _allocated;