Add comments for tip4p GPU kernels

This commit is contained in:
Vsevak
2022-08-07 22:26:11 +03:00
parent 59dc63d003
commit baf3e614fb

View File

@ -57,16 +57,26 @@ _texture( q_tex,int2);
#define q_tex q_ #define q_tex q_
#endif #endif
/* ----------------------------------------------------------------------
GPU analogue of Atom::map inline method,
but now limited to map_array mapping style.
Map global ID to local index of atom.
---------------------------------------------------------------------- */
ucl_inline int atom_mapping(const __global int *map, tagint glob) { ucl_inline int atom_mapping(const __global int *map, tagint glob) {
return map[glob]; return map[glob];
} }
/* ----------------------------------------------------------------------
GPU version of Domain::closest_image(int, int) method.
Return local index of atom J or any of its images that is closest to atom I
if J is not a valid index like -1, just return it.
---------------------------------------------------------------------- */
ucl_inline int closest_image(int i, int j, const __global int* sametag, ucl_inline int closest_image(int i, int j, const __global int* sametag,
const __global numtyp4 *restrict x_) const __global numtyp4 *restrict x_)
{ {
if (j < 0) return j; if (j < 0) return j;
numtyp4 xi; fetch4(xi,i,pos_tex); // = x[i]; numtyp4 xi; fetch4(xi,i,pos_tex);
numtyp4 xj; fetch4(xj,j,pos_tex); numtyp4 xj; fetch4(xj,j,pos_tex);
int closest = j; int closest = j;
@ -92,6 +102,10 @@ ucl_inline int closest_image(int i, int j, const __global int* sametag,
return closest; return closest;
} }
/* ----------------------------------------------------------------------
For molecule that consists of atoms O, H1 and H2 compute position
of virtual charge site xM (return parameter)
---------------------------------------------------------------------- */
ucl_inline void compute_newsite(int iO, int iH1, int iH2, ucl_inline void compute_newsite(int iO, int iH1, int iH2,
__global numtyp4 *xM, numtyp q, __global numtyp4 *xM, numtyp q,
numtyp alpha, const __global numtyp4 *restrict x_) { numtyp alpha, const __global numtyp4 *restrict x_) {
@ -118,23 +132,34 @@ ucl_inline void compute_newsite(int iO, int iH1, int iH2,
*xM = M; *xM = M;
} }
__kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_, /* ----------------------------------------------------------------------
Compute resulting forces (ans), energies and virial (engv).
An additional term is calculated based on the previously
calculated values on the virlual sites (ansO),
which should be distributed over the real atoms.
For some hydrogens, the corresponding oxygens are
not local atoms and the ansO value is not calculated.
The required increase is calculated directly in the main function.
---------------------------------------------------------------------- */
__kernel void k_lj_tip4p_long_distrib(
const __global numtyp4 *restrict x_,
__global acctyp4 *restrict ans, __global acctyp4 *restrict ans,
__global acctyp *restrict engv, __global acctyp *restrict engv,
const int eflag, const int vflag, const int inum, const int eflag, const int vflag, const int inum,
const int nbor_pitch, const int t_per_atom, const int nbor_pitch, const int t_per_atom,
__global int *restrict hneigh, const __global int *restrict hneigh,
__global numtyp4 *restrict m, const __global numtyp4 *restrict m,
const int typeO, const int typeH, const int typeO, const int typeH,
const numtyp alpha, const numtyp alpha,
const __global numtyp *restrict q_, const __global acctyp4 *restrict ansO) { const __global numtyp *restrict q_,
const __global acctyp4 *restrict ansO) {
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X; int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
acctyp4 f; acctyp4 f;
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
if (i<inum) { if (i<inum) {
numtyp4 ix; fetch4(ix,i,pos_tex);// = x_[i]; numtyp4 ix; fetch4(ix,i,pos_tex);
int itype = ix.w; int itype = ix.w;
acctyp4 fM, vM; acctyp4 fM, vM;
acctyp eM; acctyp eM;
@ -191,21 +216,28 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
} // if ii } // if ii
} }
__kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_, /* ----------------------------------------------------------------------
const __global int * dev_nbor, Rebuild hneigh after the neighbor build.
const __global int * dev_packed, hneight stores local IDs of H1 and H2 for each local and ghost O
and local ID of O for each local H.
---------------------------------------------------------------------- */
__kernel void k_lj_tip4p_reneigh(
const __global numtyp4 *restrict x_,
const __global int *dev_nbor,
const __global int *dev_packed,
const int nall, const int inum, const int nall, const int inum,
const int nbor_pitch, const int t_per_atom, const int nbor_pitch, const int t_per_atom,
__global int *restrict hneigh, __global int *restrict hneigh,
__global numtyp4 *restrict m, __global numtyp4 *restrict m,
const int typeO, const int typeH, const int typeO, const int typeH,
const __global tagint *restrict tag, const __global int *restrict map, const __global tagint *restrict tag,
const __global int *restrict map,
const __global int *restrict sametag) { const __global int *restrict sametag) {
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X; int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
if (i<nall) { if (i<nall) {
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i]; numtyp4 ix; fetch4(ix,i,pos_tex);
int iH1, iH2, iO; int iH1, iH2, iO;
int itype = ix.w; int itype = ix.w;
@ -234,14 +266,16 @@ __kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
iO = closest_image(i, iO, sametag, x_); iO = closest_image(i, iO, sametag, x_);
} }
hneigh[i*4+0] = iO; hneigh[i*4+0] = iO;
hneigh[i*4+1] += -1; hneigh[i*4+1] = -1;
hneigh[i*4+2] = -1; hneigh[i*4+2] = -1;
} }
} }
} }
} }
/* ----------------------------------------------------------------------
On each timestep update virual charge coordinates (m output parameter).
---------------------------------------------------------------------- */
__kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_, __kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
const __global int * dev_nbor, const __global int * dev_nbor,
const __global int * dev_packed, const __global int * dev_packed,
@ -275,6 +309,15 @@ __kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
} }
} }
/* ----------------------------------------------------------------------
Compute initial value of force, energy and virial for each local particle.
The values calculated on oxygens use the virtual charge position (m) and
they are stored in a separate array (ansO) for further distribution
in a separate kernel. For some hydrogens located on the boundary
of the local region, oxygens are non-local and the contribution
of oxygen is calculated separately in this kernel for them .
---------------------------------------------------------------------- */
__kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_, __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict lj1, const __global numtyp4 *restrict lj1,
const __global numtyp4 *restrict lj3, const __global numtyp4 *restrict lj3,
@ -333,8 +376,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
iH1 = hneigh[i*4 ]; iH1 = hneigh[i*4 ];
iH2 = hneigh[i*4+1]; iH2 = hneigh[i*4+1];
x1 = m[iO]; x1 = m[iO];
} } else if (itype == typeH) {
if (itype == typeH) {
iO = hneigh[i *4 ]; iO = hneigh[i *4 ];
iH1 = hneigh[iO*4 ]; iH1 = hneigh[iO*4 ];
iH2 = hneigh[iO*4+1]; iH2 = hneigh[iO*4+1];