Add comments for tip4p GPU kernels
This commit is contained in:
@ -57,16 +57,26 @@ _texture( q_tex,int2);
|
|||||||
#define q_tex q_
|
#define q_tex q_
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
GPU analogue of Atom::map inline method,
|
||||||
|
but now limited to map_array mapping style.
|
||||||
|
Map global ID to local index of atom.
|
||||||
|
---------------------------------------------------------------------- */
|
||||||
ucl_inline int atom_mapping(const __global int *map, tagint glob) {
|
ucl_inline int atom_mapping(const __global int *map, tagint glob) {
|
||||||
return map[glob];
|
return map[glob];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
GPU version of Domain::closest_image(int, int) method.
|
||||||
|
Return local index of atom J or any of its images that is closest to atom I
|
||||||
|
if J is not a valid index like -1, just return it.
|
||||||
|
---------------------------------------------------------------------- */
|
||||||
ucl_inline int closest_image(int i, int j, const __global int* sametag,
|
ucl_inline int closest_image(int i, int j, const __global int* sametag,
|
||||||
const __global numtyp4 *restrict x_)
|
const __global numtyp4 *restrict x_)
|
||||||
{
|
{
|
||||||
if (j < 0) return j;
|
if (j < 0) return j;
|
||||||
|
|
||||||
numtyp4 xi; fetch4(xi,i,pos_tex); // = x[i];
|
numtyp4 xi; fetch4(xi,i,pos_tex);
|
||||||
numtyp4 xj; fetch4(xj,j,pos_tex);
|
numtyp4 xj; fetch4(xj,j,pos_tex);
|
||||||
|
|
||||||
int closest = j;
|
int closest = j;
|
||||||
@ -92,6 +102,10 @@ ucl_inline int closest_image(int i, int j, const __global int* sametag,
|
|||||||
return closest;
|
return closest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
For molecule that consists of atoms O, H1 and H2 compute position
|
||||||
|
of virtual charge site xM (return parameter)
|
||||||
|
---------------------------------------------------------------------- */
|
||||||
ucl_inline void compute_newsite(int iO, int iH1, int iH2,
|
ucl_inline void compute_newsite(int iO, int iH1, int iH2,
|
||||||
__global numtyp4 *xM, numtyp q,
|
__global numtyp4 *xM, numtyp q,
|
||||||
numtyp alpha, const __global numtyp4 *restrict x_) {
|
numtyp alpha, const __global numtyp4 *restrict x_) {
|
||||||
@ -118,23 +132,34 @@ ucl_inline void compute_newsite(int iO, int iH1, int iH2,
|
|||||||
*xM = M;
|
*xM = M;
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
|
/* ----------------------------------------------------------------------
|
||||||
|
Compute resulting forces (ans), energies and virial (engv).
|
||||||
|
An additional term is calculated based on the previously
|
||||||
|
calculated values on the virlual sites (ansO),
|
||||||
|
which should be distributed over the real atoms.
|
||||||
|
For some hydrogens, the corresponding oxygens are
|
||||||
|
not local atoms and the ansO value is not calculated.
|
||||||
|
The required increase is calculated directly in the main function.
|
||||||
|
---------------------------------------------------------------------- */
|
||||||
|
__kernel void k_lj_tip4p_long_distrib(
|
||||||
|
const __global numtyp4 *restrict x_,
|
||||||
__global acctyp4 *restrict ans,
|
__global acctyp4 *restrict ans,
|
||||||
__global acctyp *restrict engv,
|
__global acctyp *restrict engv,
|
||||||
const int eflag, const int vflag, const int inum,
|
const int eflag, const int vflag, const int inum,
|
||||||
const int nbor_pitch, const int t_per_atom,
|
const int nbor_pitch, const int t_per_atom,
|
||||||
__global int *restrict hneigh,
|
const __global int *restrict hneigh,
|
||||||
__global numtyp4 *restrict m,
|
const __global numtyp4 *restrict m,
|
||||||
const int typeO, const int typeH,
|
const int typeO, const int typeH,
|
||||||
const numtyp alpha,
|
const numtyp alpha,
|
||||||
const __global numtyp *restrict q_, const __global acctyp4 *restrict ansO) {
|
const __global numtyp *restrict q_,
|
||||||
|
const __global acctyp4 *restrict ansO) {
|
||||||
|
|
||||||
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
|
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
|
||||||
acctyp4 f;
|
acctyp4 f;
|
||||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||||
|
|
||||||
if (i<inum) {
|
if (i<inum) {
|
||||||
numtyp4 ix; fetch4(ix,i,pos_tex);// = x_[i];
|
numtyp4 ix; fetch4(ix,i,pos_tex);
|
||||||
int itype = ix.w;
|
int itype = ix.w;
|
||||||
acctyp4 fM, vM;
|
acctyp4 fM, vM;
|
||||||
acctyp eM;
|
acctyp eM;
|
||||||
@ -191,21 +216,28 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
|
|||||||
} // if ii
|
} // if ii
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
|
/* ----------------------------------------------------------------------
|
||||||
const __global int * dev_nbor,
|
Rebuild hneigh after the neighbor build.
|
||||||
const __global int * dev_packed,
|
hneight stores local IDs of H1 and H2 for each local and ghost O
|
||||||
|
and local ID of O for each local H.
|
||||||
|
---------------------------------------------------------------------- */
|
||||||
|
__kernel void k_lj_tip4p_reneigh(
|
||||||
|
const __global numtyp4 *restrict x_,
|
||||||
|
const __global int *dev_nbor,
|
||||||
|
const __global int *dev_packed,
|
||||||
const int nall, const int inum,
|
const int nall, const int inum,
|
||||||
const int nbor_pitch, const int t_per_atom,
|
const int nbor_pitch, const int t_per_atom,
|
||||||
__global int *restrict hneigh,
|
__global int *restrict hneigh,
|
||||||
__global numtyp4 *restrict m,
|
__global numtyp4 *restrict m,
|
||||||
const int typeO, const int typeH,
|
const int typeO, const int typeH,
|
||||||
const __global tagint *restrict tag, const __global int *restrict map,
|
const __global tagint *restrict tag,
|
||||||
|
const __global int *restrict map,
|
||||||
const __global int *restrict sametag) {
|
const __global int *restrict sametag) {
|
||||||
|
|
||||||
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
|
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
|
||||||
|
|
||||||
if (i<nall) {
|
if (i<nall) {
|
||||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
numtyp4 ix; fetch4(ix,i,pos_tex);
|
||||||
|
|
||||||
int iH1, iH2, iO;
|
int iH1, iH2, iO;
|
||||||
int itype = ix.w;
|
int itype = ix.w;
|
||||||
@ -232,16 +264,18 @@ __kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
|
|||||||
if ((int)iIx.w == typeH) {
|
if ((int)iIx.w == typeH) {
|
||||||
iO = atom_mapping(map,tag[i] - 2);
|
iO = atom_mapping(map,tag[i] - 2);
|
||||||
iO = closest_image(i, iO, sametag, x_);
|
iO = closest_image(i, iO, sametag, x_);
|
||||||
}
|
}
|
||||||
hneigh[i*4+0] = iO;
|
hneigh[i*4+0] = iO;
|
||||||
hneigh[i*4+1] += -1;
|
hneigh[i*4+1] = -1;
|
||||||
hneigh[i*4+2] = -1;
|
hneigh[i*4+2] = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
On each timestep update virual charge coordinates (m output parameter).
|
||||||
|
---------------------------------------------------------------------- */
|
||||||
__kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
|
__kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
|
||||||
const __global int * dev_nbor,
|
const __global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
@ -268,13 +302,22 @@ __kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
|
|||||||
} else {
|
} else {
|
||||||
m[iO] = ix;
|
m[iO] = ix;
|
||||||
m[iO].w = qO;
|
m[iO].w = qO;
|
||||||
hneigh[i*4] = iO;
|
hneigh[i*4] = iO;
|
||||||
hneigh[i*4+1] = iO;
|
hneigh[i*4+1] = iO;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Compute initial value of force, energy and virial for each local particle.
|
||||||
|
The values calculated on oxygens use the virtual charge position (m) and
|
||||||
|
they are stored in a separate array (ansO) for further distribution
|
||||||
|
in a separate kernel. For some hydrogens located on the boundary
|
||||||
|
of the local region, oxygens are non-local and the contribution
|
||||||
|
of oxygen is calculated separately in this kernel for them .
|
||||||
|
---------------------------------------------------------------------- */
|
||||||
__kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
__kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp4 *restrict lj1,
|
const __global numtyp4 *restrict lj1,
|
||||||
const __global numtyp4 *restrict lj3,
|
const __global numtyp4 *restrict lj3,
|
||||||
@ -333,8 +376,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
|||||||
iH1 = hneigh[i*4 ];
|
iH1 = hneigh[i*4 ];
|
||||||
iH2 = hneigh[i*4+1];
|
iH2 = hneigh[i*4+1];
|
||||||
x1 = m[iO];
|
x1 = m[iO];
|
||||||
}
|
} else if (itype == typeH) {
|
||||||
if (itype == typeH) {
|
|
||||||
iO = hneigh[i *4 ];
|
iO = hneigh[i *4 ];
|
||||||
iH1 = hneigh[iO*4 ];
|
iH1 = hneigh[iO*4 ];
|
||||||
iH2 = hneigh[iO*4+1];
|
iH2 = hneigh[iO*4+1];
|
||||||
|
|||||||
Reference in New Issue
Block a user