Add comments for tip4p GPU kernels
This commit is contained in:
@ -57,16 +57,26 @@ _texture( q_tex,int2);
|
||||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
GPU analogue of Atom::map inline method,
|
||||
but now limited to map_array mapping style.
|
||||
Map global ID to local index of atom.
|
||||
---------------------------------------------------------------------- */
|
||||
ucl_inline int atom_mapping(const __global int *map, tagint glob) {
|
||||
return map[glob];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
GPU version of Domain::closest_image(int, int) method.
|
||||
Return local index of atom J or any of its images that is closest to atom I
|
||||
if J is not a valid index like -1, just return it.
|
||||
---------------------------------------------------------------------- */
|
||||
ucl_inline int closest_image(int i, int j, const __global int* sametag,
|
||||
const __global numtyp4 *restrict x_)
|
||||
{
|
||||
if (j < 0) return j;
|
||||
|
||||
numtyp4 xi; fetch4(xi,i,pos_tex); // = x[i];
|
||||
numtyp4 xi; fetch4(xi,i,pos_tex);
|
||||
numtyp4 xj; fetch4(xj,j,pos_tex);
|
||||
|
||||
int closest = j;
|
||||
@ -92,6 +102,10 @@ ucl_inline int closest_image(int i, int j, const __global int* sametag,
|
||||
return closest;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
For molecule that consists of atoms O, H1 and H2 compute position
|
||||
of virtual charge site xM (return parameter)
|
||||
---------------------------------------------------------------------- */
|
||||
ucl_inline void compute_newsite(int iO, int iH1, int iH2,
|
||||
__global numtyp4 *xM, numtyp q,
|
||||
numtyp alpha, const __global numtyp4 *restrict x_) {
|
||||
@ -118,23 +132,34 @@ ucl_inline void compute_newsite(int iO, int iH1, int iH2,
|
||||
*xM = M;
|
||||
}
|
||||
|
||||
__kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
|
||||
/* ----------------------------------------------------------------------
|
||||
Compute resulting forces (ans), energies and virial (engv).
|
||||
An additional term is calculated based on the previously
|
||||
calculated values on the virlual sites (ansO),
|
||||
which should be distributed over the real atoms.
|
||||
For some hydrogens, the corresponding oxygens are
|
||||
not local atoms and the ansO value is not calculated.
|
||||
The required increase is calculated directly in the main function.
|
||||
---------------------------------------------------------------------- */
|
||||
__kernel void k_lj_tip4p_long_distrib(
|
||||
const __global numtyp4 *restrict x_,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
__global int *restrict hneigh,
|
||||
__global numtyp4 *restrict m,
|
||||
const __global int *restrict hneigh,
|
||||
const __global numtyp4 *restrict m,
|
||||
const int typeO, const int typeH,
|
||||
const numtyp alpha,
|
||||
const __global numtyp *restrict q_, const __global acctyp4 *restrict ansO) {
|
||||
const __global numtyp *restrict q_,
|
||||
const __global acctyp4 *restrict ansO) {
|
||||
|
||||
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
|
||||
if (i<inum) {
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex);// = x_[i];
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex);
|
||||
int itype = ix.w;
|
||||
acctyp4 fM, vM;
|
||||
acctyp eM;
|
||||
@ -191,21 +216,28 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
/* ----------------------------------------------------------------------
|
||||
Rebuild hneigh after the neighbor build.
|
||||
hneight stores local IDs of H1 and H2 for each local and ghost O
|
||||
and local ID of O for each local H.
|
||||
---------------------------------------------------------------------- */
|
||||
__kernel void k_lj_tip4p_reneigh(
|
||||
const __global numtyp4 *restrict x_,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
const int nall, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
__global int *restrict hneigh,
|
||||
__global numtyp4 *restrict m,
|
||||
const int typeO, const int typeH,
|
||||
const __global tagint *restrict tag, const __global int *restrict map,
|
||||
const __global tagint *restrict tag,
|
||||
const __global int *restrict map,
|
||||
const __global int *restrict sametag) {
|
||||
|
||||
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+THREAD_ID_X;
|
||||
|
||||
if (i<nall) {
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex);
|
||||
|
||||
int iH1, iH2, iO;
|
||||
int itype = ix.w;
|
||||
@ -234,14 +266,16 @@ __kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
|
||||
iO = closest_image(i, iO, sametag, x_);
|
||||
}
|
||||
hneigh[i*4+0] = iO;
|
||||
hneigh[i*4+1] += -1;
|
||||
hneigh[i*4+1] = -1;
|
||||
hneigh[i*4+2] = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
On each timestep update virual charge coordinates (m output parameter).
|
||||
---------------------------------------------------------------------- */
|
||||
__kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
@ -275,6 +309,15 @@ __kernel void k_lj_tip4p_newsite(const __global numtyp4 *restrict x_,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Compute initial value of force, energy and virial for each local particle.
|
||||
The values calculated on oxygens use the virtual charge position (m) and
|
||||
they are stored in a separate array (ansO) for further distribution
|
||||
in a separate kernel. For some hydrogens located on the boundary
|
||||
of the local region, oxygens are non-local and the contribution
|
||||
of oxygen is calculated separately in this kernel for them .
|
||||
---------------------------------------------------------------------- */
|
||||
__kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
@ -333,8 +376,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||
iH1 = hneigh[i*4 ];
|
||||
iH2 = hneigh[i*4+1];
|
||||
x1 = m[iO];
|
||||
}
|
||||
if (itype == typeH) {
|
||||
} else if (itype == typeH) {
|
||||
iO = hneigh[i *4 ];
|
||||
iH1 = hneigh[iO*4 ];
|
||||
iH2 = hneigh[iO*4+1];
|
||||
|
||||
Reference in New Issue
Block a user