Misc Improvements to GPU Package
- Optimizations for molecular systems - Improved kernel performance and greater CPU overlap - Reduced GPU to CPU communications for discrete devices - Switch classic Intel makefiles to use LLVM-based compilers - Prefetch optimizations supported for OpenCL - Optimized data repack for quaternions
This commit is contained in:
@ -38,7 +38,7 @@ __kernel void k_born_coul_wolf(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp3 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
@ -63,7 +63,7 @@ __kernel void k_born_coul_wolf(const __global numtyp4 *restrict x_,
|
||||
sp_lj[6]=sp_lj_in[6];
|
||||
sp_lj[7]=sp_lj_in[7];
|
||||
|
||||
acctyp4 f;
|
||||
acctyp3 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp energy, e_coul, virial[6];
|
||||
if (EVFLAG) {
|
||||
@ -89,6 +89,7 @@ __kernel void k_born_coul_wolf(const __global numtyp4 *restrict x_,
|
||||
}
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
ucl_prefetch(dev_packed+nbor+n_stride);
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_lj, factor_coul;
|
||||
@ -174,7 +175,7 @@ __kernel void k_born_coul_wolf_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp3 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
@ -200,7 +201,7 @@ __kernel void k_born_coul_wolf_fast(const __global numtyp4 *restrict x_,
|
||||
coeff2[tid]=coeff2_in[tid];
|
||||
}
|
||||
|
||||
acctyp4 f;
|
||||
acctyp3 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp energy, e_coul, virial[6];
|
||||
if (EVFLAG) {
|
||||
@ -229,6 +230,7 @@ __kernel void k_born_coul_wolf_fast(const __global numtyp4 *restrict x_,
|
||||
}
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
ucl_prefetch(dev_packed+nbor+n_stride);
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_lj, factor_coul;
|
||||
|
||||
Reference in New Issue
Block a user