Feb2021 GPU Package Update - GPU Package Files

This commit is contained in:
Michael Brown
2021-02-15 08:20:50 -08:00
parent 16004e8f45
commit e7e2d2323b
345 changed files with 13424 additions and 7708 deletions

View File

@ -32,22 +32,21 @@ _texture_2d( quat_tex,int4);
#define quat_tex qif
#endif
#define nbor_info_e(nbor_mem, nbor_stride, t_per_atom, ii, offset, \
i, numj, stride, nbor_end, nbor_begin) \
i=nbor_mem[ii]; \
nbor_begin=ii+nbor_stride; \
numj=nbor_mem[nbor_begin]; \
nbor_begin+=nbor_stride; \
nbor_end=nbor_begin+fast_mul(nbor_stride,numj); \
nbor_begin+=fast_mul(offset,nbor_stride); \
stride=fast_mul(t_per_atom,nbor_stride);
#define nbor_info_e_ss(nbor_mem, nbor_stride, t_per_atom, ii, offset, \
i, numj, stride, nbor_end, nbor_begin) \
i=nbor_mem[ii]; \
nbor_begin=ii+nbor_stride; \
numj=nbor_mem[nbor_begin]; \
nbor_begin+=nbor_stride; \
nbor_end=nbor_begin+fast_mul(nbor_stride,numj); \
nbor_begin+=fast_mul(offset,nbor_stride); \
stride=fast_mul(t_per_atom,nbor_stride);
#if (ARCH < 300)
#if (SHUFFLE_AVAIL == 0)
#define store_answers_t(f, tor, energy, virial, ii, astride, tid, \
t_per_atom, offset, eflag, vflag, ans, engv) \
t_per_atom, offset, eflag, vflag, ans, engv, inum) \
if (t_per_atom>1) { \
__local acctyp red_acc[7][BLOCK_PAIR]; \
red_acc[0][tid]=f.x; \
red_acc[1][tid]=f.y; \
red_acc[2][tid]=f.z; \
@ -55,6 +54,7 @@ _texture_2d( quat_tex,int4);
red_acc[4][tid]=tor.y; \
red_acc[5][tid]=tor.z; \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
simdsync(); \
if (offset < s) { \
for (int r=0; r<6; r++) \
red_acc[r][tid] += red_acc[r][tid+s]; \
@ -66,28 +66,39 @@ _texture_2d( quat_tex,int4);
tor.x=red_acc[3][tid]; \
tor.y=red_acc[4][tid]; \
tor.z=red_acc[5][tid]; \
if (eflag>0 || vflag>0) { \
for (int r=0; r<6; r++) \
red_acc[r][tid]=virial[r]; \
red_acc[6][tid]=energy; \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
if (offset < s) { \
for (int r=0; r<7; r++) \
red_acc[r][tid] += red_acc[r][tid+s]; \
if (EVFLAG && (eflag || vflag)) { \
if (vflag) { \
simdsync(); \
for (int r=0; r<6; r++) \
red_acc[r][tid]=virial[r]; \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
simdsync(); \
if (offset < s) { \
for (int r=0; r<6; r++) \
red_acc[r][tid] += red_acc[r][tid+s]; \
} \
} \
for (int r=0; r<6; r++) \
virial[r]=red_acc[r][tid]; \
} \
if (eflag) { \
simdsync(); \
red_acc[0][tid]=energy; \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
simdsync(); \
if (offset < s) red_acc[0][tid] += red_acc[0][tid+s]; \
} \
} \
for (int r=0; r<6; r++) \
virial[r]=red_acc[r][tid]; \
energy=red_acc[6][tid]; \
energy=red_acc[0][tid]; \
} \
} \
if (offset==0) { \
if (offset==0 && ii<inum) { \
__global acctyp *ap1=engv+ii; \
if (eflag>0) { \
if (EVFLAG && eflag) { \
*ap1=energy*(acctyp)0.5; \
ap1+=astride; \
} \
if (vflag>0) { \
if (EVFLAG && vflag) { \
for (int i=0; i<6; i++) { \
*ap1=virial[i]*(acctyp)0.5; \
ap1+=astride; \
@ -100,12 +111,12 @@ _texture_2d( quat_tex,int4);
#define acc_answers(f, energy, virial, ii, inum, tid, t_per_atom, offset, \
eflag, vflag, ans, engv) \
if (t_per_atom>1) { \
__local acctyp red_acc[6][BLOCK_PAIR]; \
red_acc[0][tid]=f.x; \
red_acc[1][tid]=f.y; \
red_acc[2][tid]=f.z; \
red_acc[3][tid]=energy; \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
simdsync(); \
if (offset < s) { \
for (int r=0; r<4; r++) \
red_acc[r][tid] += red_acc[r][tid+s]; \
@ -115,10 +126,11 @@ _texture_2d( quat_tex,int4);
f.y=red_acc[1][tid]; \
f.z=red_acc[2][tid]; \
energy=red_acc[3][tid]; \
if (vflag>0) { \
if (EVFLAG && vflag) { \
for (int r=0; r<6; r++) \
red_acc[r][tid]=virial[r]; \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
simdsync(); \
if (offset < s) { \
for (int r=0; r<6; r++) \
red_acc[r][tid] += red_acc[r][tid+s]; \
@ -128,13 +140,13 @@ _texture_2d( quat_tex,int4);
virial[r]=red_acc[r][tid]; \
} \
} \
if (offset==0) { \
if (offset==0 && ii<inum) { \
engv+=ii; \
if (eflag>0) { \
if (EVFLAG && eflag) { \
*engv+=energy*(acctyp)0.5; \
engv+=inum; \
} \
if (vflag>0) { \
if (EVFLAG && vflag) { \
for (int i=0; i<6; i++) { \
*engv+=virial[i]*(acctyp)0.5; \
engv+=inum; \
@ -150,31 +162,31 @@ _texture_2d( quat_tex,int4);
#else
#define store_answers_t(f, tor, energy, virial, ii, astride, tid, \
t_per_atom, offset, eflag, vflag, ans, engv) \
t_per_atom, offset, eflag, vflag, ans, engv, inum) \
if (t_per_atom>1) { \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
f.x += shfl_xor(f.x, s, t_per_atom); \
f.y += shfl_xor(f.y, s, t_per_atom); \
f.z += shfl_xor(f.z, s, t_per_atom); \
tor.x += shfl_xor(tor.x, s, t_per_atom); \
tor.y += shfl_xor(tor.y, s, t_per_atom); \
tor.z += shfl_xor(tor.z, s, t_per_atom); \
energy += shfl_xor(energy, s, t_per_atom); \
f.x += shfl_down(f.x, s, t_per_atom); \
f.y += shfl_down(f.y, s, t_per_atom); \
f.z += shfl_down(f.z, s, t_per_atom); \
tor.x += shfl_down(tor.x, s, t_per_atom); \
tor.y += shfl_down(tor.y, s, t_per_atom); \
tor.z += shfl_down(tor.z, s, t_per_atom); \
if (EVFLAG) energy += shfl_down(energy, s, t_per_atom); \
} \
if (vflag>0) { \
if (EVFLAG && vflag) { \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
for (int r=0; r<6; r++) \
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
for (int r=0; r<6; r++) \
virial[r] += shfl_down(virial[r], s, t_per_atom); \
} \
} \
} \
if (offset==0) { \
if (offset==0 && ii<inum) { \
__global acctyp *ap1=engv+ii; \
if (eflag>0) { \
if (EVFLAG && eflag) { \
*ap1=energy*(acctyp)0.5; \
ap1+=astride; \
} \
if (vflag>0) { \
if (EVFLAG && vflag) { \
for (int i=0; i<6; i++) { \
*ap1=virial[i]*(acctyp)0.5; \
ap1+=astride; \
@ -188,25 +200,25 @@ _texture_2d( quat_tex,int4);
eflag, vflag, ans, engv) \
if (t_per_atom>1) { \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
f.x += shfl_xor(f.x, s, t_per_atom); \
f.y += shfl_xor(f.y, s, t_per_atom); \
f.z += shfl_xor(f.z, s, t_per_atom); \
energy += shfl_xor(energy, s, t_per_atom); \
f.x += shfl_down(f.x, s, t_per_atom); \
f.y += shfl_down(f.y, s, t_per_atom); \
f.z += shfl_down(f.z, s, t_per_atom); \
if (EVFLAG) energy += shfl_down(energy, s, t_per_atom); \
} \
if (vflag>0) { \
if (EVFLAG && vflag) { \
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
for (int r=0; r<6; r++) \
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
for (int r=0; r<6; r++) \
virial[r] += shfl_down(virial[r], s, t_per_atom); \
} \
} \
} \
if (offset==0) { \
if (offset==0 && ii<inum) { \
engv+=ii; \
if (eflag>0) { \
if (EVFLAG && eflag) { \
*engv+=energy*(acctyp)0.5; \
engv+=inum; \
} \
if (vflag>0) { \
if (EVFLAG && vflag) { \
for (int i=0; i<6; i++) { \
*engv+=virial[i]*(acctyp)0.5; \
engv+=inum; \