Enabled again neigh no with tpa > 1 for 3-body gpu styles for backward compatibility, could be slower than neigh no tpa 1 in many cases
This commit is contained in:
@ -224,8 +224,7 @@ __kernel void k_vashishta(const __global numtyp4 *restrict x_,
|
||||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
int nbor, nbor_end, i, numj;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
@ -234,6 +233,7 @@ __kernel void k_vashishta(const __global numtyp4 *restrict x_,
|
||||
itype=map[itype];
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
|
||||
int j=dev_packed[nbor];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
@ -287,6 +287,7 @@ __kernel void k_vashishta(const __global numtyp4 *restrict x_,
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0)
|
||||
energy += (param3_bigh*reta+vc2-vc3-param3_bigw*r6inv-r*param3_dvrc+param3_c0);
|
||||
|
||||
@ -435,7 +436,7 @@ __kernel void k_vashishta_three_center(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end;
|
||||
|
||||
const int* nbor_mem = dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -446,13 +447,17 @@ __kernel void k_vashishta_three_center(const __global numtyp4 *restrict x_,
|
||||
itype=map[itype];
|
||||
|
||||
// recalculate numj and nbor_end for use of the short nbor list
|
||||
numj = dev_short_nbor[nbor_j];
|
||||
nbor_j += n_stride;
|
||||
if (dev_packed==dev_nbor) {
|
||||
numj = dev_short_nbor[nbor_j];
|
||||
nbor_j += n_stride;
|
||||
nbor_end = nbor_j+fast_mul(numj,n_stride);
|
||||
nbor_mem = dev_short_nbor;
|
||||
}
|
||||
int nborj_start = nbor_j;
|
||||
nbor_end = nbor_j+fast_mul(numj,n_stride);
|
||||
|
||||
for ( ; nbor_j<nbor_end; nbor_j+=n_stride) {
|
||||
int j=dev_short_nbor[nbor_j];
|
||||
|
||||
int j=nbor_mem[nbor_j];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
@ -475,15 +480,22 @@ __kernel void k_vashishta_three_center(const __global numtyp4 *restrict x_,
|
||||
param_gamma_ij=param4_ijparam.y;
|
||||
param_r0_ij=param4_ijparam.w;
|
||||
|
||||
int nbor_k=nborj_start-offset_j+offset_k;
|
||||
int numk = dev_short_nbor[nbor_k-n_stride];
|
||||
int k_end = nbor_k+fast_mul(numk,n_stride);
|
||||
int nbor_k,k_end;
|
||||
if (dev_packed==dev_nbor) {
|
||||
nbor_k=nborj_start-offset_j+offset_k;
|
||||
int numk = dev_short_nbor[nbor_k-n_stride];
|
||||
k_end = nbor_k+fast_mul(numk,n_stride);
|
||||
} else {
|
||||
nbor_k = nbor_j-offset_j+offset_k;
|
||||
if (nbor_k<=nbor_j) nbor_k += n_stride;
|
||||
k_end = nbor_end;
|
||||
}
|
||||
|
||||
for ( ; nbor_k<k_end; nbor_k+=n_stride) {
|
||||
int k=dev_short_nbor[nbor_k];
|
||||
int k=nbor_mem[nbor_k];
|
||||
k &= NEIGHMASK;
|
||||
|
||||
if (k <= j) continue;
|
||||
if (dev_packed==dev_nbor && k <= j) continue;
|
||||
|
||||
numtyp4 kx; fetch4(kx,k,pos_tex);
|
||||
int ktype=kx.w;
|
||||
@ -570,7 +582,7 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end, k_end;
|
||||
|
||||
const int* nbor_mem = dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -581,12 +593,15 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
|
||||
itype=map[itype];
|
||||
|
||||
// recalculate numj and nbor_end for use of the short nbor list
|
||||
numj = dev_short_nbor[nbor_j];
|
||||
nbor_j += n_stride;
|
||||
nbor_end = nbor_j+fast_mul(numj,n_stride);
|
||||
if (dev_packed==dev_nbor) {
|
||||
numj = dev_short_nbor[nbor_j];
|
||||
nbor_j += n_stride;
|
||||
nbor_end = nbor_j+fast_mul(numj,n_stride);
|
||||
nbor_mem = dev_short_nbor;
|
||||
}
|
||||
|
||||
for ( ; nbor_j<nbor_end; nbor_j+=n_stride) {
|
||||
int j=dev_short_nbor[nbor_j];
|
||||
int j=nbor_mem[nbor_j];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
@ -626,12 +641,14 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
|
||||
}
|
||||
|
||||
// recalculate numk and k_end for the use of short neighbor list
|
||||
numk = dev_short_nbor[nbor_k];
|
||||
nbor_k += n_stride;
|
||||
k_end = nbor_k+fast_mul(numk,n_stride);
|
||||
if (dev_packed==dev_nbor) {
|
||||
numk = dev_short_nbor[nbor_k];
|
||||
nbor_k += n_stride;
|
||||
k_end = nbor_k+fast_mul(numk,n_stride);
|
||||
}
|
||||
|
||||
for ( ; nbor_k<k_end; nbor_k+=n_stride) {
|
||||
int k=dev_short_nbor[nbor_k];
|
||||
int k=nbor_mem[nbor_k];
|
||||
k &= NEIGHMASK;
|
||||
|
||||
if (k == i) continue;
|
||||
@ -721,7 +738,7 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end, k_end;
|
||||
|
||||
const int* nbor_mem = dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -732,12 +749,15 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
itype=map[itype];
|
||||
|
||||
// recalculate numj and nbor_end for use of the short nbor list
|
||||
numj = dev_short_nbor[nbor_j];
|
||||
nbor_j += n_stride;
|
||||
nbor_end = nbor_j+fast_mul(numj,n_stride);
|
||||
if (dev_packed==dev_nbor) {
|
||||
numj = dev_short_nbor[nbor_j];
|
||||
nbor_j += n_stride;
|
||||
nbor_end = nbor_j+fast_mul(numj,n_stride);
|
||||
nbor_mem = dev_short_nbor;
|
||||
}
|
||||
|
||||
for ( ; nbor_j<nbor_end; nbor_j+=n_stride) {
|
||||
int j=dev_short_nbor[nbor_j];
|
||||
int j=nbor_mem[nbor_j];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
@ -777,12 +797,14 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
}
|
||||
|
||||
// recalculate numk and k_end for the use of short neighbor list
|
||||
numk = dev_short_nbor[nbor_k];
|
||||
nbor_k += n_stride;
|
||||
k_end = nbor_k+fast_mul(numk,n_stride);
|
||||
if (dev_packed==dev_nbor) {
|
||||
numk = dev_short_nbor[nbor_k];
|
||||
nbor_k += n_stride;
|
||||
k_end = nbor_k+fast_mul(numk,n_stride);
|
||||
}
|
||||
|
||||
for ( ; nbor_k<k_end; nbor_k+=n_stride) {
|
||||
int k=dev_short_nbor[nbor_k];
|
||||
int k=nbor_mem[nbor_k];
|
||||
k &= NEIGHMASK;
|
||||
|
||||
if (k == i) continue;
|
||||
|
||||
Reference in New Issue
Block a user