GPU Package: Fixing out of bounds memory access issues with special kernel unroll optimizations.

This commit is contained in:
W. Michael Brown
2023-03-06 22:10:19 -08:00
parent 2627f60a39
commit d7c783560a

View File

@ -507,27 +507,27 @@ __kernel void kernel_special(__global int *dev_nbor,
if (ii<nt) { if (ii<nt) {
int stride; int stride;
__global int *list, *list_end; __global int *list;
int n1=nspecial[ii*3]; int n1=nspecial[ii*3];
int n2=nspecial[ii*3+1]; int n2=nspecial[ii*3+1];
int n3=nspecial[ii*3+2]; int n3=nspecial[ii*3+2];
int numj; int myj;
if (ii < inum) { if (ii < inum) {
stride=inum; stride=inum;
list=dev_nbor+stride+ii; list=dev_nbor+stride+ii;
numj=*list; int numj=*list;
list+=stride+fast_mul(ii,t_per_atom-1); list+=stride+fast_mul(ii,t_per_atom-1);
stride=fast_mul(inum,t_per_atom); stride=fast_mul(inum,t_per_atom);
int njt=numj/t_per_atom; myj=numj/t_per_atom;
list_end=list+fast_mul(njt,stride)+(numj & (t_per_atom-1)); if (offset < (numj & (t_per_atom-1)))
myj++;
list+=offset; list+=offset;
} else { } else {
stride=1; stride=1;
list=host_nbor_list+(ii-inum)*max_nbors; list=host_nbor_list+(ii-inum)*max_nbors;
numj=host_numj[ii-inum]; myj=host_numj[ii-inum];
list_end=list+fast_mul(numj,stride);
} }
#if SPECIAL_DATA_PRELOAD_SIZE > 0 #if SPECIAL_DATA_PRELOAD_SIZE > 0
@ -537,15 +537,18 @@ __kernel void kernel_special(__global int *dev_nbor,
} }
#endif #endif
for ( ; list<list_end; list+=UNROLL_FACTOR_LIST * stride) { for (int m=0; m<myj; m+=UNROLL_FACTOR_LIST) {
int nbor[UNROLL_FACTOR_LIST]; int nbor[UNROLL_FACTOR_LIST];
tagint jtag[UNROLL_FACTOR_LIST]; tagint jtag[UNROLL_FACTOR_LIST];
__global int* list_addr[UNROLL_FACTOR_LIST]; __global int* list_addr[UNROLL_FACTOR_LIST];
int lmax = myj - m;
for (int l=0; l<UNROLL_FACTOR_LIST; l++) { for (int l=0; l<UNROLL_FACTOR_LIST; l++) {
list_addr[l] = list + l*stride; list_addr[l] = list + l*stride;
if (l < lmax)
nbor[l] = *list_addr[l]; nbor[l] = *list_addr[l];
} }
for (int l=0; l<UNROLL_FACTOR_LIST; l++) { for (int l=0; l<UNROLL_FACTOR_LIST; l++) {
if (l < lmax)
jtag[l] = tag[nbor[l]]; jtag[l] = tag[nbor[l]];
} }
@ -581,7 +584,7 @@ __kernel void kernel_special(__global int *dev_nbor,
for (int c = 0; c < UNROLL_FACTOR_SPECIAL; c++) { for (int c = 0; c < UNROLL_FACTOR_SPECIAL; c++) {
if (i + c < n3) { if (i + c < n3) {
for (int l=0; l<UNROLL_FACTOR_LIST; l++) { for (int l=0; l<UNROLL_FACTOR_LIST; l++) {
if (special_data[c] == jtag[l]) { if (l < lmax && special_data[c] == jtag[l]) {
nbor[l]=nbor[l] ^ which[c]; nbor[l]=nbor[l] ^ which[c];
} }
} }
@ -589,9 +592,10 @@ __kernel void kernel_special(__global int *dev_nbor,
} }
} }
for (int l=0; l<UNROLL_FACTOR_LIST; l++) { for (int l=0; l<UNROLL_FACTOR_LIST; l++) {
if (l < lmax)
*list_addr[l] = nbor[l]; *list_addr[l] = nbor[l];
} }
list+=UNROLL_FACTOR_LIST * stride;
} }
} // if ii } // if ii
} }