Attempted to resolve the memory access runtime errors when acquiring single and mixed precision arrays from the GPU lib

This commit is contained in:
Trung Nguyen
2023-01-16 10:12:42 -06:00
parent 9dc0369cee
commit 973b46a907
3 changed files with 146 additions and 65 deletions

View File

@ -1631,9 +1631,9 @@ __kernel void k_amoeba_fphi_uind(const __global numtyp4 *restrict thetai1,
const __global numtyp4 *restrict thetai3,
const __global int *restrict igrid,
const __global numtyp2 *restrict grid,
__global numtyp *restrict fdip_phi1,
__global numtyp *restrict fdip_phi2,
__global numtyp *restrict fdip_sum_phi,
__global acctyp *restrict fdip_phi1,
__global acctyp *restrict fdip_phi2,
__global acctyp *restrict fdip_sum_phi,
const int bsorder, const int inum,
const int nzlo_out, const int nylo_out,
const int nxlo_out, const int ngridxy,
@ -1843,7 +1843,7 @@ __kernel void k_amoeba_fphi_uind(const __global numtyp4 *restrict thetai1,
}
int idx;
numtyp fdip_buf[20];
acctyp fdip_buf[20];
fdip_buf[0] = (numtyp)0.0;
fdip_buf[1] = tuv100_1;
@ -1917,7 +1917,7 @@ __kernel void k_amoeba_fphi_mpole(const __global numtyp4 *restrict thetai1,
const __global numtyp4 *restrict thetai3,
const __global int *restrict igrid,
const __global numtyp2 *restrict grid,
__global numtyp *restrict fphi,
__global acctyp *restrict fphi,
const int bsorder, const int inum, const numtyp felec,
const int nzlo_out, const int nylo_out,
const int nxlo_out, const int ngridxy,

View File

@ -250,7 +250,7 @@ class BaseAmoeba {
UCL_Vector<numtyp4,numtyp4> _thetai1, _thetai2, _thetai3;
UCL_Vector<int,int> _igrid;
UCL_Vector<numtyp2,numtyp2> _cgrid_brick;
UCL_Vector<numtyp,numtyp> _fdip_phi1, _fdip_phi2, _fdip_sum_phi;
UCL_Vector<acctyp,acctyp> _fdip_phi1, _fdip_phi2, _fdip_sum_phi;
int _max_thetai_size;
int _nzlo_out, _nzhi_out, _nylo_out, _nyhi_out, _nxlo_out, _nxhi_out;
int _ngridx, _ngridy, _ngridz, _num_grid_points;