Fixed memory bugs with device array allocations
This commit is contained in:
@ -1714,7 +1714,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
|||||||
v2 = thetai3[m][kb][2];
|
v2 = thetai3[m][kb][2];
|
||||||
v3 = thetai3[m][kb][3];
|
v3 = thetai3[m][kb][3];
|
||||||
*/
|
*/
|
||||||
int i3 = m*4*bsorder + 4*kb;
|
int i3 = i*4*bsorder + 4*kb;
|
||||||
v0 = thetai3[i3];
|
v0 = thetai3[i3];
|
||||||
v1 = thetai3[i3]+1;
|
v1 = thetai3[i3]+1;
|
||||||
v2 = thetai3[i3+2];
|
v2 = thetai3[i3+2];
|
||||||
@ -1750,7 +1750,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
|||||||
u2 = thetai2[m][jb][2];
|
u2 = thetai2[m][jb][2];
|
||||||
u3 = thetai2[m][jb][3];
|
u3 = thetai2[m][jb][3];
|
||||||
*/
|
*/
|
||||||
int i2 = m*4*bsorder+4*jb;
|
int i2 = i*4*bsorder+4*jb;
|
||||||
u0 = thetai2[i2];
|
u0 = thetai2[i2];
|
||||||
u1 = thetai2[i2+1];
|
u1 = thetai2[i2+1];
|
||||||
u2 = thetai2[i2+2];
|
u2 = thetai2[i2+2];
|
||||||
@ -1776,7 +1776,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
|||||||
t2_2 += tq_2*thetai1[m][ib][2];
|
t2_2 += tq_2*thetai1[m][ib][2];
|
||||||
t3 += (tq_1+tq_2)*thetai1[m][ib][3];
|
t3 += (tq_1+tq_2)*thetai1[m][ib][3];
|
||||||
*/
|
*/
|
||||||
int i1 = m*4*bsorder+4*ib;
|
int i1 = i*4*bsorder+4*ib;
|
||||||
numtyp w0 = thetai1[i1];
|
numtyp w0 = thetai1[i1];
|
||||||
numtyp w1 = thetai1[i1+1];
|
numtyp w1 = thetai1[i1+1];
|
||||||
numtyp w2 = thetai1[i1+2];
|
numtyp w2 = thetai1[i1+2];
|
||||||
|
|||||||
@ -563,7 +563,7 @@ template <class numtyp, class acctyp>
|
|||||||
void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||||
double ***host_thetai1, double ***host_thetai2,
|
double ***host_thetai1, double ***host_thetai2,
|
||||||
double ***host_thetai3, int** host_igrid,
|
double ***host_thetai3, int** host_igrid,
|
||||||
double* grid_brick_start, int nzlo_out,
|
double* host_grid_brick_start, int nzlo_out,
|
||||||
int nzhi_out, int nylo_out, int nyhi_out,
|
int nzhi_out, int nylo_out, int nyhi_out,
|
||||||
int nxlo_out, int nxhi_out) {
|
int nxlo_out, int nxhi_out) {
|
||||||
|
|
||||||
@ -580,9 +580,9 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
|||||||
_thetai3.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device),UCL_READ_ONLY);
|
_thetai3.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
_igrid.alloc(_max_thetai_size*4,*(this->ucl_device),UCL_READ_ONLY);
|
_igrid.alloc(_max_thetai_size*4,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
|
|
||||||
_fdip_phi1.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_WRITE_ONLY);
|
_fdip_phi1.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_READ_WRITE);
|
||||||
_fdip_phi2.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_WRITE_ONLY);
|
_fdip_phi2.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_READ_WRITE);
|
||||||
_fdip_sum_phi.alloc(_max_thetai_size*20,*(this->ucl_device),UCL_WRITE_ONLY);
|
_fdip_sum_phi.alloc(_max_thetai_size*20,*(this->ucl_device),UCL_READ_WRITE);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if (inum_full>_max_thetai_size) {
|
if (inum_full>_max_thetai_size) {
|
||||||
@ -634,13 +634,33 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
|||||||
ucl_copy(_thetai3,dview,false);
|
ucl_copy(_thetai3,dview,false);
|
||||||
|
|
||||||
UCL_H_Vec<int> dview_int;
|
UCL_H_Vec<int> dview_int;
|
||||||
|
dview_int.alloc(inum_full*4, *(this->ucl_device));
|
||||||
for (int i = 0; i < inum_full; i++) {
|
for (int i = 0; i < inum_full; i++) {
|
||||||
int idx = i*4;
|
int idx = i*4;
|
||||||
dview_int[idx+0] = host_igrid[i][0];
|
dview_int[idx+0] = host_igrid[i][0];
|
||||||
dview_int[idx+1] = host_igrid[i][1];
|
dview_int[idx+1] = host_igrid[i][1];
|
||||||
dview_int[idx+2] = host_igrid[i][2];
|
dview_int[idx+2] = host_igrid[i][2];
|
||||||
}
|
}
|
||||||
ucl_copy(_igrid,dview_int,false);
|
ucl_copy(_igrid, dview_int, false);
|
||||||
|
|
||||||
|
// update the cgrid_brick with data host
|
||||||
|
|
||||||
|
_nzlo_out = nzlo_out;
|
||||||
|
_nzhi_out = nzhi_out;
|
||||||
|
_nylo_out = nylo_out;
|
||||||
|
_nyhi_out = nyhi_out;
|
||||||
|
_nxlo_out = nxlo_out;
|
||||||
|
_nxhi_out = nxhi_out;
|
||||||
|
_ngridz = nzhi_out - nzlo_out + 1;
|
||||||
|
_ngridy = nyhi_out - nylo_out + 1;
|
||||||
|
_ngridx = nxhi_out - nxlo_out + 1;
|
||||||
|
_num_grid_points = _ngridx * _ngridy * _ngridz;
|
||||||
|
|
||||||
|
UCL_H_Vec<double> dview_cgrid;
|
||||||
|
dview_cgrid.view(host_grid_brick_start, _num_grid_points*2, *(this->ucl_device));
|
||||||
|
_cgrid_brick.alloc(_num_grid_points*2, *(this->ucl_device), UCL_READ_ONLY);
|
||||||
|
ucl_copy(_cgrid_brick,dview_cgrid,false);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@ -666,23 +686,6 @@ void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
|
|||||||
if (first_iteration) first_iteration = false;
|
if (first_iteration) first_iteration = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// update the cgrid_brick with data host
|
|
||||||
|
|
||||||
_nzlo_out = nzlo_out;
|
|
||||||
_nzhi_out = nzhi_out;
|
|
||||||
_nylo_out = nylo_out;
|
|
||||||
_nyhi_out = nyhi_out;
|
|
||||||
_nxlo_out = nxlo_out;
|
|
||||||
_nxhi_out = nxhi_out;
|
|
||||||
_ngridz = nzhi_out - nzlo_out + 1;
|
|
||||||
_ngridy = nyhi_out - nylo_out + 1;
|
|
||||||
_ngridx = nxhi_out - nxlo_out + 1;
|
|
||||||
_num_grid_points = _ngridx*_ngridy*_ngridz*2;
|
|
||||||
|
|
||||||
UCL_H_Vec<double> dview;
|
|
||||||
dview.view(host_grid_brick_start,_num_grid_points,*(this->ucl_device));
|
|
||||||
ucl_copy(_cgrid_brick,dview,false);
|
|
||||||
|
|
||||||
const int red_blocks = fphi_uind();
|
const int red_blocks = fphi_uind();
|
||||||
|
|
||||||
_fdip_phi1.update_host(_max_thetai_size*10);
|
_fdip_phi1.update_host(_max_thetai_size*10);
|
||||||
@ -929,7 +932,7 @@ void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
|||||||
k_udirect2b.set_function(*pair_program,kname_udirect2b);
|
k_udirect2b.set_function(*pair_program,kname_udirect2b);
|
||||||
k_umutual2b.set_function(*pair_program,kname_umutual2b);
|
k_umutual2b.set_function(*pair_program,kname_umutual2b);
|
||||||
k_polar.set_function(*pair_program,kname_polar);
|
k_polar.set_function(*pair_program,kname_polar);
|
||||||
k_fphi_uind.set_function(*pair_program,"kname_fphi_uind");
|
k_fphi_uind.set_function(*pair_program,"k_fphi_uind");
|
||||||
k_short_nbor.set_function(*pair_program,kname_short_nbor);
|
k_short_nbor.set_function(*pair_program,kname_short_nbor);
|
||||||
k_special15.set_function(*pair_program,kname_special15);
|
k_special15.set_function(*pair_program,kname_special15);
|
||||||
pos_tex.get_texture(*pair_program,"pos_tex");
|
pos_tex.get_texture(*pair_program,"pos_tex");
|
||||||
|
|||||||
Reference in New Issue
Block a user