Re-arranged memory allocation for cgrid_brick, some issues need to be fixed
This commit is contained in:
@ -1646,7 +1646,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
|||||||
//numtyp4 ix; fetch4(ix,ii,pos_tex); //x_[i];
|
//numtyp4 ix; fetch4(ix,ii,pos_tex); //x_[i];
|
||||||
acctyp fdip_buf[32];
|
acctyp fdip_buf[32];
|
||||||
|
|
||||||
int j,k,m;
|
int j,k;
|
||||||
int nlpts = (bsorder-1) / 2;
|
int nlpts = (bsorder-1) / 2;
|
||||||
|
|
||||||
// extract the permanent multipole field at each site
|
// extract the permanent multipole field at each site
|
||||||
|
|||||||
@ -164,15 +164,16 @@ void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double *
|
|||||||
|
|
||||||
void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder,
|
void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder,
|
||||||
double ***host_thetai1, double ***host_thetai2,
|
double ***host_thetai1, double ***host_thetai2,
|
||||||
double ***host_thetai3, int** igrid,
|
double ***host_thetai3, int** igrid, double ****host_grid_brick,
|
||||||
double *host_grid_brick_start, double ****host_grid_brick,
|
|
||||||
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||||
int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
|
const int nzlo_out, const int nzhi_out,
|
||||||
int nxlo_out, int nxhi_out, bool& first_iteration) {
|
const int nylo_out, const int nyhi_out,
|
||||||
|
const int nxlo_out, const int nxhi_out,
|
||||||
|
bool& first_iteration) {
|
||||||
AMOEBAMF.compute_fphi_uind(inum_full, bsorder, host_thetai1, host_thetai2,
|
AMOEBAMF.compute_fphi_uind(inum_full, bsorder, host_thetai1, host_thetai2,
|
||||||
host_thetai3, igrid, host_grid_brick_start, host_grid_brick, host_fdip_phi1,
|
host_thetai3, igrid, host_grid_brick, host_fdip_phi1,
|
||||||
host_fdip_phi2, host_fdip_sum_phi, nzlo_out, nzhi_out,
|
host_fdip_phi2, host_fdip_sum_phi, nzlo_out, nzhi_out,
|
||||||
nylo_out, nyhi_out, nxlo_out, nxhi_out, first_iteration);
|
nylo_out, nyhi_out, nxlo_out, nxhi_out, first_iteration);
|
||||||
}
|
}
|
||||||
|
|
||||||
void amoeba_setup_fft(const int numel, const int element_type) {
|
void amoeba_setup_fft(const int numel, const int element_type) {
|
||||||
|
|||||||
@ -186,8 +186,10 @@ void BaseAmoebaT::clear_atomic() {
|
|||||||
_igrid.clear();
|
_igrid.clear();
|
||||||
_fdip_phi1.clear();
|
_fdip_phi1.clear();
|
||||||
_fdip_phi2.clear();
|
_fdip_phi2.clear();
|
||||||
_cgrid_brick.clear();
|
|
||||||
_fdip_sum_phi.clear();
|
_fdip_sum_phi.clear();
|
||||||
|
_cgrid_brick.clear();
|
||||||
|
hview_cgrid.clear();
|
||||||
|
|
||||||
dev_nspecial15.clear();
|
dev_nspecial15.clear();
|
||||||
dev_special15.clear();
|
dev_special15.clear();
|
||||||
dev_special15_t.clear();
|
dev_special15_t.clear();
|
||||||
@ -563,10 +565,9 @@ template <class numtyp, class acctyp>
|
|||||||
void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||||
double ***host_thetai1, double ***host_thetai2,
|
double ***host_thetai1, double ***host_thetai2,
|
||||||
double ***host_thetai3, int** host_igrid,
|
double ***host_thetai3, int** host_igrid,
|
||||||
double* host_grid_brick_start, double**** host_grid_brick,
|
const int nzlo_out, const int nzhi_out,
|
||||||
int nzlo_out, int nzhi_out,
|
const int nylo_out, const int nyhi_out,
|
||||||
int nylo_out, int nyhi_out,
|
const int nxlo_out, const int nxhi_out) {
|
||||||
int nxlo_out, int nxhi_out) {
|
|
||||||
|
|
||||||
_bsorder = bsorder;
|
_bsorder = bsorder;
|
||||||
|
|
||||||
@ -642,34 +643,8 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
|||||||
dview_int[idx+1] = host_igrid[i][1];
|
dview_int[idx+1] = host_igrid[i][1];
|
||||||
dview_int[idx+2] = host_igrid[i][2];
|
dview_int[idx+2] = host_igrid[i][2];
|
||||||
}
|
}
|
||||||
ucl_copy(_igrid, dview_int, false);
|
ucl_copy(_igrid, dview_int, false);
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// fphi_uind = induced potential from grid
|
|
||||||
// fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
|
||||||
void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
|
|
||||||
double ***host_thetai1, double ***host_thetai2,
|
|
||||||
double ***host_thetai3, int** host_igrid,
|
|
||||||
double *host_grid_brick_start, double ****host_grid_brick,
|
|
||||||
void** host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
|
||||||
int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
|
|
||||||
int nxlo_out, int nxhi_out, bool& first_iteration)
|
|
||||||
{
|
|
||||||
// allocation/resize and transfers before the first iteration
|
|
||||||
|
|
||||||
if (first_iteration) {
|
|
||||||
precompute_induce(inum_full, bsorder, host_thetai1, host_thetai2, host_thetai3,
|
|
||||||
host_igrid, host_grid_brick_start, host_grid_brick, nzlo_out, nzhi_out,
|
|
||||||
nylo_out, nyhi_out, nxlo_out, nxhi_out);
|
|
||||||
first_iteration = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// update the cgrid_brick with data host
|
|
||||||
|
|
||||||
_nzlo_out = nzlo_out;
|
_nzlo_out = nzlo_out;
|
||||||
_nzhi_out = nzhi_out;
|
_nzhi_out = nzhi_out;
|
||||||
_nylo_out = nylo_out;
|
_nylo_out = nylo_out;
|
||||||
@ -681,8 +656,47 @@ void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
|
|||||||
_ngridx = nxhi_out - nxlo_out + 1;
|
_ngridx = nxhi_out - nxlo_out + 1;
|
||||||
_num_grid_points = _ngridx * _ngridy * _ngridz;
|
_num_grid_points = _ngridx * _ngridy * _ngridz;
|
||||||
|
|
||||||
UCL_H_Vec<double> hview_cgrid;
|
int numel = _num_grid_points*2;
|
||||||
hview_cgrid.alloc(_num_grid_points*2, *(this->ucl_device), UCL_READ_WRITE);
|
if (_cgrid_brick.cols() == 0) {
|
||||||
|
hview_cgrid.alloc(numel, *(this->ucl_device), UCL_READ_WRITE);
|
||||||
|
_cgrid_brick.alloc(numel, *(this->ucl_device), UCL_READ_ONLY);
|
||||||
|
} else if (numel > _cgrid_brick.cols()) {
|
||||||
|
hview_cgrid.resize(numel);
|
||||||
|
_cgrid_brick.resize(numel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// fphi_uind = induced potential from grid
|
||||||
|
// fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
|
||||||
|
double ***host_thetai1, double ***host_thetai2,
|
||||||
|
double ***host_thetai3, int** host_igrid,
|
||||||
|
double ****host_grid_brick,
|
||||||
|
void** host_fdip_phi1,
|
||||||
|
void **host_fdip_phi2,
|
||||||
|
void **host_fdip_sum_phi,
|
||||||
|
const int nzlo_out, const int nzhi_out,
|
||||||
|
const int nylo_out, const int nyhi_out,
|
||||||
|
const int nxlo_out, const int nxhi_out,
|
||||||
|
bool& first_iteration)
|
||||||
|
{
|
||||||
|
// TODO: find out why this alloc helps makes the cgrid_brick ucl_copy work
|
||||||
|
UCL_H_Vec<numtyp> hview;
|
||||||
|
hview.alloc(1, *(this->ucl_device), UCL_READ_ONLY);
|
||||||
|
|
||||||
|
// allocation/resize and transfers before the first iteration
|
||||||
|
|
||||||
|
if (first_iteration) {
|
||||||
|
precompute_induce(inum_full, bsorder, host_thetai1, host_thetai2,
|
||||||
|
host_thetai3, host_igrid, nzlo_out, nzhi_out,
|
||||||
|
nylo_out, nyhi_out, nxlo_out, nxhi_out);
|
||||||
|
first_iteration = false;
|
||||||
|
}
|
||||||
|
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (int iz = nzlo_out; iz <= nzhi_out; iz++)
|
for (int iz = nzlo_out; iz <= nzhi_out; iz++)
|
||||||
for (int iy = nylo_out; iy <= nyhi_out; iy++)
|
for (int iy = nylo_out; iy <= nyhi_out; iy++)
|
||||||
@ -691,10 +705,7 @@ void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
|
|||||||
hview_cgrid[n+1] = host_grid_brick[iz][iy][ix][1];
|
hview_cgrid[n+1] = host_grid_brick[iz][iy][ix][1];
|
||||||
n += 2;
|
n += 2;
|
||||||
}
|
}
|
||||||
//hview_cgrid.view(host_grid_brick_start, _num_grid_points*2, *(this->ucl_device));
|
ucl_copy(_cgrid_brick, hview_cgrid, false);
|
||||||
_cgrid_brick.alloc(_num_grid_points*2, *(this->ucl_device), UCL_READ_ONLY);
|
|
||||||
ucl_copy(_cgrid_brick,hview_cgrid,false);
|
|
||||||
|
|
||||||
|
|
||||||
const int red_blocks = fphi_uind();
|
const int red_blocks = fphi_uind();
|
||||||
|
|
||||||
|
|||||||
@ -153,10 +153,9 @@ class BaseAmoeba {
|
|||||||
virtual void precompute_induce(const int inum_full, const int bsorder,
|
virtual void precompute_induce(const int inum_full, const int bsorder,
|
||||||
double ***host_thetai1, double ***host_thetai2,
|
double ***host_thetai1, double ***host_thetai2,
|
||||||
double ***host_thetai3, int** igrid,
|
double ***host_thetai3, int** igrid,
|
||||||
double *host_grid_brick_start, double ****host_grid_brick,
|
const int nzlo_out, const int nzhi_out,
|
||||||
int nzlo_out, int nzhi_out,
|
const int nylo_out, const int nyhi_out,
|
||||||
int nylo_out, int nyhi_out,
|
const int nxlo_out, const int nxhi_out);
|
||||||
int nxlo_out, int nxhi_out);
|
|
||||||
|
|
||||||
/// Compute multipole real-space with device neighboring
|
/// Compute multipole real-space with device neighboring
|
||||||
virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall,
|
virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall,
|
||||||
@ -183,10 +182,12 @@ class BaseAmoeba {
|
|||||||
virtual void compute_fphi_uind(const int inum_full, const int bsorder,
|
virtual void compute_fphi_uind(const int inum_full, const int bsorder,
|
||||||
double ***host_thetai1, double ***host_thetai2,
|
double ***host_thetai1, double ***host_thetai2,
|
||||||
double ***host_thetai3, int** igrid,
|
double ***host_thetai3, int** igrid,
|
||||||
double *host_grid_brick_start, double ****host_grid_brick,
|
double ****host_grid_brick,
|
||||||
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||||
int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
|
const int nzlo_out, const int nzhi_out,
|
||||||
int nxlo_out, int nxhi_out, bool& first_iteration);
|
const int nylo_out, const int nyhi_out,
|
||||||
|
const int nxlo_out, const int nxhi_out,
|
||||||
|
bool& first_iteration);
|
||||||
|
|
||||||
/// Compute polar real-space with device neighboring
|
/// Compute polar real-space with device neighboring
|
||||||
virtual void compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
|
virtual void compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
|
||||||
@ -255,7 +256,9 @@ class BaseAmoeba {
|
|||||||
int _nmax, _max_tep_size, _max_fieldp_size;
|
int _nmax, _max_tep_size, _max_fieldp_size;
|
||||||
|
|
||||||
int _bsorder;
|
int _bsorder;
|
||||||
UCL_D_Vec<numtyp> _thetai1, _thetai2, _thetai3, _cgrid_brick;
|
UCL_D_Vec<numtyp> _thetai1, _thetai2, _thetai3;
|
||||||
|
UCL_H_Vec<numtyp> hview_cgrid;
|
||||||
|
UCL_D_Vec<numtyp> _cgrid_brick;
|
||||||
UCL_D_Vec<int> _igrid;
|
UCL_D_Vec<int> _igrid;
|
||||||
UCL_Vector<numtyp,numtyp> _fdip_phi1, _fdip_phi2, _fdip_sum_phi;
|
UCL_Vector<numtyp,numtyp> _fdip_phi1, _fdip_phi2, _fdip_sum_phi;
|
||||||
int _max_thetai_size;
|
int _max_thetai_size;
|
||||||
|
|||||||
Reference in New Issue
Block a user