Moved fphi_uind up to BaseAmoeba
This commit is contained in:
@ -259,38 +259,6 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) {
|
|||||||
return GX;
|
return GX;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Interpolate the potential from the PME grid
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
template <class numtyp, class acctyp>
|
|
||||||
int AmoebaT::fphi_uind() {
|
|
||||||
int ainum=this->ans->inum();
|
|
||||||
if (ainum == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
int _nall=this->atom->nall();
|
|
||||||
int nbor_pitch=this->nbor->nbor_pitch();
|
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
|
||||||
const int BX=this->block_size();
|
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
|
||||||
(BX/this->_threads_per_atom)));
|
|
||||||
|
|
||||||
this->time_pair.start();
|
|
||||||
int ngridyz = this->_ngridy * this->_ngridz;
|
|
||||||
this->k_fphi_uind.set_size(GX,BX);
|
|
||||||
this->k_fphi_uind.run(&this->atom->x, &this->_thetai1,
|
|
||||||
&this->_thetai2, &this->_thetai3,
|
|
||||||
&this->_igrid, &this->_cgrid_brick,
|
|
||||||
&this->_fdip_phi1, &this->_fdip_phi2,
|
|
||||||
&this->_fdip_sum_phi, &this->_bsorder,
|
|
||||||
&ainum, &ngridyz, &this->_ngridy,
|
|
||||||
&this->_threads_per_atom);
|
|
||||||
this->time_pair.stop();
|
|
||||||
|
|
||||||
return GX;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the polar real-space term, returning tep
|
// Calculate the polar real-space term, returning tep
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
@ -1706,7 +1706,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
|||||||
tuv012 = (numtyp)0.0;
|
tuv012 = (numtyp)0.0;
|
||||||
tuv111 = (numtyp)0.0;
|
tuv111 = (numtyp)0.0;
|
||||||
|
|
||||||
k = igrid[3*i+2] - nlpts;
|
k = igrid[4*i+2] - nlpts;
|
||||||
for (int kb = 0; kb < bsorder; kb++) {
|
for (int kb = 0; kb < bsorder; kb++) {
|
||||||
/*
|
/*
|
||||||
v0 = thetai3[m][kb][0];
|
v0 = thetai3[m][kb][0];
|
||||||
@ -1742,7 +1742,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
|||||||
tu12 = (numtyp)0.0;
|
tu12 = (numtyp)0.0;
|
||||||
tu03 = (numtyp)0.0;
|
tu03 = (numtyp)0.0;
|
||||||
|
|
||||||
j = igrid[3*i+1] - nlpts;
|
j = igrid[4*i+1] - nlpts;
|
||||||
for (int jb = 0; jb < bsorder; jb++) {
|
for (int jb = 0; jb < bsorder; jb++) {
|
||||||
/*
|
/*
|
||||||
u0 = thetai2[m][jb][0];
|
u0 = thetai2[m][jb][0];
|
||||||
@ -1763,7 +1763,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
|
|||||||
t2_2 = (numtyp)0.0;
|
t2_2 = (numtyp)0.0;
|
||||||
t3 = (numtyp)0.0;
|
t3 = (numtyp)0.0;
|
||||||
|
|
||||||
int ii = igrid[3*i] - nlpts;
|
int ii = igrid[4*i] - nlpts;
|
||||||
for (int ib = 0; ib < bsorder; ib++) {
|
for (int ib = 0; ib < bsorder; ib++) {
|
||||||
/*
|
/*
|
||||||
tq_1 = grid[k][j][ii][0];
|
tq_1 = grid[k][j][ii][0];
|
||||||
|
|||||||
@ -91,7 +91,6 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
int multipole_real(const int eflag, const int vflag);
|
int multipole_real(const int eflag, const int vflag);
|
||||||
int udirect2b(const int eflag, const int vflag);
|
int udirect2b(const int eflag, const int vflag);
|
||||||
int umutual2b(const int eflag, const int vflag);
|
int umutual2b(const int eflag, const int vflag);
|
||||||
int fphi_uind();
|
|
||||||
int polar_real(const int eflag, const int vflag);
|
int polar_real(const int eflag, const int vflag);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
@ -144,7 +144,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
|
|||||||
_max_fieldp_size = _max_tep_size;
|
_max_fieldp_size = _max_tep_size;
|
||||||
_fieldp.alloc(_max_fieldp_size*8,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE);
|
_fieldp.alloc(_max_fieldp_size*8,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE);
|
||||||
|
|
||||||
_max_thetai_size = 0;
|
_max_thetai_size = _max_tep_size;
|
||||||
|
|
||||||
_nmax = nall;
|
_nmax = nall;
|
||||||
dev_nspecial15.alloc(nall,*(this->ucl_device),UCL_READ_ONLY);
|
dev_nspecial15.alloc(nall,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
@ -466,7 +466,7 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
|||||||
_thetai1.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device),UCL_READ_ONLY);
|
_thetai1.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
_thetai2.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device),UCL_READ_ONLY);
|
_thetai2.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
_thetai3.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device),UCL_READ_ONLY);
|
_thetai3.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
_igrid.alloc(_max_thetai_size*3,*(this->ucl_device),UCL_READ_ONLY);
|
_igrid.alloc(_max_thetai_size*4,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
|
|
||||||
_fdip_phi1.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_WRITE_ONLY);
|
_fdip_phi1.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_WRITE_ONLY);
|
||||||
_fdip_phi2.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_WRITE_ONLY);
|
_fdip_phi2.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_WRITE_ONLY);
|
||||||
@ -478,7 +478,7 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
|||||||
_thetai1.resize(_max_thetai_size*bsorder*4);
|
_thetai1.resize(_max_thetai_size*bsorder*4);
|
||||||
_thetai2.resize(_max_thetai_size*bsorder*4);
|
_thetai2.resize(_max_thetai_size*bsorder*4);
|
||||||
_thetai3.resize(_max_thetai_size*bsorder*4);
|
_thetai3.resize(_max_thetai_size*bsorder*4);
|
||||||
_igrid.resize(_max_thetai_size*3);
|
_igrid.resize(_max_thetai_size*4);
|
||||||
|
|
||||||
_fdip_phi1.resize(_max_thetai_size*10);
|
_fdip_phi1.resize(_max_thetai_size*10);
|
||||||
_fdip_phi2.resize(_max_thetai_size*10);
|
_fdip_phi2.resize(_max_thetai_size*10);
|
||||||
@ -498,7 +498,7 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
|||||||
ucl_copy(_thetai3,dview,false);
|
ucl_copy(_thetai3,dview,false);
|
||||||
|
|
||||||
UCL_H_Vec<int> dview_int;
|
UCL_H_Vec<int> dview_int;
|
||||||
dview_int.view(&host_igrid[0][0],inum_full*3,*(this->ucl_device));
|
dview_int.view(&host_igrid[0][0],inum_full*4,*(this->ucl_device));
|
||||||
ucl_copy(_igrid,dview_int,false);
|
ucl_copy(_igrid,dview_int,false);
|
||||||
|
|
||||||
_nzlo_out = nzlo_out;
|
_nzlo_out = nzlo_out;
|
||||||
@ -666,6 +666,34 @@ void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
|
|||||||
const int red_blocks = fphi_uind();
|
const int red_blocks = fphi_uind();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Interpolate the potential from the PME grid
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
int BaseAmoebaT::fphi_uind() {
|
||||||
|
int ainum=ans->inum();
|
||||||
|
if (ainum == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
int _nall=atom->nall();
|
||||||
|
int nbor_pitch=nbor->nbor_pitch();
|
||||||
|
|
||||||
|
// Compute the block size and grid size to keep all cores busy
|
||||||
|
const int BX=block_size();
|
||||||
|
int GX=static_cast<int>(ceil(static_cast<double>(ans->inum())/
|
||||||
|
(BX/_threads_per_atom)));
|
||||||
|
|
||||||
|
time_pair.start();
|
||||||
|
int ngridyz = _ngridy * _ngridz;
|
||||||
|
k_fphi_uind.set_size(GX,BX);
|
||||||
|
k_fphi_uind.run(&atom->x, &_thetai1, &_thetai2, &_thetai3,
|
||||||
|
&_igrid, &_cgrid_brick, &_fdip_phi1, &_fdip_phi2,
|
||||||
|
&_fdip_sum_phi, &_bsorder, &ainum, &ngridyz, &_ngridy,
|
||||||
|
&_threads_per_atom);
|
||||||
|
time_pair.stop();
|
||||||
|
|
||||||
|
return GX;
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Reneighbor on GPU if necessary, and then compute polar real-space
|
// Reneighbor on GPU if necessary, and then compute polar real-space
|
||||||
|
|||||||
@ -311,7 +311,7 @@ class BaseAmoeba {
|
|||||||
virtual int multipole_real(const int eflag, const int vflag) = 0;
|
virtual int multipole_real(const int eflag, const int vflag) = 0;
|
||||||
virtual int udirect2b(const int eflag, const int vflag) = 0;
|
virtual int udirect2b(const int eflag, const int vflag) = 0;
|
||||||
virtual int umutual2b(const int eflag, const int vflag) = 0;
|
virtual int umutual2b(const int eflag, const int vflag) = 0;
|
||||||
virtual int fphi_uind() = 0;
|
virtual int fphi_uind();
|
||||||
virtual int polar_real(const int eflag, const int vflag) = 0;
|
virtual int polar_real(const int eflag, const int vflag) = 0;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user