Cleaned up 3-body gpu styles, and fixed a bug for tersoff/zbl/gpu.
There is a unresolved bug for neigh no with tpa > 1 with BaseThree, enforce tpa = 1 for neigh no in BaseThree for now.
This commit is contained in:
@ -20,7 +20,7 @@ using namespace LAMMPS_AL;
|
||||
extern Device<PRECISION,ACC_PRECISION> global_device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
BaseThreeT::BaseThree() : _compiled(false), _max_bytes(0), _short_nbor(false) {
|
||||
BaseThreeT::BaseThree() : _compiled(false), _max_bytes(0) {
|
||||
device=&global_device;
|
||||
ans=new Answer<numtyp,acctyp>();
|
||||
nbor=new Neighbor();
|
||||
@ -73,6 +73,7 @@ int BaseThreeT::init_three(const int nlocal, const int nall,
|
||||
if (_threads_per_atom>1 && gpu_nbor==0) { // neigh no and tpa > 1
|
||||
nbor->packing(true);
|
||||
_nbor_data=&(nbor->dev_packed);
|
||||
_threads_per_atom = 1; // enforce tpa = 1 for now
|
||||
} else // neigh yes or tpa == 1
|
||||
_nbor_data=&(nbor->dev_nbor);
|
||||
if (_threads_per_atom*_threads_per_atom>device->warp_size())
|
||||
@ -113,14 +114,10 @@ int BaseThreeT::init_three(const int nlocal, const int nall,
|
||||
_max_an_bytes+=ans2->gpu_bytes();
|
||||
#endif
|
||||
|
||||
// if short neighbor list is supported
|
||||
if (short_nbor) {
|
||||
_short_nbor = true;
|
||||
int ef_nall=nall;
|
||||
if (ef_nall==0)
|
||||
ef_nall=2000;
|
||||
dev_short_nbor.alloc(ef_nall*(2+max_nbors),*(this->ucl_device),UCL_READ_WRITE);
|
||||
}
|
||||
int ef_nall=nall;
|
||||
if (ef_nall==0)
|
||||
ef_nall=2000;
|
||||
dev_short_nbor.alloc(ef_nall*(2+max_nbors),*(this->ucl_device),UCL_READ_WRITE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -269,14 +266,10 @@ void BaseThreeT::compute(const int f_ago, const int inum_full, const int nall,
|
||||
hd_balancer.start_timer();
|
||||
atom->add_x_data(host_x,host_type);
|
||||
|
||||
// if short neighbor list is supported
|
||||
if (_short_nbor) {
|
||||
|
||||
// re-allocate dev_short_nbor if necessary
|
||||
if (nall*(2+_max_nbors) > dev_short_nbor.cols()) {
|
||||
int _nmax=static_cast<int>(static_cast<double>(nall)*1.10);
|
||||
dev_short_nbor.resize((2+_max_nbors)*_nmax);
|
||||
}
|
||||
// re-allocate dev_short_nbor if necessary
|
||||
if (nall*(2+_max_nbors) > dev_short_nbor.cols()) {
|
||||
int _nmax=static_cast<int>(static_cast<double>(nall)*1.10);
|
||||
dev_short_nbor.resize((2+_max_nbors)*_nmax);
|
||||
}
|
||||
|
||||
// _ainum to be used in loop() for short neighbor list build
|
||||
@ -342,14 +335,10 @@ int ** BaseThreeT::compute(const int ago, const int inum_full,
|
||||
*ilist=nbor->host_ilist.begin();
|
||||
*jnum=nbor->host_acc.begin();
|
||||
|
||||
// if short neighbor list is supported
|
||||
if (_short_nbor) {
|
||||
|
||||
// re-allocate dev_short_nbor if necessary
|
||||
if (nall*(2+_max_nbors) > dev_short_nbor.cols()) {
|
||||
int _nmax=static_cast<int>(static_cast<double>(nall)*1.10);
|
||||
dev_short_nbor.resize((2+_max_nbors)*_nmax);
|
||||
}
|
||||
// re-allocate dev_short_nbor if necessary
|
||||
if (nall*(2+_max_nbors) > dev_short_nbor.cols()) {
|
||||
int _nmax=static_cast<int>(static_cast<double>(nall)*1.10);
|
||||
dev_short_nbor.resize((2+_max_nbors)*_nmax);
|
||||
}
|
||||
|
||||
// _ainum to be used in loop() for short neighbor list build
|
||||
@ -394,7 +383,7 @@ void BaseThreeT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
k_three_end.set_function(*pair_program,three_end);
|
||||
k_three_end_vatom.set_function(*pair_program,vatom_name.c_str());
|
||||
k_pair.set_function(*pair_program,two);
|
||||
if (short_nbor) k_short_nbor.set_function(*pair_program,short_nbor);
|
||||
k_short_nbor.set_function(*pair_program,short_nbor);
|
||||
pos_tex.get_texture(*pair_program,"pos_tex");
|
||||
|
||||
#ifdef THREE_CONCURRENT
|
||||
|
||||
@ -199,7 +199,7 @@ class BaseThree {
|
||||
UCL_Texture pos_tex;
|
||||
|
||||
protected:
|
||||
bool _compiled,_short_nbor;
|
||||
bool _compiled;
|
||||
int _block_pair, _block_size, _threads_per_atom, _end_command_queue;
|
||||
int _gpu_nbor;
|
||||
double _max_bytes, _max_an_bytes;
|
||||
|
||||
@ -690,7 +690,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_short_nbor,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
|
||||
@ -293,7 +293,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
(BX/(JTHREADS*KTHREADS))));
|
||||
|
||||
this->k_zeta.set_size(GX,BX);
|
||||
this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq,
|
||||
this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &ts6, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->dev_short_nbor,
|
||||
|
||||
@ -702,7 +702,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_short_nbor,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
|
||||
Reference in New Issue
Block a user