Feb2021 GPU Package Update - GPU Package Files
This commit is contained in:
@ -52,9 +52,23 @@ int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen)
|
||||
{
|
||||
int max_shared_types=this->device->max_shared_types();
|
||||
|
||||
int onetype=0;
|
||||
#ifdef USE_OPENCL
|
||||
for (int i=1; i<ntypes; i++)
|
||||
if (host_type2frho[i]>=0 && host_type2frho[i]<=nfrho-1) {
|
||||
if (onetype>0)
|
||||
onetype=-1;
|
||||
else if (onetype==0)
|
||||
onetype=i*max_shared_types+i;
|
||||
}
|
||||
if (onetype<0) onetype=0;
|
||||
#endif
|
||||
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
|
||||
gpu_split,_screen,eam,"k_eam");
|
||||
gpu_split,_screen,eam,"k_eam",onetype);
|
||||
|
||||
if (success!=0)
|
||||
return success;
|
||||
@ -72,6 +86,13 @@ int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
|
||||
k_energy_fast.set_function(*(this->pair_program),"k_energy_fast");
|
||||
fp_tex.get_texture(*(this->pair_program),"fp_tex");
|
||||
fp_tex.bind_float(_fp,1);
|
||||
|
||||
#if defined(LAL_OCL_EV_JIT)
|
||||
k_energy_fast_noev.set_function(*(this->pair_program_noev),"k_energy_fast");
|
||||
#else
|
||||
k_energy_sel = &k_energy_fast;
|
||||
#endif
|
||||
|
||||
_compiled_energy = true;
|
||||
|
||||
// Initialize timers for selected GPU
|
||||
@ -88,7 +109,6 @@ int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
|
||||
int lj_types=ntypes;
|
||||
shared_types=false;
|
||||
|
||||
int max_shared_types=this->device->max_shared_types();
|
||||
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
|
||||
lj_types=max_shared_types;
|
||||
shared_types=true;
|
||||
@ -260,6 +280,9 @@ void EAMT::clear() {
|
||||
if (_compiled_energy) {
|
||||
k_energy_fast.clear();
|
||||
k_energy.clear();
|
||||
#if defined(LAL_OCL_EV_JIT)
|
||||
k_energy_fast_noev.clear();
|
||||
#endif
|
||||
_compiled_energy=false;
|
||||
}
|
||||
|
||||
@ -278,11 +301,18 @@ template <class numtyp, class acctyp>
|
||||
void EAMT::compute(const int f_ago, const int inum_full, const int nlocal,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag,
|
||||
const bool eflag_in, const bool vflag_in,
|
||||
const bool eatom, const bool vatom,
|
||||
int &host_start, const double cpu_time,
|
||||
bool &success, void **fp_ptr) {
|
||||
this->acc_timers();
|
||||
int eflag, vflag;
|
||||
if (eflag_in) eflag=2;
|
||||
else eflag=0;
|
||||
if (vflag_in) vflag=2;
|
||||
else vflag=0;
|
||||
|
||||
this->set_kernel(eflag,vflag);
|
||||
|
||||
if (this->device->time_device()) {
|
||||
// Put time from the second part to the total time_pair
|
||||
@ -346,12 +376,20 @@ void EAMT::compute(const int f_ago, const int inum_full, const int nlocal,
|
||||
template <class numtyp, class acctyp>
|
||||
int** EAMT::compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, tagint *tag, int **nspecial, tagint **special,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag_in,
|
||||
const bool vflag_in, const bool eatom,
|
||||
const bool vatom, int &host_start, int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success, int &inum,
|
||||
void **fp_ptr) {
|
||||
this->acc_timers();
|
||||
int eflag, vflag;
|
||||
if (eflag_in) eflag=2;
|
||||
else eflag=0;
|
||||
if (vflag_in) vflag=2;
|
||||
else vflag=0;
|
||||
|
||||
this->set_kernel(eflag,vflag);
|
||||
|
||||
if (this->device->time_device()) {
|
||||
// Put time from the second part to the total time_pair
|
||||
@ -430,9 +468,9 @@ void EAMT::compute2(int *ilist, const bool eflag, const bool vflag,
|
||||
|
||||
loop2(eflag,vflag);
|
||||
if (ilist == nullptr)
|
||||
this->ans->copy_answers(eflag,vflag,eatom,vatom);
|
||||
this->ans->copy_answers(eflag,vflag,eatom,vatom, this->ans->inum());
|
||||
else
|
||||
this->ans->copy_answers(eflag,vflag,eatom,vatom, ilist);
|
||||
this->ans->copy_answers(eflag,vflag,eatom,vatom, ilist, this->ans->inum());
|
||||
|
||||
this->device->add_ans_object(this->ans);
|
||||
this->hd_balancer.stop_timer();
|
||||
@ -442,20 +480,9 @@ void EAMT::compute2(int *ilist, const bool eflag, const bool vflag,
|
||||
// Calculate per-atom energies and forces
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
void EAMT::loop(const bool _eflag, const bool _vflag) {
|
||||
int EAMT::loop(const int eflag, const int vflag) {
|
||||
// Compute the block size and grid size to keep all cores busy
|
||||
const int BX=this->block_size();
|
||||
int eflag, vflag;
|
||||
if (_eflag)
|
||||
eflag=1;
|
||||
else
|
||||
eflag=0;
|
||||
|
||||
if (_vflag)
|
||||
vflag=1;
|
||||
else
|
||||
vflag=0;
|
||||
|
||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
||||
(BX/this->_threads_per_atom)));
|
||||
|
||||
@ -464,13 +491,18 @@ void EAMT::loop(const bool _eflag, const bool _vflag) {
|
||||
this->time_pair.start();
|
||||
|
||||
if (shared_types) {
|
||||
this->k_energy_fast.set_size(GX,BX);
|
||||
this->k_energy_fast.run(&this->atom->x, &type2rhor_z2r, &type2frho,
|
||||
&rhor_spline2, &frho_spline1,&frho_spline2,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&_fp, &this->ans->engv, &eflag, &ainum,
|
||||
&nbor_pitch, &_ntypes, &_cutforcesq, &_rdr, &_rdrho,
|
||||
&_rhomax, &_nrho, &_nr, &this->_threads_per_atom);
|
||||
#if defined(LAL_OCL_EV_JIT)
|
||||
if (eflag || vflag) k_energy_sel = &k_energy_fast;
|
||||
else k_energy_sel = &k_energy_fast_noev;
|
||||
#endif
|
||||
|
||||
k_energy_sel->set_size(GX,BX);
|
||||
k_energy_sel->run(&this->atom->x, &type2rhor_z2r, &type2frho,
|
||||
&rhor_spline2, &frho_spline1,&frho_spline2,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&_fp, &this->ans->engv, &eflag, &ainum,
|
||||
&nbor_pitch, &_ntypes, &_cutforcesq, &_rdr, &_rdrho,
|
||||
&_rhomax, &_nrho, &_nr, &this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_energy.set_size(GX,BX);
|
||||
this->k_energy.run(&this->atom->x, &type2rhor_z2r, &type2frho,
|
||||
@ -482,6 +514,7 @@ void EAMT::loop(const bool _eflag, const bool _vflag) {
|
||||
}
|
||||
|
||||
this->time_pair.stop();
|
||||
return ainum;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -510,8 +543,8 @@ void EAMT::loop2(const bool _eflag, const bool _vflag) {
|
||||
this->time_pair2.start();
|
||||
|
||||
if (shared_types) {
|
||||
this->k_pair_fast.set_size(GX,BX);
|
||||
this->k_pair_fast.run(&this->atom->x, &_fp, &type2rhor_z2r,
|
||||
this->k_pair_sel->set_size(GX,BX);
|
||||
this->k_pair_sel->run(&this->atom->x, &_fp, &type2rhor_z2r,
|
||||
&rhor_spline1, &z2r_spline1, &z2r_spline2,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->ans->force, &this->ans->engv, &eflag,
|
||||
|
||||
Reference in New Issue
Block a user