Feb2021 GPU Package Update - GPU Package Files

This commit is contained in:
Michael Brown
2021-02-15 08:20:50 -08:00
parent 16004e8f45
commit e7e2d2323b
345 changed files with 13424 additions and 7708 deletions

View File

@ -52,9 +52,23 @@ int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
const int maxspecial, const double cell_size,
const double gpu_split, FILE *_screen)
{
int max_shared_types=this->device->max_shared_types();
int onetype=0;
#ifdef USE_OPENCL
for (int i=1; i<ntypes; i++)
if (host_type2frho[i]>=0 && host_type2frho[i]<=nfrho-1) {
if (onetype>0)
onetype=-1;
else if (onetype==0)
onetype=i*max_shared_types+i;
}
if (onetype<0) onetype=0;
#endif
int success;
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
gpu_split,_screen,eam,"k_eam");
gpu_split,_screen,eam,"k_eam",onetype);
if (success!=0)
return success;
@ -72,6 +86,13 @@ int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
k_energy_fast.set_function(*(this->pair_program),"k_energy_fast");
fp_tex.get_texture(*(this->pair_program),"fp_tex");
fp_tex.bind_float(_fp,1);
#if defined(LAL_OCL_EV_JIT)
k_energy_fast_noev.set_function(*(this->pair_program_noev),"k_energy_fast");
#else
k_energy_sel = &k_energy_fast;
#endif
_compiled_energy = true;
// Initialize timers for selected GPU
@ -88,7 +109,6 @@ int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
int lj_types=ntypes;
shared_types=false;
int max_shared_types=this->device->max_shared_types();
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
lj_types=max_shared_types;
shared_types=true;
@ -260,6 +280,9 @@ void EAMT::clear() {
if (_compiled_energy) {
k_energy_fast.clear();
k_energy.clear();
#if defined(LAL_OCL_EV_JIT)
k_energy_fast_noev.clear();
#endif
_compiled_energy=false;
}
@ -278,11 +301,18 @@ template <class numtyp, class acctyp>
void EAMT::compute(const int f_ago, const int inum_full, const int nlocal,
const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag,
const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom,
int &host_start, const double cpu_time,
bool &success, void **fp_ptr) {
this->acc_timers();
int eflag, vflag;
if (eflag_in) eflag=2;
else eflag=0;
if (vflag_in) vflag=2;
else vflag=0;
this->set_kernel(eflag,vflag);
if (this->device->time_device()) {
// Put time from the second part to the total time_pair
@ -346,12 +376,20 @@ void EAMT::compute(const int f_ago, const int inum_full, const int nlocal,
template <class numtyp, class acctyp>
int** EAMT::compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial, tagint **special,
const bool eflag, const bool vflag, const bool eatom,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag_in,
const bool vflag_in, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, int &inum,
void **fp_ptr) {
this->acc_timers();
int eflag, vflag;
if (eflag_in) eflag=2;
else eflag=0;
if (vflag_in) vflag=2;
else vflag=0;
this->set_kernel(eflag,vflag);
if (this->device->time_device()) {
// Put time from the second part to the total time_pair
@ -430,9 +468,9 @@ void EAMT::compute2(int *ilist, const bool eflag, const bool vflag,
loop2(eflag,vflag);
if (ilist == nullptr)
this->ans->copy_answers(eflag,vflag,eatom,vatom);
this->ans->copy_answers(eflag,vflag,eatom,vatom, this->ans->inum());
else
this->ans->copy_answers(eflag,vflag,eatom,vatom, ilist);
this->ans->copy_answers(eflag,vflag,eatom,vatom, ilist, this->ans->inum());
this->device->add_ans_object(this->ans);
this->hd_balancer.stop_timer();
@ -442,20 +480,9 @@ void EAMT::compute2(int *ilist, const bool eflag, const bool vflag,
// Calculate per-atom energies and forces
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
void EAMT::loop(const bool _eflag, const bool _vflag) {
int EAMT::loop(const int eflag, const int vflag) {
// Compute the block size and grid size to keep all cores busy
const int BX=this->block_size();
int eflag, vflag;
if (_eflag)
eflag=1;
else
eflag=0;
if (_vflag)
vflag=1;
else
vflag=0;
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
(BX/this->_threads_per_atom)));
@ -464,13 +491,18 @@ void EAMT::loop(const bool _eflag, const bool _vflag) {
this->time_pair.start();
if (shared_types) {
this->k_energy_fast.set_size(GX,BX);
this->k_energy_fast.run(&this->atom->x, &type2rhor_z2r, &type2frho,
&rhor_spline2, &frho_spline1,&frho_spline2,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&_fp, &this->ans->engv, &eflag, &ainum,
&nbor_pitch, &_ntypes, &_cutforcesq, &_rdr, &_rdrho,
&_rhomax, &_nrho, &_nr, &this->_threads_per_atom);
#if defined(LAL_OCL_EV_JIT)
if (eflag || vflag) k_energy_sel = &k_energy_fast;
else k_energy_sel = &k_energy_fast_noev;
#endif
k_energy_sel->set_size(GX,BX);
k_energy_sel->run(&this->atom->x, &type2rhor_z2r, &type2frho,
&rhor_spline2, &frho_spline1,&frho_spline2,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&_fp, &this->ans->engv, &eflag, &ainum,
&nbor_pitch, &_ntypes, &_cutforcesq, &_rdr, &_rdrho,
&_rhomax, &_nrho, &_nr, &this->_threads_per_atom);
} else {
this->k_energy.set_size(GX,BX);
this->k_energy.run(&this->atom->x, &type2rhor_z2r, &type2frho,
@ -482,6 +514,7 @@ void EAMT::loop(const bool _eflag, const bool _vflag) {
}
this->time_pair.stop();
return ainum;
}
// ---------------------------------------------------------------------------
@ -510,8 +543,8 @@ void EAMT::loop2(const bool _eflag, const bool _vflag) {
this->time_pair2.start();
if (shared_types) {
this->k_pair_fast.set_size(GX,BX);
this->k_pair_fast.run(&this->atom->x, &_fp, &type2rhor_z2r,
this->k_pair_sel->set_size(GX,BX);
this->k_pair_sel->run(&this->atom->x, &_fp, &type2rhor_z2r,
&rhor_spline1, &z2r_spline1, &z2r_spline2,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->ans->force, &this->ans->engv, &eflag,