/*************************************************************************** sw.cpp ------------------- W. Michael Brown (ORNL) Class for acceleration of the sw pair style. __________________________________________________________________________ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) __________________________________________________________________________ begin : Tue March 26, 2013 email : brownw@ornl.gov ***************************************************************************/ #if defined(USE_OPENCL) #include "sw_cl.h" #elif defined(USE_CUDART) const char *lj=0; #else #include "sw_cubin.h" #endif #include "lal_sw.h" #include using namespace LAMMPS_AL; #define SWT SW extern Device device; template SWT::SW() : BaseThree(), _allocated(false) { } template SWT::~SW() { clear(); } template int SWT::bytes_per_atom(const int max_nbors) const { return this->bytes_per_atom_atomic(max_nbors); } template int SWT::init(const int nlocal, const int nall, const int max_nbors, const double cell_size, const double gpu_split, FILE *_screen, const double epsilon, const double sigma, const double lambda, const double gamma, const double costheta, const double biga, const double bigb, const double powerp, const double powerq, const double cut, const double cutsq) { sw_epsilon=static_cast(epsilon); sw_sigma=static_cast(sigma); sw_lambda=static_cast(lambda); sw_gamma=static_cast(gamma); sw_costheta=static_cast(costheta); sw_biga=static_cast(biga); sw_bigb=static_cast(bigb); sw_powerp=static_cast(powerp); sw_powerq=static_cast(powerq); sw_cut=static_cast(cut); sw_cutsq=static_cast(cutsq); if (sw_cutsq>=sw_cut*sw_cut) sw_cutsq=sw_cut*sw_cut-1e-4; int success; success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split, _screen,sw,"k_sw","k_sw_three_center", "k_sw_three_end"); if (success!=0) return success; // If atom type constants fit in shared memory use fast kernel shared_types=true; _allocated=true; this->_max_bytes=0; return 0; } template void SWT::clear() { if (!_allocated) return; _allocated=false; this->clear_atomic(); } template double SWT::host_memory_usage() const { return this->host_memory_usage_atomic()+sizeof(SW); } #define KTHREADS this->_threads_per_atom #define JTHREADS this->_threads_per_atom // --------------------------------------------------------------------------- // Calculate energies, forces, and torques // --------------------------------------------------------------------------- template void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) { // Compute the block size and grid size to keep all cores busy int BX=this->block_pair(); int eflag, vflag; if (_eflag) eflag=1; else eflag=0; if (_vflag) vflag=1; else vflag=0; int GX=static_cast(ceil(static_cast(this->ans->inum())/ (BX/this->_threads_per_atom))); int ainum=this->ans->inum(); int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->x, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &sw_cut, &sw_epsilon, &sw_sigma, &sw_biga, &sw_bigb, &sw_powerp, &sw_powerq, &sw_cutsq); BX=this->block_size(); GX=static_cast(ceil(static_cast(this->ans->inum())/ (BX/(KTHREADS*JTHREADS)))); this->k_three_center.set_size(GX,BX); this->k_three_center.run(&this->atom->x, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &evatom, &sw_cut, &sw_epsilon, &sw_sigma, &sw_lambda, &sw_gamma, &sw_costheta, &sw_cutsq); Answer *end_ans; #ifdef THREE_CONCURRENT end_ans=this->ans2; #else end_ans=this->ans; #endif if (evatom!=0) { this->k_three_end_vatom.set_size(GX,BX); this->k_three_end_vatom.run(&this->atom->x, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &sw_cut, &sw_epsilon, &sw_sigma, &sw_lambda, &sw_gamma, &sw_costheta, &sw_cutsq); } else { this->k_three_end.set_size(GX,BX); this->k_three_end.run(&this->atom->x, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &sw_cut, &sw_epsilon, &sw_sigma, &sw_lambda, &sw_gamma, &sw_costheta, &sw_cutsq); } this->time_pair.stop(); } template class SW;