diff --git a/lib/gpu/lal_eam.cpp b/lib/gpu/lal_eam.cpp index cdafe72898..2c0d63f7bf 100644 --- a/lib/gpu/lal_eam.cpp +++ b/lib/gpu/lal_eam.cpp @@ -46,7 +46,7 @@ template int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor, int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline, double ***host_z2r_spline, - double ***host_frho_spline, double rdr, double rdrho, + double ***host_frho_spline, double** host_cutsq, double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r, int nfrho, int nr, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, @@ -243,6 +243,12 @@ int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor, z2r_spline2_tex.get_texture(*(this->pair_program),"z2r_sp2_tex"); z2r_spline2_tex.bind_float(z2r_spline2,4); + UCL_H_Vec host_write(lj_types*lj_types,*(this->ucl_device), + UCL_WRITE_ONLY); + host_write.zero(); + cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack1(ntypes,lj_types,cutsq,host_write,host_cutsq); + _allocated=true; this->_max_bytes=type2rhor_z2r.row_bytes() + type2frho.row_bytes() @@ -252,6 +258,7 @@ int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor, + frho_spline2.row_bytes() + z2r_spline1.row_bytes() + z2r_spline2.row_bytes() + + cutsq.row_bytes() + _fp.device.row_bytes(); return 0; } @@ -270,6 +277,7 @@ void EAMT::clear() { frho_spline2.clear(); z2r_spline1.clear(); z2r_spline2.clear(); + cutsq.clear(); _fp.clear(); @@ -477,7 +485,7 @@ void EAMT::compute2(int *ilist, const bool eflag, const bool vflag, } // --------------------------------------------------------------------------- -// Calculate per-atom energies and forces +// Calculate per-atom fp // --------------------------------------------------------------------------- template int EAMT::loop(const int eflag, const int vflag) { @@ -498,7 +506,7 @@ int EAMT::loop(const int eflag, const int vflag) { k_energy_sel->set_size(GX,BX); k_energy_sel->run(&this->atom->x, &type2rhor_z2r, &type2frho, - &rhor_spline2, &frho_spline1,&frho_spline2, + &rhor_spline2, &frho_spline1, &frho_spline2, &cutsq, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &_fp, &this->ans->engv, &eflag, &ainum, &nbor_pitch, &_ntypes, &_cutforcesq, &_rdr, &_rdrho, @@ -506,7 +514,7 @@ int EAMT::loop(const int eflag, const int vflag) { } else { this->k_energy.set_size(GX,BX); this->k_energy.run(&this->atom->x, &type2rhor_z2r, &type2frho, - &rhor_spline2, &frho_spline1, &frho_spline2, + &rhor_spline2, &frho_spline1, &frho_spline2, &cutsq, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &_fp, &this->ans->engv,&eflag, &ainum, &nbor_pitch, &_ntypes, &_cutforcesq, &_rdr, &_rdrho, &_rhomax, &_nrho, @@ -545,7 +553,7 @@ void EAMT::loop2(const bool _eflag, const bool _vflag) { if (shared_types) { this->k_pair_sel->set_size(GX,BX); this->k_pair_sel->run(&this->atom->x, &_fp, &type2rhor_z2r, - &rhor_spline1, &z2r_spline1, &z2r_spline2, + &rhor_spline1, &z2r_spline1, &z2r_spline2, &cutsq, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &_cutforcesq, &_rdr, @@ -553,7 +561,7 @@ void EAMT::loop2(const bool _eflag, const bool _vflag) { } else { this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->x, &_fp, &type2rhor_z2r, &rhor_spline1, - &z2r_spline1, &z2r_spline2, &this->nbor->dev_nbor, + &z2r_spline1, &z2r_spline2, &cutsq, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &_ntypes, &_cutforcesq, &_rdr, &_nr, diff --git a/lib/gpu/lal_eam.cu b/lib/gpu/lal_eam.cu index 3955f3cc8a..4c6aa2f6b2 100644 --- a/lib/gpu/lal_eam.cu +++ b/lib/gpu/lal_eam.cu @@ -216,6 +216,7 @@ __kernel void k_energy(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict rhor_spline2, const __global numtyp4 *restrict frho_spline1, const __global numtyp4 *restrict frho_spline2, + const __global numtyp *restrict cutsq, const __global int *dev_nbor, const __global int *dev_packed, __global numtyp *restrict fp_, @@ -256,7 +257,8 @@ __kernel void k_energy(const __global numtyp4 *restrict x_, numtyp delz = ix.z-jx.z; numtyp rsq = delx*delx+dely*dely+delz*delz; - if (rsq(numtyp)0) { numtyp p = ucl_sqrt(rsq)*rdr + (numtyp)1.0; int m=p; m = MIN(m,nr-1); @@ -281,6 +283,7 @@ __kernel void k_energy_fast(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict rhor_spline2, const __global numtyp4 *restrict frho_spline1, const __global numtyp4 *restrict frho_spline2, + const __global numtyp *restrict cutsq, const __global int *dev_nbor, const __global int *dev_packed, __global numtyp *restrict fp_, @@ -371,6 +374,7 @@ __kernel void k_eam(const __global numtyp4 *restrict x_, const __global numtyp4 *rhor_spline1, const __global numtyp4 *z2r_spline1, const __global numtyp4 *z2r_spline2, + const __global numtyp *cutsq, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *ans, @@ -416,7 +420,8 @@ __kernel void k_eam(const __global numtyp4 *restrict x_, numtyp delz = ix.z-jx.z; numtyp rsq = delx*delx+dely*dely+delz*delz; - if (rsq(numtyp)0) { numtyp r = ucl_sqrt(rsq); numtyp p = r*rdr + (numtyp)1.0; int m=p; @@ -480,6 +485,7 @@ __kernel void k_eam_fast(const __global numtyp4 *x_, const __global numtyp4 *rhor_spline1, const __global numtyp4 *z2r_spline1, const __global numtyp4 *z2r_spline2, + const __global numtyp *cutsq, const __global int *dev_nbor, const __global int *dev_packed, __global acctyp4 *ans, @@ -542,7 +548,8 @@ __kernel void k_eam_fast(const __global numtyp4 *x_, numtyp delz = ix.z-jx.z; numtyp rsq = delx*delx+dely*dely+delz*delz; - if (rsq(numtyp)0) { numtyp r = ucl_sqrt(rsq); numtyp p = r*rdr + (numtyp)1.0; int m=p; diff --git a/lib/gpu/lal_eam.h b/lib/gpu/lal_eam.h index 3cbaeac0b8..efc38537d6 100644 --- a/lib/gpu/lal_eam.h +++ b/lib/gpu/lal_eam.h @@ -40,8 +40,8 @@ class EAM : public BaseAtomic { * - -5 Double precision is not supported on card **/ int init(const int ntypes, double host_cutforcesq, int **host_type2rhor, int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline, - double ***host_z2r_spline, double ***host_frho_spline, double rdr, - double rdrho, double rhomax, int nrhor, int nrho, int nz2r, + double ***host_z2r_spline, double ***host_frho_spline, double** host_cutsq, + double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r, int nfrho, int nr, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *_screen); @@ -112,6 +112,8 @@ class EAM : public BaseAtomic { UCL_D_Vec frho_spline1, frho_spline2; UCL_D_Vec rhor_spline1, rhor_spline2; + UCL_D_Vec cutsq; + numtyp _cutforcesq,_rdr,_rdrho, _rhomax; int _nfrho,_nrhor,_nrho,_nz2r,_nr; diff --git a/lib/gpu/lal_eam_alloy_ext.cpp b/lib/gpu/lal_eam_alloy_ext.cpp index f7c4986e68..5a3dfb9d6d 100644 --- a/lib/gpu/lal_eam_alloy_ext.cpp +++ b/lib/gpu/lal_eam_alloy_ext.cpp @@ -30,7 +30,7 @@ static EAM EAMALMF; int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor, int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline, double ***host_z2r_spline, - double ***host_frho_spline, + double ***host_frho_spline, double** host_cutsq, double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r, int nfrho, int nr, const int nlocal, const int nall, const int max_nbors, @@ -66,7 +66,7 @@ int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq, if (world_me==0) init_ok=EAMALMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r, host_type2frho, host_rhor_spline, host_z2r_spline, - host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, nz2r, + host_frho_spline, host_cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); @@ -86,7 +86,7 @@ int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq, if (gpu_rank==i && world_me!=0) init_ok=EAMALMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r, host_type2frho, host_rhor_spline, host_z2r_spline, - host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, + host_frho_spline, host_cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); diff --git a/lib/gpu/lal_eam_ext.cpp b/lib/gpu/lal_eam_ext.cpp index 3010e0ea7f..a884335bd9 100644 --- a/lib/gpu/lal_eam_ext.cpp +++ b/lib/gpu/lal_eam_ext.cpp @@ -30,7 +30,7 @@ static EAM EAMMF; int eam_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor, int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline, double ***host_z2r_spline, - double ***host_frho_spline, + double ***host_frho_spline, double** host_cutsq, double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r, int nfrho, int nr, const int nlocal, const int nall, const int max_nbors, @@ -66,7 +66,7 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq, if (world_me==0) init_ok=EAMMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r, host_type2frho, host_rhor_spline, host_z2r_spline, - host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, nz2r, + host_frho_spline, host_cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); @@ -86,7 +86,7 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq, if (gpu_rank==i && world_me!=0) init_ok=EAMMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r, host_type2frho, host_rhor_spline, host_z2r_spline, - host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, + host_frho_spline, host_cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); diff --git a/lib/gpu/lal_eam_fs_ext.cpp b/lib/gpu/lal_eam_fs_ext.cpp index 205b601562..5aad871237 100644 --- a/lib/gpu/lal_eam_fs_ext.cpp +++ b/lib/gpu/lal_eam_fs_ext.cpp @@ -30,7 +30,7 @@ static EAM EAMFSMF; int eam_fs_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor, int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline, double ***host_z2r_spline, - double ***host_frho_spline, + double ***host_frho_spline, double** host_cutsq, double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r, int nfrho, int nr, const int nlocal, const int nall, const int max_nbors, @@ -66,7 +66,7 @@ int eam_fs_gpu_init(const int ntypes, double host_cutforcesq, if (world_me==0) init_ok=EAMFSMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r, host_type2frho, host_rhor_spline, host_z2r_spline, - host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, nz2r, + host_frho_spline, host_cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); @@ -86,7 +86,7 @@ int eam_fs_gpu_init(const int ntypes, double host_cutforcesq, if (gpu_rank==i && world_me!=0) init_ok=EAMFSMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r, host_type2frho, host_rhor_spline, host_z2r_spline, - host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, + host_frho_spline, host_cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); diff --git a/src/GPU/pair_eam_alloy_gpu.cpp b/src/GPU/pair_eam_alloy_gpu.cpp index 4678a6f669..56ca8c6f69 100644 --- a/src/GPU/pair_eam_alloy_gpu.cpp +++ b/src/GPU/pair_eam_alloy_gpu.cpp @@ -42,7 +42,7 @@ int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor, int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline, double ***host_z2r_spline, double ***host_frho_spline, - double rdr, double rdrho, double rhomax, + double** host_cutsq, double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r, int nfrho, int nr, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, @@ -187,7 +187,7 @@ void PairEAMAlloyGPU::init_style() int mnf = 5e-2 * neighbor->oneatom; int success = eam_alloy_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r, type2frho, rhor_spline, z2r_spline, frho_spline, - rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, + cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, atom->nlocal, atom->nlocal+atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen, fp_size); GPU_EXTRA::check_flag(success,error,world); diff --git a/src/GPU/pair_eam_fs_gpu.cpp b/src/GPU/pair_eam_fs_gpu.cpp index 390bb93987..5cf5c9180a 100644 --- a/src/GPU/pair_eam_fs_gpu.cpp +++ b/src/GPU/pair_eam_fs_gpu.cpp @@ -42,7 +42,7 @@ int eam_fs_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor, int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline, double ***host_z2r_spline, double ***host_frho_spline, - double rdr, double rdrho, double rhomax, + double** host_cutsq, double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r, int nfrho, int nr, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode, @@ -186,7 +186,7 @@ void PairEAMFSGPU::init_style() int mnf = 5e-2 * neighbor->oneatom; int success = eam_fs_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r, type2frho, rhor_spline, z2r_spline, frho_spline, - rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, + cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, atom->nlocal, atom->nlocal+atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen, fp_size); GPU_EXTRA::check_flag(success,error,world); diff --git a/src/GPU/pair_eam_gpu.cpp b/src/GPU/pair_eam_gpu.cpp index e458ea2020..724f71b312 100644 --- a/src/GPU/pair_eam_gpu.cpp +++ b/src/GPU/pair_eam_gpu.cpp @@ -42,7 +42,7 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor, int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline, double ***host_z2r_spline, double ***host_frho_spline, - double rdr, double rdrho, double rhomax, + double** host_cutsq, double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r, int nfrho, int nr, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode, @@ -188,7 +188,7 @@ void PairEAMGPU::init_style() int mnf = 5e-2 * neighbor->oneatom; int success = eam_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r, type2frho, rhor_spline, z2r_spline, frho_spline, - rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, + cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, atom->nlocal, atom->nlocal+atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen, fp_size); GPU_EXTRA::check_flag(success,error,world);