From 3d5897c9263ec2e4ecf39970409e705b2536a6cc Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 18 Apr 2021 01:40:33 -0700 Subject: [PATCH] Fixes issue from Feb 2021 GPU package update for tersoff styles using multiple types. --- lib/gpu/lal_tersoff.cpp | 12 +++--------- lib/gpu/lal_tersoff.cu | 16 ++-------------- lib/gpu/lal_tersoff.h | 2 +- lib/gpu/lal_tersoff_extra.h | 5 ++++- lib/gpu/lal_tersoff_mod.cpp | 12 +++--------- lib/gpu/lal_tersoff_mod.cu | 22 +++++----------------- lib/gpu/lal_tersoff_mod.h | 2 +- lib/gpu/lal_tersoff_zbl.cpp | 12 +++--------- lib/gpu/lal_tersoff_zbl.cu | 22 +++++----------------- lib/gpu/lal_tersoff_zbl.h | 2 +- 10 files changed, 28 insertions(+), 79 deletions(-) diff --git a/lib/gpu/lal_tersoff.cpp b/lib/gpu/lal_tersoff.cpp index e0e87d9148..ac5e5bc600 100644 --- a/lib/gpu/lal_tersoff.cpp +++ b/lib/gpu/lal_tersoff.cpp @@ -108,10 +108,7 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int _nparams = nparams; _nelements = nelements; - UCL_H_Vec host_write(ntypes*ntypes,*(this->ucl_device), - UCL_READ_WRITE); - host_write.zero(); - cutsq_pair.alloc(ntypes*ntypes,*(this->ucl_device),UCL_READ_ONLY); + _cutsq_max=0.0; for (int ii=1; iihost_write[ii*ntypes+jj]) - host_write[ii*ntypes+jj]=host_cutsq[ijkparam]; + if (host_cutsq[ijkparam]>_cutsq_max) _cutsq_max=host_cutsq[ijkparam]; } } } - ucl_copy(cutsq_pair,host_write,ntypes*ntypes); // -------------------------------------------------------------------- UCL_H_Vec dview(nparams,*(this->ucl_device), @@ -235,7 +230,6 @@ void TersoffT::clear() { ts3.clear(); ts4.clear(); ts5.clear(); - cutsq_pair.clear(); map.clear(); elem2param.clear(); _zetaij.clear(); @@ -286,7 +280,7 @@ int TersoffT::loop(const int eflag, const int vflag, const int evatom, int BX=this->block_pair(); int GX=static_cast(ceil(static_cast(ainum)/BX)); this->k_short_nbor.set_size(GX,BX); - this->k_short_nbor.run(&this->atom->x, &cutsq_pair, &_ntypes, + this->k_short_nbor.run(&this->atom->x, &_cutsq_max, &_ntypes, &this->nbor->dev_nbor, &this->nbor->dev_packed, &ainum, &nbor_pitch, &this->_threads_per_atom); diff --git a/lib/gpu/lal_tersoff.cu b/lib/gpu/lal_tersoff.cu index 03ce68be77..8baa5ce12a 100644 --- a/lib/gpu/lal_tersoff.cu +++ b/lib/gpu/lal_tersoff.cu @@ -226,17 +226,13 @@ _texture_2d( pos_tex,int4); #endif __kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_, - const __global numtyp *restrict cutsq_pair, - const int ntypes, __global int * dev_nbor, + const numtyp cutsq, const int ntypes, + __global int * dev_nbor, const __global int * dev_packed, const int inum, const int nbor_pitch, const int t_per_atom_in) { const int ii=GLOBAL_ID_X; - #ifdef ONETYPE - const numtyp cutsq=cutsq_pair[ONETYPE]; - #endif - if (ii { /// ts5.x = beta, ts5.y = powern, ts5.z = lam2, ts5.w = bigb UCL_D_Vec ts5; - UCL_D_Vec cutsq_pair; + numtyp _cutsq_max; UCL_D_Vec elem2param; UCL_D_Vec map; diff --git a/lib/gpu/lal_tersoff_extra.h b/lib/gpu/lal_tersoff_extra.h index da2568aa1b..9fe2c63176 100644 --- a/lib/gpu/lal_tersoff_extra.h +++ b/lib/gpu/lal_tersoff_extra.h @@ -142,7 +142,10 @@ ucl_inline numtyp ters_fa_d(const numtyp r, numtyp *ans_d) { #ifndef ONETYPE - if (r > param_bigr + param_bigd) return (numtyp)0.0; + if (r > param_bigr + param_bigd) { + *ans_d = (numtyp)0.0; + return (numtyp)0.0; + } #endif numtyp dfc; const numtyp fc=ters_fc_d(r,param_bigr,param_bigd,&dfc); diff --git a/lib/gpu/lal_tersoff_mod.cpp b/lib/gpu/lal_tersoff_mod.cpp index b7b0fff1b9..347feab06f 100644 --- a/lib/gpu/lal_tersoff_mod.cpp +++ b/lib/gpu/lal_tersoff_mod.cpp @@ -105,10 +105,7 @@ int TersoffMT::init(const int ntypes, const int nlocal, const int nall, const in _nparams = nparams; _nelements = nelements; - UCL_H_Vec host_write(ntypes*ntypes,*(this->ucl_device), - UCL_READ_WRITE); - host_write.zero(); - cutsq_pair.alloc(ntypes*ntypes,*(this->ucl_device),UCL_READ_ONLY); + _cutsq_max=0.0; for (int ii=1; iihost_write[ii*ntypes+jj]) - host_write[ii*ntypes+jj]=host_cutsq[ijkparam]; + if (host_cutsq[ijkparam]>_cutsq_max) _cutsq_max=host_cutsq[ijkparam]; } } } - ucl_copy(cutsq_pair,host_write,ntypes*ntypes); UCL_H_Vec dview(nparams,*(this->ucl_device), UCL_WRITE_ONLY); @@ -229,7 +224,6 @@ void TersoffMT::clear() { ts3.clear(); ts4.clear(); ts5.clear(); - cutsq_pair.clear(); map.clear(); elem2param.clear(); _zetaij.clear(); @@ -275,7 +269,7 @@ int TersoffMT::loop(const int eflag, const int vflag, const int evatom, int BX=this->block_pair(); int GX=static_cast(ceil(static_cast(ainum)/BX)); this->k_short_nbor.set_size(GX,BX); - this->k_short_nbor.run(&this->atom->x, &cutsq_pair, &_ntypes, + this->k_short_nbor.run(&this->atom->x, &_cutsq_max, &_ntypes, &this->nbor->dev_nbor, &this->nbor->dev_packed, &ainum, &nbor_pitch, &this->_threads_per_atom); diff --git a/lib/gpu/lal_tersoff_mod.cu b/lib/gpu/lal_tersoff_mod.cu index 44b04c6933..1eb57683d5 100644 --- a/lib/gpu/lal_tersoff_mod.cu +++ b/lib/gpu/lal_tersoff_mod.cu @@ -220,17 +220,13 @@ _texture_2d( pos_tex,int4); #endif __kernel void k_tersoff_mod_short_nbor(const __global numtyp4 *restrict x_, - const __global numtyp *restrict cutsq_pair, - const int ntypes, __global int * dev_nbor, - const __global int * dev_packed, - const int inum, const int nbor_pitch, - const int t_per_atom) { + const numtyp cutsq, const int ntypes, + __global int * dev_nbor, + const __global int * dev_packed, + const int inum, const int nbor_pitch, + const int t_per_atom) { const int ii=GLOBAL_ID_X; - #ifdef ONETYPE - const numtyp cutsq=cutsq_pair[ONETYPE]; - #endif - if (ii { /// ts5.x = c5, ts5.y = h UCL_D_Vec ts5; - UCL_D_Vec cutsq_pair; + numtyp _cutsq_max; UCL_D_Vec elem2param; UCL_D_Vec map; diff --git a/lib/gpu/lal_tersoff_zbl.cpp b/lib/gpu/lal_tersoff_zbl.cpp index 4456712b0a..4fba97606c 100644 --- a/lib/gpu/lal_tersoff_zbl.cpp +++ b/lib/gpu/lal_tersoff_zbl.cpp @@ -112,10 +112,7 @@ int TersoffZT::init(const int ntypes, const int nlocal, const int nall, _nparams = nparams; _nelements = nelements; - UCL_H_Vec host_write(ntypes*ntypes,*(this->ucl_device), - UCL_READ_WRITE); - host_write.zero(); - cutsq_pair.alloc(ntypes*ntypes,*(this->ucl_device),UCL_READ_ONLY); + _cutsq_max=0.0; for (int ii=1; iihost_write[ii*ntypes+jj]) - host_write[ii*ntypes+jj]=host_cutsq[ijkparam]; + if (host_cutsq[ijkparam]>_cutsq_max) _cutsq_max=host_cutsq[ijkparam]; } } } - ucl_copy(cutsq_pair,host_write,ntypes*ntypes); UCL_H_Vec dview(nparams,*(this->ucl_device), UCL_WRITE_ONLY); @@ -253,7 +248,6 @@ void TersoffZT::clear() { ts4.clear(); ts5.clear(); ts6.clear(); - cutsq_pair.clear(); map.clear(); elem2param.clear(); _zetaij.clear(); @@ -299,7 +293,7 @@ int TersoffZT::loop(const int eflag, const int vflag, const int evatom, int BX=this->block_pair(); int GX=static_cast(ceil(static_cast(ainum)/BX)); this->k_short_nbor.set_size(GX,BX); - this->k_short_nbor.run(&this->atom->x, &cutsq_pair, &_ntypes, + this->k_short_nbor.run(&this->atom->x, &_cutsq_max, &_ntypes, &this->nbor->dev_nbor, &this->nbor->dev_packed, &ainum, &nbor_pitch, &this->_threads_per_atom); diff --git a/lib/gpu/lal_tersoff_zbl.cu b/lib/gpu/lal_tersoff_zbl.cu index fce1ccc406..6250fa55de 100644 --- a/lib/gpu/lal_tersoff_zbl.cu +++ b/lib/gpu/lal_tersoff_zbl.cu @@ -238,17 +238,13 @@ _texture( ts6_tex,int4); #endif __kernel void k_tersoff_zbl_short_nbor(const __global numtyp4 *restrict x_, - const __global numtyp *restrict cutsq_pair, - const int ntypes, __global int * dev_nbor, - const __global int * dev_packed, - const int inum, const int nbor_pitch, - const int t_per_atom) { + const numtyp cutsq, const int ntypes, + __global int * dev_nbor, + const __global int * dev_packed, + const int inum, const int nbor_pitch, + const int t_per_atom) { const int ii=GLOBAL_ID_X; - #ifdef ONETYPE - const numtyp cutsq=cutsq_pair[ONETYPE]; - #endif - if (ii { /// ts6.x = Z_i, ts6.y = Z_j, ts6.z = ZBLcut, ts6.w = ZBLexpscale UCL_D_Vec ts6; - UCL_D_Vec cutsq_pair; + numtyp _cutsq_max; UCL_D_Vec elem2param; UCL_D_Vec map;