Fixes issue from Feb 2021 GPU package update for tersoff styles using multiple types.
This commit is contained in:
@ -108,10 +108,7 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int
|
|||||||
_nparams = nparams;
|
_nparams = nparams;
|
||||||
_nelements = nelements;
|
_nelements = nelements;
|
||||||
|
|
||||||
UCL_H_Vec<numtyp> host_write(ntypes*ntypes,*(this->ucl_device),
|
_cutsq_max=0.0;
|
||||||
UCL_READ_WRITE);
|
|
||||||
host_write.zero();
|
|
||||||
cutsq_pair.alloc(ntypes*ntypes,*(this->ucl_device),UCL_READ_ONLY);
|
|
||||||
for (int ii=1; ii<ntypes; ii++) {
|
for (int ii=1; ii<ntypes; ii++) {
|
||||||
const int i=host_map[ii];
|
const int i=host_map[ii];
|
||||||
for (int jj=1; jj<ntypes; jj++) {
|
for (int jj=1; jj<ntypes; jj++) {
|
||||||
@ -120,12 +117,10 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int
|
|||||||
const int k=host_map[kk];
|
const int k=host_map[kk];
|
||||||
if (i<0 || j<0 || k<0) continue;
|
if (i<0 || j<0 || k<0) continue;
|
||||||
const int ijkparam = host_elem2param[i][j][k];
|
const int ijkparam = host_elem2param[i][j][k];
|
||||||
if (host_cutsq[ijkparam]>host_write[ii*ntypes+jj])
|
if (host_cutsq[ijkparam]>_cutsq_max) _cutsq_max=host_cutsq[ijkparam];
|
||||||
host_write[ii*ntypes+jj]=host_cutsq[ijkparam];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ucl_copy(cutsq_pair,host_write,ntypes*ntypes);
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------
|
// --------------------------------------------------------------------
|
||||||
UCL_H_Vec<numtyp4> dview(nparams,*(this->ucl_device),
|
UCL_H_Vec<numtyp4> dview(nparams,*(this->ucl_device),
|
||||||
@ -235,7 +230,6 @@ void TersoffT::clear() {
|
|||||||
ts3.clear();
|
ts3.clear();
|
||||||
ts4.clear();
|
ts4.clear();
|
||||||
ts5.clear();
|
ts5.clear();
|
||||||
cutsq_pair.clear();
|
|
||||||
map.clear();
|
map.clear();
|
||||||
elem2param.clear();
|
elem2param.clear();
|
||||||
_zetaij.clear();
|
_zetaij.clear();
|
||||||
@ -286,7 +280,7 @@ int TersoffT::loop(const int eflag, const int vflag, const int evatom,
|
|||||||
int BX=this->block_pair();
|
int BX=this->block_pair();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &cutsq_pair, &_ntypes,
|
this->k_short_nbor.run(&this->atom->x, &_cutsq_max, &_ntypes,
|
||||||
&this->nbor->dev_nbor, &this->nbor->dev_packed,
|
&this->nbor->dev_nbor, &this->nbor->dev_packed,
|
||||||
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||||
|
|
||||||
|
|||||||
@ -226,17 +226,13 @@ _texture_2d( pos_tex,int4);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
__kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
|
__kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict cutsq_pair,
|
const numtyp cutsq, const int ntypes,
|
||||||
const int ntypes, __global int * dev_nbor,
|
__global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
const int inum, const int nbor_pitch,
|
const int inum, const int nbor_pitch,
|
||||||
const int t_per_atom_in) {
|
const int t_per_atom_in) {
|
||||||
const int ii=GLOBAL_ID_X;
|
const int ii=GLOBAL_ID_X;
|
||||||
|
|
||||||
#ifdef ONETYPE
|
|
||||||
const numtyp cutsq=cutsq_pair[ONETYPE];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
const int i=dev_packed[ii];
|
const int i=dev_packed[ii];
|
||||||
int nbor=ii+nbor_pitch;
|
int nbor=ii+nbor_pitch;
|
||||||
@ -245,9 +241,6 @@ __kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
|
|||||||
const int nbor_end=nbor+fast_mul(numj,nbor_pitch);
|
const int nbor_end=nbor+fast_mul(numj,nbor_pitch);
|
||||||
|
|
||||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||||
#ifndef ONETYPE
|
|
||||||
const int itype=ix.w*ntypes;
|
|
||||||
#endif
|
|
||||||
int newj=0;
|
int newj=0;
|
||||||
|
|
||||||
__global int *out_list=dev_nbor+2*nbor_pitch+ii*t_per_atom;
|
__global int *out_list=dev_nbor+2*nbor_pitch+ii*t_per_atom;
|
||||||
@ -258,11 +251,6 @@ __kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
|
|||||||
int j = sj & NEIGHMASK;
|
int j = sj & NEIGHMASK;
|
||||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||||
|
|
||||||
#ifndef ONETYPE
|
|
||||||
const int mtype=jx.w+itype;
|
|
||||||
const numtyp cutsq=cutsq_pair[mtype];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Compute r12
|
// Compute r12
|
||||||
numtyp delx = ix.x-jx.x;
|
numtyp delx = ix.x-jx.x;
|
||||||
numtyp dely = ix.y-jx.y;
|
numtyp dely = ix.y-jx.y;
|
||||||
|
|||||||
@ -73,7 +73,7 @@ class Tersoff : public BaseThree<numtyp, acctyp> {
|
|||||||
/// ts5.x = beta, ts5.y = powern, ts5.z = lam2, ts5.w = bigb
|
/// ts5.x = beta, ts5.y = powern, ts5.z = lam2, ts5.w = bigb
|
||||||
UCL_D_Vec<numtyp4> ts5;
|
UCL_D_Vec<numtyp4> ts5;
|
||||||
|
|
||||||
UCL_D_Vec<numtyp> cutsq_pair;
|
numtyp _cutsq_max;
|
||||||
|
|
||||||
UCL_D_Vec<int> elem2param;
|
UCL_D_Vec<int> elem2param;
|
||||||
UCL_D_Vec<int> map;
|
UCL_D_Vec<int> map;
|
||||||
|
|||||||
@ -142,7 +142,10 @@ ucl_inline numtyp ters_fa_d(const numtyp r,
|
|||||||
numtyp *ans_d)
|
numtyp *ans_d)
|
||||||
{
|
{
|
||||||
#ifndef ONETYPE
|
#ifndef ONETYPE
|
||||||
if (r > param_bigr + param_bigd) return (numtyp)0.0;
|
if (r > param_bigr + param_bigd) {
|
||||||
|
*ans_d = (numtyp)0.0;
|
||||||
|
return (numtyp)0.0;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
numtyp dfc;
|
numtyp dfc;
|
||||||
const numtyp fc=ters_fc_d(r,param_bigr,param_bigd,&dfc);
|
const numtyp fc=ters_fc_d(r,param_bigr,param_bigd,&dfc);
|
||||||
|
|||||||
@ -105,10 +105,7 @@ int TersoffMT::init(const int ntypes, const int nlocal, const int nall, const in
|
|||||||
_nparams = nparams;
|
_nparams = nparams;
|
||||||
_nelements = nelements;
|
_nelements = nelements;
|
||||||
|
|
||||||
UCL_H_Vec<numtyp> host_write(ntypes*ntypes,*(this->ucl_device),
|
_cutsq_max=0.0;
|
||||||
UCL_READ_WRITE);
|
|
||||||
host_write.zero();
|
|
||||||
cutsq_pair.alloc(ntypes*ntypes,*(this->ucl_device),UCL_READ_ONLY);
|
|
||||||
for (int ii=1; ii<ntypes; ii++) {
|
for (int ii=1; ii<ntypes; ii++) {
|
||||||
const int i=host_map[ii];
|
const int i=host_map[ii];
|
||||||
for (int jj=1; jj<ntypes; jj++) {
|
for (int jj=1; jj<ntypes; jj++) {
|
||||||
@ -117,12 +114,10 @@ int TersoffMT::init(const int ntypes, const int nlocal, const int nall, const in
|
|||||||
const int k=host_map[kk];
|
const int k=host_map[kk];
|
||||||
if (i<0 || j<0 || k<0) continue;
|
if (i<0 || j<0 || k<0) continue;
|
||||||
const int ijkparam = host_elem2param[i][j][k];
|
const int ijkparam = host_elem2param[i][j][k];
|
||||||
if (host_cutsq[ijkparam]>host_write[ii*ntypes+jj])
|
if (host_cutsq[ijkparam]>_cutsq_max) _cutsq_max=host_cutsq[ijkparam];
|
||||||
host_write[ii*ntypes+jj]=host_cutsq[ijkparam];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ucl_copy(cutsq_pair,host_write,ntypes*ntypes);
|
|
||||||
|
|
||||||
UCL_H_Vec<numtyp4> dview(nparams,*(this->ucl_device),
|
UCL_H_Vec<numtyp4> dview(nparams,*(this->ucl_device),
|
||||||
UCL_WRITE_ONLY);
|
UCL_WRITE_ONLY);
|
||||||
@ -229,7 +224,6 @@ void TersoffMT::clear() {
|
|||||||
ts3.clear();
|
ts3.clear();
|
||||||
ts4.clear();
|
ts4.clear();
|
||||||
ts5.clear();
|
ts5.clear();
|
||||||
cutsq_pair.clear();
|
|
||||||
map.clear();
|
map.clear();
|
||||||
elem2param.clear();
|
elem2param.clear();
|
||||||
_zetaij.clear();
|
_zetaij.clear();
|
||||||
@ -275,7 +269,7 @@ int TersoffMT::loop(const int eflag, const int vflag, const int evatom,
|
|||||||
int BX=this->block_pair();
|
int BX=this->block_pair();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &cutsq_pair, &_ntypes,
|
this->k_short_nbor.run(&this->atom->x, &_cutsq_max, &_ntypes,
|
||||||
&this->nbor->dev_nbor, &this->nbor->dev_packed,
|
&this->nbor->dev_nbor, &this->nbor->dev_packed,
|
||||||
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||||
|
|
||||||
|
|||||||
@ -220,17 +220,13 @@ _texture_2d( pos_tex,int4);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
__kernel void k_tersoff_mod_short_nbor(const __global numtyp4 *restrict x_,
|
__kernel void k_tersoff_mod_short_nbor(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict cutsq_pair,
|
const numtyp cutsq, const int ntypes,
|
||||||
const int ntypes, __global int * dev_nbor,
|
__global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
const int inum, const int nbor_pitch,
|
const int inum, const int nbor_pitch,
|
||||||
const int t_per_atom) {
|
const int t_per_atom) {
|
||||||
const int ii=GLOBAL_ID_X;
|
const int ii=GLOBAL_ID_X;
|
||||||
|
|
||||||
#ifdef ONETYPE
|
|
||||||
const numtyp cutsq=cutsq_pair[ONETYPE];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
const int i=dev_packed[ii];
|
const int i=dev_packed[ii];
|
||||||
int nbor=ii+nbor_pitch;
|
int nbor=ii+nbor_pitch;
|
||||||
@ -239,9 +235,6 @@ __kernel void k_tersoff_mod_short_nbor(const __global numtyp4 *restrict x_,
|
|||||||
const int nbor_end=nbor+fast_mul(numj,nbor_pitch);
|
const int nbor_end=nbor+fast_mul(numj,nbor_pitch);
|
||||||
|
|
||||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||||
#ifndef ONETYPE
|
|
||||||
const int itype=ix.w*ntypes;
|
|
||||||
#endif
|
|
||||||
int newj=0;
|
int newj=0;
|
||||||
|
|
||||||
__global int *out_list=dev_nbor+2*nbor_pitch+ii*t_per_atom;
|
__global int *out_list=dev_nbor+2*nbor_pitch+ii*t_per_atom;
|
||||||
@ -252,11 +245,6 @@ __kernel void k_tersoff_mod_short_nbor(const __global numtyp4 *restrict x_,
|
|||||||
int j = sj & NEIGHMASK;
|
int j = sj & NEIGHMASK;
|
||||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||||
|
|
||||||
#ifndef ONETYPE
|
|
||||||
const int mtype=jx.w+itype;
|
|
||||||
const numtyp cutsq=cutsq_pair[mtype];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Compute r12
|
// Compute r12
|
||||||
numtyp delx = ix.x-jx.x;
|
numtyp delx = ix.x-jx.x;
|
||||||
numtyp dely = ix.y-jx.y;
|
numtyp dely = ix.y-jx.y;
|
||||||
|
|||||||
@ -76,7 +76,7 @@ class TersoffMod : public BaseThree<numtyp, acctyp> {
|
|||||||
/// ts5.x = c5, ts5.y = h
|
/// ts5.x = c5, ts5.y = h
|
||||||
UCL_D_Vec<numtyp4> ts5;
|
UCL_D_Vec<numtyp4> ts5;
|
||||||
|
|
||||||
UCL_D_Vec<numtyp> cutsq_pair;
|
numtyp _cutsq_max;
|
||||||
|
|
||||||
UCL_D_Vec<int> elem2param;
|
UCL_D_Vec<int> elem2param;
|
||||||
UCL_D_Vec<int> map;
|
UCL_D_Vec<int> map;
|
||||||
|
|||||||
@ -112,10 +112,7 @@ int TersoffZT::init(const int ntypes, const int nlocal, const int nall,
|
|||||||
_nparams = nparams;
|
_nparams = nparams;
|
||||||
_nelements = nelements;
|
_nelements = nelements;
|
||||||
|
|
||||||
UCL_H_Vec<numtyp> host_write(ntypes*ntypes,*(this->ucl_device),
|
_cutsq_max=0.0;
|
||||||
UCL_READ_WRITE);
|
|
||||||
host_write.zero();
|
|
||||||
cutsq_pair.alloc(ntypes*ntypes,*(this->ucl_device),UCL_READ_ONLY);
|
|
||||||
for (int ii=1; ii<ntypes; ii++) {
|
for (int ii=1; ii<ntypes; ii++) {
|
||||||
const int i=host_map[ii];
|
const int i=host_map[ii];
|
||||||
for (int jj=1; jj<ntypes; jj++) {
|
for (int jj=1; jj<ntypes; jj++) {
|
||||||
@ -124,12 +121,10 @@ int TersoffZT::init(const int ntypes, const int nlocal, const int nall,
|
|||||||
const int k=host_map[kk];
|
const int k=host_map[kk];
|
||||||
if (i<0 || j<0 || k<0) continue;
|
if (i<0 || j<0 || k<0) continue;
|
||||||
const int ijkparam = host_elem2param[i][j][k];
|
const int ijkparam = host_elem2param[i][j][k];
|
||||||
if (host_cutsq[ijkparam]>host_write[ii*ntypes+jj])
|
if (host_cutsq[ijkparam]>_cutsq_max) _cutsq_max=host_cutsq[ijkparam];
|
||||||
host_write[ii*ntypes+jj]=host_cutsq[ijkparam];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ucl_copy(cutsq_pair,host_write,ntypes*ntypes);
|
|
||||||
|
|
||||||
UCL_H_Vec<numtyp4> dview(nparams,*(this->ucl_device),
|
UCL_H_Vec<numtyp4> dview(nparams,*(this->ucl_device),
|
||||||
UCL_WRITE_ONLY);
|
UCL_WRITE_ONLY);
|
||||||
@ -253,7 +248,6 @@ void TersoffZT::clear() {
|
|||||||
ts4.clear();
|
ts4.clear();
|
||||||
ts5.clear();
|
ts5.clear();
|
||||||
ts6.clear();
|
ts6.clear();
|
||||||
cutsq_pair.clear();
|
|
||||||
map.clear();
|
map.clear();
|
||||||
elem2param.clear();
|
elem2param.clear();
|
||||||
_zetaij.clear();
|
_zetaij.clear();
|
||||||
@ -299,7 +293,7 @@ int TersoffZT::loop(const int eflag, const int vflag, const int evatom,
|
|||||||
int BX=this->block_pair();
|
int BX=this->block_pair();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &cutsq_pair, &_ntypes,
|
this->k_short_nbor.run(&this->atom->x, &_cutsq_max, &_ntypes,
|
||||||
&this->nbor->dev_nbor, &this->nbor->dev_packed,
|
&this->nbor->dev_nbor, &this->nbor->dev_packed,
|
||||||
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||||
|
|
||||||
|
|||||||
@ -238,17 +238,13 @@ _texture( ts6_tex,int4);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
__kernel void k_tersoff_zbl_short_nbor(const __global numtyp4 *restrict x_,
|
__kernel void k_tersoff_zbl_short_nbor(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict cutsq_pair,
|
const numtyp cutsq, const int ntypes,
|
||||||
const int ntypes, __global int * dev_nbor,
|
__global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
const int inum, const int nbor_pitch,
|
const int inum, const int nbor_pitch,
|
||||||
const int t_per_atom) {
|
const int t_per_atom) {
|
||||||
const int ii=GLOBAL_ID_X;
|
const int ii=GLOBAL_ID_X;
|
||||||
|
|
||||||
#ifdef ONETYPE
|
|
||||||
const numtyp cutsq=cutsq_pair[ONETYPE];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
const int i=dev_packed[ii];
|
const int i=dev_packed[ii];
|
||||||
int nbor=ii+nbor_pitch;
|
int nbor=ii+nbor_pitch;
|
||||||
@ -257,9 +253,6 @@ __kernel void k_tersoff_zbl_short_nbor(const __global numtyp4 *restrict x_,
|
|||||||
const int nbor_end=nbor+fast_mul(numj,nbor_pitch);
|
const int nbor_end=nbor+fast_mul(numj,nbor_pitch);
|
||||||
|
|
||||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||||
#ifndef ONETYPE
|
|
||||||
const int itype=ix.w*ntypes;
|
|
||||||
#endif
|
|
||||||
int newj=0;
|
int newj=0;
|
||||||
|
|
||||||
__global int *out_list=dev_nbor+2*nbor_pitch+ii*t_per_atom;
|
__global int *out_list=dev_nbor+2*nbor_pitch+ii*t_per_atom;
|
||||||
@ -270,11 +263,6 @@ __kernel void k_tersoff_zbl_short_nbor(const __global numtyp4 *restrict x_,
|
|||||||
int j = sj & NEIGHMASK;
|
int j = sj & NEIGHMASK;
|
||||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||||
|
|
||||||
#ifndef ONETYPE
|
|
||||||
const int mtype=jx.w+itype;
|
|
||||||
const numtyp cutsq=cutsq_pair[mtype];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Compute r12
|
// Compute r12
|
||||||
numtyp delx = ix.x-jx.x;
|
numtyp delx = ix.x-jx.x;
|
||||||
numtyp dely = ix.y-jx.y;
|
numtyp dely = ix.y-jx.y;
|
||||||
|
|||||||
@ -80,7 +80,7 @@ class TersoffZBL : public BaseThree<numtyp, acctyp> {
|
|||||||
/// ts6.x = Z_i, ts6.y = Z_j, ts6.z = ZBLcut, ts6.w = ZBLexpscale
|
/// ts6.x = Z_i, ts6.y = Z_j, ts6.z = ZBLcut, ts6.w = ZBLexpscale
|
||||||
UCL_D_Vec<numtyp4> ts6;
|
UCL_D_Vec<numtyp4> ts6;
|
||||||
|
|
||||||
UCL_D_Vec<numtyp> cutsq_pair;
|
numtyp _cutsq_max;
|
||||||
|
|
||||||
UCL_D_Vec<int> elem2param;
|
UCL_D_Vec<int> elem2param;
|
||||||
UCL_D_Vec<int> map;
|
UCL_D_Vec<int> map;
|
||||||
|
|||||||
Reference in New Issue
Block a user