Prepared data structure for the dispersion real-space term
This commit is contained in:
@ -44,12 +44,14 @@ int AmoebaT::bytes_per_atom(const int max_nbors) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pdamp,
|
int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||||
const double *host_thole, const double *host_dirdamp,
|
const double *host_pdamp, const double *host_thole,
|
||||||
|
const double *host_dirdamp, const int *host_amtype2class,
|
||||||
const double *host_special_mpole,
|
const double *host_special_mpole,
|
||||||
const double *host_special_polar_wscale,
|
const double *host_special_polar_wscale,
|
||||||
const double *host_special_polar_piscale,
|
const double *host_special_polar_piscale,
|
||||||
const double *host_special_polar_pscale,
|
const double *host_special_polar_pscale,
|
||||||
|
const double *host_csix, const double *host_adisp,
|
||||||
const int nlocal, const int nall, const int max_nbors,
|
const int nlocal, const int nall, const int max_nbors,
|
||||||
const int maxspecial, const int maxspecial15,
|
const int maxspecial, const int maxspecial15,
|
||||||
const double cell_size, const double gpu_split, FILE *_screen,
|
const double cell_size, const double gpu_split, FILE *_screen,
|
||||||
@ -80,11 +82,22 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pda
|
|||||||
host_write[i].x = host_pdamp[i];
|
host_write[i].x = host_pdamp[i];
|
||||||
host_write[i].y = host_thole[i];
|
host_write[i].y = host_thole[i];
|
||||||
host_write[i].z = host_dirdamp[i];
|
host_write[i].z = host_dirdamp[i];
|
||||||
host_write[i].w = (numtyp)0;
|
host_write[i].w = host_amtype2class[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
damping.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY);
|
coeff_amtype.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY);
|
||||||
ucl_copy(damping,host_write,false);
|
ucl_copy(coeff_amtype,host_write,false);
|
||||||
|
|
||||||
|
UCL_H_Vec<numtyp4> host_write2(max_amclass, *(this->ucl_device), UCL_WRITE_ONLY);
|
||||||
|
for (int i = 0; i < max_amclass; i++) {
|
||||||
|
host_write2[i].x = host_csix[i];
|
||||||
|
host_write2[i].y = host_adisp[i];
|
||||||
|
host_write2[i].z = (numtyp)0;
|
||||||
|
host_write2[i].w = (numtyp)0;
|
||||||
|
}
|
||||||
|
|
||||||
|
coeff_amclass.alloc(max_amclass,*(this->ucl_device), UCL_READ_ONLY);
|
||||||
|
ucl_copy(coeff_amclass,host_write2,false);
|
||||||
|
|
||||||
UCL_H_Vec<numtyp4> dview(5, *(this->ucl_device), UCL_WRITE_ONLY);
|
UCL_H_Vec<numtyp4> dview(5, *(this->ucl_device), UCL_WRITE_ONLY);
|
||||||
sp_polar.alloc(5,*(this->ucl_device),UCL_READ_ONLY);
|
sp_polar.alloc(5,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
@ -100,9 +113,8 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pda
|
|||||||
_polar_uscale = polar_uscale;
|
_polar_uscale = polar_uscale;
|
||||||
|
|
||||||
_allocated=true;
|
_allocated=true;
|
||||||
this->_max_bytes=damping.row_bytes()
|
this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes()
|
||||||
+ sp_polar.row_bytes()
|
+ sp_polar.row_bytes() + this->_tep.row_bytes();
|
||||||
+ this->_tep.row_bytes();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -112,7 +124,7 @@ void AmoebaT::clear() {
|
|||||||
return;
|
return;
|
||||||
_allocated=false;
|
_allocated=false;
|
||||||
|
|
||||||
damping.clear();
|
coeff_amtype.clear();
|
||||||
sp_polar.clear();
|
sp_polar.clear();
|
||||||
|
|
||||||
this->clear_atomic();
|
this->clear_atomic();
|
||||||
@ -151,7 +163,7 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) {
|
|||||||
&nbor_pitch, &this->_threads_per_atom);
|
&nbor_pitch, &this->_threads_per_atom);
|
||||||
|
|
||||||
this->k_multipole.set_size(GX,BX);
|
this->k_multipole.set_size(GX,BX);
|
||||||
this->k_multipole.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
|
this->k_multipole.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor,
|
&this->dev_short_nbor,
|
||||||
&this->ans->force, &this->ans->engv, &this->_tep,
|
&this->ans->force, &this->ans->engv, &this->_tep,
|
||||||
@ -192,7 +204,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
this->k_udirect2b.set_size(GX,BX);
|
this->k_udirect2b.set_size(GX,BX);
|
||||||
this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
|
this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor,
|
&this->dev_short_nbor,
|
||||||
&this->_fieldp, &ainum, &_nall, &nbor_pitch,
|
&this->_fieldp, &ainum, &_nall, &nbor_pitch,
|
||||||
@ -232,7 +244,7 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
this->k_umutual2b.set_size(GX,BX);
|
this->k_umutual2b.set_size(GX,BX);
|
||||||
this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
|
this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor, &this->_fieldp, &ainum, &_nall,
|
&this->dev_short_nbor, &this->_fieldp, &ainum, &_nall,
|
||||||
&nbor_pitch, &this->_threads_per_atom, &this->_aewald,
|
&nbor_pitch, &this->_threads_per_atom, &this->_aewald,
|
||||||
@ -271,7 +283,7 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
this->k_polar.set_size(GX,BX);
|
this->k_polar.set_size(GX,BX);
|
||||||
this->k_polar.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
|
this->k_polar.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor,
|
&this->dev_short_nbor,
|
||||||
&this->ans->force, &this->ans->engv, &this->_tep,
|
&this->ans->force, &this->ans->engv, &this->_tep,
|
||||||
|
|||||||
@ -147,7 +147,7 @@ _texture( q_tex,int2);
|
|||||||
fieldp[ii+inum] = fp; \
|
fieldp[ii+inum] = fp; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define store_answers_p(f,energy,e_coul, virial, ii, inum, tid, t_per_atom \
|
#define store_answers_acc(f,energy,e_coul, virial, ii, inum, tid, t_per_atom \
|
||||||
offset, eflag, vflag, ans, engv, ev_stride) \
|
offset, eflag, vflag, ans, engv, ev_stride) \
|
||||||
if (t_per_atom>1) { \
|
if (t_per_atom>1) { \
|
||||||
simd_reduce_add3(t_per_atom, red_acc, offset, tid, f.x, f.y, f.z); \
|
simd_reduce_add3(t_per_atom, red_acc, offset, tid, f.x, f.y, f.z); \
|
||||||
@ -210,8 +210,7 @@ _texture( q_tex,int2);
|
|||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
// SHUFFLE_AVAIL == 1
|
#else // SHUFFLE_AVAIL == 1
|
||||||
#else
|
|
||||||
|
|
||||||
#define local_allocate_store_ufld()
|
#define local_allocate_store_ufld()
|
||||||
|
|
||||||
@ -280,7 +279,7 @@ _texture( q_tex,int2);
|
|||||||
|
|
||||||
#if (EVFLAG == 1)
|
#if (EVFLAG == 1)
|
||||||
|
|
||||||
#define store_answers_p(f,energy,e_coul, virial, ii, inum, tid, t_per_atom, \
|
#define store_answers_acc(f,energy,e_coul, virial, ii, inum, tid, t_per_atom, \
|
||||||
offset, eflag, vflag, ans, engv, ev_stride) \
|
offset, eflag, vflag, ans, engv, ev_stride) \
|
||||||
if (t_per_atom>1) { \
|
if (t_per_atom>1) { \
|
||||||
simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \
|
simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \
|
||||||
@ -376,7 +375,7 @@ _texture( q_tex,int2);
|
|||||||
// EVFLAG == 0
|
// EVFLAG == 0
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define store_answers_p(f,energy,e_coul, virial, ii, inum, tid, t_per_atom, \
|
#define store_answers_acc(f,energy,e_coul, virial, ii, inum, tid, t_per_atom, \
|
||||||
offset, eflag, vflag, ans, engv, ev_stride) \
|
offset, eflag, vflag, ans, engv, ev_stride) \
|
||||||
if (t_per_atom>1) \
|
if (t_per_atom>1) \
|
||||||
simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \
|
simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \
|
||||||
@ -394,6 +393,125 @@ _texture( q_tex,int2);
|
|||||||
#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
||||||
#define MY_PIS (acctyp)1.77245385090551602729
|
#define MY_PIS (acctyp)1.77245385090551602729
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
dispersion = real-space portion of Ewald dispersion
|
||||||
|
adapted from Tinker edreal1d() routine
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
__kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
|
||||||
|
const __global numtyp *restrict extra,
|
||||||
|
const __global numtyp4 *restrict coeff,
|
||||||
|
const __global numtyp4 *restrict sp_polar,
|
||||||
|
const __global int *dev_nbor,
|
||||||
|
const __global int *dev_packed,
|
||||||
|
const __global int *dev_short_nbor,
|
||||||
|
__global acctyp4 *restrict ans,
|
||||||
|
__global acctyp *restrict engv,
|
||||||
|
const int eflag, const int vflag, const int inum,
|
||||||
|
const int nall, const int nbor_pitch,
|
||||||
|
const int t_per_atom, const numtyp aewald,
|
||||||
|
const numtyp felec, const numtyp off2,
|
||||||
|
const numtyp polar_dscale, const numtyp polar_uscale)
|
||||||
|
{
|
||||||
|
int tid, ii, offset, i;
|
||||||
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|
||||||
|
int n_stride;
|
||||||
|
local_allocate_store_charge();
|
||||||
|
|
||||||
|
acctyp4 f;
|
||||||
|
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||||
|
acctyp energy, e_coul, virial[6];
|
||||||
|
if (EVFLAG) {
|
||||||
|
energy=(acctyp)0;
|
||||||
|
e_coul=(acctyp)0;
|
||||||
|
for (int l=0; l<6; l++) virial[l]=(acctyp)0;
|
||||||
|
}
|
||||||
|
|
||||||
|
acctyp4 tq;
|
||||||
|
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
|
||||||
|
|
||||||
|
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
||||||
|
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
||||||
|
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||||
|
|
||||||
|
if (ii<inum) {
|
||||||
|
int m,itype,igroup;
|
||||||
|
numtyp bfac;
|
||||||
|
numtyp term1,term2,term3;
|
||||||
|
numtyp term4,term5,term6;
|
||||||
|
numtyp bn[6];
|
||||||
|
numtyp ci,dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
|
||||||
|
|
||||||
|
int numj, nbor, nbor_end;
|
||||||
|
const __global int* nbor_mem=dev_packed;
|
||||||
|
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||||
|
n_stride,nbor_end,nbor);
|
||||||
|
|
||||||
|
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||||
|
//numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||||
|
//int itype=ix.w;
|
||||||
|
|
||||||
|
// recalculate numj and nbor_end for use of the short nbor list
|
||||||
|
if (dev_packed==dev_nbor) {
|
||||||
|
numj = dev_short_nbor[nbor];
|
||||||
|
nbor += n_stride;
|
||||||
|
nbor_end = nbor+fast_mul(numj,n_stride);
|
||||||
|
nbor_mem = dev_short_nbor;
|
||||||
|
}
|
||||||
|
|
||||||
|
ci = polar1[i].x; // rpole[i][0];
|
||||||
|
dix = polar1[i].y; // rpole[i][1];
|
||||||
|
diy = polar1[i].z; // rpole[i][2];
|
||||||
|
diz = polar1[i].w; // rpole[i][3];
|
||||||
|
qixx = polar2[i].x; // rpole[i][4];
|
||||||
|
qixy = polar2[i].y; // rpole[i][5];
|
||||||
|
qixz = polar2[i].z; // rpole[i][6];
|
||||||
|
qiyy = polar2[i].w; // rpole[i][8];
|
||||||
|
qiyz = polar3[i].x; // rpole[i][9];
|
||||||
|
qizz = polar3[i].y; // rpole[i][12];
|
||||||
|
itype = polar3[i].z; // amtype[i];
|
||||||
|
igroup = polar3[i].w; // amgroup[i];
|
||||||
|
|
||||||
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
|
||||||
|
int jextra=nbor_mem[nbor];
|
||||||
|
int j = jextra & NEIGHMASK15;
|
||||||
|
|
||||||
|
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||||
|
//int jtype=jx.w;
|
||||||
|
|
||||||
|
// Compute r12
|
||||||
|
numtyp xr = jx.x - ix.x;
|
||||||
|
numtyp yr = jx.y - ix.y;
|
||||||
|
numtyp zr = jx.z - ix.z;
|
||||||
|
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||||
|
|
||||||
|
//if (r2>off2) continue;
|
||||||
|
|
||||||
|
numtyp r = ucl_sqrt(r2);
|
||||||
|
numtyp ck = polar1[j].x; // rpole[j][0];
|
||||||
|
numtyp dkx = polar1[j].y; // rpole[j][1];
|
||||||
|
numtyp dky = polar1[j].z; // rpole[j][2];
|
||||||
|
numtyp dkz = polar1[j].w; // rpole[j][3];
|
||||||
|
numtyp qkxx = polar2[j].x; // rpole[j][4];
|
||||||
|
numtyp qkxy = polar2[j].y; // rpole[j][5];
|
||||||
|
numtyp qkxz = polar2[j].z; // rpole[j][6];
|
||||||
|
numtyp qkyy = polar2[j].w; // rpole[j][8];
|
||||||
|
numtyp qkyz = polar3[j].x; // rpole[j][9];
|
||||||
|
numtyp qkzz = polar3[j].y; // rpole[j][12];
|
||||||
|
int jtype = polar3[j].z; // amtype[j];
|
||||||
|
int jgroup = polar3[j].w; // amgroup[j];
|
||||||
|
|
||||||
|
} // nbor
|
||||||
|
|
||||||
|
} // ii<inum
|
||||||
|
|
||||||
|
// accumate force, energy and virial
|
||||||
|
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||||
|
// offset,eflag,vflag,ans,engv);
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
multipole_real = real-space portion of multipole
|
multipole_real = real-space portion of multipole
|
||||||
adapted from Tinker emreal1d() routine
|
adapted from Tinker emreal1d() routine
|
||||||
@ -401,7 +519,7 @@ _texture( q_tex,int2);
|
|||||||
|
|
||||||
__kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
__kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp *restrict extra,
|
||||||
const __global numtyp4 *restrict damping,
|
const __global numtyp4 *restrict coeff,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
@ -697,7 +815,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
__kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
__kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp *restrict extra,
|
||||||
const __global numtyp4 *restrict damping,
|
const __global numtyp4 *restrict coeff,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
@ -759,9 +877,9 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
// debug:
|
// debug:
|
||||||
// xi__ = ix; xi__.w = itype;
|
// xi__ = ix; xi__.w = itype;
|
||||||
|
|
||||||
numtyp pdi = damping[itype].x;
|
numtyp pdi = coeff[itype].x;
|
||||||
numtyp pti = damping[itype].y;
|
numtyp pti = coeff[itype].y;
|
||||||
numtyp ddi = damping[itype].z;
|
numtyp ddi = coeff[itype].z;
|
||||||
|
|
||||||
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
|
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
|
||||||
numtyp aesq2n = (numtyp)0.0;
|
numtyp aesq2n = (numtyp)0.0;
|
||||||
@ -848,9 +966,9 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
numtyp scale3 = (numtyp)1.0;
|
numtyp scale3 = (numtyp)1.0;
|
||||||
numtyp scale5 = (numtyp)1.0;
|
numtyp scale5 = (numtyp)1.0;
|
||||||
numtyp scale7 = (numtyp)1.0;
|
numtyp scale7 = (numtyp)1.0;
|
||||||
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype]
|
numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
|
||||||
if (damp != (numtyp)0.0) {
|
if (damp != (numtyp)0.0) {
|
||||||
numtyp pgamma = MIN(ddi,damping[jtype].z); // dirdamp[jtype]
|
numtyp pgamma = MIN(ddi,coeff[jtype].z); // dirdamp[jtype]
|
||||||
if (pgamma != (numtyp)0.0) {
|
if (pgamma != (numtyp)0.0) {
|
||||||
damp = pgamma * ucl_powr(r/damp,(numtyp)1.5);
|
damp = pgamma * ucl_powr(r/damp,(numtyp)1.5);
|
||||||
if (damp < (numtyp)50.0) {
|
if (damp < (numtyp)50.0) {
|
||||||
@ -860,7 +978,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.65*damp + (numtyp)0.15*damp*damp);
|
scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.65*damp + (numtyp)0.15*damp*damp);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
pgamma = MIN(pti,damping[jtype].y); // thole[jtype]
|
pgamma = MIN(pti,coeff[jtype].y); // thole[jtype]
|
||||||
damp = pgamma * ucl_powr(r/damp,3.0);
|
damp = pgamma * ucl_powr(r/damp,3.0);
|
||||||
if (damp < (numtyp)50.0) {
|
if (damp < (numtyp)50.0) {
|
||||||
numtyp expdamp = ucl_exp(-damp);
|
numtyp expdamp = ucl_exp(-damp);
|
||||||
@ -911,7 +1029,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
__kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
__kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp *restrict extra,
|
||||||
const __global numtyp4 *restrict damping,
|
const __global numtyp4 *restrict coeff,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
@ -962,8 +1080,8 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
itype = polar3[i].z; // amtype[i];
|
itype = polar3[i].z; // amtype[i];
|
||||||
igroup = polar3[i].w; // amgroup[i];
|
igroup = polar3[i].w; // amgroup[i];
|
||||||
|
|
||||||
numtyp pdi = damping[itype].x;
|
numtyp pdi = coeff[itype].x;
|
||||||
numtyp pti = damping[itype].y;
|
numtyp pti = coeff[itype].y;
|
||||||
|
|
||||||
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
|
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
|
||||||
numtyp aesq2n = (numtyp)0.0;
|
numtyp aesq2n = (numtyp)0.0;
|
||||||
@ -1025,9 +1143,9 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
// if (poltyp != DIRECT)
|
// if (poltyp != DIRECT)
|
||||||
numtyp scale3 = (numtyp)1.0;
|
numtyp scale3 = (numtyp)1.0;
|
||||||
numtyp scale5 = (numtyp)1.0;
|
numtyp scale5 = (numtyp)1.0;
|
||||||
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype]
|
numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
|
||||||
if (damp != (numtyp)0.0) {
|
if (damp != (numtyp)0.0) {
|
||||||
numtyp pgamma = MIN(pti,damping[jtype].y); // thole[jtype]
|
numtyp pgamma = MIN(pti,coeff[jtype].y); // thole[jtype]
|
||||||
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
|
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
|
||||||
if (damp < (numtyp)50.0) {
|
if (damp < (numtyp)50.0) {
|
||||||
numtyp expdamp = ucl_exp(-damp);
|
numtyp expdamp = ucl_exp(-damp);
|
||||||
@ -1131,7 +1249,7 @@ __kernel void k_special15(__global int * dev_nbor,
|
|||||||
|
|
||||||
__kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
__kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp *restrict extra,
|
||||||
const __global numtyp4 *restrict damping,
|
const __global numtyp4 *restrict coeff,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
@ -1233,8 +1351,8 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
|||||||
// debug:
|
// debug:
|
||||||
// xi__ = ix; xi__.w = itype;
|
// xi__ = ix; xi__.w = itype;
|
||||||
|
|
||||||
numtyp pdi = damping[itype].x;
|
numtyp pdi = coeff[itype].x;
|
||||||
numtyp pti = damping[itype].y;
|
numtyp pti = coeff[itype].y;
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
|
||||||
@ -1344,9 +1462,9 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
// apply Thole polarization damping to scale factors
|
// apply Thole polarization damping to scale factors
|
||||||
|
|
||||||
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype]
|
numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
|
||||||
if (damp != (numtyp)0.0) {
|
if (damp != (numtyp)0.0) {
|
||||||
numtyp pgamma = MIN(pti,damping[jtype].y); // thole[jtype]
|
numtyp pgamma = MIN(pti,coeff[jtype].y); // thole[jtype]
|
||||||
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
|
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
|
||||||
if (damp < (numtyp)50.0) {
|
if (damp < (numtyp)50.0) {
|
||||||
numtyp expdamp = ucl_exp(-damp);
|
numtyp expdamp = ucl_exp(-damp);
|
||||||
@ -1644,7 +1762,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
|||||||
// accumate force, energy and virial
|
// accumate force, energy and virial
|
||||||
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||||
// offset,eflag,vflag,ans,engv);
|
// offset,eflag,vflag,ans,engv);
|
||||||
store_answers_p(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||||
offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
|
offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -37,12 +37,13 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
* - -3 if there is an out of memory error
|
* - -3 if there is an out of memory error
|
||||||
* - -4 if the GPU library was not compiled for GPU
|
* - -4 if the GPU library was not compiled for GPU
|
||||||
* - -5 Double precision is not supported on card **/
|
* - -5 Double precision is not supported on card **/
|
||||||
int init(const int ntypes, const int max_amtype, const double *host_pdamp,
|
int init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||||
const double *host_thole, const double *host_dirdamp,
|
const double *host_pdamp, const double *host_thole, const double *host_dirdamp,
|
||||||
const double *host_special_mpole,
|
const int *host_amtype2class, const double *host_special_mpole,
|
||||||
const double *host_special_polar_wscale,
|
const double *host_special_polar_wscale,
|
||||||
const double *host_special_polar_piscale,
|
const double *host_special_polar_piscale,
|
||||||
const double *host_special_polar_pscale,
|
const double *host_special_polar_pscale,
|
||||||
|
const double *host_csix, const double *host_adisp,
|
||||||
const int nlocal, const int nall, const int max_nbors,
|
const int nlocal, const int nall, const int max_nbors,
|
||||||
const int maxspecial, const int maxspecial15, const double cell_size,
|
const int maxspecial, const int maxspecial15, const double cell_size,
|
||||||
const double gpu_split, FILE *_screen,
|
const double gpu_split, FILE *_screen,
|
||||||
@ -60,8 +61,11 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
|
|
||||||
// --------------------------- TYPE DATA --------------------------
|
// --------------------------- TYPE DATA --------------------------
|
||||||
|
|
||||||
/// pdamp = damping.x; thole = damping.y
|
/// pdamp = coeff_amtype.x; thole = coeff_amtype.y;
|
||||||
UCL_D_Vec<numtyp4> damping;
|
/// dirdamp = coeff_amtype.z; amtype2class = coeff_amtype.w
|
||||||
|
UCL_D_Vec<numtyp4> coeff_amtype;
|
||||||
|
/// csix = coeff_amclass.x; adisp = coeff_amclass.y;
|
||||||
|
UCL_D_Vec<numtyp4> coeff_amclass;
|
||||||
/// Special polar values [0-4]:
|
/// Special polar values [0-4]:
|
||||||
/// sp_polar.x = special_polar_wscale
|
/// sp_polar.x = special_polar_wscale
|
||||||
/// sp_polar.y special_polar_pscale,
|
/// sp_polar.y special_polar_pscale,
|
||||||
|
|||||||
@ -27,13 +27,14 @@ static Amoeba<PRECISION,ACC_PRECISION> AMOEBAMF;
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Allocate memory on host and device and copy constants to device
|
// Allocate memory on host and device and copy constants to device
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
int amoeba_gpu_init(const int ntypes, const int max_amtype,
|
int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||||
const double *host_pdamp, const double *host_thole,
|
const double *host_pdamp, const double *host_thole,
|
||||||
const double *host_dirdamp,
|
const double *host_dirdamp, const int *host_amtype2class,
|
||||||
const double *host_special_mpole,
|
const double *host_special_mpole,
|
||||||
const double *host_special_polar_wscale,
|
const double *host_special_polar_wscale,
|
||||||
const double *host_special_polar_piscale,
|
const double *host_special_polar_piscale,
|
||||||
const double *host_special_polar_pscale,
|
const double *host_special_polar_pscale,
|
||||||
|
const double *host_csix, const double *host_adisp,
|
||||||
const int nlocal, const int nall, const int max_nbors,
|
const int nlocal, const int nall, const int max_nbors,
|
||||||
const int maxspecial, const int maxspecial15,
|
const int maxspecial, const int maxspecial15,
|
||||||
const double cell_size, int &gpu_mode, FILE *screen,
|
const double cell_size, int &gpu_mode, FILE *screen,
|
||||||
@ -63,11 +64,13 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
|
|||||||
|
|
||||||
int init_ok=0;
|
int init_ok=0;
|
||||||
if (world_me==0)
|
if (world_me==0)
|
||||||
init_ok=AMOEBAMF.init(ntypes, max_amtype, host_pdamp, host_thole, host_dirdamp,
|
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass,
|
||||||
host_special_mpole, host_special_polar_wscale,
|
host_pdamp, host_thole, host_dirdamp,
|
||||||
|
host_amtype2class, host_special_mpole, host_special_polar_wscale,
|
||||||
host_special_polar_piscale, host_special_polar_pscale,
|
host_special_polar_piscale, host_special_polar_pscale,
|
||||||
nlocal, nall, max_nbors, maxspecial, maxspecial15,
|
host_csix, host_adisp, nlocal, nall, max_nbors,
|
||||||
cell_size, gpu_split, screen, polar_dscale, polar_uscale);
|
maxspecial, maxspecial15, cell_size, gpu_split,
|
||||||
|
screen, polar_dscale, polar_uscale);
|
||||||
|
|
||||||
AMOEBAMF.device->world_barrier();
|
AMOEBAMF.device->world_barrier();
|
||||||
if (message)
|
if (message)
|
||||||
@ -83,11 +86,12 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
|
|||||||
fflush(screen);
|
fflush(screen);
|
||||||
}
|
}
|
||||||
if (gpu_rank==i && world_me!=0)
|
if (gpu_rank==i && world_me!=0)
|
||||||
init_ok=AMOEBAMF.init(ntypes, max_amtype, host_pdamp, host_thole, host_dirdamp,
|
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass, host_pdamp, host_thole, host_dirdamp,
|
||||||
host_special_mpole, host_special_polar_wscale,
|
host_amtype2class, host_special_mpole, host_special_polar_wscale,
|
||||||
host_special_polar_piscale, host_special_polar_pscale,
|
host_special_polar_piscale, host_special_polar_pscale,
|
||||||
nlocal, nall, max_nbors, maxspecial, maxspecial15,
|
host_csix, host_adisp, nlocal, nall, max_nbors,
|
||||||
cell_size, gpu_split, screen, polar_dscale, polar_uscale);
|
maxspecial, maxspecial15, cell_size, gpu_split,
|
||||||
|
screen, polar_dscale, polar_uscale);
|
||||||
|
|
||||||
AMOEBAMF.device->gpu_barrier();
|
AMOEBAMF.device->gpu_barrier();
|
||||||
if (message)
|
if (message)
|
||||||
|
|||||||
@ -50,13 +50,14 @@ enum{GORDON1,GORDON2};
|
|||||||
|
|
||||||
// External functions from cuda library for atom decomposition
|
// External functions from cuda library for atom decomposition
|
||||||
|
|
||||||
int amoeba_gpu_init(const int ntypes, const int max_amtype,
|
int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||||
const double *host_pdamp, const double *host_thole,
|
const double *host_pdamp, const double *host_thole,
|
||||||
const double *host_dirdamp,
|
const double *host_dirdamp, const int* host_amtype2class,
|
||||||
const double *host_special_mpole,
|
const double *host_special_mpole,
|
||||||
const double *host_special_polar_wscale,
|
const double *host_special_polar_wscale,
|
||||||
const double *host_special_polar_piscale,
|
const double *host_special_polar_piscale,
|
||||||
const double *host_special_polar_pscale,
|
const double *host_special_polar_pscale,
|
||||||
|
const double *host_csix, const double *host_adisp,
|
||||||
const int nlocal, const int nall, const int max_nbors,
|
const int nlocal, const int nall, const int max_nbors,
|
||||||
const int maxspecial, const int maxspecial15,
|
const int maxspecial, const int maxspecial15,
|
||||||
const double cell_size, int &gpu_mode, FILE *screen,
|
const double cell_size, int &gpu_mode, FILE *screen,
|
||||||
@ -168,9 +169,10 @@ void PairAmoebaGPU::init_style()
|
|||||||
|
|
||||||
int tq_size;
|
int tq_size;
|
||||||
int mnf = 5e-2 * neighbor->oneatom;
|
int mnf = 5e-2 * neighbor->oneatom;
|
||||||
int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, pdamp, thole, dirdamp,
|
int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
|
||||||
special_mpole, special_polar_wscale, special_polar_piscale,
|
pdamp, thole, dirdamp, amtype2class, special_mpole,
|
||||||
special_polar_pscale, atom->nlocal,
|
special_polar_wscale, special_polar_piscale,
|
||||||
|
special_polar_pscale, csix, adisp, atom->nlocal,
|
||||||
atom->nlocal+atom->nghost, mnf, maxspecial,
|
atom->nlocal+atom->nghost, mnf, maxspecial,
|
||||||
maxspecial15, cell_size, gpu_mode, screen,
|
maxspecial15, cell_size, gpu_mode, screen,
|
||||||
polar_dscale, polar_uscale, tq_size);
|
polar_dscale, polar_uscale, tq_size);
|
||||||
|
|||||||
Reference in New Issue
Block a user