Prepared data structure for the dispersion real-space term

This commit is contained in:
Trung Nguyen
2021-09-18 10:22:22 -05:00
parent 5d801e985f
commit 1166845fcf
5 changed files with 197 additions and 57 deletions

View File

@ -44,12 +44,14 @@ int AmoebaT::bytes_per_atom(const int max_nbors) const {
} }
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pdamp, int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
const double *host_thole, const double *host_dirdamp, const double *host_pdamp, const double *host_thole,
const double *host_dirdamp, const int *host_amtype2class,
const double *host_special_mpole, const double *host_special_mpole,
const double *host_special_polar_wscale, const double *host_special_polar_wscale,
const double *host_special_polar_piscale, const double *host_special_polar_piscale,
const double *host_special_polar_pscale, const double *host_special_polar_pscale,
const double *host_csix, const double *host_adisp,
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const int maxspecial, const int maxspecial15,
const double cell_size, const double gpu_split, FILE *_screen, const double cell_size, const double gpu_split, FILE *_screen,
@ -80,11 +82,22 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pda
host_write[i].x = host_pdamp[i]; host_write[i].x = host_pdamp[i];
host_write[i].y = host_thole[i]; host_write[i].y = host_thole[i];
host_write[i].z = host_dirdamp[i]; host_write[i].z = host_dirdamp[i];
host_write[i].w = (numtyp)0; host_write[i].w = host_amtype2class[i];
} }
damping.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY); coeff_amtype.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY);
ucl_copy(damping,host_write,false); ucl_copy(coeff_amtype,host_write,false);
UCL_H_Vec<numtyp4> host_write2(max_amclass, *(this->ucl_device), UCL_WRITE_ONLY);
for (int i = 0; i < max_amclass; i++) {
host_write2[i].x = host_csix[i];
host_write2[i].y = host_adisp[i];
host_write2[i].z = (numtyp)0;
host_write2[i].w = (numtyp)0;
}
coeff_amclass.alloc(max_amclass,*(this->ucl_device), UCL_READ_ONLY);
ucl_copy(coeff_amclass,host_write2,false);
UCL_H_Vec<numtyp4> dview(5, *(this->ucl_device), UCL_WRITE_ONLY); UCL_H_Vec<numtyp4> dview(5, *(this->ucl_device), UCL_WRITE_ONLY);
sp_polar.alloc(5,*(this->ucl_device),UCL_READ_ONLY); sp_polar.alloc(5,*(this->ucl_device),UCL_READ_ONLY);
@ -100,9 +113,8 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pda
_polar_uscale = polar_uscale; _polar_uscale = polar_uscale;
_allocated=true; _allocated=true;
this->_max_bytes=damping.row_bytes() this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes()
+ sp_polar.row_bytes() + sp_polar.row_bytes() + this->_tep.row_bytes();
+ this->_tep.row_bytes();
return 0; return 0;
} }
@ -112,7 +124,7 @@ void AmoebaT::clear() {
return; return;
_allocated=false; _allocated=false;
damping.clear(); coeff_amtype.clear();
sp_polar.clear(); sp_polar.clear();
this->clear_atomic(); this->clear_atomic();
@ -151,7 +163,7 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) {
&nbor_pitch, &this->_threads_per_atom); &nbor_pitch, &this->_threads_per_atom);
this->k_multipole.set_size(GX,BX); this->k_multipole.set_size(GX,BX);
this->k_multipole.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar, this->k_multipole.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->dev_short_nbor, &this->dev_short_nbor,
&this->ans->force, &this->ans->engv, &this->_tep, &this->ans->force, &this->ans->engv, &this->_tep,
@ -192,7 +204,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
} }
this->k_udirect2b.set_size(GX,BX); this->k_udirect2b.set_size(GX,BX);
this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar, this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->dev_short_nbor, &this->dev_short_nbor,
&this->_fieldp, &ainum, &_nall, &nbor_pitch, &this->_fieldp, &ainum, &_nall, &nbor_pitch,
@ -232,7 +244,7 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) {
} }
this->k_umutual2b.set_size(GX,BX); this->k_umutual2b.set_size(GX,BX);
this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar, this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->dev_short_nbor, &this->_fieldp, &ainum, &_nall, &this->dev_short_nbor, &this->_fieldp, &ainum, &_nall,
&nbor_pitch, &this->_threads_per_atom, &this->_aewald, &nbor_pitch, &this->_threads_per_atom, &this->_aewald,
@ -271,7 +283,7 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
} }
this->k_polar.set_size(GX,BX); this->k_polar.set_size(GX,BX);
this->k_polar.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar, this->k_polar.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->dev_short_nbor, &this->dev_short_nbor,
&this->ans->force, &this->ans->engv, &this->_tep, &this->ans->force, &this->ans->engv, &this->_tep,

View File

@ -147,7 +147,7 @@ _texture( q_tex,int2);
fieldp[ii+inum] = fp; \ fieldp[ii+inum] = fp; \
} }
#define store_answers_p(f,energy,e_coul, virial, ii, inum, tid, t_per_atom \ #define store_answers_acc(f,energy,e_coul, virial, ii, inum, tid, t_per_atom \
offset, eflag, vflag, ans, engv, ev_stride) \ offset, eflag, vflag, ans, engv, ev_stride) \
if (t_per_atom>1) { \ if (t_per_atom>1) { \
simd_reduce_add3(t_per_atom, red_acc, offset, tid, f.x, f.y, f.z); \ simd_reduce_add3(t_per_atom, red_acc, offset, tid, f.x, f.y, f.z); \
@ -210,8 +210,7 @@ _texture( q_tex,int2);
} \ } \
} }
// SHUFFLE_AVAIL == 1 #else // SHUFFLE_AVAIL == 1
#else
#define local_allocate_store_ufld() #define local_allocate_store_ufld()
@ -280,7 +279,7 @@ _texture( q_tex,int2);
#if (EVFLAG == 1) #if (EVFLAG == 1)
#define store_answers_p(f,energy,e_coul, virial, ii, inum, tid, t_per_atom, \ #define store_answers_acc(f,energy,e_coul, virial, ii, inum, tid, t_per_atom, \
offset, eflag, vflag, ans, engv, ev_stride) \ offset, eflag, vflag, ans, engv, ev_stride) \
if (t_per_atom>1) { \ if (t_per_atom>1) { \
simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \ simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \
@ -376,7 +375,7 @@ _texture( q_tex,int2);
// EVFLAG == 0 // EVFLAG == 0
#else #else
#define store_answers_p(f,energy,e_coul, virial, ii, inum, tid, t_per_atom, \ #define store_answers_acc(f,energy,e_coul, virial, ii, inum, tid, t_per_atom, \
offset, eflag, vflag, ans, engv, ev_stride) \ offset, eflag, vflag, ans, engv, ev_stride) \
if (t_per_atom>1) \ if (t_per_atom>1) \
simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \ simd_reduce_add3(t_per_atom, f.x, f.y, f.z); \
@ -394,6 +393,125 @@ _texture( q_tex,int2);
#define MIN(A,B) ((A) < (B) ? (A) : (B)) #define MIN(A,B) ((A) < (B) ? (A) : (B))
#define MY_PIS (acctyp)1.77245385090551602729 #define MY_PIS (acctyp)1.77245385090551602729
/* ----------------------------------------------------------------------
dispersion = real-space portion of Ewald dispersion
adapted from Tinker edreal1d() routine
------------------------------------------------------------------------- */
__kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
const __global numtyp *restrict extra,
const __global numtyp4 *restrict coeff,
const __global numtyp4 *restrict sp_polar,
const __global int *dev_nbor,
const __global int *dev_packed,
const __global int *dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
const int eflag, const int vflag, const int inum,
const int nall, const int nbor_pitch,
const int t_per_atom, const numtyp aewald,
const numtyp felec, const numtyp off2,
const numtyp polar_dscale, const numtyp polar_uscale)
{
int tid, ii, offset, i;
atom_info(t_per_atom,ii,tid,offset);
int n_stride;
local_allocate_store_charge();
acctyp4 f;
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
acctyp energy, e_coul, virial[6];
if (EVFLAG) {
energy=(acctyp)0;
e_coul=(acctyp)0;
for (int l=0; l<6; l++) virial[l]=(acctyp)0;
}
acctyp4 tq;
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
numtyp4* polar1 = (numtyp4*)(&extra[0]);
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
if (ii<inum) {
int m,itype,igroup;
numtyp bfac;
numtyp term1,term2,term3;
numtyp term4,term5,term6;
numtyp bn[6];
numtyp ci,dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
int numj, nbor, nbor_end;
const __global int* nbor_mem=dev_packed;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
n_stride,nbor_end,nbor);
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
//numtyp qtmp; fetch(qtmp,i,q_tex);
//int itype=ix.w;
// recalculate numj and nbor_end for use of the short nbor list
if (dev_packed==dev_nbor) {
numj = dev_short_nbor[nbor];
nbor += n_stride;
nbor_end = nbor+fast_mul(numj,n_stride);
nbor_mem = dev_short_nbor;
}
ci = polar1[i].x; // rpole[i][0];
dix = polar1[i].y; // rpole[i][1];
diy = polar1[i].z; // rpole[i][2];
diz = polar1[i].w; // rpole[i][3];
qixx = polar2[i].x; // rpole[i][4];
qixy = polar2[i].y; // rpole[i][5];
qixz = polar2[i].z; // rpole[i][6];
qiyy = polar2[i].w; // rpole[i][8];
qiyz = polar3[i].x; // rpole[i][9];
qizz = polar3[i].y; // rpole[i][12];
itype = polar3[i].z; // amtype[i];
igroup = polar3[i].w; // amgroup[i];
for ( ; nbor<nbor_end; nbor+=n_stride) {
int jextra=nbor_mem[nbor];
int j = jextra & NEIGHMASK15;
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
numtyp zr = jx.z - ix.z;
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
numtyp r = ucl_sqrt(r2);
numtyp ck = polar1[j].x; // rpole[j][0];
numtyp dkx = polar1[j].y; // rpole[j][1];
numtyp dky = polar1[j].z; // rpole[j][2];
numtyp dkz = polar1[j].w; // rpole[j][3];
numtyp qkxx = polar2[j].x; // rpole[j][4];
numtyp qkxy = polar2[j].y; // rpole[j][5];
numtyp qkxz = polar2[j].z; // rpole[j][6];
numtyp qkyy = polar2[j].w; // rpole[j][8];
numtyp qkyz = polar3[j].x; // rpole[j][9];
numtyp qkzz = polar3[j].y; // rpole[j][12];
int jtype = polar3[j].z; // amtype[j];
int jgroup = polar3[j].w; // amgroup[j];
} // nbor
} // ii<inum
// accumate force, energy and virial
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
// offset,eflag,vflag,ans,engv);
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
multipole_real = real-space portion of multipole multipole_real = real-space portion of multipole
adapted from Tinker emreal1d() routine adapted from Tinker emreal1d() routine
@ -401,7 +519,7 @@ _texture( q_tex,int2);
__kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_, __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
const __global numtyp *restrict extra, const __global numtyp *restrict extra,
const __global numtyp4 *restrict damping, const __global numtyp4 *restrict coeff,
const __global numtyp4 *restrict sp_polar, const __global numtyp4 *restrict sp_polar,
const __global int *dev_nbor, const __global int *dev_nbor,
const __global int *dev_packed, const __global int *dev_packed,
@ -697,7 +815,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
__kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_, __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
const __global numtyp *restrict extra, const __global numtyp *restrict extra,
const __global numtyp4 *restrict damping, const __global numtyp4 *restrict coeff,
const __global numtyp4 *restrict sp_polar, const __global numtyp4 *restrict sp_polar,
const __global int *dev_nbor, const __global int *dev_nbor,
const __global int *dev_packed, const __global int *dev_packed,
@ -759,9 +877,9 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
// debug: // debug:
// xi__ = ix; xi__.w = itype; // xi__ = ix; xi__.w = itype;
numtyp pdi = damping[itype].x; numtyp pdi = coeff[itype].x;
numtyp pti = damping[itype].y; numtyp pti = coeff[itype].y;
numtyp ddi = damping[itype].z; numtyp ddi = coeff[itype].z;
numtyp aesq2 = (numtyp)2.0 * aewald*aewald; numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
numtyp aesq2n = (numtyp)0.0; numtyp aesq2n = (numtyp)0.0;
@ -848,9 +966,9 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
numtyp scale3 = (numtyp)1.0; numtyp scale3 = (numtyp)1.0;
numtyp scale5 = (numtyp)1.0; numtyp scale5 = (numtyp)1.0;
numtyp scale7 = (numtyp)1.0; numtyp scale7 = (numtyp)1.0;
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype] numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
if (damp != (numtyp)0.0) { if (damp != (numtyp)0.0) {
numtyp pgamma = MIN(ddi,damping[jtype].z); // dirdamp[jtype] numtyp pgamma = MIN(ddi,coeff[jtype].z); // dirdamp[jtype]
if (pgamma != (numtyp)0.0) { if (pgamma != (numtyp)0.0) {
damp = pgamma * ucl_powr(r/damp,(numtyp)1.5); damp = pgamma * ucl_powr(r/damp,(numtyp)1.5);
if (damp < (numtyp)50.0) { if (damp < (numtyp)50.0) {
@ -860,7 +978,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.65*damp + (numtyp)0.15*damp*damp); scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.65*damp + (numtyp)0.15*damp*damp);
} }
} else { } else {
pgamma = MIN(pti,damping[jtype].y); // thole[jtype] pgamma = MIN(pti,coeff[jtype].y); // thole[jtype]
damp = pgamma * ucl_powr(r/damp,3.0); damp = pgamma * ucl_powr(r/damp,3.0);
if (damp < (numtyp)50.0) { if (damp < (numtyp)50.0) {
numtyp expdamp = ucl_exp(-damp); numtyp expdamp = ucl_exp(-damp);
@ -911,7 +1029,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
__kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_, __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
const __global numtyp *restrict extra, const __global numtyp *restrict extra,
const __global numtyp4 *restrict damping, const __global numtyp4 *restrict coeff,
const __global numtyp4 *restrict sp_polar, const __global numtyp4 *restrict sp_polar,
const __global int *dev_nbor, const __global int *dev_nbor,
const __global int *dev_packed, const __global int *dev_packed,
@ -962,8 +1080,8 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
itype = polar3[i].z; // amtype[i]; itype = polar3[i].z; // amtype[i];
igroup = polar3[i].w; // amgroup[i]; igroup = polar3[i].w; // amgroup[i];
numtyp pdi = damping[itype].x; numtyp pdi = coeff[itype].x;
numtyp pti = damping[itype].y; numtyp pti = coeff[itype].y;
numtyp aesq2 = (numtyp)2.0 * aewald*aewald; numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
numtyp aesq2n = (numtyp)0.0; numtyp aesq2n = (numtyp)0.0;
@ -1025,9 +1143,9 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
// if (poltyp != DIRECT) // if (poltyp != DIRECT)
numtyp scale3 = (numtyp)1.0; numtyp scale3 = (numtyp)1.0;
numtyp scale5 = (numtyp)1.0; numtyp scale5 = (numtyp)1.0;
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype] numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
if (damp != (numtyp)0.0) { if (damp != (numtyp)0.0) {
numtyp pgamma = MIN(pti,damping[jtype].y); // thole[jtype] numtyp pgamma = MIN(pti,coeff[jtype].y); // thole[jtype]
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0); damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
if (damp < (numtyp)50.0) { if (damp < (numtyp)50.0) {
numtyp expdamp = ucl_exp(-damp); numtyp expdamp = ucl_exp(-damp);
@ -1131,7 +1249,7 @@ __kernel void k_special15(__global int * dev_nbor,
__kernel void k_amoeba_polar(const __global numtyp4 *restrict x_, __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
const __global numtyp *restrict extra, const __global numtyp *restrict extra,
const __global numtyp4 *restrict damping, const __global numtyp4 *restrict coeff,
const __global numtyp4 *restrict sp_polar, const __global numtyp4 *restrict sp_polar,
const __global int *dev_nbor, const __global int *dev_nbor,
const __global int *dev_packed, const __global int *dev_packed,
@ -1233,8 +1351,8 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
// debug: // debug:
// xi__ = ix; xi__.w = itype; // xi__ = ix; xi__.w = itype;
numtyp pdi = damping[itype].x; numtyp pdi = coeff[itype].x;
numtyp pti = damping[itype].y; numtyp pti = coeff[itype].y;
for ( ; nbor<nbor_end; nbor+=n_stride) { for ( ; nbor<nbor_end; nbor+=n_stride) {
@ -1344,9 +1462,9 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
// apply Thole polarization damping to scale factors // apply Thole polarization damping to scale factors
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype] numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
if (damp != (numtyp)0.0) { if (damp != (numtyp)0.0) {
numtyp pgamma = MIN(pti,damping[jtype].y); // thole[jtype] numtyp pgamma = MIN(pti,coeff[jtype].y); // thole[jtype]
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0); damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
if (damp < (numtyp)50.0) { if (damp < (numtyp)50.0) {
numtyp expdamp = ucl_exp(-damp); numtyp expdamp = ucl_exp(-damp);
@ -1644,7 +1762,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
// accumate force, energy and virial // accumate force, energy and virial
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom, //store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
// offset,eflag,vflag,ans,engv); // offset,eflag,vflag,ans,engv);
store_answers_p(f,energy,e_coul,virial,ii,inum,tid,t_per_atom, store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
offset,eflag,vflag,ans,engv,NUM_BLOCKS_X); offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
} }

View File

@ -37,12 +37,13 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
* - -3 if there is an out of memory error * - -3 if there is an out of memory error
* - -4 if the GPU library was not compiled for GPU * - -4 if the GPU library was not compiled for GPU
* - -5 Double precision is not supported on card **/ * - -5 Double precision is not supported on card **/
int init(const int ntypes, const int max_amtype, const double *host_pdamp, int init(const int ntypes, const int max_amtype, const int max_amclass,
const double *host_thole, const double *host_dirdamp, const double *host_pdamp, const double *host_thole, const double *host_dirdamp,
const double *host_special_mpole, const int *host_amtype2class, const double *host_special_mpole,
const double *host_special_polar_wscale, const double *host_special_polar_wscale,
const double *host_special_polar_piscale, const double *host_special_polar_piscale,
const double *host_special_polar_pscale, const double *host_special_polar_pscale,
const double *host_csix, const double *host_adisp,
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const double cell_size, const int maxspecial, const int maxspecial15, const double cell_size,
const double gpu_split, FILE *_screen, const double gpu_split, FILE *_screen,
@ -60,8 +61,11 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
// --------------------------- TYPE DATA -------------------------- // --------------------------- TYPE DATA --------------------------
/// pdamp = damping.x; thole = damping.y /// pdamp = coeff_amtype.x; thole = coeff_amtype.y;
UCL_D_Vec<numtyp4> damping; /// dirdamp = coeff_amtype.z; amtype2class = coeff_amtype.w
UCL_D_Vec<numtyp4> coeff_amtype;
/// csix = coeff_amclass.x; adisp = coeff_amclass.y;
UCL_D_Vec<numtyp4> coeff_amclass;
/// Special polar values [0-4]: /// Special polar values [0-4]:
/// sp_polar.x = special_polar_wscale /// sp_polar.x = special_polar_wscale
/// sp_polar.y special_polar_pscale, /// sp_polar.y special_polar_pscale,

View File

@ -27,13 +27,14 @@ static Amoeba<PRECISION,ACC_PRECISION> AMOEBAMF;
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Allocate memory on host and device and copy constants to device // Allocate memory on host and device and copy constants to device
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
int amoeba_gpu_init(const int ntypes, const int max_amtype, int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
const double *host_pdamp, const double *host_thole, const double *host_pdamp, const double *host_thole,
const double *host_dirdamp, const double *host_dirdamp, const int *host_amtype2class,
const double *host_special_mpole, const double *host_special_mpole,
const double *host_special_polar_wscale, const double *host_special_polar_wscale,
const double *host_special_polar_piscale, const double *host_special_polar_piscale,
const double *host_special_polar_pscale, const double *host_special_polar_pscale,
const double *host_csix, const double *host_adisp,
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const int maxspecial, const int maxspecial15,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
@ -63,11 +64,13 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
int init_ok=0; int init_ok=0;
if (world_me==0) if (world_me==0)
init_ok=AMOEBAMF.init(ntypes, max_amtype, host_pdamp, host_thole, host_dirdamp, init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass,
host_special_mpole, host_special_polar_wscale, host_pdamp, host_thole, host_dirdamp,
host_amtype2class, host_special_mpole, host_special_polar_wscale,
host_special_polar_piscale, host_special_polar_pscale, host_special_polar_piscale, host_special_polar_pscale,
nlocal, nall, max_nbors, maxspecial, maxspecial15, host_csix, host_adisp, nlocal, nall, max_nbors,
cell_size, gpu_split, screen, polar_dscale, polar_uscale); maxspecial, maxspecial15, cell_size, gpu_split,
screen, polar_dscale, polar_uscale);
AMOEBAMF.device->world_barrier(); AMOEBAMF.device->world_barrier();
if (message) if (message)
@ -83,11 +86,12 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
fflush(screen); fflush(screen);
} }
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
init_ok=AMOEBAMF.init(ntypes, max_amtype, host_pdamp, host_thole, host_dirdamp, init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass, host_pdamp, host_thole, host_dirdamp,
host_special_mpole, host_special_polar_wscale, host_amtype2class, host_special_mpole, host_special_polar_wscale,
host_special_polar_piscale, host_special_polar_pscale, host_special_polar_piscale, host_special_polar_pscale,
nlocal, nall, max_nbors, maxspecial, maxspecial15, host_csix, host_adisp, nlocal, nall, max_nbors,
cell_size, gpu_split, screen, polar_dscale, polar_uscale); maxspecial, maxspecial15, cell_size, gpu_split,
screen, polar_dscale, polar_uscale);
AMOEBAMF.device->gpu_barrier(); AMOEBAMF.device->gpu_barrier();
if (message) if (message)

View File

@ -50,13 +50,14 @@ enum{GORDON1,GORDON2};
// External functions from cuda library for atom decomposition // External functions from cuda library for atom decomposition
int amoeba_gpu_init(const int ntypes, const int max_amtype, int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
const double *host_pdamp, const double *host_thole, const double *host_pdamp, const double *host_thole,
const double *host_dirdamp, const double *host_dirdamp, const int* host_amtype2class,
const double *host_special_mpole, const double *host_special_mpole,
const double *host_special_polar_wscale, const double *host_special_polar_wscale,
const double *host_special_polar_piscale, const double *host_special_polar_piscale,
const double *host_special_polar_pscale, const double *host_special_polar_pscale,
const double *host_csix, const double *host_adisp,
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const int maxspecial, const int maxspecial15,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
@ -168,9 +169,10 @@ void PairAmoebaGPU::init_style()
int tq_size; int tq_size;
int mnf = 5e-2 * neighbor->oneatom; int mnf = 5e-2 * neighbor->oneatom;
int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, pdamp, thole, dirdamp, int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
special_mpole, special_polar_wscale, special_polar_piscale, pdamp, thole, dirdamp, amtype2class, special_mpole,
special_polar_pscale, atom->nlocal, special_polar_wscale, special_polar_piscale,
special_polar_pscale, csix, adisp, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial, atom->nlocal+atom->nghost, mnf, maxspecial,
maxspecial15, cell_size, gpu_mode, screen, maxspecial15, cell_size, gpu_mode, screen,
polar_dscale, polar_uscale, tq_size); polar_dscale, polar_uscale, tq_size);