Added the dispersion real space kernel and transfer special coeffs to the device
This commit is contained in:
@ -47,6 +47,9 @@ template <class numtyp, class acctyp>
|
|||||||
int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||||
const double *host_pdamp, const double *host_thole,
|
const double *host_pdamp, const double *host_thole,
|
||||||
const double *host_dirdamp, const int *host_amtype2class,
|
const double *host_dirdamp, const int *host_amtype2class,
|
||||||
|
const double *host_special_hal,
|
||||||
|
const double *host_special_repel,
|
||||||
|
const double *host_special_disp,
|
||||||
const double *host_special_mpole,
|
const double *host_special_mpole,
|
||||||
const double *host_special_polar_wscale,
|
const double *host_special_polar_wscale,
|
||||||
const double *host_special_polar_piscale,
|
const double *host_special_polar_piscale,
|
||||||
@ -109,12 +112,21 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
|||||||
}
|
}
|
||||||
ucl_copy(sp_polar,dview,5,false);
|
ucl_copy(sp_polar,dview,5,false);
|
||||||
|
|
||||||
|
sp_nonpolar.alloc(5,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
|
for (int i=0; i<5; i++) {
|
||||||
|
dview[i].x=host_special_hal[i];
|
||||||
|
dview[i].y=host_special_repel[i];
|
||||||
|
dview[i].z=host_special_disp[i];
|
||||||
|
dview[i].w=(numtyp)0;
|
||||||
|
}
|
||||||
|
ucl_copy(sp_nonpolar,dview,5,false);
|
||||||
|
|
||||||
_polar_dscale = polar_dscale;
|
_polar_dscale = polar_dscale;
|
||||||
_polar_uscale = polar_uscale;
|
_polar_uscale = polar_uscale;
|
||||||
|
|
||||||
_allocated=true;
|
_allocated=true;
|
||||||
this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes()
|
this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes()
|
||||||
+ sp_polar.row_bytes() + this->_tep.row_bytes();
|
+ sp_polar.row_bytes() + sp_nonpolar.row_bytes() + this->_tep.row_bytes();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,7 +137,9 @@ void AmoebaT::clear() {
|
|||||||
_allocated=false;
|
_allocated=false;
|
||||||
|
|
||||||
coeff_amtype.clear();
|
coeff_amtype.clear();
|
||||||
|
coeff_amclass.clear();
|
||||||
sp_polar.clear();
|
sp_polar.clear();
|
||||||
|
sp_nonpolar.clear();
|
||||||
|
|
||||||
this->clear_atomic();
|
this->clear_atomic();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -400,8 +400,9 @@ _texture( q_tex,int2);
|
|||||||
|
|
||||||
__kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
|
__kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff,
|
const __global numtyp4 *restrict coeff_amtype,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict coeff_amclass,
|
||||||
|
const __global numtyp4 *restrict sp_disp,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
const __global int *dev_packed,
|
const __global int *dev_packed,
|
||||||
const __global int *dev_short_nbor,
|
const __global int *dev_short_nbor,
|
||||||
@ -428,20 +429,11 @@ __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
|
|||||||
for (int l=0; l<6; l++) virial[l]=(acctyp)0;
|
for (int l=0; l<6; l++) virial[l]=(acctyp)0;
|
||||||
}
|
}
|
||||||
|
|
||||||
acctyp4 tq;
|
|
||||||
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
|
|
||||||
|
|
||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int m,itype,igroup;
|
int itype,iclass;
|
||||||
numtyp bfac;
|
numtyp ci,ai;
|
||||||
numtyp term1,term2,term3;
|
|
||||||
numtyp term4,term5,term6;
|
|
||||||
numtyp bn[6];
|
|
||||||
numtyp ci,dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
|
|
||||||
|
|
||||||
int numj, nbor, nbor_end;
|
int numj, nbor, nbor_end;
|
||||||
const __global int* nbor_mem=dev_packed;
|
const __global int* nbor_mem=dev_packed;
|
||||||
@ -460,18 +452,10 @@ __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
|
|||||||
nbor_mem = dev_short_nbor;
|
nbor_mem = dev_short_nbor;
|
||||||
}
|
}
|
||||||
|
|
||||||
ci = polar1[i].x; // rpole[i][0];
|
|
||||||
dix = polar1[i].y; // rpole[i][1];
|
|
||||||
diy = polar1[i].z; // rpole[i][2];
|
|
||||||
diz = polar1[i].w; // rpole[i][3];
|
|
||||||
qixx = polar2[i].x; // rpole[i][4];
|
|
||||||
qixy = polar2[i].y; // rpole[i][5];
|
|
||||||
qixz = polar2[i].z; // rpole[i][6];
|
|
||||||
qiyy = polar2[i].w; // rpole[i][8];
|
|
||||||
qiyz = polar3[i].x; // rpole[i][9];
|
|
||||||
qizz = polar3[i].y; // rpole[i][12];
|
|
||||||
itype = polar3[i].z; // amtype[i];
|
itype = polar3[i].z; // amtype[i];
|
||||||
igroup = polar3[i].w; // amgroup[i];
|
iclass = coeff_amtype[itype].w; // amtype2class[itype];
|
||||||
|
ci = coeff_amclass[iclass].x; // csix[iclass];
|
||||||
|
ai = coeff_amclass[iclass].y; // adisp[iclass];
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
|
||||||
@ -482,34 +466,115 @@ __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
|
|||||||
//int jtype=jx.w;
|
//int jtype=jx.w;
|
||||||
|
|
||||||
// Compute r12
|
// Compute r12
|
||||||
numtyp xr = jx.x - ix.x;
|
numtyp xr = ix.x - jx.x;
|
||||||
numtyp yr = jx.y - ix.y;
|
numtyp yr = ix.y - jx.y;
|
||||||
numtyp zr = jx.z - ix.z;
|
numtyp zr = ix.z - jx.z;
|
||||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||||
|
|
||||||
//if (r2>off2) continue;
|
//if (r2>off2) continue;
|
||||||
|
|
||||||
numtyp r = ucl_sqrt(r2);
|
|
||||||
numtyp ck = polar1[j].x; // rpole[j][0];
|
|
||||||
numtyp dkx = polar1[j].y; // rpole[j][1];
|
|
||||||
numtyp dky = polar1[j].z; // rpole[j][2];
|
|
||||||
numtyp dkz = polar1[j].w; // rpole[j][3];
|
|
||||||
numtyp qkxx = polar2[j].x; // rpole[j][4];
|
|
||||||
numtyp qkxy = polar2[j].y; // rpole[j][5];
|
|
||||||
numtyp qkxz = polar2[j].z; // rpole[j][6];
|
|
||||||
numtyp qkyy = polar2[j].w; // rpole[j][8];
|
|
||||||
numtyp qkyz = polar3[j].x; // rpole[j][9];
|
|
||||||
numtyp qkzz = polar3[j].y; // rpole[j][12];
|
|
||||||
int jtype = polar3[j].z; // amtype[j];
|
int jtype = polar3[j].z; // amtype[j];
|
||||||
int jgroup = polar3[j].w; // amgroup[j];
|
int jclass = coeff_amtype[jtype].w; // amtype2class[jtype];
|
||||||
|
numtyp ck = coeff_amclass[jclass].x; // csix[jclass];
|
||||||
|
numtyp ak = coeff_amclass[jclass].y; // adisp[jclass];
|
||||||
|
|
||||||
|
numtyp r6 = r2*r2*r2;
|
||||||
|
numtyp ralpha2 = r2 * aewald*aewald;
|
||||||
|
numtyp term = (numtyp)1.0 + ralpha2 + (numtyp)0.5*ralpha2*ralpha2;
|
||||||
|
numtyp expterm = ucl_exp(-ralpha2);
|
||||||
|
numtyp expa = expterm * term;
|
||||||
|
|
||||||
|
// find the damping factor for the dispersion interaction
|
||||||
|
|
||||||
|
numtyp r = ucl_sqrt(r2);
|
||||||
|
numtyp r7 = r6 * r;
|
||||||
|
numtyp di = ai * r;
|
||||||
|
numtyp di2 = di * di;
|
||||||
|
numtyp di3 = di * di2;
|
||||||
|
numtyp dk = ak * r;
|
||||||
|
numtyp expi = ucl_exp(-di);
|
||||||
|
numtyp expk = ucl_exp(-dk);
|
||||||
|
|
||||||
|
numtyp ai2,ak2;
|
||||||
|
numtyp di4,di5;
|
||||||
|
numtyp dk2,dk3;
|
||||||
|
numtyp ti,ti2;
|
||||||
|
numtyp tk,tk2;
|
||||||
|
numtyp damp3,damp5;
|
||||||
|
numtyp ddamp;
|
||||||
|
numtyp factor_disp = (numtyp)1.0; // factor_disp = special_disp[sbmask15(j)];
|
||||||
|
|
||||||
|
if (ai != ak) {
|
||||||
|
ai2 = ai * ai;
|
||||||
|
ak2 = ak * ak;
|
||||||
|
dk2 = dk * dk;
|
||||||
|
dk3 = dk * dk2;
|
||||||
|
ti = ak2 / (ak2-ai2);
|
||||||
|
ti2 = ti * ti;
|
||||||
|
tk = ai2 / (ai2-ak2);
|
||||||
|
tk2 = tk * tk;
|
||||||
|
damp3 = (numtyp)1.0 - ti2*((numtyp)1.0 + di + (numtyp)0.5*di2) * expi
|
||||||
|
- tk2*((numtyp)1.0 + dk + (numtyp)0.5*dk2) * expk
|
||||||
|
- (numtyp)2.0*ti2*tk * ((numtyp)1.0 + di)* expi
|
||||||
|
- (numtyp)2.0*tk2*ti * ((numtyp)1.0 + dk) *expk;
|
||||||
|
damp5 = (numtyp)1.0 - ti2*((numtyp)1.0 + di + (numtyp)0.5*di2 + di3/(numtyp)6.0) * expi
|
||||||
|
- tk2*((numtyp)1.0 + dk + (numtyp)0.5*dk2 + dk3/(numtyp)6.0) * expk
|
||||||
|
- (numtyp)2.0*ti2*tk*((numtyp)1.0 + di + di2/(numtyp)3.0) * expi
|
||||||
|
- (numtyp)2.0*tk2*ti*((numtyp)1.0 + dk + dk2/(numtyp)3.0) * expk;
|
||||||
|
ddamp = (numtyp)0.25 * di2 * ti2 * ai * expi * (r*ai+(numtyp)4.0*tk - (numtyp)1.0) +
|
||||||
|
(numtyp)0.25 * dk2 * tk2 * ak * expk * (r*ak+(numtyp)4.0*ti-(numtyp)1.0);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
di4 = di2 * di2;
|
||||||
|
di5 = di2 * di3;
|
||||||
|
damp3 = (numtyp)1.0 - ((numtyp)1.0+di+(numtyp)0.5*di2 + (numtyp)7.0*di3/(numtyp)48.0+di4/(numtyp)48.0)*expi;
|
||||||
|
damp5 = (numtyp)1.0 - ((numtyp)1.0+di+(numtyp)0.5*di2 + di3/(numtyp)6.0+di4/(numtyp)24.0+di5/(numtyp)144.0)*expi;
|
||||||
|
ddamp = ai * expi * (di5-(numtyp)3.0*di3-(numtyp)3.0*di2) / (numtyp)96.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
numtyp damp = (numtyp)1.5*damp5 - (numtyp)0.5*damp3;
|
||||||
|
|
||||||
|
// apply damping and scaling factors for this interaction
|
||||||
|
|
||||||
|
numtyp scale = factor_disp * damp*damp;
|
||||||
|
scale = scale - (numtyp )1.0;
|
||||||
|
numtyp e = -ci * ck * (expa+scale) / r6;
|
||||||
|
numtyp rterm = -ucl_powr(ralpha2,(numtyp)3.0) * expterm / r;
|
||||||
|
numtyp de = (numtyp)-6.0*e/r2 - ci*ck*rterm/r7 - (numtyp)2.0*ci*ck*factor_disp*damp*ddamp/r7;
|
||||||
|
|
||||||
|
energy+= e;
|
||||||
|
|
||||||
|
// increment the damped dispersion derivative components
|
||||||
|
|
||||||
|
numtyp dedx = de * xr;
|
||||||
|
numtyp dedy = de * yr;
|
||||||
|
numtyp dedz = de * zr;
|
||||||
|
f.x += dedx;
|
||||||
|
f.y += dedy;
|
||||||
|
f.z += dedz;
|
||||||
|
|
||||||
|
// increment the internal virial tensor components
|
||||||
|
|
||||||
|
numtyp vxx = xr * dedx;
|
||||||
|
numtyp vyx = yr * dedx;
|
||||||
|
numtyp vzx = zr * dedx;
|
||||||
|
numtyp vyy = yr * dedy;
|
||||||
|
numtyp vzy = zr * dedy;
|
||||||
|
numtyp vzz = zr * dedz;
|
||||||
|
|
||||||
|
virial[0] += vxx;
|
||||||
|
virial[1] += vyy;
|
||||||
|
virial[2] += vzz;
|
||||||
|
virial[3] += vyx;
|
||||||
|
virial[4] += vzx;
|
||||||
|
virial[5] += vzy;
|
||||||
} // nbor
|
} // nbor
|
||||||
|
|
||||||
} // ii<inum
|
} // ii<inum
|
||||||
|
|
||||||
// accumate force, energy and virial
|
// accumate force, energy and virial
|
||||||
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||||
// offset,eflag,vflag,ans,engv);
|
offset,eflag,vflag,ans,engv);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -556,7 +621,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
|||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int m,itype,igroup;
|
int m;
|
||||||
numtyp bfac;
|
numtyp bfac;
|
||||||
numtyp term1,term2,term3;
|
numtyp term1,term2,term3;
|
||||||
numtyp term4,term5,term6;
|
numtyp term4,term5,term6;
|
||||||
@ -590,8 +655,6 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
|||||||
qiyy = polar2[i].w; // rpole[i][8];
|
qiyy = polar2[i].w; // rpole[i][8];
|
||||||
qiyz = polar3[i].x; // rpole[i][9];
|
qiyz = polar3[i].x; // rpole[i][9];
|
||||||
qizz = polar3[i].y; // rpole[i][12];
|
qizz = polar3[i].y; // rpole[i][12];
|
||||||
itype = polar3[i].z; // amtype[i];
|
|
||||||
igroup = polar3[i].w; // amgroup[i];
|
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
|
||||||
@ -1391,9 +1454,8 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
|||||||
numtyp ukyp = polar5[j].y; // uinp[j][1];
|
numtyp ukyp = polar5[j].y; // uinp[j][1];
|
||||||
numtyp ukzp = polar5[j].z; // uinp[j][2];
|
numtyp ukzp = polar5[j].z; // uinp[j][2];
|
||||||
|
|
||||||
numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
|
numtyp factor_dscale, factor_pscale, factor_uscale;
|
||||||
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
||||||
factor_wscale = sp_pol.x; // sp_polar_wscale[sbmask15(jextra)];
|
|
||||||
if (igroup == jgroup) {
|
if (igroup == jgroup) {
|
||||||
factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)];
|
factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)];
|
||||||
factor_dscale = polar_dscale;
|
factor_dscale = polar_dscale;
|
||||||
|
|||||||
@ -40,6 +40,8 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
int init(const int ntypes, const int max_amtype, const int max_amclass,
|
int init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||||
const double *host_pdamp, const double *host_thole, const double *host_dirdamp,
|
const double *host_pdamp, const double *host_thole, const double *host_dirdamp,
|
||||||
const int *host_amtype2class, const double *host_special_mpole,
|
const int *host_amtype2class, const double *host_special_mpole,
|
||||||
|
const double *host_special_hal, const double *host_special_repel,
|
||||||
|
const double *host_special_disp,
|
||||||
const double *host_special_polar_wscale,
|
const double *host_special_polar_wscale,
|
||||||
const double *host_special_polar_piscale,
|
const double *host_special_polar_piscale,
|
||||||
const double *host_special_polar_pscale,
|
const double *host_special_polar_pscale,
|
||||||
@ -70,7 +72,13 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
/// sp_polar.x = special_polar_wscale
|
/// sp_polar.x = special_polar_wscale
|
||||||
/// sp_polar.y special_polar_pscale,
|
/// sp_polar.y special_polar_pscale,
|
||||||
/// sp_polar.z = special_polar_piscale
|
/// sp_polar.z = special_polar_piscale
|
||||||
|
/// sp_polar.w = special_mpole
|
||||||
UCL_D_Vec<numtyp4> sp_polar;
|
UCL_D_Vec<numtyp4> sp_polar;
|
||||||
|
/// Special nonpolar values [0-4]:
|
||||||
|
/// sp_nonpolar.x = special_hal
|
||||||
|
/// sp_nonpolar.y special_repel
|
||||||
|
/// sp_nonpolar.z = special_disp
|
||||||
|
UCL_D_Vec<numtyp4> sp_nonpolar;
|
||||||
|
|
||||||
/// If atom type constants fit in shared memory, use fast kernels
|
/// If atom type constants fit in shared memory, use fast kernels
|
||||||
bool shared_types;
|
bool shared_types;
|
||||||
|
|||||||
@ -30,6 +30,9 @@ static Amoeba<PRECISION,ACC_PRECISION> AMOEBAMF;
|
|||||||
int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
|
int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||||
const double *host_pdamp, const double *host_thole,
|
const double *host_pdamp, const double *host_thole,
|
||||||
const double *host_dirdamp, const int *host_amtype2class,
|
const double *host_dirdamp, const int *host_amtype2class,
|
||||||
|
const double *host_special_hal,
|
||||||
|
const double *host_special_repel,
|
||||||
|
const double *host_special_disp,
|
||||||
const double *host_special_mpole,
|
const double *host_special_mpole,
|
||||||
const double *host_special_polar_wscale,
|
const double *host_special_polar_wscale,
|
||||||
const double *host_special_polar_piscale,
|
const double *host_special_polar_piscale,
|
||||||
@ -66,7 +69,9 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
|
|||||||
if (world_me==0)
|
if (world_me==0)
|
||||||
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass,
|
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass,
|
||||||
host_pdamp, host_thole, host_dirdamp,
|
host_pdamp, host_thole, host_dirdamp,
|
||||||
host_amtype2class, host_special_mpole, host_special_polar_wscale,
|
host_amtype2class, host_special_hal,
|
||||||
|
host_special_repel, host_special_disp,
|
||||||
|
host_special_mpole, host_special_polar_wscale,
|
||||||
host_special_polar_piscale, host_special_polar_pscale,
|
host_special_polar_piscale, host_special_polar_pscale,
|
||||||
host_csix, host_adisp, nlocal, nall, max_nbors,
|
host_csix, host_adisp, nlocal, nall, max_nbors,
|
||||||
maxspecial, maxspecial15, cell_size, gpu_split,
|
maxspecial, maxspecial15, cell_size, gpu_split,
|
||||||
@ -86,8 +91,11 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
|
|||||||
fflush(screen);
|
fflush(screen);
|
||||||
}
|
}
|
||||||
if (gpu_rank==i && world_me!=0)
|
if (gpu_rank==i && world_me!=0)
|
||||||
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass, host_pdamp, host_thole, host_dirdamp,
|
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass,
|
||||||
host_amtype2class, host_special_mpole, host_special_polar_wscale,
|
host_pdamp, host_thole, host_dirdamp,
|
||||||
|
host_amtype2class, host_special_hal,
|
||||||
|
host_special_repel, host_special_disp,
|
||||||
|
host_special_mpole, host_special_polar_wscale,
|
||||||
host_special_polar_piscale, host_special_polar_pscale,
|
host_special_polar_piscale, host_special_polar_pscale,
|
||||||
host_csix, host_adisp, nlocal, nall, max_nbors,
|
host_csix, host_adisp, nlocal, nall, max_nbors,
|
||||||
maxspecial, maxspecial15, cell_size, gpu_split,
|
maxspecial, maxspecial15, cell_size, gpu_split,
|
||||||
|
|||||||
@ -53,7 +53,8 @@ enum{GORDON1,GORDON2};
|
|||||||
int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
|
int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||||
const double *host_pdamp, const double *host_thole,
|
const double *host_pdamp, const double *host_thole,
|
||||||
const double *host_dirdamp, const int* host_amtype2class,
|
const double *host_dirdamp, const int* host_amtype2class,
|
||||||
const double *host_special_mpole,
|
const double *host_special_hal, const double *host_special_repel,
|
||||||
|
const double *host_special_disp, const double *host_special_mpole,
|
||||||
const double *host_special_polar_wscale,
|
const double *host_special_polar_wscale,
|
||||||
const double *host_special_polar_piscale,
|
const double *host_special_polar_piscale,
|
||||||
const double *host_special_polar_pscale,
|
const double *host_special_polar_pscale,
|
||||||
@ -116,6 +117,9 @@ PairAmoebaGPU::PairAmoebaGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
|
|||||||
fieldp_pinned = nullptr;
|
fieldp_pinned = nullptr;
|
||||||
tq_pinned = nullptr;
|
tq_pinned = nullptr;
|
||||||
|
|
||||||
|
gpu_hal_ready = false;
|
||||||
|
gpu_repulsion_ready = false;
|
||||||
|
gpu_dispersion_real_ready = false;
|
||||||
gpu_multipole_real_ready = true;
|
gpu_multipole_real_ready = true;
|
||||||
gpu_udirect2b_ready = true;
|
gpu_udirect2b_ready = true;
|
||||||
gpu_umutual2b_ready = true;
|
gpu_umutual2b_ready = true;
|
||||||
@ -170,7 +174,8 @@ void PairAmoebaGPU::init_style()
|
|||||||
int tq_size;
|
int tq_size;
|
||||||
int mnf = 5e-2 * neighbor->oneatom;
|
int mnf = 5e-2 * neighbor->oneatom;
|
||||||
int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
|
int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
|
||||||
pdamp, thole, dirdamp, amtype2class, special_mpole,
|
pdamp, thole, dirdamp, amtype2class, special_hal,
|
||||||
|
special_repel, special_disp, special_mpole,
|
||||||
special_polar_wscale, special_polar_piscale,
|
special_polar_wscale, special_polar_piscale,
|
||||||
special_polar_pscale, csix, adisp, atom->nlocal,
|
special_polar_pscale, csix, adisp, atom->nlocal,
|
||||||
atom->nlocal+atom->nghost, mnf, maxspecial,
|
atom->nlocal+atom->nghost, mnf, maxspecial,
|
||||||
|
|||||||
@ -47,6 +47,9 @@ class PairAmoebaGPU : public PairAmoeba {
|
|||||||
void *fieldp_pinned;
|
void *fieldp_pinned;
|
||||||
bool tq_single;
|
bool tq_single;
|
||||||
|
|
||||||
|
bool gpu_hal_ready;
|
||||||
|
bool gpu_repulsion_ready;
|
||||||
|
bool gpu_dispersion_real_ready;
|
||||||
bool gpu_multipole_real_ready;
|
bool gpu_multipole_real_ready;
|
||||||
bool gpu_udirect2b_ready;
|
bool gpu_udirect2b_ready;
|
||||||
bool gpu_umutual2b_ready;
|
bool gpu_umutual2b_ready;
|
||||||
|
|||||||
Reference in New Issue
Block a user