Added the dispersion real space kernel and transfer special coeffs to the device
This commit is contained in:
@ -47,6 +47,9 @@ template <class numtyp, class acctyp>
|
||||
int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
const double *host_pdamp, const double *host_thole,
|
||||
const double *host_dirdamp, const int *host_amtype2class,
|
||||
const double *host_special_hal,
|
||||
const double *host_special_repel,
|
||||
const double *host_special_disp,
|
||||
const double *host_special_mpole,
|
||||
const double *host_special_polar_wscale,
|
||||
const double *host_special_polar_piscale,
|
||||
@ -109,12 +112,21 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
}
|
||||
ucl_copy(sp_polar,dview,5,false);
|
||||
|
||||
sp_nonpolar.alloc(5,*(this->ucl_device),UCL_READ_ONLY);
|
||||
for (int i=0; i<5; i++) {
|
||||
dview[i].x=host_special_hal[i];
|
||||
dview[i].y=host_special_repel[i];
|
||||
dview[i].z=host_special_disp[i];
|
||||
dview[i].w=(numtyp)0;
|
||||
}
|
||||
ucl_copy(sp_nonpolar,dview,5,false);
|
||||
|
||||
_polar_dscale = polar_dscale;
|
||||
_polar_uscale = polar_uscale;
|
||||
|
||||
_allocated=true;
|
||||
this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes()
|
||||
+ sp_polar.row_bytes() + this->_tep.row_bytes();
|
||||
+ sp_polar.row_bytes() + sp_nonpolar.row_bytes() + this->_tep.row_bytes();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -125,7 +137,9 @@ void AmoebaT::clear() {
|
||||
_allocated=false;
|
||||
|
||||
coeff_amtype.clear();
|
||||
coeff_amclass.clear();
|
||||
sp_polar.clear();
|
||||
sp_nonpolar.clear();
|
||||
|
||||
this->clear_atomic();
|
||||
}
|
||||
|
||||
@ -400,8 +400,9 @@ _texture( q_tex,int2);
|
||||
|
||||
__kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict extra,
|
||||
const __global numtyp4 *restrict coeff,
|
||||
const __global numtyp4 *restrict sp_polar,
|
||||
const __global numtyp4 *restrict coeff_amtype,
|
||||
const __global numtyp4 *restrict coeff_amclass,
|
||||
const __global numtyp4 *restrict sp_disp,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
const __global int *dev_short_nbor,
|
||||
@ -428,20 +429,11 @@ __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
|
||||
for (int l=0; l<6; l++) virial[l]=(acctyp)0;
|
||||
}
|
||||
|
||||
acctyp4 tq;
|
||||
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
|
||||
|
||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||
|
||||
if (ii<inum) {
|
||||
int m,itype,igroup;
|
||||
numtyp bfac;
|
||||
numtyp term1,term2,term3;
|
||||
numtyp term4,term5,term6;
|
||||
numtyp bn[6];
|
||||
numtyp ci,dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
|
||||
int itype,iclass;
|
||||
numtyp ci,ai;
|
||||
|
||||
int numj, nbor, nbor_end;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
@ -460,18 +452,10 @@ __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
|
||||
nbor_mem = dev_short_nbor;
|
||||
}
|
||||
|
||||
ci = polar1[i].x; // rpole[i][0];
|
||||
dix = polar1[i].y; // rpole[i][1];
|
||||
diy = polar1[i].z; // rpole[i][2];
|
||||
diz = polar1[i].w; // rpole[i][3];
|
||||
qixx = polar2[i].x; // rpole[i][4];
|
||||
qixy = polar2[i].y; // rpole[i][5];
|
||||
qixz = polar2[i].z; // rpole[i][6];
|
||||
qiyy = polar2[i].w; // rpole[i][8];
|
||||
qiyz = polar3[i].x; // rpole[i][9];
|
||||
qizz = polar3[i].y; // rpole[i][12];
|
||||
itype = polar3[i].z; // amtype[i];
|
||||
igroup = polar3[i].w; // amgroup[i];
|
||||
iclass = coeff_amtype[itype].w; // amtype2class[itype];
|
||||
ci = coeff_amclass[iclass].x; // csix[iclass];
|
||||
ai = coeff_amclass[iclass].y; // adisp[iclass];
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
|
||||
@ -482,34 +466,115 @@ __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
|
||||
//int jtype=jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp xr = jx.x - ix.x;
|
||||
numtyp yr = jx.y - ix.y;
|
||||
numtyp zr = jx.z - ix.z;
|
||||
numtyp xr = ix.x - jx.x;
|
||||
numtyp yr = ix.y - jx.y;
|
||||
numtyp zr = ix.z - jx.z;
|
||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||
|
||||
//if (r2>off2) continue;
|
||||
|
||||
numtyp r = ucl_sqrt(r2);
|
||||
numtyp ck = polar1[j].x; // rpole[j][0];
|
||||
numtyp dkx = polar1[j].y; // rpole[j][1];
|
||||
numtyp dky = polar1[j].z; // rpole[j][2];
|
||||
numtyp dkz = polar1[j].w; // rpole[j][3];
|
||||
numtyp qkxx = polar2[j].x; // rpole[j][4];
|
||||
numtyp qkxy = polar2[j].y; // rpole[j][5];
|
||||
numtyp qkxz = polar2[j].z; // rpole[j][6];
|
||||
numtyp qkyy = polar2[j].w; // rpole[j][8];
|
||||
numtyp qkyz = polar3[j].x; // rpole[j][9];
|
||||
numtyp qkzz = polar3[j].y; // rpole[j][12];
|
||||
int jtype = polar3[j].z; // amtype[j];
|
||||
int jgroup = polar3[j].w; // amgroup[j];
|
||||
int jclass = coeff_amtype[jtype].w; // amtype2class[jtype];
|
||||
numtyp ck = coeff_amclass[jclass].x; // csix[jclass];
|
||||
numtyp ak = coeff_amclass[jclass].y; // adisp[jclass];
|
||||
|
||||
numtyp r6 = r2*r2*r2;
|
||||
numtyp ralpha2 = r2 * aewald*aewald;
|
||||
numtyp term = (numtyp)1.0 + ralpha2 + (numtyp)0.5*ralpha2*ralpha2;
|
||||
numtyp expterm = ucl_exp(-ralpha2);
|
||||
numtyp expa = expterm * term;
|
||||
|
||||
// find the damping factor for the dispersion interaction
|
||||
|
||||
numtyp r = ucl_sqrt(r2);
|
||||
numtyp r7 = r6 * r;
|
||||
numtyp di = ai * r;
|
||||
numtyp di2 = di * di;
|
||||
numtyp di3 = di * di2;
|
||||
numtyp dk = ak * r;
|
||||
numtyp expi = ucl_exp(-di);
|
||||
numtyp expk = ucl_exp(-dk);
|
||||
|
||||
numtyp ai2,ak2;
|
||||
numtyp di4,di5;
|
||||
numtyp dk2,dk3;
|
||||
numtyp ti,ti2;
|
||||
numtyp tk,tk2;
|
||||
numtyp damp3,damp5;
|
||||
numtyp ddamp;
|
||||
numtyp factor_disp = (numtyp)1.0; // factor_disp = special_disp[sbmask15(j)];
|
||||
|
||||
if (ai != ak) {
|
||||
ai2 = ai * ai;
|
||||
ak2 = ak * ak;
|
||||
dk2 = dk * dk;
|
||||
dk3 = dk * dk2;
|
||||
ti = ak2 / (ak2-ai2);
|
||||
ti2 = ti * ti;
|
||||
tk = ai2 / (ai2-ak2);
|
||||
tk2 = tk * tk;
|
||||
damp3 = (numtyp)1.0 - ti2*((numtyp)1.0 + di + (numtyp)0.5*di2) * expi
|
||||
- tk2*((numtyp)1.0 + dk + (numtyp)0.5*dk2) * expk
|
||||
- (numtyp)2.0*ti2*tk * ((numtyp)1.0 + di)* expi
|
||||
- (numtyp)2.0*tk2*ti * ((numtyp)1.0 + dk) *expk;
|
||||
damp5 = (numtyp)1.0 - ti2*((numtyp)1.0 + di + (numtyp)0.5*di2 + di3/(numtyp)6.0) * expi
|
||||
- tk2*((numtyp)1.0 + dk + (numtyp)0.5*dk2 + dk3/(numtyp)6.0) * expk
|
||||
- (numtyp)2.0*ti2*tk*((numtyp)1.0 + di + di2/(numtyp)3.0) * expi
|
||||
- (numtyp)2.0*tk2*ti*((numtyp)1.0 + dk + dk2/(numtyp)3.0) * expk;
|
||||
ddamp = (numtyp)0.25 * di2 * ti2 * ai * expi * (r*ai+(numtyp)4.0*tk - (numtyp)1.0) +
|
||||
(numtyp)0.25 * dk2 * tk2 * ak * expk * (r*ak+(numtyp)4.0*ti-(numtyp)1.0);
|
||||
|
||||
} else {
|
||||
di4 = di2 * di2;
|
||||
di5 = di2 * di3;
|
||||
damp3 = (numtyp)1.0 - ((numtyp)1.0+di+(numtyp)0.5*di2 + (numtyp)7.0*di3/(numtyp)48.0+di4/(numtyp)48.0)*expi;
|
||||
damp5 = (numtyp)1.0 - ((numtyp)1.0+di+(numtyp)0.5*di2 + di3/(numtyp)6.0+di4/(numtyp)24.0+di5/(numtyp)144.0)*expi;
|
||||
ddamp = ai * expi * (di5-(numtyp)3.0*di3-(numtyp)3.0*di2) / (numtyp)96.0;
|
||||
}
|
||||
|
||||
numtyp damp = (numtyp)1.5*damp5 - (numtyp)0.5*damp3;
|
||||
|
||||
// apply damping and scaling factors for this interaction
|
||||
|
||||
numtyp scale = factor_disp * damp*damp;
|
||||
scale = scale - (numtyp )1.0;
|
||||
numtyp e = -ci * ck * (expa+scale) / r6;
|
||||
numtyp rterm = -ucl_powr(ralpha2,(numtyp)3.0) * expterm / r;
|
||||
numtyp de = (numtyp)-6.0*e/r2 - ci*ck*rterm/r7 - (numtyp)2.0*ci*ck*factor_disp*damp*ddamp/r7;
|
||||
|
||||
energy+= e;
|
||||
|
||||
// increment the damped dispersion derivative components
|
||||
|
||||
numtyp dedx = de * xr;
|
||||
numtyp dedy = de * yr;
|
||||
numtyp dedz = de * zr;
|
||||
f.x += dedx;
|
||||
f.y += dedy;
|
||||
f.z += dedz;
|
||||
|
||||
// increment the internal virial tensor components
|
||||
|
||||
numtyp vxx = xr * dedx;
|
||||
numtyp vyx = yr * dedx;
|
||||
numtyp vzx = zr * dedx;
|
||||
numtyp vyy = yr * dedy;
|
||||
numtyp vzy = zr * dedy;
|
||||
numtyp vzz = zr * dedz;
|
||||
|
||||
virial[0] += vxx;
|
||||
virial[1] += vyy;
|
||||
virial[2] += vzz;
|
||||
virial[3] += vyx;
|
||||
virial[4] += vzx;
|
||||
virial[5] += vzy;
|
||||
} // nbor
|
||||
|
||||
} // ii<inum
|
||||
|
||||
// accumate force, energy and virial
|
||||
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||
// offset,eflag,vflag,ans,engv);
|
||||
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||
offset,eflag,vflag,ans,engv);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -556,7 +621,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||
|
||||
if (ii<inum) {
|
||||
int m,itype,igroup;
|
||||
int m;
|
||||
numtyp bfac;
|
||||
numtyp term1,term2,term3;
|
||||
numtyp term4,term5,term6;
|
||||
@ -590,8 +655,6 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
||||
qiyy = polar2[i].w; // rpole[i][8];
|
||||
qiyz = polar3[i].x; // rpole[i][9];
|
||||
qizz = polar3[i].y; // rpole[i][12];
|
||||
itype = polar3[i].z; // amtype[i];
|
||||
igroup = polar3[i].w; // amgroup[i];
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
|
||||
@ -1391,9 +1454,8 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
||||
numtyp ukyp = polar5[j].y; // uinp[j][1];
|
||||
numtyp ukzp = polar5[j].z; // uinp[j][2];
|
||||
|
||||
numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
|
||||
numtyp factor_dscale, factor_pscale, factor_uscale;
|
||||
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
||||
factor_wscale = sp_pol.x; // sp_polar_wscale[sbmask15(jextra)];
|
||||
if (igroup == jgroup) {
|
||||
factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)];
|
||||
factor_dscale = polar_dscale;
|
||||
|
||||
@ -40,6 +40,8 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
|
||||
int init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
const double *host_pdamp, const double *host_thole, const double *host_dirdamp,
|
||||
const int *host_amtype2class, const double *host_special_mpole,
|
||||
const double *host_special_hal, const double *host_special_repel,
|
||||
const double *host_special_disp,
|
||||
const double *host_special_polar_wscale,
|
||||
const double *host_special_polar_piscale,
|
||||
const double *host_special_polar_pscale,
|
||||
@ -70,7 +72,13 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
|
||||
/// sp_polar.x = special_polar_wscale
|
||||
/// sp_polar.y special_polar_pscale,
|
||||
/// sp_polar.z = special_polar_piscale
|
||||
/// sp_polar.w = special_mpole
|
||||
UCL_D_Vec<numtyp4> sp_polar;
|
||||
/// Special nonpolar values [0-4]:
|
||||
/// sp_nonpolar.x = special_hal
|
||||
/// sp_nonpolar.y special_repel
|
||||
/// sp_nonpolar.z = special_disp
|
||||
UCL_D_Vec<numtyp4> sp_nonpolar;
|
||||
|
||||
/// If atom type constants fit in shared memory, use fast kernels
|
||||
bool shared_types;
|
||||
|
||||
@ -30,6 +30,9 @@ static Amoeba<PRECISION,ACC_PRECISION> AMOEBAMF;
|
||||
int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
const double *host_pdamp, const double *host_thole,
|
||||
const double *host_dirdamp, const int *host_amtype2class,
|
||||
const double *host_special_hal,
|
||||
const double *host_special_repel,
|
||||
const double *host_special_disp,
|
||||
const double *host_special_mpole,
|
||||
const double *host_special_polar_wscale,
|
||||
const double *host_special_polar_piscale,
|
||||
@ -66,7 +69,9 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
|
||||
if (world_me==0)
|
||||
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass,
|
||||
host_pdamp, host_thole, host_dirdamp,
|
||||
host_amtype2class, host_special_mpole, host_special_polar_wscale,
|
||||
host_amtype2class, host_special_hal,
|
||||
host_special_repel, host_special_disp,
|
||||
host_special_mpole, host_special_polar_wscale,
|
||||
host_special_polar_piscale, host_special_polar_pscale,
|
||||
host_csix, host_adisp, nlocal, nall, max_nbors,
|
||||
maxspecial, maxspecial15, cell_size, gpu_split,
|
||||
@ -86,8 +91,11 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
|
||||
fflush(screen);
|
||||
}
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass, host_pdamp, host_thole, host_dirdamp,
|
||||
host_amtype2class, host_special_mpole, host_special_polar_wscale,
|
||||
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass,
|
||||
host_pdamp, host_thole, host_dirdamp,
|
||||
host_amtype2class, host_special_hal,
|
||||
host_special_repel, host_special_disp,
|
||||
host_special_mpole, host_special_polar_wscale,
|
||||
host_special_polar_piscale, host_special_polar_pscale,
|
||||
host_csix, host_adisp, nlocal, nall, max_nbors,
|
||||
maxspecial, maxspecial15, cell_size, gpu_split,
|
||||
|
||||
@ -53,7 +53,8 @@ enum{GORDON1,GORDON2};
|
||||
int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
const double *host_pdamp, const double *host_thole,
|
||||
const double *host_dirdamp, const int* host_amtype2class,
|
||||
const double *host_special_mpole,
|
||||
const double *host_special_hal, const double *host_special_repel,
|
||||
const double *host_special_disp, const double *host_special_mpole,
|
||||
const double *host_special_polar_wscale,
|
||||
const double *host_special_polar_piscale,
|
||||
const double *host_special_polar_pscale,
|
||||
@ -116,6 +117,9 @@ PairAmoebaGPU::PairAmoebaGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
|
||||
fieldp_pinned = nullptr;
|
||||
tq_pinned = nullptr;
|
||||
|
||||
gpu_hal_ready = false;
|
||||
gpu_repulsion_ready = false;
|
||||
gpu_dispersion_real_ready = false;
|
||||
gpu_multipole_real_ready = true;
|
||||
gpu_udirect2b_ready = true;
|
||||
gpu_umutual2b_ready = true;
|
||||
@ -170,7 +174,8 @@ void PairAmoebaGPU::init_style()
|
||||
int tq_size;
|
||||
int mnf = 5e-2 * neighbor->oneatom;
|
||||
int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
|
||||
pdamp, thole, dirdamp, amtype2class, special_mpole,
|
||||
pdamp, thole, dirdamp, amtype2class, special_hal,
|
||||
special_repel, special_disp, special_mpole,
|
||||
special_polar_wscale, special_polar_piscale,
|
||||
special_polar_pscale, csix, adisp, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, mnf, maxspecial,
|
||||
|
||||
@ -47,6 +47,9 @@ class PairAmoebaGPU : public PairAmoeba {
|
||||
void *fieldp_pinned;
|
||||
bool tq_single;
|
||||
|
||||
bool gpu_hal_ready;
|
||||
bool gpu_repulsion_ready;
|
||||
bool gpu_dispersion_real_ready;
|
||||
bool gpu_multipole_real_ready;
|
||||
bool gpu_udirect2b_ready;
|
||||
bool gpu_umutual2b_ready;
|
||||
|
||||
Reference in New Issue
Block a user