Added the dispersion real space kernel and transfer special coeffs to the device

This commit is contained in:
Trung Nguyen
2021-09-19 23:40:43 -05:00
parent 1166845fcf
commit 0228867d8e
6 changed files with 153 additions and 53 deletions

View File

@ -47,6 +47,9 @@ template <class numtyp, class acctyp>
int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
const double *host_pdamp, const double *host_thole,
const double *host_dirdamp, const int *host_amtype2class,
const double *host_special_hal,
const double *host_special_repel,
const double *host_special_disp,
const double *host_special_mpole,
const double *host_special_polar_wscale,
const double *host_special_polar_piscale,
@ -109,12 +112,21 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass,
}
ucl_copy(sp_polar,dview,5,false);
sp_nonpolar.alloc(5,*(this->ucl_device),UCL_READ_ONLY);
for (int i=0; i<5; i++) {
dview[i].x=host_special_hal[i];
dview[i].y=host_special_repel[i];
dview[i].z=host_special_disp[i];
dview[i].w=(numtyp)0;
}
ucl_copy(sp_nonpolar,dview,5,false);
_polar_dscale = polar_dscale;
_polar_uscale = polar_uscale;
_allocated=true;
this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes()
+ sp_polar.row_bytes() + this->_tep.row_bytes();
+ sp_polar.row_bytes() + sp_nonpolar.row_bytes() + this->_tep.row_bytes();
return 0;
}
@ -125,7 +137,9 @@ void AmoebaT::clear() {
_allocated=false;
coeff_amtype.clear();
coeff_amclass.clear();
sp_polar.clear();
sp_nonpolar.clear();
this->clear_atomic();
}

View File

@ -400,8 +400,9 @@ _texture( q_tex,int2);
__kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
const __global numtyp *restrict extra,
const __global numtyp4 *restrict coeff,
const __global numtyp4 *restrict sp_polar,
const __global numtyp4 *restrict coeff_amtype,
const __global numtyp4 *restrict coeff_amclass,
const __global numtyp4 *restrict sp_disp,
const __global int *dev_nbor,
const __global int *dev_packed,
const __global int *dev_short_nbor,
@ -428,20 +429,11 @@ __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
for (int l=0; l<6; l++) virial[l]=(acctyp)0;
}
acctyp4 tq;
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
numtyp4* polar1 = (numtyp4*)(&extra[0]);
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
if (ii<inum) {
int m,itype,igroup;
numtyp bfac;
numtyp term1,term2,term3;
numtyp term4,term5,term6;
numtyp bn[6];
numtyp ci,dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
int itype,iclass;
numtyp ci,ai;
int numj, nbor, nbor_end;
const __global int* nbor_mem=dev_packed;
@ -460,18 +452,10 @@ __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
nbor_mem = dev_short_nbor;
}
ci = polar1[i].x; // rpole[i][0];
dix = polar1[i].y; // rpole[i][1];
diy = polar1[i].z; // rpole[i][2];
diz = polar1[i].w; // rpole[i][3];
qixx = polar2[i].x; // rpole[i][4];
qixy = polar2[i].y; // rpole[i][5];
qixz = polar2[i].z; // rpole[i][6];
qiyy = polar2[i].w; // rpole[i][8];
qiyz = polar3[i].x; // rpole[i][9];
qizz = polar3[i].y; // rpole[i][12];
itype = polar3[i].z; // amtype[i];
igroup = polar3[i].w; // amgroup[i];
itype = polar3[i].z; // amtype[i];
iclass = coeff_amtype[itype].w; // amtype2class[itype];
ci = coeff_amclass[iclass].x; // csix[iclass];
ai = coeff_amclass[iclass].y; // adisp[iclass];
for ( ; nbor<nbor_end; nbor+=n_stride) {
@ -482,34 +466,115 @@ __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_,
//int jtype=jx.w;
// Compute r12
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
numtyp zr = jx.z - ix.z;
numtyp xr = ix.x - jx.x;
numtyp yr = ix.y - jx.y;
numtyp zr = ix.z - jx.z;
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
numtyp r = ucl_sqrt(r2);
numtyp ck = polar1[j].x; // rpole[j][0];
numtyp dkx = polar1[j].y; // rpole[j][1];
numtyp dky = polar1[j].z; // rpole[j][2];
numtyp dkz = polar1[j].w; // rpole[j][3];
numtyp qkxx = polar2[j].x; // rpole[j][4];
numtyp qkxy = polar2[j].y; // rpole[j][5];
numtyp qkxz = polar2[j].z; // rpole[j][6];
numtyp qkyy = polar2[j].w; // rpole[j][8];
numtyp qkyz = polar3[j].x; // rpole[j][9];
numtyp qkzz = polar3[j].y; // rpole[j][12];
int jtype = polar3[j].z; // amtype[j];
int jgroup = polar3[j].w; // amgroup[j];
int jclass = coeff_amtype[jtype].w; // amtype2class[jtype];
numtyp ck = coeff_amclass[jclass].x; // csix[jclass];
numtyp ak = coeff_amclass[jclass].y; // adisp[jclass];
numtyp r6 = r2*r2*r2;
numtyp ralpha2 = r2 * aewald*aewald;
numtyp term = (numtyp)1.0 + ralpha2 + (numtyp)0.5*ralpha2*ralpha2;
numtyp expterm = ucl_exp(-ralpha2);
numtyp expa = expterm * term;
// find the damping factor for the dispersion interaction
numtyp r = ucl_sqrt(r2);
numtyp r7 = r6 * r;
numtyp di = ai * r;
numtyp di2 = di * di;
numtyp di3 = di * di2;
numtyp dk = ak * r;
numtyp expi = ucl_exp(-di);
numtyp expk = ucl_exp(-dk);
numtyp ai2,ak2;
numtyp di4,di5;
numtyp dk2,dk3;
numtyp ti,ti2;
numtyp tk,tk2;
numtyp damp3,damp5;
numtyp ddamp;
numtyp factor_disp = (numtyp)1.0; // factor_disp = special_disp[sbmask15(j)];
if (ai != ak) {
ai2 = ai * ai;
ak2 = ak * ak;
dk2 = dk * dk;
dk3 = dk * dk2;
ti = ak2 / (ak2-ai2);
ti2 = ti * ti;
tk = ai2 / (ai2-ak2);
tk2 = tk * tk;
damp3 = (numtyp)1.0 - ti2*((numtyp)1.0 + di + (numtyp)0.5*di2) * expi
- tk2*((numtyp)1.0 + dk + (numtyp)0.5*dk2) * expk
- (numtyp)2.0*ti2*tk * ((numtyp)1.0 + di)* expi
- (numtyp)2.0*tk2*ti * ((numtyp)1.0 + dk) *expk;
damp5 = (numtyp)1.0 - ti2*((numtyp)1.0 + di + (numtyp)0.5*di2 + di3/(numtyp)6.0) * expi
- tk2*((numtyp)1.0 + dk + (numtyp)0.5*dk2 + dk3/(numtyp)6.0) * expk
- (numtyp)2.0*ti2*tk*((numtyp)1.0 + di + di2/(numtyp)3.0) * expi
- (numtyp)2.0*tk2*ti*((numtyp)1.0 + dk + dk2/(numtyp)3.0) * expk;
ddamp = (numtyp)0.25 * di2 * ti2 * ai * expi * (r*ai+(numtyp)4.0*tk - (numtyp)1.0) +
(numtyp)0.25 * dk2 * tk2 * ak * expk * (r*ak+(numtyp)4.0*ti-(numtyp)1.0);
} else {
di4 = di2 * di2;
di5 = di2 * di3;
damp3 = (numtyp)1.0 - ((numtyp)1.0+di+(numtyp)0.5*di2 + (numtyp)7.0*di3/(numtyp)48.0+di4/(numtyp)48.0)*expi;
damp5 = (numtyp)1.0 - ((numtyp)1.0+di+(numtyp)0.5*di2 + di3/(numtyp)6.0+di4/(numtyp)24.0+di5/(numtyp)144.0)*expi;
ddamp = ai * expi * (di5-(numtyp)3.0*di3-(numtyp)3.0*di2) / (numtyp)96.0;
}
numtyp damp = (numtyp)1.5*damp5 - (numtyp)0.5*damp3;
// apply damping and scaling factors for this interaction
numtyp scale = factor_disp * damp*damp;
scale = scale - (numtyp )1.0;
numtyp e = -ci * ck * (expa+scale) / r6;
numtyp rterm = -ucl_powr(ralpha2,(numtyp)3.0) * expterm / r;
numtyp de = (numtyp)-6.0*e/r2 - ci*ck*rterm/r7 - (numtyp)2.0*ci*ck*factor_disp*damp*ddamp/r7;
energy+= e;
// increment the damped dispersion derivative components
numtyp dedx = de * xr;
numtyp dedy = de * yr;
numtyp dedz = de * zr;
f.x += dedx;
f.y += dedy;
f.z += dedz;
// increment the internal virial tensor components
numtyp vxx = xr * dedx;
numtyp vyx = yr * dedx;
numtyp vzx = zr * dedx;
numtyp vyy = yr * dedy;
numtyp vzy = zr * dedy;
numtyp vzz = zr * dedz;
virial[0] += vxx;
virial[1] += vyy;
virial[2] += vzz;
virial[3] += vyx;
virial[4] += vzx;
virial[5] += vzy;
} // nbor
} // ii<inum
// accumate force, energy and virial
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
// offset,eflag,vflag,ans,engv);
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
offset,eflag,vflag,ans,engv);
}
/* ----------------------------------------------------------------------
@ -556,7 +621,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
if (ii<inum) {
int m,itype,igroup;
int m;
numtyp bfac;
numtyp term1,term2,term3;
numtyp term4,term5,term6;
@ -590,8 +655,6 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
qiyy = polar2[i].w; // rpole[i][8];
qiyz = polar3[i].x; // rpole[i][9];
qizz = polar3[i].y; // rpole[i][12];
itype = polar3[i].z; // amtype[i];
igroup = polar3[i].w; // amgroup[i];
for ( ; nbor<nbor_end; nbor+=n_stride) {
@ -1391,9 +1454,8 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
numtyp ukyp = polar5[j].y; // uinp[j][1];
numtyp ukzp = polar5[j].z; // uinp[j][2];
numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
numtyp factor_dscale, factor_pscale, factor_uscale;
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
factor_wscale = sp_pol.x; // sp_polar_wscale[sbmask15(jextra)];
if (igroup == jgroup) {
factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)];
factor_dscale = polar_dscale;

View File

@ -40,6 +40,8 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
int init(const int ntypes, const int max_amtype, const int max_amclass,
const double *host_pdamp, const double *host_thole, const double *host_dirdamp,
const int *host_amtype2class, const double *host_special_mpole,
const double *host_special_hal, const double *host_special_repel,
const double *host_special_disp,
const double *host_special_polar_wscale,
const double *host_special_polar_piscale,
const double *host_special_polar_pscale,
@ -70,7 +72,13 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
/// sp_polar.x = special_polar_wscale
/// sp_polar.y special_polar_pscale,
/// sp_polar.z = special_polar_piscale
/// sp_polar.w = special_mpole
UCL_D_Vec<numtyp4> sp_polar;
/// Special nonpolar values [0-4]:
/// sp_nonpolar.x = special_hal
/// sp_nonpolar.y special_repel
/// sp_nonpolar.z = special_disp
UCL_D_Vec<numtyp4> sp_nonpolar;
/// If atom type constants fit in shared memory, use fast kernels
bool shared_types;

View File

@ -30,6 +30,9 @@ static Amoeba<PRECISION,ACC_PRECISION> AMOEBAMF;
int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
const double *host_pdamp, const double *host_thole,
const double *host_dirdamp, const int *host_amtype2class,
const double *host_special_hal,
const double *host_special_repel,
const double *host_special_disp,
const double *host_special_mpole,
const double *host_special_polar_wscale,
const double *host_special_polar_piscale,
@ -66,7 +69,9 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
if (world_me==0)
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass,
host_pdamp, host_thole, host_dirdamp,
host_amtype2class, host_special_mpole, host_special_polar_wscale,
host_amtype2class, host_special_hal,
host_special_repel, host_special_disp,
host_special_mpole, host_special_polar_wscale,
host_special_polar_piscale, host_special_polar_pscale,
host_csix, host_adisp, nlocal, nall, max_nbors,
maxspecial, maxspecial15, cell_size, gpu_split,
@ -86,8 +91,11 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas
fflush(screen);
}
if (gpu_rank==i && world_me!=0)
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass, host_pdamp, host_thole, host_dirdamp,
host_amtype2class, host_special_mpole, host_special_polar_wscale,
init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass,
host_pdamp, host_thole, host_dirdamp,
host_amtype2class, host_special_hal,
host_special_repel, host_special_disp,
host_special_mpole, host_special_polar_wscale,
host_special_polar_piscale, host_special_polar_pscale,
host_csix, host_adisp, nlocal, nall, max_nbors,
maxspecial, maxspecial15, cell_size, gpu_split,

View File

@ -53,7 +53,8 @@ enum{GORDON1,GORDON2};
int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass,
const double *host_pdamp, const double *host_thole,
const double *host_dirdamp, const int* host_amtype2class,
const double *host_special_mpole,
const double *host_special_hal, const double *host_special_repel,
const double *host_special_disp, const double *host_special_mpole,
const double *host_special_polar_wscale,
const double *host_special_polar_piscale,
const double *host_special_polar_pscale,
@ -116,6 +117,9 @@ PairAmoebaGPU::PairAmoebaGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
fieldp_pinned = nullptr;
tq_pinned = nullptr;
gpu_hal_ready = false;
gpu_repulsion_ready = false;
gpu_dispersion_real_ready = false;
gpu_multipole_real_ready = true;
gpu_udirect2b_ready = true;
gpu_umutual2b_ready = true;
@ -170,7 +174,8 @@ void PairAmoebaGPU::init_style()
int tq_size;
int mnf = 5e-2 * neighbor->oneatom;
int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
pdamp, thole, dirdamp, amtype2class, special_mpole,
pdamp, thole, dirdamp, amtype2class, special_hal,
special_repel, special_disp, special_mpole,
special_polar_wscale, special_polar_piscale,
special_polar_pscale, csix, adisp, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,

View File

@ -47,6 +47,9 @@ class PairAmoebaGPU : public PairAmoeba {
void *fieldp_pinned;
bool tq_single;
bool gpu_hal_ready;
bool gpu_repulsion_ready;
bool gpu_dispersion_real_ready;
bool gpu_multipole_real_ready;
bool gpu_udirect2b_ready;
bool gpu_umutual2b_ready;