diff --git a/lib/gpu/lal_amoeba.cpp b/lib/gpu/lal_amoeba.cpp index 28ed02b480..1d62e483d8 100644 --- a/lib/gpu/lal_amoeba.cpp +++ b/lib/gpu/lal_amoeba.cpp @@ -47,6 +47,9 @@ template int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass, const double *host_pdamp, const double *host_thole, const double *host_dirdamp, const int *host_amtype2class, + const double *host_special_hal, + const double *host_special_repel, + const double *host_special_disp, const double *host_special_mpole, const double *host_special_polar_wscale, const double *host_special_polar_piscale, @@ -109,12 +112,21 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass, } ucl_copy(sp_polar,dview,5,false); + sp_nonpolar.alloc(5,*(this->ucl_device),UCL_READ_ONLY); + for (int i=0; i<5; i++) { + dview[i].x=host_special_hal[i]; + dview[i].y=host_special_repel[i]; + dview[i].z=host_special_disp[i]; + dview[i].w=(numtyp)0; + } + ucl_copy(sp_nonpolar,dview,5,false); + _polar_dscale = polar_dscale; _polar_uscale = polar_uscale; _allocated=true; this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes() - + sp_polar.row_bytes() + this->_tep.row_bytes(); + + sp_polar.row_bytes() + sp_nonpolar.row_bytes() + this->_tep.row_bytes(); return 0; } @@ -125,7 +137,9 @@ void AmoebaT::clear() { _allocated=false; coeff_amtype.clear(); + coeff_amclass.clear(); sp_polar.clear(); + sp_nonpolar.clear(); this->clear_atomic(); } diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu index 5a1151f610..8915ef0146 100644 --- a/lib/gpu/lal_amoeba.cu +++ b/lib/gpu/lal_amoeba.cu @@ -400,8 +400,9 @@ _texture( q_tex,int2); __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_, const __global numtyp *restrict extra, - const __global numtyp4 *restrict coeff, - const __global numtyp4 *restrict sp_polar, + const __global numtyp4 *restrict coeff_amtype, + const __global numtyp4 *restrict coeff_amclass, + const __global numtyp4 *restrict sp_disp, const __global int *dev_nbor, const __global int *dev_packed, const __global int *dev_short_nbor, @@ -428,20 +429,11 @@ __kernel void k_amoeba_dispersion(const __global numtyp4 *restrict x_, for (int l=0; l<6; l++) virial[l]=(acctyp)0; } - acctyp4 tq; - tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0; - - numtyp4* polar1 = (numtyp4*)(&extra[0]); - numtyp4* polar2 = (numtyp4*)(&extra[4*nall]); numtyp4* polar3 = (numtyp4*)(&extra[8*nall]); if (iioff2) continue; - numtyp r = ucl_sqrt(r2); - numtyp ck = polar1[j].x; // rpole[j][0]; - numtyp dkx = polar1[j].y; // rpole[j][1]; - numtyp dky = polar1[j].z; // rpole[j][2]; - numtyp dkz = polar1[j].w; // rpole[j][3]; - numtyp qkxx = polar2[j].x; // rpole[j][4]; - numtyp qkxy = polar2[j].y; // rpole[j][5]; - numtyp qkxz = polar2[j].z; // rpole[j][6]; - numtyp qkyy = polar2[j].w; // rpole[j][8]; - numtyp qkyz = polar3[j].x; // rpole[j][9]; - numtyp qkzz = polar3[j].y; // rpole[j][12]; int jtype = polar3[j].z; // amtype[j]; - int jgroup = polar3[j].w; // amgroup[j]; + int jclass = coeff_amtype[jtype].w; // amtype2class[jtype]; + numtyp ck = coeff_amclass[jclass].x; // csix[jclass]; + numtyp ak = coeff_amclass[jclass].y; // adisp[jclass]; + numtyp r6 = r2*r2*r2; + numtyp ralpha2 = r2 * aewald*aewald; + numtyp term = (numtyp)1.0 + ralpha2 + (numtyp)0.5*ralpha2*ralpha2; + numtyp expterm = ucl_exp(-ralpha2); + numtyp expa = expterm * term; + + // find the damping factor for the dispersion interaction + + numtyp r = ucl_sqrt(r2); + numtyp r7 = r6 * r; + numtyp di = ai * r; + numtyp di2 = di * di; + numtyp di3 = di * di2; + numtyp dk = ak * r; + numtyp expi = ucl_exp(-di); + numtyp expk = ucl_exp(-dk); + + numtyp ai2,ak2; + numtyp di4,di5; + numtyp dk2,dk3; + numtyp ti,ti2; + numtyp tk,tk2; + numtyp damp3,damp5; + numtyp ddamp; + numtyp factor_disp = (numtyp)1.0; // factor_disp = special_disp[sbmask15(j)]; + + if (ai != ak) { + ai2 = ai * ai; + ak2 = ak * ak; + dk2 = dk * dk; + dk3 = dk * dk2; + ti = ak2 / (ak2-ai2); + ti2 = ti * ti; + tk = ai2 / (ai2-ak2); + tk2 = tk * tk; + damp3 = (numtyp)1.0 - ti2*((numtyp)1.0 + di + (numtyp)0.5*di2) * expi + - tk2*((numtyp)1.0 + dk + (numtyp)0.5*dk2) * expk + - (numtyp)2.0*ti2*tk * ((numtyp)1.0 + di)* expi + - (numtyp)2.0*tk2*ti * ((numtyp)1.0 + dk) *expk; + damp5 = (numtyp)1.0 - ti2*((numtyp)1.0 + di + (numtyp)0.5*di2 + di3/(numtyp)6.0) * expi + - tk2*((numtyp)1.0 + dk + (numtyp)0.5*dk2 + dk3/(numtyp)6.0) * expk + - (numtyp)2.0*ti2*tk*((numtyp)1.0 + di + di2/(numtyp)3.0) * expi + - (numtyp)2.0*tk2*ti*((numtyp)1.0 + dk + dk2/(numtyp)3.0) * expk; + ddamp = (numtyp)0.25 * di2 * ti2 * ai * expi * (r*ai+(numtyp)4.0*tk - (numtyp)1.0) + + (numtyp)0.25 * dk2 * tk2 * ak * expk * (r*ak+(numtyp)4.0*ti-(numtyp)1.0); + + } else { + di4 = di2 * di2; + di5 = di2 * di3; + damp3 = (numtyp)1.0 - ((numtyp)1.0+di+(numtyp)0.5*di2 + (numtyp)7.0*di3/(numtyp)48.0+di4/(numtyp)48.0)*expi; + damp5 = (numtyp)1.0 - ((numtyp)1.0+di+(numtyp)0.5*di2 + di3/(numtyp)6.0+di4/(numtyp)24.0+di5/(numtyp)144.0)*expi; + ddamp = ai * expi * (di5-(numtyp)3.0*di3-(numtyp)3.0*di2) / (numtyp)96.0; + } + + numtyp damp = (numtyp)1.5*damp5 - (numtyp)0.5*damp3; + + // apply damping and scaling factors for this interaction + + numtyp scale = factor_disp * damp*damp; + scale = scale - (numtyp )1.0; + numtyp e = -ci * ck * (expa+scale) / r6; + numtyp rterm = -ucl_powr(ralpha2,(numtyp)3.0) * expterm / r; + numtyp de = (numtyp)-6.0*e/r2 - ci*ck*rterm/r7 - (numtyp)2.0*ci*ck*factor_disp*damp*ddamp/r7; + + energy+= e; + + // increment the damped dispersion derivative components + + numtyp dedx = de * xr; + numtyp dedy = de * yr; + numtyp dedz = de * zr; + f.x += dedx; + f.y += dedy; + f.z += dedz; + + // increment the internal virial tensor components + + numtyp vxx = xr * dedx; + numtyp vyx = yr * dedx; + numtyp vzx = zr * dedx; + numtyp vyy = yr * dedy; + numtyp vzy = zr * dedy; + numtyp vzz = zr * dedz; + + virial[0] += vxx; + virial[1] += vyy; + virial[2] += vzz; + virial[3] += vyx; + virial[4] += vzx; + virial[5] += vzy; } // nbor } // ii { int init(const int ntypes, const int max_amtype, const int max_amclass, const double *host_pdamp, const double *host_thole, const double *host_dirdamp, const int *host_amtype2class, const double *host_special_mpole, + const double *host_special_hal, const double *host_special_repel, + const double *host_special_disp, const double *host_special_polar_wscale, const double *host_special_polar_piscale, const double *host_special_polar_pscale, @@ -70,7 +72,13 @@ class Amoeba : public BaseAmoeba { /// sp_polar.x = special_polar_wscale /// sp_polar.y special_polar_pscale, /// sp_polar.z = special_polar_piscale + /// sp_polar.w = special_mpole UCL_D_Vec sp_polar; + /// Special nonpolar values [0-4]: + /// sp_nonpolar.x = special_hal + /// sp_nonpolar.y special_repel + /// sp_nonpolar.z = special_disp + UCL_D_Vec sp_nonpolar; /// If atom type constants fit in shared memory, use fast kernels bool shared_types; diff --git a/lib/gpu/lal_amoeba_ext.cpp b/lib/gpu/lal_amoeba_ext.cpp index 804bf10f32..86cf6f4c54 100644 --- a/lib/gpu/lal_amoeba_ext.cpp +++ b/lib/gpu/lal_amoeba_ext.cpp @@ -30,6 +30,9 @@ static Amoeba AMOEBAMF; int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass, const double *host_pdamp, const double *host_thole, const double *host_dirdamp, const int *host_amtype2class, + const double *host_special_hal, + const double *host_special_repel, + const double *host_special_disp, const double *host_special_mpole, const double *host_special_polar_wscale, const double *host_special_polar_piscale, @@ -66,7 +69,9 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas if (world_me==0) init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass, host_pdamp, host_thole, host_dirdamp, - host_amtype2class, host_special_mpole, host_special_polar_wscale, + host_amtype2class, host_special_hal, + host_special_repel, host_special_disp, + host_special_mpole, host_special_polar_wscale, host_special_polar_piscale, host_special_polar_pscale, host_csix, host_adisp, nlocal, nall, max_nbors, maxspecial, maxspecial15, cell_size, gpu_split, @@ -86,8 +91,11 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas fflush(screen); } if (gpu_rank==i && world_me!=0) - init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass, host_pdamp, host_thole, host_dirdamp, - host_amtype2class, host_special_mpole, host_special_polar_wscale, + init_ok=AMOEBAMF.init(ntypes, max_amtype, max_amclass, + host_pdamp, host_thole, host_dirdamp, + host_amtype2class, host_special_hal, + host_special_repel, host_special_disp, + host_special_mpole, host_special_polar_wscale, host_special_polar_piscale, host_special_polar_pscale, host_csix, host_adisp, nlocal, nall, max_nbors, maxspecial, maxspecial15, cell_size, gpu_split, diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index 25f4718163..35bba58a14 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -53,7 +53,8 @@ enum{GORDON1,GORDON2}; int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclass, const double *host_pdamp, const double *host_thole, const double *host_dirdamp, const int* host_amtype2class, - const double *host_special_mpole, + const double *host_special_hal, const double *host_special_repel, + const double *host_special_disp, const double *host_special_mpole, const double *host_special_polar_wscale, const double *host_special_polar_piscale, const double *host_special_polar_pscale, @@ -116,6 +117,9 @@ PairAmoebaGPU::PairAmoebaGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE) fieldp_pinned = nullptr; tq_pinned = nullptr; + gpu_hal_ready = false; + gpu_repulsion_ready = false; + gpu_dispersion_real_ready = false; gpu_multipole_real_ready = true; gpu_udirect2b_ready = true; gpu_umutual2b_ready = true; @@ -170,7 +174,8 @@ void PairAmoebaGPU::init_style() int tq_size; int mnf = 5e-2 * neighbor->oneatom; int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass, - pdamp, thole, dirdamp, amtype2class, special_mpole, + pdamp, thole, dirdamp, amtype2class, special_hal, + special_repel, special_disp, special_mpole, special_polar_wscale, special_polar_piscale, special_polar_pscale, csix, adisp, atom->nlocal, atom->nlocal+atom->nghost, mnf, maxspecial, diff --git a/src/GPU/pair_amoeba_gpu.h b/src/GPU/pair_amoeba_gpu.h index d9a3fc5904..710f997e4c 100644 --- a/src/GPU/pair_amoeba_gpu.h +++ b/src/GPU/pair_amoeba_gpu.h @@ -47,6 +47,9 @@ class PairAmoebaGPU : public PairAmoeba { void *fieldp_pinned; bool tq_single; + bool gpu_hal_ready; + bool gpu_repulsion_ready; + bool gpu_dispersion_real_ready; bool gpu_multipole_real_ready; bool gpu_udirect2b_ready; bool gpu_umutual2b_ready;