Updated pair coul/long/cs and born/coul/long/cs; updated gpu neighbor builds to support core-shell styles where r2 can be tiny.
This commit is contained in:
@ -9,7 +9,7 @@
|
|||||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||||
// __________________________________________________________________________
|
// __________________________________________________________________________
|
||||||
//
|
//
|
||||||
// begin :
|
// begin : June 2018
|
||||||
// email : trung.nguyen@northwestern.edu
|
// email : trung.nguyen@northwestern.edu
|
||||||
// ***************************************************************************/
|
// ***************************************************************************/
|
||||||
|
|
||||||
@ -29,15 +29,19 @@ texture<int2> q_tex;
|
|||||||
#define q_tex q_
|
#define q_tex q_
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define B0 (numtyp)-0.1335096380159268
|
// Note: EWALD_P is different from that in lal_preprocessor.h
|
||||||
#define B1 (numtyp)-2.57839507e-1
|
// acctyp is needed for these parameters
|
||||||
#define B2 (numtyp)-1.37203639e-1
|
#define CS_EWALD_P (acctyp)9.95473818e-1
|
||||||
#define B3 (numtyp)-8.88822059e-3
|
#define B0 (acctyp)-0.1335096380159268
|
||||||
#define B4 (numtyp)-5.80844129e-3
|
#define B1 (acctyp)-2.57839507e-1
|
||||||
#define B5 (numtyp)1.14652755e-1
|
#define B2 (acctyp)-1.37203639e-1
|
||||||
#define EPSILON (numtyp)1.0e-20
|
#define B3 (acctyp)-8.88822059e-3
|
||||||
#define EPS_EWALD (numtyp)1.0e-6
|
#define B4 (acctyp)-5.80844129e-3
|
||||||
#define EPS_EWALD_SQR (numtyp)1.0e-12
|
#define B5 (acctyp)1.14652755e-1
|
||||||
|
|
||||||
|
#define EPSILON (acctyp)(1.0e-20)
|
||||||
|
#define EPS_EWALD (acctyp)(1.0e-6)
|
||||||
|
#define EPS_EWALD_SQR (acctyp)(1.0e-12)
|
||||||
|
|
||||||
__kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
|
__kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp4 *restrict coeff1,
|
const __global numtyp4 *restrict coeff1,
|
||||||
@ -105,39 +109,37 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
int mtype=itype*lj_types+jtype;
|
int mtype=itype*lj_types+jtype;
|
||||||
if (rsq<cutsq_sigma[mtype].x) { // cutsq
|
if (rsq<cutsq_sigma[mtype].x) { // cutsq
|
||||||
numtyp forcecoul, forceborn, force, r6inv, prefactor, _erfc;
|
numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;
|
||||||
numtyp rexp = (numtyp)0.0;
|
|
||||||
|
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||||
|
numtyp r2inv = ucl_recip(rsq);
|
||||||
|
|
||||||
if (rsq < cut_coulsq) {
|
if (rsq < cut_coulsq) {
|
||||||
rsq += EPSILON;
|
numtyp r = ucl_sqrt(rsq);
|
||||||
|
|
||||||
numtyp r2inv = ucl_recip(rsq);
|
|
||||||
numtyp r = ucl_rsqrt(r2inv);
|
|
||||||
fetch(prefactor,j,q_tex);
|
fetch(prefactor,j,q_tex);
|
||||||
prefactor *= qqrd2e * qtmp;
|
prefactor *= qqrd2e * qtmp;
|
||||||
if (factor_coul<(numtyp)1.0) {
|
if (factor_coul<(numtyp)1.0) {
|
||||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||||
numtyp expm2 = ucl_exp(-grij*grij);
|
numtyp expm2 = ucl_exp(-grij*grij);
|
||||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||||
numtyp u = (numtyp)1.0 - t;
|
numtyp u = (numtyp)1.0 - t;
|
||||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||||
prefactor /= (r+EPS_EWALD);
|
prefactor /= (r+EPS_EWALD);
|
||||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||||
|
// Additionally r2inv needs to be accordingly modified since the later
|
||||||
|
// scaling of the overall force shall be consistent
|
||||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||||
forcecoul *= r2inv;
|
|
||||||
} else {
|
} else {
|
||||||
numtyp grij = g_ewald * r;
|
numtyp grij = g_ewald * r;
|
||||||
numtyp expm2 = ucl_exp(-grij*grij);
|
numtyp expm2 = ucl_exp(-grij*grij);
|
||||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||||
numtyp u = (numtyp)1.0 - t;
|
numtyp u = (numtyp)1.0 - t;
|
||||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||||
prefactor /= r;
|
prefactor /= r;
|
||||||
forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
|
forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
|
||||||
forcecoul *= r2inv;
|
|
||||||
}
|
}
|
||||||
} else forcecoul = (numtyp)0.0;
|
} else forcecoul = (numtyp)0.0;
|
||||||
|
|
||||||
numtyp r2inv = ucl_recip(rsq);
|
|
||||||
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
||||||
numtyp r = ucl_sqrt(rsq);
|
numtyp r = ucl_sqrt(rsq);
|
||||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||||
@ -146,7 +148,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||||||
+ coeff1[mtype].w*r2inv*r6inv)*factor_lj;
|
+ coeff1[mtype].w*r2inv*r6inv)*factor_lj;
|
||||||
} else forceborn = (numtyp)0.0;
|
} else forceborn = (numtyp)0.0;
|
||||||
|
|
||||||
force = forceborn*r2inv + forcecoul;
|
force = (forcecoul + forceborn) * r2inv;
|
||||||
|
|
||||||
f.x+=delx*force;
|
f.x+=delx*force;
|
||||||
f.y+=dely*force;
|
f.y+=dely*force;
|
||||||
@ -154,9 +156,9 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
if (eflag>0) {
|
if (eflag>0) {
|
||||||
if (rsq < cut_coulsq) {
|
if (rsq < cut_coulsq) {
|
||||||
e_coul += prefactor*_erfc;
|
numtyp e = prefactor*_erfc;
|
||||||
if (factor_coul<(numtyp)1.0)
|
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||||
e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
|
e_coul += e;
|
||||||
}
|
}
|
||||||
if (rsq < cutsq_sigma[mtype].y) {
|
if (rsq < cutsq_sigma[mtype].y) {
|
||||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||||
@ -248,39 +250,38 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
|||||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||||
|
|
||||||
if (rsq<cutsq_sigma[mtype].x) { // cutsq
|
if (rsq<cutsq_sigma[mtype].x) { // cutsq
|
||||||
numtyp forcecoul, forceborn, force, r6inv, prefactor, _erfc;
|
numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;
|
||||||
numtyp rexp = (numtyp)0.0;
|
|
||||||
|
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||||
|
numtyp r2inv = ucl_recip(rsq);
|
||||||
|
|
||||||
if (rsq < cut_coulsq) {
|
if (rsq < cut_coulsq) {
|
||||||
rsq += EPSILON;
|
numtyp r = ucl_sqrt(rsq);
|
||||||
|
|
||||||
numtyp r2inv = ucl_recip(rsq);
|
|
||||||
numtyp r = ucl_rsqrt(r2inv);
|
|
||||||
fetch(prefactor,j,q_tex);
|
fetch(prefactor,j,q_tex);
|
||||||
prefactor *= qqrd2e * qtmp;
|
prefactor *= qqrd2e * qtmp;
|
||||||
if (factor_coul<(numtyp)1.0) {
|
if (factor_coul<(numtyp)1.0) {
|
||||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||||
numtyp expm2 = ucl_exp(-grij*grij);
|
numtyp expm2 = ucl_exp(-grij*grij);
|
||||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||||
numtyp u = (numtyp)1.0 - t;
|
numtyp u = (numtyp)1.0 - t;
|
||||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||||
prefactor /= (r+EPS_EWALD);
|
prefactor /= (r+EPS_EWALD);
|
||||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||||
|
// Additionally r2inv needs to be accordingly modified since the later
|
||||||
|
// scaling of the overall force shall be consistent
|
||||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||||
forcecoul *= r2inv;
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
numtyp grij = g_ewald * r;
|
numtyp grij = g_ewald * r;
|
||||||
numtyp expm2 = ucl_exp(-grij*grij);
|
numtyp expm2 = ucl_exp(-grij*grij);
|
||||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||||
numtyp u = (numtyp)1.0 - t;
|
numtyp u = (numtyp)1.0 - t;
|
||||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||||
prefactor /= r;
|
prefactor /= r;
|
||||||
forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
|
forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
|
||||||
forcecoul *= r2inv;
|
|
||||||
}
|
}
|
||||||
} else forcecoul = (numtyp)0.0;
|
} else forcecoul = (numtyp)0.0;
|
||||||
|
|
||||||
numtyp r2inv = ucl_recip(rsq);
|
|
||||||
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
||||||
numtyp r = ucl_sqrt(rsq);
|
numtyp r = ucl_sqrt(rsq);
|
||||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||||
@ -289,7 +290,7 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
|||||||
+ coeff1[mtype].w*r2inv*r6inv)*factor_lj;
|
+ coeff1[mtype].w*r2inv*r6inv)*factor_lj;
|
||||||
} else forceborn = (numtyp)0.0;
|
} else forceborn = (numtyp)0.0;
|
||||||
|
|
||||||
force = forceborn*r2inv + forcecoul;
|
force = (forcecoul + forceborn) * r2inv;
|
||||||
|
|
||||||
f.x+=delx*force;
|
f.x+=delx*force;
|
||||||
f.y+=dely*force;
|
f.y+=dely*force;
|
||||||
@ -297,9 +298,9 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
if (eflag>0) {
|
if (eflag>0) {
|
||||||
if (rsq < cut_coulsq) {
|
if (rsq < cut_coulsq) {
|
||||||
e_coul += prefactor*_erfc;
|
numtyp e = prefactor*_erfc;
|
||||||
if (factor_coul<(numtyp)1.0)
|
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||||
e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
|
e_coul += e;
|
||||||
}
|
}
|
||||||
if (rsq < cutsq_sigma[mtype].y) {
|
if (rsq < cutsq_sigma[mtype].y) {
|
||||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||||
|
|||||||
@ -29,6 +29,20 @@ texture<int2> q_tex;
|
|||||||
#define q_tex q_
|
#define q_tex q_
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Note: EWALD_P is different from that in lal_preprocessor.h
|
||||||
|
// acctyp is needed for these parameters
|
||||||
|
#define CS_EWALD_P (acctyp)9.95473818e-1
|
||||||
|
#define B0 (acctyp)-0.1335096380159268
|
||||||
|
#define B1 (acctyp)-2.57839507e-1
|
||||||
|
#define B2 (acctyp)-1.37203639e-1
|
||||||
|
#define B3 (acctyp)-8.88822059e-3
|
||||||
|
#define B4 (acctyp)-5.80844129e-3
|
||||||
|
#define B5 (acctyp)1.14652755e-1
|
||||||
|
|
||||||
|
#define EPSILON (acctyp)(1.0e-20)
|
||||||
|
#define EPS_EWALD (acctyp)(1.0e-6)
|
||||||
|
#define EPS_EWALD_SQR (acctyp)(1.0e-12)
|
||||||
|
|
||||||
#if (ARCH < 300)
|
#if (ARCH < 300)
|
||||||
|
|
||||||
#define store_answers_lq(f, e_coul, virial, ii, inum, tid, \
|
#define store_answers_lq(f, e_coul, virial, ii, inum, tid, \
|
||||||
@ -123,16 +137,6 @@ texture<int2> q_tex;
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define B0 (numtyp)-0.1335096380159268
|
|
||||||
#define B1 (numtyp)-2.57839507e-1
|
|
||||||
#define B2 (numtyp)-1.37203639e-1
|
|
||||||
#define B3 (numtyp)-8.88822059e-3
|
|
||||||
#define B4 (numtyp)-5.80844129e-3
|
|
||||||
#define B5 (numtyp)1.14652755e-1
|
|
||||||
#define EPSILON (numtyp)1.0e-20
|
|
||||||
#define EPS_EWALD (numtyp)1.0e-6
|
|
||||||
#define EPS_EWALD_SQR (numtyp)1.0e-12
|
|
||||||
|
|
||||||
__kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
|
__kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict scale,
|
const __global numtyp *restrict scale,
|
||||||
const int lj_types,
|
const int lj_types,
|
||||||
@ -191,7 +195,7 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
int mtype=itype*lj_types+jtype;
|
int mtype=itype*lj_types+jtype;
|
||||||
if (rsq < cut_coulsq) {
|
if (rsq < cut_coulsq) {
|
||||||
rsq += EPSILON;
|
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||||
|
|
||||||
numtyp force,prefactor,_erfc;
|
numtyp force,prefactor,_erfc;
|
||||||
numtyp r2inv = ucl_recip(rsq);
|
numtyp r2inv = ucl_recip(rsq);
|
||||||
@ -201,17 +205,19 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||||||
if (factor_coul<(numtyp)1.0) {
|
if (factor_coul<(numtyp)1.0) {
|
||||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||||
numtyp expm2 = ucl_exp(-grij*grij);
|
numtyp expm2 = ucl_exp(-grij*grij);
|
||||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||||
numtyp u = (numtyp)1.0 - t;
|
numtyp u = (numtyp)1.0 - t;
|
||||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||||
prefactor /= (r+EPS_EWALD);
|
prefactor /= (r+EPS_EWALD);
|
||||||
force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||||
|
// Additionally r2inv needs to be accordingly modified since the later
|
||||||
|
// scaling of the overall force shall be consistent
|
||||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||||
force *= r2inv;
|
force *= r2inv;
|
||||||
} else {
|
} else {
|
||||||
numtyp grij = g_ewald * r;
|
numtyp grij = g_ewald * r;
|
||||||
numtyp expm2 = ucl_exp(-grij*grij);
|
numtyp expm2 = ucl_exp(-grij*grij);
|
||||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||||
numtyp u = (numtyp)1.0 - t;
|
numtyp u = (numtyp)1.0 - t;
|
||||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||||
prefactor /= r;
|
prefactor /= r;
|
||||||
@ -224,9 +230,9 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||||||
f.z+=delz*force;
|
f.z+=delz*force;
|
||||||
|
|
||||||
if (eflag>0) {
|
if (eflag>0) {
|
||||||
e_coul += prefactor*_erfc;
|
numtyp e = prefactor*_erfc;
|
||||||
if (factor_coul<(numtyp)1.0)
|
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||||
e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
|
e_coul += e;
|
||||||
}
|
}
|
||||||
if (vflag>0) {
|
if (vflag>0) {
|
||||||
virial[0] += delx*delx*force;
|
virial[0] += delx*delx*force;
|
||||||
@ -304,7 +310,7 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
|||||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||||
|
|
||||||
if (rsq < cut_coulsq) {
|
if (rsq < cut_coulsq) {
|
||||||
rsq += EPSILON;
|
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||||
|
|
||||||
numtyp force,prefactor,_erfc;
|
numtyp force,prefactor,_erfc;
|
||||||
numtyp r2inv = ucl_recip(rsq);
|
numtyp r2inv = ucl_recip(rsq);
|
||||||
@ -314,32 +320,34 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
|||||||
if (factor_coul<(numtyp)1.0) {
|
if (factor_coul<(numtyp)1.0) {
|
||||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||||
numtyp expm2 = ucl_exp(-grij*grij);
|
numtyp expm2 = ucl_exp(-grij*grij);
|
||||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||||
numtyp u = (numtyp)1.0 - t;
|
numtyp u = (numtyp)1.0 - t;
|
||||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||||
prefactor /= (r+EPS_EWALD);
|
prefactor /= (r+EPS_EWALD);
|
||||||
force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||||
|
// Additionally r2inv needs to be accordingly modified since the later
|
||||||
|
// scaling of the overall force shall be consistent
|
||||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||||
force *= r2inv;
|
|
||||||
} else {
|
} else {
|
||||||
numtyp grij = g_ewald * r;
|
numtyp grij = g_ewald * r;
|
||||||
numtyp expm2 = ucl_exp(-grij*grij);
|
numtyp expm2 = ucl_exp(-grij*grij);
|
||||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||||
numtyp u = (numtyp)1.0 - t;
|
numtyp u = (numtyp)1.0 - t;
|
||||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||||
prefactor /= r;
|
prefactor /= r;
|
||||||
force = prefactor * (_erfc + EWALD_F*grij*expm2);
|
force = prefactor * (_erfc + EWALD_F*grij*expm2);
|
||||||
force *= r2inv;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
force *= r2inv;
|
||||||
|
|
||||||
f.x+=delx*force;
|
f.x+=delx*force;
|
||||||
f.y+=dely*force;
|
f.y+=dely*force;
|
||||||
f.z+=delz*force;
|
f.z+=delz*force;
|
||||||
|
|
||||||
if (eflag>0) {
|
if (eflag>0) {
|
||||||
e_coul += prefactor*_erfc;
|
numtyp e = prefactor*_erfc;
|
||||||
if (factor_coul<(numtyp)1.0)
|
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||||
e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
|
e_coul += e;
|
||||||
}
|
}
|
||||||
if (vflag>0) {
|
if (vflag>0) {
|
||||||
virial[0] += delx*delx*force;
|
virial[0] += delx*delx*force;
|
||||||
|
|||||||
@ -232,7 +232,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
|||||||
diff.z = atom_i.z - pos_sh[j].z;
|
diff.z = atom_i.z - pos_sh[j].z;
|
||||||
|
|
||||||
r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z;
|
r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z;
|
||||||
if (r2 < cell_size*cell_size && r2 > 1e-5) {
|
if (r2 < cell_size*cell_size && pid_j != pid_i) { // && r2 > 1e-5
|
||||||
cnt++;
|
cnt++;
|
||||||
if (cnt <= neigh_bin_size) {
|
if (cnt <= neigh_bin_size) {
|
||||||
*neigh_list = pid_j;
|
*neigh_list = pid_j;
|
||||||
|
|||||||
Reference in New Issue
Block a user