git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14867 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -71,9 +71,6 @@ int CoulLongT::init(const int ntypes, double **host_scale,
|
||||
for (int i=0; i<lj_types*lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
|
||||
scale.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack1(ntypes,lj_types,scale,host_write,host_scale);
|
||||
|
||||
@ -88,8 +85,7 @@ int CoulLongT::init(const int ntypes, double **host_scale,
|
||||
_g_ewald=g_ewald;
|
||||
|
||||
_allocated=true;
|
||||
this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+scale.row_bytes()+
|
||||
sp_cl.row_bytes();
|
||||
this->_max_bytes=scale.row_bytes()+sp_cl.row_bytes();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -106,8 +102,6 @@ void CoulLongT::clear() {
|
||||
return;
|
||||
_allocated=false;
|
||||
|
||||
lj1.clear();
|
||||
lj3.clear();
|
||||
scale.clear();
|
||||
sp_cl.clear();
|
||||
this->clear_atomic();
|
||||
|
||||
@ -124,8 +124,7 @@ texture<int2> q_tex;
|
||||
#endif
|
||||
|
||||
__kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const __global numtyp *restrict scale,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_cl_in,
|
||||
const __global int *dev_nbor,
|
||||
@ -161,6 +160,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
int itype=ix.w;
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
@ -171,6 +171,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int jtype=jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
@ -178,6 +179,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp r2inv=ucl_recip(rsq);
|
||||
numtyp force, prefactor, _erfc;
|
||||
@ -188,7 +190,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp/r;
|
||||
prefactor *= qqrd2e * scale[mtype] * qtmp/r;
|
||||
force = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul) * r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
@ -215,8 +217,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
||||
}
|
||||
|
||||
__kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict scale_in,
|
||||
const __global numtyp *restrict sp_cl_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
@ -230,9 +231,12 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp sp_cl[4];
|
||||
if (tid<4)
|
||||
sp_cl[tid]=sp_cl_in[tid];
|
||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES)
|
||||
scale[tid]=scale_in[tid];
|
||||
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
@ -252,6 +256,8 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
int iw=ix.w;
|
||||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
@ -261,6 +267,7 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int mtype=itype+jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
@ -278,7 +285,7 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp/r;
|
||||
prefactor *= qqrd2e * scale[mtype] * qtmp/r;
|
||||
force = prefactor*(_erfc + EWALD_F*grij*expm2-factor_coul) * r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
|
||||
@ -59,10 +59,6 @@ class CoulLong : public BaseCharge<numtyp, acctyp> {
|
||||
|
||||
// --------------------------- TYPE DATA --------------------------
|
||||
|
||||
/// lj1 dummy
|
||||
UCL_D_Vec<numtyp4> lj1;
|
||||
/// lj3 dummy
|
||||
UCL_D_Vec<numtyp4> lj3;
|
||||
/// scale
|
||||
UCL_D_Vec<numtyp> scale;
|
||||
/// Special Coul values [0-3]
|
||||
|
||||
@ -23,7 +23,7 @@ FILES = $(SRC) Makefile
|
||||
|
||||
DIR = Obj_mingw32/
|
||||
LIB = $(DIR)libmeam.a
|
||||
OBJ = $(SRC:%.F=$(DIR)%.o)
|
||||
OBJ = $(SRC:%.F=$(DIR)%.o) $(DIR)fm_exp.o
|
||||
|
||||
# ------ SETTINGS ------
|
||||
|
||||
|
||||
@ -23,7 +23,7 @@ FILES = $(SRC) Makefile
|
||||
|
||||
DIR = Obj_mingw64/
|
||||
LIB = $(DIR)libmeam.a
|
||||
OBJ = $(SRC:%.F=$(DIR)%.o)
|
||||
OBJ = $(SRC:%.F=$(DIR)%.o) $(DIR)fm_exp.o
|
||||
|
||||
# ------ SETTINGS ------
|
||||
|
||||
|
||||
Reference in New Issue
Block a user