cuda forces + init var
This commit is contained in:
@ -46,14 +46,15 @@ template <class numtyp, class acctyp>
|
||||
int DPDChargedT::init(const int ntypes,
|
||||
double **host_cutsq, double **host_a0,
|
||||
double **host_gamma, double **host_sigma,
|
||||
double **host_cut,
|
||||
double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq,
|
||||
double **host_cut_dpd, double **host_cut_dpdsq,
|
||||
double **host_cut_slatersq, **host_scale,
|
||||
double *host_special_lj,
|
||||
const bool tstat_only,
|
||||
const int nlocal, const int nall,
|
||||
const int max_nbors, const int maxspecial,
|
||||
const double cell_size,
|
||||
const double gpu_split, FILE *_screen) {
|
||||
const double gpu_split, FILE *_screen, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald, double lamda) {
|
||||
const int max_shared_types=this->device->max_shared_types();
|
||||
|
||||
int onetype=0;
|
||||
@ -83,7 +84,28 @@ int DPDChargedT::init(const int ntypes,
|
||||
lj_types=max_shared_types;
|
||||
shared_types=true;
|
||||
}
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write_coul(lj_types*lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<lj_types*lj_types; i++)
|
||||
host_write_coul[i]=0.0;
|
||||
|
||||
scale.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack1(ntypes,lj_types,scale,host_write_coul,host_scale);
|
||||
|
||||
sp_cl.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
|
||||
for (int i=0; i<4; i++) {
|
||||
host_write_coul[i]=host_special_coul[i];
|
||||
}
|
||||
ucl_copy(sp_cl,host_write_coul,4,false);
|
||||
|
||||
_lj_types=lj_types;
|
||||
_cut_coulsq=host_cut_coulsq;
|
||||
_qqrd2e=qqrd2e;
|
||||
_g_ewald=g_ewald;
|
||||
_lamda=lamda;
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
|
||||
@ -103,7 +125,7 @@ int DPDChargedT::init(const int ntypes,
|
||||
host_rsq[i]=0.0;
|
||||
cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,cutsq,host_rsq,host_cutsq,
|
||||
host_cut_dpdsq, host_cut_dpd, host_cut_slatersq);
|
||||
host_cut_dpdsq, host_scale, host_cut_slatersq);
|
||||
|
||||
double special_sqrt[4];
|
||||
special_sqrt[0] = sqrt(host_special_lj[0]);
|
||||
@ -181,20 +203,23 @@ int DPDChargedT::loop(const int eflag, const int vflag) {
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
this->k_pair_sel->set_size(GX,BX);
|
||||
this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &sp_lj, &sp_sqrt,
|
||||
this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &sp_lj, &sp_cl, &sp_sqrt,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->ans->force, &this->ans->engv, &eflag,
|
||||
&vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq,
|
||||
&this->_dtinvsqrt, &this->_seed, &this->_timestep,
|
||||
&this->_tstat_only, &this->_threads_per_atom);
|
||||
&_qqrd2e, &_g_ewald, &_lamda,
|
||||
&this->_tstat_only, &this->_threads_per_atom,);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &_lj_types, &sp_lj, &sp_sqrt,
|
||||
this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &_lj_types, &sp_lj, &sp_cl, &sp_sqrt,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->ans->force, &this->ans->engv, &eflag, &vflag,
|
||||
&ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt,
|
||||
&this->_seed, &this->_timestep, &this->_tstat_only,
|
||||
&this->_threads_per_atom);
|
||||
&_qqrd2e, &_g_ewald, &_lamda,
|
||||
&this->_seed, &this->_timestep,
|
||||
&_qqrd2e, &_g_ewald, &_lamda,
|
||||
&this->_tstat_only, &this->_threads_per_atom,);
|
||||
}
|
||||
this->time_pair.stop();
|
||||
return GX;
|
||||
|
||||
@ -166,6 +166,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj,
|
||||
const __global numtyp *restrict sp_cl_in,
|
||||
const __global numtyp *restrict sp_sqrt,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
@ -176,19 +177,31 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict v_,
|
||||
const __global numtyp4 *restrict cutsq,
|
||||
const numtyp dtinvsqrt, const int seed,
|
||||
const int timestep, const int tstat_only,
|
||||
const int timestep, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const numtyp lamda,
|
||||
const int tstat_only,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp sp_cl[4];
|
||||
int n_stride;
|
||||
local_allocate_store_charge();
|
||||
|
||||
sp_cl[0]=sp_cl_in[0];
|
||||
sp_cl[1]=sp_cl_in[1];
|
||||
sp_cl[2]=sp_cl_in[2];
|
||||
sp_cl[3]=sp_cl_in[3];
|
||||
|
||||
int n_stride;
|
||||
local_allocate_store_pair();
|
||||
|
||||
acctyp3 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp energy, virial[6];
|
||||
acctyp e_coul, energy, virial[6];
|
||||
if (EVFLAG) {
|
||||
energy=(acctyp)0;
|
||||
e_coul=(acctyp)0
|
||||
for (int i=0; i<6; i++) virial[i]=(acctyp)0;
|
||||
}
|
||||
|
||||
@ -202,7 +215,8 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
|
||||
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
||||
int itag=iv.w;
|
||||
|
||||
const numtyp qtmp = extra[i].x; // q[i]
|
||||
numtyp qtmp = extra[i].x; // q[i]
|
||||
numtyp lamdainv = ucl_recip(lamda);
|
||||
|
||||
numtyp factor_dpd, factor_sqrt;
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
@ -211,6 +225,8 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
|
||||
int j=dev_packed[nbor];
|
||||
factor_dpd = sp_lj[sbmask(j)];
|
||||
factor_sqrt = sp_sqrt[sbmask(j)];
|
||||
numtyp factor_coul;
|
||||
factor_coul = (numtyp)1.0-sp_cl[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
@ -225,9 +241,16 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq<cutsq.x[mtype]) {
|
||||
|
||||
// cutsq[mtype].x -> global squared cutoff
|
||||
if (rsq<cutsq[mtype].x) {
|
||||
numtyp r=ucl_sqrt(rsq);
|
||||
if (r < EPSILON) continue;
|
||||
numtyp force_dpd = (numtyp)0.0;
|
||||
numtyp force_coul = (numtyp)0.0;
|
||||
|
||||
// apply DPD force if distance below DPD cutoff
|
||||
// cutsq[mtype].y -> DPD squared cutoff
|
||||
if (rsq < cutsq[mtype].y && r < EPSILON) {
|
||||
|
||||
numtyp rinv=ucl_recip(r);
|
||||
numtyp delvx = iv.x - jv.x;
|
||||
@ -250,24 +273,57 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
|
||||
// drag force = -gamma * wd^2 * (delx dot delv) / r
|
||||
// random force = sigma * wd * rnd * dtinvsqrt;
|
||||
|
||||
numtyp force = (numtyp)0.0;
|
||||
if (!tstat_only) force = coeff[mtype].x*wd;
|
||||
force -= coeff[mtype].y*wd*wd*dot*rinv;
|
||||
force *= factor_dpd;
|
||||
force += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt;
|
||||
force*=rinv;
|
||||
|
||||
if (!tstat_only) force_dpd = coeff[mtype].x*wd;
|
||||
force_dpd -= coeff[mtype].y*wd*wd*dot*rinv;
|
||||
force_dpd *= factor_dpd;
|
||||
force_dpd += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt;
|
||||
force_dpd *=rinv;
|
||||
}
|
||||
|
||||
// apply Slater electrostatic force if distance below Slater cutoff
|
||||
// and the two species have a slater coeff
|
||||
// cutsq[mtype].w -> Coulombic squared cutoff
|
||||
if ( cutsq[mtype].w != 0.0 && rsq < cutsq[mtype].w){
|
||||
numtyp r2inv=ucl_recip(rsq);
|
||||
numtyp _erfc;
|
||||
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
numtyp prefactor = extra[j].x;
|
||||
prefactor *= qqrd2e * cutsq[mtype].z * qtmp/r;
|
||||
numtyp rlamdainv = r * lamdainv;
|
||||
numtyp exprlmdainv = ucl_exp((numtyp)-2.0*rlamdainv);
|
||||
numtyp slater_term = exprlmdainv*((numtyp)1.0 + ((numtyp)2.0*rlamdainv*((numtyp)1.0+rlamdainv)));
|
||||
force_coul = prefactor*(_erfc + EWALD_F*grij*expm2-slater_term);
|
||||
if (factor_coul > (numtyp)0) force_coul -= factor_coul*prefactor*((numtyp)1.0-slater_term);
|
||||
force_coul *= r2inv;
|
||||
}
|
||||
|
||||
numtyp force = force_coul + force_dpd;
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (EVFLAG && eflag) {
|
||||
|
||||
if (rsq < cutsq[mtype].y && r < EPSILON) {
|
||||
// unshifted eng of conservative term:
|
||||
// evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]);
|
||||
// eng shifted to 0.0 at cutoff
|
||||
numtyp e = (numtyp)0.5*coeff[mtype].x*coeff[mtype].w * wd*wd;
|
||||
energy+=factor_dpd*e;
|
||||
}
|
||||
if ( cutsq[mtype].w != 0.0 && rsq < cutsq[mtype].w){
|
||||
numtyp e_slater = ((numtyp)1.0 + rlamdainv)*exprlmdainv;
|
||||
numtyp e = prefactor*(_erfc-e_slater);
|
||||
if (factor_coul > (numtyp)0) e -= factor_coul*prefactor*((numtyp)1.0 - e_slater);
|
||||
e_coul += e;
|
||||
}
|
||||
}
|
||||
if (EVFLAG && vflag) {
|
||||
virial[0] += delx*delx*force;
|
||||
virial[1] += dely*dely*force;
|
||||
@ -276,18 +332,22 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
|
||||
virial[4] += delx*delz*force;
|
||||
virial[5] += dely*delz*force;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
} // if ii
|
||||
store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
|
||||
store_answers_q(f,energy, e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
|
||||
ans,engv);
|
||||
|
||||
}
|
||||
|
||||
__kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict extra,
|
||||
const __global numtyp4 *restrict coeff_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global numtyp *restrict sp_cl_in,
|
||||
const __global numtyp *restrict sp_sqrt_in,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
@ -298,39 +358,41 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict v_,
|
||||
const __global numtyp4 *restrict cutsq,
|
||||
const numtyp dtinvsqrt, const int seed,
|
||||
const int timestep, const int tstat_only,
|
||||
const int timestep, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const numtyp lamda,
|
||||
const int tstat_only,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
#ifndef ONETYPE
|
||||
__local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp sp_lj[4];
|
||||
__local numtyp sp_sqrt[4];
|
||||
/// COUL Init
|
||||
__local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp sp_cl[4];
|
||||
if (tid<4) {
|
||||
sp_lj[tid]=sp_lj_in[tid];
|
||||
sp_sqrt[tid]=sp_sqrt_in[tid];
|
||||
sp_cl[tid]=sp_cl_in[tid];
|
||||
}
|
||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
||||
coeff[tid]=coeff_in[tid];
|
||||
scale[tid]=scale_in[tid];
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
#else
|
||||
const numtyp coeffx=coeff_in[ONETYPE].x;
|
||||
const numtyp coeffy=coeff_in[ONETYPE].y;
|
||||
const numtyp coeffz=coeff_in[ONETYPE].z;
|
||||
const numtyp coeffw=coeff_in[ONETYPE].w;
|
||||
const numtyp cutsq_p=cutsq[ONETYPE];
|
||||
#endif
|
||||
|
||||
|
||||
int n_stride;
|
||||
local_allocate_store_pair();
|
||||
|
||||
acctyp3 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp energy, virial[6];
|
||||
acctyp e_coul, energy, virial[6];
|
||||
if (EVFLAG) {
|
||||
energy=(acctyp)0;
|
||||
e_coul=(acctyp)0;
|
||||
for (int i=0; i<6; i++) virial[i]=(acctyp)0;
|
||||
}
|
||||
|
||||
@ -340,33 +402,26 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
#ifndef ONETYPE
|
||||
int iw=ix.w;
|
||||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||
#endif
|
||||
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
||||
int itag=iv.w;
|
||||
|
||||
const numtyp qi = extra[i].x;
|
||||
numtyp qtmp = extra[i].x; // q[i]
|
||||
numtyp lamdainv = ucl_recip(lamda);
|
||||
|
||||
#ifndef ONETYPE
|
||||
numtyp factor_dpd, factor_sqrt;
|
||||
#endif
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
ucl_prefetch(dev_packed+nbor+n_stride);
|
||||
|
||||
int j=dev_packed[nbor];
|
||||
#ifndef ONETYPE
|
||||
factor_dpd = sp_lj[sbmask(j)];
|
||||
factor_sqrt = sp_sqrt[sbmask(j)];
|
||||
numtyp factor_coul;
|
||||
factor_coul = (numtyp)1.0-sp_cl[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
#endif
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
#ifndef ONETYPE
|
||||
int mtype=itype+jx.w;
|
||||
const numtyp cutsq_p=cutsq[mtype];
|
||||
#endif
|
||||
numtyp4 jv; fetch4(jv,j,vel_tex); //v_[j];
|
||||
int jtag=jv.w;
|
||||
|
||||
@ -376,19 +431,24 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_,
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
if (rsq<cutsq_p) {
|
||||
int mtype=itype+jx.w;
|
||||
|
||||
// cutsq[mtype].x -> global squared cutoff
|
||||
if (rsq<cutsq[mtype].x) {
|
||||
numtyp r=ucl_sqrt(rsq);
|
||||
if (r < EPSILON) continue;
|
||||
numtyp force_dpd = (numtyp)0.0;
|
||||
numtyp force_coul = (numtyp)0.0;
|
||||
|
||||
// apply DPD force if distance below DPD cutoff
|
||||
// cutsq[mtype].y -> DPD squared cutoff
|
||||
if (rsq < cutsq[mtype].y && r < EPSILON) {
|
||||
|
||||
numtyp rinv=ucl_recip(r);
|
||||
numtyp delvx = iv.x - jv.x;
|
||||
numtyp delvy = iv.y - jv.y;
|
||||
numtyp delvz = iv.z - jv.z;
|
||||
numtyp dot = delx*delvx + dely*delvy + delz*delvz;
|
||||
#ifndef ONETYPE
|
||||
const numtyp coeffw=coeff[mtype].w;
|
||||
#endif
|
||||
numtyp wd = (numtyp)1.0 - r/coeffw;
|
||||
numtyp wd = (numtyp)1.0 - r/coeff[mtype].w;
|
||||
|
||||
const numtyp qj = extra[j].x;
|
||||
|
||||
@ -404,36 +464,56 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_,
|
||||
// drag force = -gamma * wd^2 * (delx dot delv) / r
|
||||
// random force = sigma * wd * rnd * dtinvsqrt;
|
||||
|
||||
#ifndef ONETYPE
|
||||
const numtyp coeffx=coeff[mtype].x;
|
||||
const numtyp coeffy=coeff[mtype].y;
|
||||
const numtyp coeffz=coeff[mtype].z;
|
||||
#endif
|
||||
numtyp force = (numtyp)0.0;
|
||||
if (!tstat_only) force = coeffx*wd;
|
||||
force -= coeffy*wd*wd*dot*rinv;
|
||||
#ifndef ONETYPE
|
||||
force *= factor_dpd;
|
||||
force += factor_sqrt*coeffz*wd*randnum*dtinvsqrt;
|
||||
#else
|
||||
force += coeffz*wd*randnum*dtinvsqrt;
|
||||
#endif
|
||||
force*=rinv;
|
||||
|
||||
if (!tstat_only) force_dpd = coeff[mtype].x*wd;
|
||||
force_dpd -= coeff[mtype].y*wd*wd*dot*rinv;
|
||||
force_dpd *= factor_dpd;
|
||||
force_dpd += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt;
|
||||
force_dpd *=rinv;
|
||||
}
|
||||
|
||||
// apply Slater electrostatic force if distance below Slater cutoff
|
||||
// and the two species have a slater coeff
|
||||
// cutsq[mtype].w -> Coulombic squared cutoff
|
||||
if ( cutsq[mtype].w != 0.0 && rsq < cutsq[mtype].w){
|
||||
numtyp r2inv=ucl_recip(rsq);
|
||||
numtyp _erfc;
|
||||
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
numtyp prefactor = extra[j].x;
|
||||
prefactor *= qqrd2e * cutsq[mtype].z * qtmp/r;
|
||||
numtyp rlamdainv = r * lamdainv;
|
||||
numtyp exprlmdainv = ucl_exp((numtyp)-2.0*rlamdainv);
|
||||
numtyp slater_term = exprlmdainv*((numtyp)1.0 + ((numtyp)2.0*rlamdainv*((numtyp)1.0+rlamdainv)));
|
||||
force_coul = prefactor*(_erfc + EWALD_F*grij*expm2-slater_term);
|
||||
if (factor_coul > (numtyp)0) force_coul -= factor_coul*prefactor*((numtyp)1.0-slater_term);
|
||||
force_coul *= r2inv;
|
||||
}
|
||||
|
||||
numtyp force = force_coul + force_dpd;
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (EVFLAG && eflag) {
|
||||
|
||||
if (rsq < cutsq[mtype].y && r < EPSILON) {
|
||||
// unshifted eng of conservative term:
|
||||
// evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]);
|
||||
// eng shifted to 0.0 at cutoff
|
||||
numtyp e = (numtyp)0.5*coeffx*coeffw * wd*wd;
|
||||
#ifndef ONETYPE
|
||||
numtyp e = (numtyp)0.5*coeff[mtype].x*coeff[mtype].w * wd*wd;
|
||||
energy+=factor_dpd*e;
|
||||
#else
|
||||
energy+=e;
|
||||
#endif
|
||||
}
|
||||
if ( cutsq[mtype].w != 0.0 && rsq < cutsq[mtype].w){
|
||||
numtyp e_slater = ((numtyp)1.0 + rlamdainv)*exprlmdainv;
|
||||
numtyp e = prefactor*(_erfc-e_slater);
|
||||
if (factor_coul > (numtyp)0) e -= factor_coul*prefactor*((numtyp)1.0 - e_slater);
|
||||
e_coul += e;
|
||||
}
|
||||
}
|
||||
if (EVFLAG && vflag) {
|
||||
virial[0] += delx*delx*force;
|
||||
@ -443,6 +523,8 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_,
|
||||
virial[4] += delx*delz*force;
|
||||
virial[5] += dely*delz*force;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
|
||||
@ -38,11 +38,12 @@ class DPDCharged : public BaseDPD<numtyp, acctyp> {
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(const int ntypes, double **host_cutsq, double **host_a0,
|
||||
double **host_gamma, double **host_sigma, double **host_cut,
|
||||
double **host_gamma, double **host_sigma,
|
||||
double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq,
|
||||
double *host_special_lj, bool tstat_only, const int nlocal,
|
||||
double **host_scale, double *host_special_lj, bool tstat_only, const int nlocal,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, const double gpu_split, FILE *screen);
|
||||
const double cell_size, const double gpu_split, FILE *screen,
|
||||
const double qqrd2e, const double g_ewald, const double lamda);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
@ -63,12 +64,15 @@ class DPDCharged : public BaseDPD<numtyp, acctyp> {
|
||||
/// coeff.x = a0, coeff.y = gamma, coeff.z = sigma, coeff.w = cut_dpd
|
||||
UCL_D_Vec<numtyp4> coeff;
|
||||
|
||||
/// cutsq.x = cutsq, cutsq.y = cut_dpd_sq, cutsq.z = cut_dpd, cutsq.w = cut_slatersq
|
||||
/// cutsq.x = cutsq, cutsq.y = cut_dpd_sq, cutsq.z = scale, cutsq.w = cut_slatersq
|
||||
UCL_D_Vec<numtyp4> cutsq;
|
||||
|
||||
/// Special LJ values
|
||||
UCL_D_Vec<numtyp> sp_lj, sp_sqrt;
|
||||
|
||||
/// Special Coul values [0-3]
|
||||
UCL_D_Vec<numtyp> sp_cl;
|
||||
|
||||
/// If atom type constants fit in shared memory, use fast kernels
|
||||
bool shared_types;
|
||||
|
||||
@ -78,6 +82,9 @@ class DPDCharged : public BaseDPD<numtyp, acctyp> {
|
||||
/// Only used for thermostat
|
||||
int _tstat_only;
|
||||
|
||||
/// Coulombic terms
|
||||
numtyp _cut_coulsq, _qqrd2e, _g_ewald, _lamda;
|
||||
|
||||
/// pointer to host data for atom charge
|
||||
double *q;
|
||||
|
||||
|
||||
@ -28,11 +28,12 @@ static DPDCharged<PRECISION,ACC_PRECISION> DPDCMF;
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
||||
double **host_gamma, double **host_sigma, double **host_cut,
|
||||
double **host_cut_dpd, double **host_cut_slater,
|
||||
double *special_lj, const int inum,
|
||||
double **host_gamma, double **host_sigma,
|
||||
double **host_cut_dpd, double **host_cut_dpd_sq, double **host_cut_slatersq,
|
||||
double **host_scale, double *special_lj, const int inum,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen) {
|
||||
const double cell_size, int &gpu_mode, FILE *screen, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald, const double lamda) {
|
||||
DPDCMF.clear();
|
||||
gpu_mode=DPDCMF.device->gpu_mode();
|
||||
double gpu_split=DPDCMF.device->particle_split();
|
||||
@ -56,8 +57,10 @@ int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
||||
int init_ok=0;
|
||||
if (world_me==0)
|
||||
init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma,
|
||||
host_cut, host_cut_dpd, host_cut_dpdsq, host_cut_slatersq, special_lj, false, inum, nall, max_nbors,
|
||||
maxspecial, cell_size, gpu_split, screen);
|
||||
host_cut_dpd, host_cut_dpdsq, host_cut_slatersq,
|
||||
host_scale, special_lj, false, inum, nall, max_nbors,
|
||||
maxspecial, cell_size, gpu_split, screen,
|
||||
force->special_coul,->qqrd2e, g_ewald, lamda);
|
||||
|
||||
DPDCMF.device->world_barrier();
|
||||
if (message)
|
||||
@ -74,8 +77,10 @@ int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
||||
}
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma,
|
||||
host_cut, host_cut_dpd, host_cut_slater, special_lj, false, inum, nall, max_nbors,
|
||||
maxspecial, cell_size, gpu_split, screen);
|
||||
host_cut_dpd, host_cut_dpdsq, host_cut_slatersq,
|
||||
host_scale, special_lj, false, inum, nall, max_nbors,
|
||||
maxspecial, cell_size, gpu_split, screen,
|
||||
force->special_coul,force->qqrd2e, g_ewald, lamda);
|
||||
|
||||
DPDCMF.device->serialize_init();
|
||||
if (message)
|
||||
|
||||
@ -39,10 +39,11 @@ using namespace EwaldConst;
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma,
|
||||
double **host_sigma, double **host_cut, double **host_cut_dpd, double **host_cut_slater, double *special_lj, const int inum,
|
||||
double **host_sigma, double **host_cut_dpd, double **host_cut_dpd_sq, double **host_cut_slatersq,
|
||||
double **host_scale, double *special_lj, const int inum,
|
||||
const int nall, const int max_nbors, const int maxspecial, const double cell_size,
|
||||
int &gpu_mode, FILE *screen,
|
||||
double *host_special_coul, const double qqrd2e, const double g_ewald, const double lamda);
|
||||
int &gpu_mode, FILE *screen, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald, const double lamda);
|
||||
void dpd_charged_gpu_clear();
|
||||
int **dpd_charged_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
|
||||
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
@ -313,8 +314,9 @@ void PairDPDChargedGPU::init_style()
|
||||
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
|
||||
int mnf = 5e-2 * neighbor->oneatom;
|
||||
int success =
|
||||
dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut,
|
||||
cut_dpd, cut_dpdsq, cut_slatersq, force->special_lj, atom->nlocal,
|
||||
dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma,
|
||||
cut_dpd, cut_dpdsq, cut_slatersq, scale,
|
||||
force->special_lj, atom->nlocal,
|
||||
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen,
|
||||
force->special_coul, force->qqrd2e, g_ewald, lamda);
|
||||
GPU_EXTRA::check_flag(success, error, world);
|
||||
|
||||
Reference in New Issue
Block a user