diff --git a/lib/gpu/lal_dpd_charged.cpp b/lib/gpu/lal_dpd_charged.cpp index f07d870884..f7956ee69b 100644 --- a/lib/gpu/lal_dpd_charged.cpp +++ b/lib/gpu/lal_dpd_charged.cpp @@ -46,14 +46,15 @@ template int DPDChargedT::init(const int ntypes, double **host_cutsq, double **host_a0, double **host_gamma, double **host_sigma, - double **host_cut, - double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq, + double **host_cut_dpd, double **host_cut_dpdsq, + double **host_cut_slatersq, **host_scale, double *host_special_lj, const bool tstat_only, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, - const double gpu_split, FILE *_screen) { + const double gpu_split, FILE *_screen, double *host_special_coul, + const double qqrd2e, const double g_ewald, double lamda) { const int max_shared_types=this->device->max_shared_types(); int onetype=0; @@ -83,7 +84,28 @@ int DPDChargedT::init(const int ntypes, lj_types=max_shared_types; shared_types=true; } + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write_coul(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack1(ntypes,lj_types,scale,host_write_coul,host_scale); + + sp_cl.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + for (int i=0; i<4; i++) { + host_write_coul[i]=host_special_coul[i]; + } + ucl_copy(sp_cl,host_write_coul,4,false); + _lj_types=lj_types; + _cut_coulsq=host_cut_coulsq; + _qqrd2e=qqrd2e; + _g_ewald=g_ewald; + _lamda=lamda; // Allocate a host write buffer for data initialization UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), @@ -103,7 +125,7 @@ int DPDChargedT::init(const int ntypes, host_rsq[i]=0.0; cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); this->atom->type_pack4(ntypes,lj_types,cutsq,host_rsq,host_cutsq, - host_cut_dpdsq, host_cut_dpd, host_cut_slatersq); + host_cut_dpdsq, host_scale, host_cut_slatersq); double special_sqrt[4]; special_sqrt[0] = sqrt(host_special_lj[0]); @@ -181,20 +203,23 @@ int DPDChargedT::loop(const int eflag, const int vflag) { this->time_pair.start(); if (shared_types) { this->k_pair_sel->set_size(GX,BX); - this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &sp_lj, &sp_sqrt, + this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &sp_lj, &sp_cl, &sp_sqrt, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt, &this->_seed, &this->_timestep, - &this->_tstat_only, &this->_threads_per_atom); + &_qqrd2e, &_g_ewald, &_lamda, + &this->_tstat_only, &this->_threads_per_atom,); } else { this->k_pair.set_size(GX,BX); - this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &_lj_types, &sp_lj, &sp_sqrt, + this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &_lj_types, &sp_lj, &sp_cl, &sp_sqrt, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt, - &this->_seed, &this->_timestep, &this->_tstat_only, - &this->_threads_per_atom); + &_qqrd2e, &_g_ewald, &_lamda, + &this->_seed, &this->_timestep, + &_qqrd2e, &_g_ewald, &_lamda, + &this->_tstat_only, &this->_threads_per_atom,); } this->time_pair.stop(); return GX; diff --git a/lib/gpu/lal_dpd_charged.cu b/lib/gpu/lal_dpd_charged.cu index 1fe2bfcc17..2ffd2b9990 100644 --- a/lib/gpu/lal_dpd_charged.cu +++ b/lib/gpu/lal_dpd_charged.cu @@ -166,6 +166,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict coeff, const int lj_types, const __global numtyp *restrict sp_lj, + const __global numtyp *restrict sp_cl_in, const __global numtyp *restrict sp_sqrt, const __global int * dev_nbor, const __global int * dev_packed, @@ -176,19 +177,31 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict v_, const __global numtyp4 *restrict cutsq, const numtyp dtinvsqrt, const int seed, - const int timestep, const int tstat_only, + const int timestep, const numtyp qqrd2e, + const numtyp g_ewald, const numtyp lamda, + const int tstat_only, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); + __local numtyp sp_cl[4]; + int n_stride; + local_allocate_store_charge(); + + sp_cl[0]=sp_cl_in[0]; + sp_cl[1]=sp_cl_in[1]; + sp_cl[2]=sp_cl_in[2]; + sp_cl[3]=sp_cl_in[3]; + int n_stride; local_allocate_store_pair(); acctyp3 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; - acctyp energy, virial[6]; + acctyp e_coul, energy, virial[6]; if (EVFLAG) { energy=(acctyp)0; + e_coul=(acctyp)0 for (int i=0; i<6; i++) virial[i]=(acctyp)0; } @@ -202,7 +215,8 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_, numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i]; int itag=iv.w; - const numtyp qtmp = extra[i].x; // q[i] + numtyp qtmp = extra[i].x; // q[i] + numtyp lamdainv = ucl_recip(lamda); numtyp factor_dpd, factor_sqrt; for ( ; nbor global squared cutoff + if (rsq DPD squared cutoff + if (rsq < cutsq[mtype].y && r < EPSILON) { - const numtyp qj = extra[j].x; + numtyp rinv=ucl_recip(r); + numtyp delvx = iv.x - jv.x; + numtyp delvy = iv.y - jv.y; + numtyp delvz = iv.z - jv.z; + numtyp dot = delx*delvx + dely*delvy + delz*delvz; + numtyp wd = (numtyp)1.0 - r/coeff[mtype].w; - unsigned int tag1=itag, tag2=jtag; - if (tag1 > tag2) { - tag1 = jtag; tag2 = itag; + const numtyp qj = extra[j].x; + + unsigned int tag1=itag, tag2=jtag; + if (tag1 > tag2) { + tag1 = jtag; tag2 = itag; + } + + numtyp randnum = (numtyp)0.0; + saru(tag1, tag2, seed, timestep, randnum); + + // conservative force = a0 * wd, or 0 if tstat only + // drag force = -gamma * wd^2 * (delx dot delv) / r + // random force = sigma * wd * rnd * dtinvsqrt; + + + if (!tstat_only) force_dpd = coeff[mtype].x*wd; + force_dpd -= coeff[mtype].y*wd*wd*dot*rinv; + force_dpd *= factor_dpd; + force_dpd += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt; + force_dpd *=rinv; + } + + // apply Slater electrostatic force if distance below Slater cutoff + // and the two species have a slater coeff + // cutsq[mtype].w -> Coulombic squared cutoff + if ( cutsq[mtype].w != 0.0 && rsq < cutsq[mtype].w){ + numtyp r2inv=ucl_recip(rsq); + numtyp _erfc; + + numtyp r = ucl_rsqrt(r2inv); + numtyp grij = g_ewald * r; + numtyp expm2 = ucl_exp(-grij*grij); + numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij); + _erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + numtyp prefactor = extra[j].x; + prefactor *= qqrd2e * cutsq[mtype].z * qtmp/r; + numtyp rlamdainv = r * lamdainv; + numtyp exprlmdainv = ucl_exp((numtyp)-2.0*rlamdainv); + numtyp slater_term = exprlmdainv*((numtyp)1.0 + ((numtyp)2.0*rlamdainv*((numtyp)1.0+rlamdainv))); + force_coul = prefactor*(_erfc + EWALD_F*grij*expm2-slater_term); + if (factor_coul > (numtyp)0) force_coul -= factor_coul*prefactor*((numtyp)1.0-slater_term); + force_coul *= r2inv; } - numtyp randnum = (numtyp)0.0; - saru(tag1, tag2, seed, timestep, randnum); - - // conservative force = a0 * wd, or 0 if tstat only - // drag force = -gamma * wd^2 * (delx dot delv) / r - // random force = sigma * wd * rnd * dtinvsqrt; - - numtyp force = (numtyp)0.0; - if (!tstat_only) force = coeff[mtype].x*wd; - force -= coeff[mtype].y*wd*wd*dot*rinv; - force *= factor_dpd; - force += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt; - force*=rinv; - + numtyp force = force_coul + force_dpd; f.x+=delx*force; f.y+=dely*force; f.z+=delz*force; if (EVFLAG && eflag) { - // unshifted eng of conservative term: - // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); - // eng shifted to 0.0 at cutoff - numtyp e = (numtyp)0.5*coeff[mtype].x*coeff[mtype].w * wd*wd; - energy+=factor_dpd*e; + + if (rsq < cutsq[mtype].y && r < EPSILON) { + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); + // eng shifted to 0.0 at cutoff + numtyp e = (numtyp)0.5*coeff[mtype].x*coeff[mtype].w * wd*wd; + energy+=factor_dpd*e; + } + if ( cutsq[mtype].w != 0.0 && rsq < cutsq[mtype].w){ + numtyp e_slater = ((numtyp)1.0 + rlamdainv)*exprlmdainv; + numtyp e = prefactor*(_erfc-e_slater); + if (factor_coul > (numtyp)0) e -= factor_coul*prefactor*((numtyp)1.0 - e_slater); + e_coul += e; + } } if (EVFLAG && vflag) { virial[0] += delx*delx*force; @@ -276,18 +332,22 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_, virial[4] += delx*delz*force; virial[5] += dely*delz*force; } + + } } // for nbor } // if ii - store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + store_answers_q(f,energy, e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); + } __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict extra, const __global numtyp4 *restrict coeff_in, const __global numtyp *restrict sp_lj_in, + const __global numtyp *restrict sp_cl_in, const __global numtyp *restrict sp_sqrt_in, const __global int * dev_nbor, const __global int * dev_packed, @@ -298,39 +358,41 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict v_, const __global numtyp4 *restrict cutsq, const numtyp dtinvsqrt, const int seed, - const int timestep, const int tstat_only, + const int timestep, const numtyp qqrd2e, + const numtyp g_ewald, const numtyp lamda, + const int tstat_only, const int t_per_atom) { int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); - #ifndef ONETYPE __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_lj[4]; __local numtyp sp_sqrt[4]; + /// COUL Init + __local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp sp_cl[4]; if (tid<4) { sp_lj[tid]=sp_lj_in[tid]; sp_sqrt[tid]=sp_sqrt_in[tid]; + sp_cl[tid]=sp_cl_in[tid]; } if (tid global squared cutoff + if (rsq DPD squared cutoff + if (rsq < cutsq[mtype].y && r < EPSILON) { - const numtyp qj = extra[j].x; - - unsigned int tag1=itag, tag2=jtag; - if (tag1 > tag2) { - tag1 = jtag; tag2 = itag; + numtyp rinv=ucl_recip(r); + numtyp delvx = iv.x - jv.x; + numtyp delvy = iv.y - jv.y; + numtyp delvz = iv.z - jv.z; + numtyp dot = delx*delvx + dely*delvy + delz*delvz; + numtyp wd = (numtyp)1.0 - r/coeff[mtype].w; + + const numtyp qj = extra[j].x; + + unsigned int tag1=itag, tag2=jtag; + if (tag1 > tag2) { + tag1 = jtag; tag2 = itag; + } + + numtyp randnum = (numtyp)0.0; + saru(tag1, tag2, seed, timestep, randnum); + + // conservative force = a0 * wd, or 0 if tstat only + // drag force = -gamma * wd^2 * (delx dot delv) / r + // random force = sigma * wd * rnd * dtinvsqrt; + + + if (!tstat_only) force_dpd = coeff[mtype].x*wd; + force_dpd -= coeff[mtype].y*wd*wd*dot*rinv; + force_dpd *= factor_dpd; + force_dpd += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt; + force_dpd *=rinv; + } + + // apply Slater electrostatic force if distance below Slater cutoff + // and the two species have a slater coeff + // cutsq[mtype].w -> Coulombic squared cutoff + if ( cutsq[mtype].w != 0.0 && rsq < cutsq[mtype].w){ + numtyp r2inv=ucl_recip(rsq); + numtyp _erfc; + + numtyp r = ucl_rsqrt(r2inv); + numtyp grij = g_ewald * r; + numtyp expm2 = ucl_exp(-grij*grij); + numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij); + _erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + numtyp prefactor = extra[j].x; + prefactor *= qqrd2e * cutsq[mtype].z * qtmp/r; + numtyp rlamdainv = r * lamdainv; + numtyp exprlmdainv = ucl_exp((numtyp)-2.0*rlamdainv); + numtyp slater_term = exprlmdainv*((numtyp)1.0 + ((numtyp)2.0*rlamdainv*((numtyp)1.0+rlamdainv))); + force_coul = prefactor*(_erfc + EWALD_F*grij*expm2-slater_term); + if (factor_coul > (numtyp)0) force_coul -= factor_coul*prefactor*((numtyp)1.0-slater_term); + force_coul *= r2inv; } - numtyp randnum = (numtyp)0.0; - saru(tag1, tag2, seed, timestep, randnum); - - // conservative force = a0 * wd, or 0 if tstat only - // drag force = -gamma * wd^2 * (delx dot delv) / r - // random force = sigma * wd * rnd * dtinvsqrt; - - #ifndef ONETYPE - const numtyp coeffx=coeff[mtype].x; - const numtyp coeffy=coeff[mtype].y; - const numtyp coeffz=coeff[mtype].z; - #endif - numtyp force = (numtyp)0.0; - if (!tstat_only) force = coeffx*wd; - force -= coeffy*wd*wd*dot*rinv; - #ifndef ONETYPE - force *= factor_dpd; - force += factor_sqrt*coeffz*wd*randnum*dtinvsqrt; - #else - force += coeffz*wd*randnum*dtinvsqrt; - #endif - force*=rinv; - + numtyp force = force_coul + force_dpd; f.x+=delx*force; f.y+=dely*force; f.z+=delz*force; if (EVFLAG && eflag) { - // unshifted eng of conservative term: - // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); - // eng shifted to 0.0 at cutoff - numtyp e = (numtyp)0.5*coeffx*coeffw * wd*wd; - #ifndef ONETYPE - energy+=factor_dpd*e; - #else - energy+=e; - #endif + + if (rsq < cutsq[mtype].y && r < EPSILON) { + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); + // eng shifted to 0.0 at cutoff + numtyp e = (numtyp)0.5*coeff[mtype].x*coeff[mtype].w * wd*wd; + energy+=factor_dpd*e; + } + if ( cutsq[mtype].w != 0.0 && rsq < cutsq[mtype].w){ + numtyp e_slater = ((numtyp)1.0 + rlamdainv)*exprlmdainv; + numtyp e = prefactor*(_erfc-e_slater); + if (factor_coul > (numtyp)0) e -= factor_coul*prefactor*((numtyp)1.0 - e_slater); + e_coul += e; + } } if (EVFLAG && vflag) { virial[0] += delx*delx*force; @@ -443,6 +523,8 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_, virial[4] += delx*delz*force; virial[5] += dely*delz*force; } + + } } // for nbor diff --git a/lib/gpu/lal_dpd_charged.h b/lib/gpu/lal_dpd_charged.h index fe64bb56c9..3f2da9f047 100644 --- a/lib/gpu/lal_dpd_charged.h +++ b/lib/gpu/lal_dpd_charged.h @@ -38,11 +38,12 @@ class DPDCharged : public BaseDPD { * - -4 if the GPU library was not compiled for GPU * - -5 Double precision is not supported on card **/ int init(const int ntypes, double **host_cutsq, double **host_a0, - double **host_gamma, double **host_sigma, double **host_cut, + double **host_gamma, double **host_sigma, double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq, - double *host_special_lj, bool tstat_only, const int nlocal, + double **host_scale, double *host_special_lj, bool tstat_only, const int nlocal, const int nall, const int max_nbors, const int maxspecial, - const double cell_size, const double gpu_split, FILE *screen); + const double cell_size, const double gpu_split, FILE *screen, + const double qqrd2e, const double g_ewald, const double lamda); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ @@ -63,12 +64,15 @@ class DPDCharged : public BaseDPD { /// coeff.x = a0, coeff.y = gamma, coeff.z = sigma, coeff.w = cut_dpd UCL_D_Vec coeff; - /// cutsq.x = cutsq, cutsq.y = cut_dpd_sq, cutsq.z = cut_dpd, cutsq.w = cut_slatersq + /// cutsq.x = cutsq, cutsq.y = cut_dpd_sq, cutsq.z = scale, cutsq.w = cut_slatersq UCL_D_Vec cutsq; /// Special LJ values UCL_D_Vec sp_lj, sp_sqrt; + /// Special Coul values [0-3] + UCL_D_Vec sp_cl; + /// If atom type constants fit in shared memory, use fast kernels bool shared_types; @@ -78,6 +82,9 @@ class DPDCharged : public BaseDPD { /// Only used for thermostat int _tstat_only; + /// Coulombic terms + numtyp _cut_coulsq, _qqrd2e, _g_ewald, _lamda; + /// pointer to host data for atom charge double *q; diff --git a/lib/gpu/lal_dpd_charged_ext.cpp b/lib/gpu/lal_dpd_charged_ext.cpp index 5a94aa6bf6..f05c093a7b 100644 --- a/lib/gpu/lal_dpd_charged_ext.cpp +++ b/lib/gpu/lal_dpd_charged_ext.cpp @@ -28,11 +28,12 @@ static DPDCharged DPDCMF; // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, - double **host_gamma, double **host_sigma, double **host_cut, - double **host_cut_dpd, double **host_cut_slater, - double *special_lj, const int inum, + double **host_gamma, double **host_sigma, + double **host_cut_dpd, double **host_cut_dpd_sq, double **host_cut_slatersq, + double **host_scale, double *special_lj, const int inum, const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen) { + const double cell_size, int &gpu_mode, FILE *screen, double *host_special_coul, + const double qqrd2e, const double g_ewald, const double lamda) { DPDCMF.clear(); gpu_mode=DPDCMF.device->gpu_mode(); double gpu_split=DPDCMF.device->particle_split(); @@ -56,8 +57,10 @@ int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, int init_ok=0; if (world_me==0) init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma, - host_cut, host_cut_dpd, host_cut_dpdsq, host_cut_slatersq, special_lj, false, inum, nall, max_nbors, - maxspecial, cell_size, gpu_split, screen); + host_cut_dpd, host_cut_dpdsq, host_cut_slatersq, + host_scale, special_lj, false, inum, nall, max_nbors, + maxspecial, cell_size, gpu_split, screen, + force->special_coul,->qqrd2e, g_ewald, lamda); DPDCMF.device->world_barrier(); if (message) @@ -74,8 +77,10 @@ int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, } if (gpu_rank==i && world_me!=0) init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma, - host_cut, host_cut_dpd, host_cut_slater, special_lj, false, inum, nall, max_nbors, - maxspecial, cell_size, gpu_split, screen); + host_cut_dpd, host_cut_dpdsq, host_cut_slatersq, + host_scale, special_lj, false, inum, nall, max_nbors, + maxspecial, cell_size, gpu_split, screen, + force->special_coul,force->qqrd2e, g_ewald, lamda); DPDCMF.device->serialize_init(); if (message) diff --git a/src/GPU/pair_dpd_charged_gpu.cpp b/src/GPU/pair_dpd_charged_gpu.cpp index d92be2e7d2..2d05a4a33e 100644 --- a/src/GPU/pair_dpd_charged_gpu.cpp +++ b/src/GPU/pair_dpd_charged_gpu.cpp @@ -39,10 +39,11 @@ using namespace EwaldConst; // External functions from cuda library for atom decomposition int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma, - double **host_sigma, double **host_cut, double **host_cut_dpd, double **host_cut_slater, double *special_lj, const int inum, + double **host_sigma, double **host_cut_dpd, double **host_cut_dpd_sq, double **host_cut_slatersq, + double **host_scale, double *special_lj, const int inum, const int nall, const int max_nbors, const int maxspecial, const double cell_size, - int &gpu_mode, FILE *screen, - double *host_special_coul, const double qqrd2e, const double g_ewald, const double lamda); + int &gpu_mode, FILE *screen, double *host_special_coul, + const double qqrd2e, const double g_ewald, const double lamda); void dpd_charged_gpu_clear(); int **dpd_charged_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, @@ -313,8 +314,9 @@ void PairDPDChargedGPU::init_style() if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial; int mnf = 5e-2 * neighbor->oneatom; int success = - dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut, - cut_dpd, cut_dpdsq, cut_slatersq, force->special_lj, atom->nlocal, + dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, + cut_dpd, cut_dpdsq, cut_slatersq, scale, + force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen, force->special_coul, force->qqrd2e, g_ewald, lamda); GPU_EXTRA::check_flag(success, error, world);