diff --git a/lib/gpu/lal_dpd_charged.cpp b/lib/gpu/lal_dpd_charged.cpp index 96cdeffbc6..8a86ea9cd1 100644 --- a/lib/gpu/lal_dpd_charged.cpp +++ b/lib/gpu/lal_dpd_charged.cpp @@ -112,13 +112,8 @@ int DPDChargedT::init(const int ntypes, this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_a0,host_gamma, host_sigma,host_cut_dpd); - // Allocate a host write buffer for data initialization - UCL_H_Vec host_rsq(lj_types*lj_types*32,*(this->ucl_device), - UCL_WRITE_ONLY); - for (int i=0; iucl_device),UCL_READ_ONLY); - this->atom->type_pack4(ntypes,lj_types,cutsq,host_rsq,host_cutsq, + this->atom->type_pack4(ntypes,lj_types,cutsq,host_write,host_cutsq, host_cut_dpdsq, host_scale, host_cut_slatersq); double special_sqrt[4]; diff --git a/lib/gpu/lal_dpd_charged.cu b/lib/gpu/lal_dpd_charged.cu index 1093377122..668a950f23 100644 --- a/lib/gpu/lal_dpd_charged.cu +++ b/lib/gpu/lal_dpd_charged.cu @@ -249,7 +249,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_, // apply DPD force if distance below DPD cutoff // cutsq[mtype].y -> DPD squared cutoff - if (rsq < cutsq[mtype].y && r < EPSILON) { + if (rsq < cutsq[mtype].y && r > EPSILON) { numtyp rinv=ucl_recip(r); numtyp delvx = iv.x - jv.x; @@ -350,7 +350,7 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_, const int eflag, const int vflag, const int inum, const int nbor_pitch, const __global numtyp4 *restrict v_, - const __global numtyp4 *restrict cutsq, + const __global numtyp4 *restrict cutsq_in, const numtyp dtinvsqrt, const int seed, const int timestep, const numtyp qqrd2e, const numtyp g_ewald, const numtyp lamda, @@ -360,10 +360,10 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_, atom_info(t_per_atom,ii,tid,offset); __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp4 cutsq[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_lj[4]; __local numtyp sp_sqrt[4]; /// COUL Init - __local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_cl[4]; if (tid<4) { sp_lj[tid]=sp_lj_in[tid]; @@ -372,7 +372,7 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_, } if (tid DPD squared cutoff - if (rsq < cutsq[mtype].y && r < EPSILON) { + if (rsq < cutsq[mtype].y && r > EPSILON) { numtyp rinv=ucl_recip(r); numtyp delvx = iv.x - jv.x; diff --git a/lib/gpu/lal_dpd_charged.h b/lib/gpu/lal_dpd_charged.h index 2b24cd1b88..60d041f5aa 100644 --- a/lib/gpu/lal_dpd_charged.h +++ b/lib/gpu/lal_dpd_charged.h @@ -66,7 +66,7 @@ class DPDCharged : public BaseDPD { /// coeff.x = a0, coeff.y = gamma, coeff.z = sigma, coeff.w = cut_dpd UCL_D_Vec coeff; - /// cutsq.x = cutsq, cutsq.y = cut_dpd_sq, cutsq.z = scale, cutsq.w = cut_slatersq + /// cutsq.x = cutsq, cutsq.y = cut_dpdsq, cutsq.z = scale, cutsq.w = cut_slatersq UCL_D_Vec cutsq; /// Special LJ values diff --git a/lib/gpu/lal_dpd_charged_ext.cpp b/lib/gpu/lal_dpd_charged_ext.cpp index 6899bb4e82..a8e153300a 100644 --- a/lib/gpu/lal_dpd_charged_ext.cpp +++ b/lib/gpu/lal_dpd_charged_ext.cpp @@ -27,7 +27,7 @@ static DPDCharged DPDCMF; // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma, +int dpd_charged_gpu_init(const int ntypes, double **host_cutsq, double **host_a0, double **host_gamma, double **host_sigma, double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq, double **host_scale, double *special_lj, const int inum, const int nall, const int max_nbors, const int maxspecial, const double cell_size, @@ -55,7 +55,7 @@ int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, dou int init_ok=0; if (world_me==0) - init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma, + init_ok=DPDCMF.init(ntypes, host_cutsq, host_a0, host_gamma, host_sigma, host_cut_dpd, host_cut_dpdsq, host_cut_slatersq, host_scale, special_lj, false, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen, @@ -75,7 +75,7 @@ int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, dou fflush(screen); } if (gpu_rank==i && world_me!=0) - init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma, + init_ok=DPDCMF.init(ntypes, host_cutsq, host_a0, host_gamma, host_sigma, host_cut_dpd, host_cut_dpdsq, host_cut_slatersq, host_scale, special_lj, false, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen,