diff --git a/lib/gpu/lal_dpd_charged.cpp b/lib/gpu/lal_dpd_charged.cpp index 9f7570403a..f07d870884 100644 --- a/lib/gpu/lal_dpd_charged.cpp +++ b/lib/gpu/lal_dpd_charged.cpp @@ -47,7 +47,7 @@ int DPDChargedT::init(const int ntypes, double **host_cutsq, double **host_a0, double **host_gamma, double **host_sigma, double **host_cut, - double **host_cut_dpd, **host_cut_slater, + double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq, double *host_special_lj, const bool tstat_only, const int nlocal, const int nall, @@ -94,21 +94,16 @@ int DPDChargedT::init(const int ntypes, coeff.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_a0,host_gamma, - host_sigma,host_cut); + host_sigma,host_cut_dpd); - UCL_H_Vec host_rsq(lj_types*lj_types,*(this->ucl_device), + // Allocate a host write buffer for data initialization + UCL_H_Vec host_rsq(lj_types*lj_types*32,*(this->ucl_device), UCL_WRITE_ONLY); + for (int i=0; iucl_device),UCL_READ_ONLY); - this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq); - - cut_dpd.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); - this->atom->type_pack1(ntypes,lj_types,cut_dpdsq,host_rsq,host_cut_dpd); - - cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); - this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq); - - scale.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); - this->atom->type_pack1(ntypes,lj_types,scale,host_write,host_scale); + this->atom->type_pack4(ntypes,lj_types,cutsq,host_rsq,host_cutsq, + host_cut_dpdsq, host_cut_dpd, host_cut_slatersq); double special_sqrt[4]; special_sqrt[0] = sqrt(host_special_lj[0]); @@ -207,12 +202,12 @@ int DPDChargedT::loop(const int eflag, const int vflag) { template void DPDChargedT::update_coeff(int ntypes, double **host_a0, double **host_gamma, - double **host_sigma, double **host_cut, double **host_cut_dpd, **host_cut_slater) + double **host_sigma, double **host_cut_dpd) { UCL_H_Vec host_write(_lj_types*_lj_types*32,*(this->ucl_device), UCL_WRITE_ONLY); this->atom->type_pack4(ntypes,_lj_types,coeff,host_write,host_a0,host_gamma, - host_sigma,host_cut,host_cut_dpd, host_cut_slater); + host_sigma,host_cut_dpd); } // --------------------------------------------------------------------------- diff --git a/lib/gpu/lal_dpd_charged.cu b/lib/gpu/lal_dpd_charged.cu index e4caa49dd1..1fe2bfcc17 100644 --- a/lib/gpu/lal_dpd_charged.cu +++ b/lib/gpu/lal_dpd_charged.cu @@ -174,7 +174,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_, const int eflag, const int vflag, const int inum, const int nbor_pitch, const __global numtyp4 *restrict v_, - const __global numtyp *restrict cutsq, + const __global numtyp4 *restrict cutsq, const numtyp dtinvsqrt, const int seed, const int timestep, const int tstat_only, const int t_per_atom) { @@ -202,7 +202,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_, numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i]; int itag=iv.w; - const numtyp qi = extra[i].x; + const numtyp qtmp = extra[i].x; // q[i] numtyp factor_dpd, factor_sqrt; for ( ; nbor { * - -5 Double precision is not supported on card **/ int init(const int ntypes, double **host_cutsq, double **host_a0, double **host_gamma, double **host_sigma, double **host_cut, + double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq, double *host_special_lj, bool tstat_only, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *screen); @@ -55,18 +56,15 @@ class DPDCharged : public BaseDPD { /// Update coeff if needed (tstat only) void update_coeff(int ntypes, double **host_a0, double **host_gamma, - double **host_sigma, double **host_cut); + double **host_sigma, double **host_cut_dpd ); // --------------------------- TYPE DATA -------------------------- - /// coeff.x = a0, coeff.y = gamma, coeff.z = sigma, coeff.w = cut + /// coeff.x = a0, coeff.y = gamma, coeff.z = sigma, coeff.w = cut_dpd UCL_D_Vec coeff; - UCL_D_Vec cutsq; - UCL_D_Vec cut_dpd; - UCL_D_Vec cut_dpdsq; - UCL_D_Vec cut_slater; - UCL_D_Vec cut_slatersq; + /// cutsq.x = cutsq, cutsq.y = cut_dpd_sq, cutsq.z = cut_dpd, cutsq.w = cut_slatersq + UCL_D_Vec cutsq; /// Special LJ values UCL_D_Vec sp_lj, sp_sqrt; diff --git a/lib/gpu/lal_dpd_charged_ext.cpp b/lib/gpu/lal_dpd_charged_ext.cpp index cc7642fcf2..5a94aa6bf6 100644 --- a/lib/gpu/lal_dpd_charged_ext.cpp +++ b/lib/gpu/lal_dpd_charged_ext.cpp @@ -56,7 +56,7 @@ int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, int init_ok=0; if (world_me==0) init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma, - host_cut, host_cut_dpd, host_cut_slater, special_lj, false, inum, nall, max_nbors, + host_cut, host_cut_dpd, host_cut_dpdsq, host_cut_slatersq, special_lj, false, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); DPDCMF.device->world_barrier(); @@ -122,9 +122,9 @@ void dpd_charged_gpu_compute(const int ago, const int inum_full, const int nall, } void dpd_charged_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma, - double **host_sigma, double **host_cut, double **host_cut_dpd, **host_cut_slater) + double **host_sigma, double **host_cut, double **host_cut_dpd) { - DPDCMF.update_coeff(ntypes,host_a0,host_gamma,host_sigma,host_cut, host_cut_dpd, host_cut_slater,); + DPDCMF.update_coeff(ntypes,host_a0,host_gamma,host_sigma,host_cut, host_cut_dpd); } void dpd_charged_gpu_get_extra_data(double *host_q) { diff --git a/src/GPU/pair_dpd_charged_gpu.cpp b/src/GPU/pair_dpd_charged_gpu.cpp index c7a1fa5902..d92be2e7d2 100644 --- a/src/GPU/pair_dpd_charged_gpu.cpp +++ b/src/GPU/pair_dpd_charged_gpu.cpp @@ -41,7 +41,8 @@ using namespace EwaldConst; int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma, double **host_sigma, double **host_cut, double **host_cut_dpd, double **host_cut_slater, double *special_lj, const int inum, const int nall, const int max_nbors, const int maxspecial, const double cell_size, - int &gpu_mode, FILE *screen); + int &gpu_mode, FILE *screen, + double *host_special_coul, const double qqrd2e, const double g_ewald, const double lamda); void dpd_charged_gpu_clear(); int **dpd_charged_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, @@ -312,8 +313,10 @@ void PairDPDChargedGPU::init_style() if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial; int mnf = 5e-2 * neighbor->oneatom; int success = - dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut, cut_dpd, cut_slater, force->special_lj, atom->nlocal, - atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen); + dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut, + cut_dpd, cut_dpdsq, cut_slatersq, force->special_lj, atom->nlocal, + atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen, + force->special_coul, force->qqrd2e, g_ewald, lamda); GPU_EXTRA::check_flag(success, error, world); if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);