diff --git a/lib/gpu/lal_dpd_charged.cpp b/lib/gpu/lal_dpd_charged.cpp
index 9f7570403a..f07d870884 100644
--- a/lib/gpu/lal_dpd_charged.cpp
+++ b/lib/gpu/lal_dpd_charged.cpp
@@ -47,7 +47,7 @@ int DPDChargedT::init(const int ntypes,
                double **host_cutsq, double **host_a0,
                double **host_gamma, double **host_sigma,
                double **host_cut,
-               double **host_cut_dpd, **host_cut_slater,
+               double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq,
                double *host_special_lj,
                const bool tstat_only,
                const int nlocal, const int nall,
@@ -94,21 +94,16 @@ int DPDChargedT::init(const int ntypes,
 
   coeff.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
   this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_a0,host_gamma,
-                         host_sigma,host_cut);
+                         host_sigma,host_cut_dpd);
 
-  UCL_H_Vec<numtyp> host_rsq(lj_types*lj_types,*(this->ucl_device),
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_rsq(lj_types*lj_types*32,*(this->ucl_device),
                              UCL_WRITE_ONLY);
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_rsq[i]=0.0;
   cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
-  this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq);
-
-  cut_dpd.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
-  this->atom->type_pack1(ntypes,lj_types,cut_dpdsq,host_rsq,host_cut_dpd);
-  
-  cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
-  this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq);
-
-  scale.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
-  this->atom->type_pack1(ntypes,lj_types,scale,host_write,host_scale);
+  this->atom->type_pack4(ntypes,lj_types,cutsq,host_rsq,host_cutsq, 
+                          host_cut_dpdsq, host_cut_dpd, host_cut_slatersq);
 
   double special_sqrt[4];
   special_sqrt[0] = sqrt(host_special_lj[0]);
@@ -207,12 +202,12 @@ int DPDChargedT::loop(const int eflag, const int vflag) {
 
 template <class numtyp, class acctyp>
 void DPDChargedT::update_coeff(int ntypes, double **host_a0, double **host_gamma,
-                        double **host_sigma, double **host_cut, double **host_cut_dpd, **host_cut_slater)
+                        double **host_sigma, double **host_cut_dpd)
 {
   UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
                                UCL_WRITE_ONLY);
   this->atom->type_pack4(ntypes,_lj_types,coeff,host_write,host_a0,host_gamma,
-                         host_sigma,host_cut,host_cut_dpd, host_cut_slater);
+                         host_sigma,host_cut_dpd);
 }
 
 // ---------------------------------------------------------------------------
diff --git a/lib/gpu/lal_dpd_charged.cu b/lib/gpu/lal_dpd_charged.cu
index e4caa49dd1..1fe2bfcc17 100644
--- a/lib/gpu/lal_dpd_charged.cu
+++ b/lib/gpu/lal_dpd_charged.cu
@@ -174,7 +174,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
                     const int eflag, const int vflag, const int inum,
                     const int nbor_pitch,
                     const __global numtyp4 *restrict v_,
-                    const __global numtyp *restrict cutsq,
+                    const __global numtyp4 *restrict cutsq,
                     const numtyp dtinvsqrt, const int seed,
                     const int timestep, const int tstat_only,
                     const int t_per_atom) {
@@ -202,7 +202,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
     numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
     int itag=iv.w;
 
-    const numtyp qi = extra[i].x;
+    const numtyp qtmp = extra[i].x; // q[i]
 
     numtyp factor_dpd, factor_sqrt;
     for ( ; nbor<nbor_end; nbor+=n_stride) {
@@ -225,7 +225,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
       numtyp rsq = delx*delx+dely*dely+delz*delz;
 
       int mtype=itype*lj_types+jtype;
-      if (rsq<cutsq[mtype]) {
+      if (rsq<cutsq.x[mtype]) {
         numtyp r=ucl_sqrt(rsq);
         if (r < EPSILON) continue;
 
@@ -296,7 +296,7 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_,
                          const int eflag, const int vflag, const int inum,
                          const int nbor_pitch,
                          const __global numtyp4 *restrict v_,
-                         const __global numtyp *restrict cutsq,
+                         const __global numtyp4 *restrict cutsq,
                          const numtyp dtinvsqrt, const int seed,
                          const int timestep, const int tstat_only,
                          const int t_per_atom) {
diff --git a/lib/gpu/lal_dpd_charged.h b/lib/gpu/lal_dpd_charged.h
index ca955b24f2..fe64bb56c9 100644
--- a/lib/gpu/lal_dpd_charged.h
+++ b/lib/gpu/lal_dpd_charged.h
@@ -39,6 +39,7 @@ class DPDCharged : public BaseDPD<numtyp, acctyp> {
     * - -5 Double precision is not supported on card **/
   int init(const int ntypes, double **host_cutsq, double **host_a0,
            double **host_gamma, double **host_sigma, double **host_cut,
+           double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq,
            double *host_special_lj, bool tstat_only, const int nlocal,
            const int nall, const int max_nbors, const int maxspecial,
            const double cell_size, const double gpu_split, FILE *screen);
@@ -55,18 +56,15 @@ class DPDCharged : public BaseDPD<numtyp, acctyp> {
 
   /// Update coeff if needed (tstat only)
   void update_coeff(int ntypes, double **host_a0, double **host_gamma,
-                    double **host_sigma, double **host_cut);
+                    double **host_sigma, double **host_cut_dpd );
 
   // --------------------------- TYPE DATA --------------------------
 
-  /// coeff.x = a0, coeff.y = gamma, coeff.z = sigma, coeff.w = cut
+  /// coeff.x = a0, coeff.y = gamma, coeff.z = sigma, coeff.w = cut_dpd
   UCL_D_Vec<numtyp4> coeff;
 
-  UCL_D_Vec<numtyp> cutsq;
-  UCL_D_Vec<numtyp> cut_dpd;
-  UCL_D_Vec<numtyp> cut_dpdsq;
-  UCL_D_Vec<numtyp> cut_slater;
-  UCL_D_Vec<numtyp> cut_slatersq;
+  /// cutsq.x = cutsq, cutsq.y = cut_dpd_sq, cutsq.z = cut_dpd, cutsq.w = cut_slatersq
+  UCL_D_Vec<numtyp4> cutsq;
 
   /// Special LJ values
   UCL_D_Vec<numtyp> sp_lj, sp_sqrt;
diff --git a/lib/gpu/lal_dpd_charged_ext.cpp b/lib/gpu/lal_dpd_charged_ext.cpp
index cc7642fcf2..5a94aa6bf6 100644
--- a/lib/gpu/lal_dpd_charged_ext.cpp
+++ b/lib/gpu/lal_dpd_charged_ext.cpp
@@ -56,7 +56,7 @@ int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0,
   int init_ok=0;
   if (world_me==0)
     init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma,
-                       host_cut, host_cut_dpd, host_cut_slater, special_lj, false, inum, nall, max_nbors,
+                       host_cut, host_cut_dpd, host_cut_dpdsq, host_cut_slatersq, special_lj, false, inum, nall, max_nbors,
                        maxspecial, cell_size, gpu_split, screen);
 
   DPDCMF.device->world_barrier();
@@ -122,9 +122,9 @@ void dpd_charged_gpu_compute(const int ago, const int inum_full, const int nall,
 }
 
 void dpd_charged_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma,
-                          double **host_sigma, double **host_cut, double **host_cut_dpd, **host_cut_slater)
+                          double **host_sigma, double **host_cut, double **host_cut_dpd)
 {
-   DPDCMF.update_coeff(ntypes,host_a0,host_gamma,host_sigma,host_cut, host_cut_dpd, host_cut_slater,);
+   DPDCMF.update_coeff(ntypes,host_a0,host_gamma,host_sigma,host_cut, host_cut_dpd);
 }
 
 void dpd_charged_gpu_get_extra_data(double *host_q) {
diff --git a/src/GPU/pair_dpd_charged_gpu.cpp b/src/GPU/pair_dpd_charged_gpu.cpp
index c7a1fa5902..d92be2e7d2 100644
--- a/src/GPU/pair_dpd_charged_gpu.cpp
+++ b/src/GPU/pair_dpd_charged_gpu.cpp
@@ -41,7 +41,8 @@ using namespace EwaldConst;
 int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma,
                  double **host_sigma, double **host_cut, double **host_cut_dpd, double **host_cut_slater, double *special_lj, const int inum,
                  const int nall, const int max_nbors, const int maxspecial, const double cell_size,
-                 int &gpu_mode, FILE *screen);
+                 int &gpu_mode, FILE *screen, 
+                 double *host_special_coul, const double qqrd2e, const double g_ewald, const double lamda);
 void dpd_charged_gpu_clear();
 int **dpd_charged_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
@@ -312,8 +313,10 @@ void PairDPDChargedGPU::init_style()
   if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
   int mnf = 5e-2 * neighbor->oneatom;
   int success =
-      dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut, cut_dpd, cut_slater, force->special_lj, atom->nlocal,
-                   atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
+      dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut, 
+                   cut_dpd, cut_dpdsq, cut_slatersq, force->special_lj, atom->nlocal,
+                   atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen,
+                   force->special_coul, force->qqrd2e, g_ewald, lamda);
   GPU_EXTRA::check_flag(success, error, world);
 
   if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);