diff --git a/src/GPU/pppm_gpu.cpp b/src/GPU/pppm_gpu.cpp
index 12baec456c..99f61a7fc9 100644
--- a/src/GPU/pppm_gpu.cpp
+++ b/src/GPU/pppm_gpu.cpp
@@ -49,7 +49,7 @@ using namespace MathConst;
 #define LARGE 10000.0
 #define EPS_HOC 1.0e-7
 
-enum{REVERSE_RHO};
+enum{REVERSE_RHO_GPU,REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
 #ifdef FFT_SINGLE
@@ -254,8 +254,8 @@ void PPPMGPU::compute(int eflag, int vflag)
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
-  cg->reverse_comm(this,REVERSE_RHO);
-  brick2fft();
+  cg->reverse_comm(this,REVERSE_RHO_GPU);
+  brick2fft_gpu();
 
   // compute potential gradient on my FFT grid and
   //   portion of e_long on this proc's FFT grid
@@ -355,7 +355,7 @@ void PPPMGPU::compute(int eflag, int vflag)
    remap density from 3d brick decomposition to FFT decomposition
 ------------------------------------------------------------------------- */
 
-void PPPMGPU::brick2fft()
+void PPPMGPU::brick2fft_gpu()
 {
   int n,ix,iy,iz;
 
@@ -619,10 +619,14 @@ void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 
 void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
-  if (flag == REVERSE_RHO) {
+  if (flag == REVERSE_RHO_GPU) {
     FFT_SCALAR *src = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
+  } else if (flag == REVERSE_RHO) {
+    FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++)
+      buf[i] = src[list[i]];
   }
 }
 
@@ -632,10 +636,14 @@ void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 
 void PPPMGPU::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
-  if (flag == REVERSE_RHO) {
+  if (flag == REVERSE_RHO_GPU) {
     FFT_SCALAR *dest = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] += buf[i];
+  } else if (flag == REVERSE_RHO) {
+    FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++)
+      dest[list[i]] += buf[i];
   }
 }
 
@@ -736,3 +744,117 @@ void PPPMGPU::setup()
   if (im_real_space) return;
   PPPM::setup();
 }
+
+/* ----------------------------------------------------------------------
+   group-group interactions
+ ------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   compute the PPPM total long-range force and energy for groups A and B
+ ------------------------------------------------------------------------- */
+
+void PPPMGPU::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
+{
+  if (slabflag && triclinic)
+    error->all(FLERR,"Cannot (yet) use K-space slab "
+               "correction with compute group/group for triclinic systems");
+
+  if (differentiation_flag)
+    error->all(FLERR,"Cannot (yet) use kspace_modify "
+               "diff ad with compute group/group");
+
+  if (!group_allocate_flag) allocate_groups();
+
+  // convert atoms from box to lamda coords
+
+  if (triclinic == 0) boxlo = domain->boxlo;
+  else {
+    boxlo = domain->boxlo_lamda;
+    domain->x2lamda(atom->nlocal);
+  }
+
+  // extend size of per-atom arrays if necessary
+  // part2grid needs to be allocated
+ 
+  if (atom->nmax > nmax || part2grid == NULL) {
+    memory->destroy(part2grid);
+    nmax = atom->nmax;
+    memory->create(part2grid,nmax,3,"pppm:part2grid");
+  }
+
+  particle_map();
+
+  e2group = 0.0; //energy
+  f2group[0] = 0.0; //force in x-direction
+  f2group[1] = 0.0; //force in y-direction
+  f2group[2] = 0.0; //force in z-direction
+
+  // map my particle charge onto my local 3d density grid
+
+  make_rho_groups(groupbit_A,groupbit_B,AA_flag);
+
+  // all procs communicate density values from their ghost cells
+  //   to fully sum contribution in their 3d bricks
+  // remap from 3d decomposition to FFT decomposition
+
+  // temporarily store and switch pointers so we can
+  //  use brick2fft() for groups A and B (without
+  //  writing an additional function)
+
+  FFT_SCALAR ***density_brick_real = density_brick;
+  FFT_SCALAR *density_fft_real = density_fft;
+
+  // group A
+
+  density_brick = density_A_brick;
+  density_fft = density_A_fft;
+
+  cg->reverse_comm(this,REVERSE_RHO);
+  brick2fft();
+
+  // group B
+
+  density_brick = density_B_brick;
+  density_fft = density_B_fft;
+
+  cg->reverse_comm(this,REVERSE_RHO);
+  brick2fft();
+
+  // switch back pointers
+
+  density_brick = density_brick_real;
+  density_fft = density_fft_real;
+
+  // compute potential gradient on my FFT grid and
+  //   portion of group-group energy/force on this proc's FFT grid
+
+  poisson_groups(AA_flag);
+
+  const double qscale = qqrd2e * scale;
+
+  // total group A <--> group B energy
+  // self and boundary correction terms are in compute_group_group.cpp
+
+  double e2group_all;
+  MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
+  e2group = e2group_all;
+
+  e2group *= qscale*0.5*volume;
+
+  // total group A <--> group B force
+
+  double f2group_all[3];
+  MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
+
+  f2group[0] = qscale*volume*f2group_all[0];
+  f2group[1] = qscale*volume*f2group_all[1];
+  if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2];
+
+  // convert atoms back from lamda to box coords
+
+  if (triclinic) domain->lamda2x(atom->nlocal);
+
+  if (slabflag == 1)
+    slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
+}
+
diff --git a/src/GPU/pppm_gpu.h b/src/GPU/pppm_gpu.h
index 3a9b537487..b37358e091 100644
--- a/src/GPU/pppm_gpu.h
+++ b/src/GPU/pppm_gpu.h
@@ -35,13 +35,15 @@ class PPPMGPU : public PPPM {
   int timing_3d(int, double &);
   double memory_usage();
 
+  virtual void compute_group_group(int, int, int);
+
  protected:
   FFT_SCALAR ***density_brick_gpu, ***vd_brick;
   bool kspace_split, im_real_space;
   int old_nlocal;
   double poisson_time;
 
-  void brick2fft();
+  void brick2fft_gpu();
   virtual void poisson_ik();
 
   void pack_forward(int, FFT_SCALAR *, int, int *);