diff --git a/src/EXTRA-FIX/fix_ttm.cpp b/src/EXTRA-FIX/fix_ttm.cpp
index 9184983bdb..be0df513ca 100644
--- a/src/EXTRA-FIX/fix_ttm.cpp
+++ b/src/EXTRA-FIX/fix_ttm.cpp
@@ -227,7 +227,7 @@ void FixTTM::init()
 
   // to allow this, would have to reset grid bounds dynamically
   // for RCB balancing would have to reassign grid pts to procs
-  //   and create a new GridComm, and pass old GC data to new GC
+  //   and create a new Grid3d, and pass old GC data to new GC
 
   if (domain->box_change)
     error->all(FLERR,"Cannot use fix ttm with changing box shape, size, or sub-domains");
diff --git a/src/EXTRA-FIX/fix_ttm_grid.cpp b/src/EXTRA-FIX/fix_ttm_grid.cpp
index 5e6022af58..dd254cd892 100644
--- a/src/EXTRA-FIX/fix_ttm_grid.cpp
+++ b/src/EXTRA-FIX/fix_ttm_grid.cpp
@@ -23,7 +23,7 @@
 #include "comm.h"
 #include "domain.h"
 #include "error.h"
-#include "gridcomm.h"
+#include "grid3d.h"
 #include "memory.h"
 #include "neighbor.h"
 #include "random_mars.h"
@@ -86,7 +86,7 @@ void FixTTMGrid::post_constructor()
 
   if (infile) {
     read_electron_temperatures(infile);
-    gc->forward_comm(GridComm::FIX,this,1,sizeof(double),0,gc_buf1,gc_buf2,MPI_DOUBLE);
+    gc->forward_comm(Grid3d::FIX,this,1,sizeof(double),0,gc_buf1,gc_buf2,MPI_DOUBLE);
   }
 }
 
@@ -193,7 +193,7 @@ void FixTTMGrid::end_of_step()
          flangevin[i][2]*v[i][2]);
     }
 
-  gc->reverse_comm(GridComm::FIX,this,1,sizeof(double),0,
+  gc->reverse_comm(Grid3d::FIX,this,1,sizeof(double),0,
                    gc_buf1,gc_buf2,MPI_DOUBLE);
 
   // clang-format off
@@ -246,7 +246,7 @@ void FixTTMGrid::end_of_step()
 
     // communicate new T_electron values to ghost grid points
 
-    gc->forward_comm(GridComm::FIX,this,1,sizeof(double),0,gc_buf1,gc_buf2,MPI_DOUBLE);
+    gc->forward_comm(Grid3d::FIX,this,1,sizeof(double),0,gc_buf1,gc_buf2,MPI_DOUBLE);
   }
 
   // clang-format on
@@ -363,7 +363,7 @@ void FixTTMGrid::write_electron_temperatures(const std::string &filename)
                style);
   }
 
-  gc->gather(GridComm::FIX, this, 1, sizeof(double), 1, nullptr, MPI_DOUBLE);
+  gc->gather(Grid3d::FIX, this, 1, sizeof(double), 1, nullptr, MPI_DOUBLE);
 
   if (comm->me == 0) fclose(FPout);
 }
@@ -468,8 +468,8 @@ void FixTTMGrid::allocate_grid()
   totalmine = (bigint) (nxhi_in - nxlo_in + 1) * (nyhi_in - nylo_in + 1) * (nzhi_in - nzlo_in + 1);
   ngridmine = totalmine;
 
-  gc = new GridComm(lmp, world, nxgrid, nygrid, nzgrid, nxlo_in, nxhi_in, nylo_in, nyhi_in, nzlo_in,
-                    nzhi_in, nxlo_out, nxhi_out, nylo_out, nyhi_out, nzlo_out, nzhi_out);
+  gc = new Grid3d(lmp, world, nxgrid, nygrid, nzgrid, nxlo_in, nxhi_in, nylo_in, nyhi_in, nzlo_in,
+                  nzhi_in, nxlo_out, nxhi_out, nylo_out, nyhi_out, nzlo_out, nzhi_out);
 
   gc->setup(ngc_buf1, ngc_buf2);
 
@@ -517,7 +517,7 @@ void FixTTMGrid::write_restart(FILE *fp)
 
   // gather rest of rlist on proc 0 as global grid values
 
-  gc->gather(GridComm::FIX, this, 1, sizeof(double), 0, &rlist[4], MPI_DOUBLE);
+  gc->gather(Grid3d::FIX, this, 1, sizeof(double), 0, &rlist[4], MPI_DOUBLE);
 
   if (comm->me == 0) {
     int size = rsize * sizeof(double);
@@ -568,7 +568,7 @@ void FixTTMGrid::restart(char *buf)
 
   // communicate new T_electron values to ghost grid points
 
-  gc->forward_comm(GridComm::FIX, this, 1, sizeof(double), 0, gc_buf1, gc_buf2, MPI_DOUBLE);
+  gc->forward_comm(Grid3d::FIX, this, 1, sizeof(double), 0, gc_buf1, gc_buf2, MPI_DOUBLE);
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/EXTRA-FIX/fix_ttm_grid.h b/src/EXTRA-FIX/fix_ttm_grid.h
index f6f467fce0..e42e602be9 100644
--- a/src/EXTRA-FIX/fix_ttm_grid.h
+++ b/src/EXTRA-FIX/fix_ttm_grid.h
@@ -55,7 +55,7 @@ class FixTTMGrid : public FixTTM {
   double skin_original;
   FILE *FPout;
 
-  class GridComm *gc;
+  class Grid3d *gc;
   int ngc_buf1, ngc_buf2;
   double *gc_buf1, *gc_buf2;
 
diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp
index b518d8efe1..c1392fb1bd 100644
--- a/src/KSPACE/msm.cpp
+++ b/src/KSPACE/msm.cpp
@@ -23,7 +23,7 @@
 #include "domain.h"
 #include "error.h"
 #include "force.h"
-#include "gridcomm.h"
+#include "grid3d.h"
 #include "math_const.h"
 #include "memory.h"
 #include "neighbor.h"
@@ -444,7 +444,7 @@ void MSM::compute(int eflag, int vflag)
   // to fully sum contribution in their 3d grid
 
   current_level = 0;
-  gcall->reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
+  gcall->reverse_comm(Grid3d::KSPACE,this,1,sizeof(double),
                       REVERSE_RHO,gcall_buf1,gcall_buf2,MPI_DOUBLE);
 
   // forward communicate charge density values to fill ghost grid points
@@ -453,7 +453,7 @@ void MSM::compute(int eflag, int vflag)
   for (int n=0; n<=levels-2; n++) {
     if (!active_flag[n]) continue;
     current_level = n;
-    gc[n]->forward_comm(GridComm::KSPACE,this,1,sizeof(double),
+    gc[n]->forward_comm(Grid3d::KSPACE,this,1,sizeof(double),
                         FORWARD_RHO,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
     direct(n);
     restriction(n);
@@ -466,15 +466,15 @@ void MSM::compute(int eflag, int vflag)
     if (domain->nonperiodic) {
       current_level = levels-1;
       gc[levels-1]->
-        forward_comm(GridComm::KSPACE,this,1,sizeof(double),
+        forward_comm(Grid3d::KSPACE,this,1,sizeof(double),
                      FORWARD_RHO,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
       direct_top(levels-1);
       gc[levels-1]->
-        reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
+        reverse_comm(Grid3d::KSPACE,this,1,sizeof(double),
                      REVERSE_AD,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
       if (vflag_atom)
         gc[levels-1]->
-          reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
+          reverse_comm(Grid3d::KSPACE,this,6,sizeof(double),
                        REVERSE_AD_PERATOM,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
 
     } else {
@@ -485,7 +485,7 @@ void MSM::compute(int eflag, int vflag)
       current_level = levels-1;
       if (vflag_atom)
         gc[levels-1]->
-          reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
+          reverse_comm(Grid3d::KSPACE,this,6,sizeof(double),
                        REVERSE_AD_PERATOM,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
     }
   }
@@ -498,13 +498,13 @@ void MSM::compute(int eflag, int vflag)
     prolongation(n);
 
     current_level = n;
-    gc[n]->reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
+    gc[n]->reverse_comm(Grid3d::KSPACE,this,1,sizeof(double),
                         REVERSE_AD,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
 
     // extra per-atom virial communication
 
     if (vflag_atom)
-      gc[n]->reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
+      gc[n]->reverse_comm(Grid3d::KSPACE,this,6,sizeof(double),
                           REVERSE_AD_PERATOM,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
   }
 
@@ -512,13 +512,13 @@ void MSM::compute(int eflag, int vflag)
   // to fill ghost cells surrounding their 3d bricks
 
   current_level = 0;
-  gcall->forward_comm(GridComm::KSPACE,this,1,sizeof(double),
+  gcall->forward_comm(Grid3d::KSPACE,this,1,sizeof(double),
                       FORWARD_AD,gcall_buf1,gcall_buf2,MPI_DOUBLE);
 
   // extra per-atom energy/virial communication
 
   if (vflag_atom)
-    gcall->forward_comm(GridComm::KSPACE,this,6,sizeof(double),
+    gcall->forward_comm(Grid3d::KSPACE,this,6,sizeof(double),
                         FORWARD_AD_PERATOM,gcall_buf1,gcall_buf2,MPI_DOUBLE);
 
   // calculate the force on my particles (interpolation)
@@ -595,7 +595,7 @@ void MSM::allocate()
 
   // commgrid using all processors for finest grid level
 
-  gcall = new GridComm(lmp,world,1,nx_msm[0],ny_msm[0],nz_msm[0],
+  gcall = new Grid3d(lmp,world,1,nx_msm[0],ny_msm[0],nz_msm[0],
                        nxlo_in[0],nxhi_in[0],nylo_in[0],
                        nyhi_in[0],nzlo_in[0],nzhi_in[0],
                        nxlo_out_all,nxhi_out_all,nylo_out_all,
@@ -627,7 +627,7 @@ void MSM::allocate()
       delete gc[n];
       int **procneigh = procneigh_levels[n];
 
-      gc[n] = new GridComm(lmp,world_levels[n],2,nx_msm[n],ny_msm[n],nz_msm[n],
+      gc[n] = new Grid3d(lmp,world_levels[n],2,nx_msm[n],ny_msm[n],nz_msm[n],
                            nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],
                            nzlo_in[n],nzhi_in[n],
                            nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],
@@ -743,7 +743,7 @@ void MSM::allocate_levels()
 {
   ngrid = new int[levels];
 
-  gc = new GridComm*[levels];
+  gc = new Grid3d*[levels];
   gc_buf1 = new double*[levels];
   gc_buf2 = new double*[levels];
   ngc_buf1 = new int[levels];
@@ -3394,7 +3394,7 @@ double MSM::memory_usage()
 
   // NOTE: Stan, fill in other memory allocations here
 
-  // all GridComm bufs
+  // all Grid3d bufs
 
   bytes += (double)(ngcall_buf1 + ngcall_buf2) * npergrid * sizeof(double);
 
diff --git a/src/KSPACE/msm.h b/src/KSPACE/msm.h
index c39988b13b..c5f9f1bc0a 100644
--- a/src/KSPACE/msm.h
+++ b/src/KSPACE/msm.h
@@ -81,8 +81,8 @@ class MSM : public KSpace {
   int myloc[3];               // which proc I am in each dim
   int ***procneigh_levels;    // my 6 neighboring procs, 0/1 = left/right
 
-  class GridComm *gcall;    // GridComm class for finest level grid
-  class GridComm **gc;      // GridComm classes for each hierarchical level
+  class Grid3d *gcall;        // GridComm class for finest level grid
+  class Grid3d **gc;          // GridComm classes for each hierarchical level
 
   double *gcall_buf1, *gcall_buf2;
   double **gc_buf1, **gc_buf2;
diff --git a/src/KSPACE/msm_cg.cpp b/src/KSPACE/msm_cg.cpp
index 3073e9c9dc..a04cf4b764 100644
--- a/src/KSPACE/msm_cg.cpp
+++ b/src/KSPACE/msm_cg.cpp
@@ -19,7 +19,7 @@
 #include "msm_cg.h"
 
 #include "atom.h"
-#include "gridcomm.h"
+#include "grid3d.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
@@ -160,7 +160,7 @@ void MSMCG::compute(int eflag, int vflag)
   //   to fully sum contribution in their 3d grid
 
   current_level = 0;
-  gcall->reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
+  gcall->reverse_comm(Grid3d::KSPACE,this,1,sizeof(double),
                       REVERSE_RHO,gcall_buf1,gcall_buf2,MPI_DOUBLE);
 
   // forward communicate charge density values to fill ghost grid points
@@ -169,7 +169,7 @@ void MSMCG::compute(int eflag, int vflag)
   for (n=0; n<=levels-2; n++) {
     if (!active_flag[n]) continue;
     current_level = n;
-    gc[n]->forward_comm(GridComm::KSPACE,this,1,sizeof(double),
+    gc[n]->forward_comm(Grid3d::KSPACE,this,1,sizeof(double),
                         FORWARD_RHO,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
     direct(n);
     restriction(n);
@@ -182,15 +182,15 @@ void MSMCG::compute(int eflag, int vflag)
     if (domain->nonperiodic) {
       current_level = levels-1;
       gc[levels-1]->
-        forward_comm(GridComm::KSPACE,this,1,sizeof(double),
+        forward_comm(Grid3d::KSPACE,this,1,sizeof(double),
                      FORWARD_RHO,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
       direct_top(levels-1);
       gc[levels-1]->
-        reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
+        reverse_comm(Grid3d::KSPACE,this,1,sizeof(double),
                      REVERSE_AD,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
       if (vflag_atom)
         gc[levels-1]->
-          reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
+          reverse_comm(Grid3d::KSPACE,this,6,sizeof(double),
                        REVERSE_AD_PERATOM,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
 
     } else {
@@ -201,7 +201,7 @@ void MSMCG::compute(int eflag, int vflag)
       current_level = levels-1;
       if (vflag_atom)
         gc[levels-1]->
-          reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
+          reverse_comm(Grid3d::KSPACE,this,6,sizeof(double),
                        REVERSE_AD_PERATOM,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
     }
   }
@@ -214,13 +214,13 @@ void MSMCG::compute(int eflag, int vflag)
     prolongation(n);
 
     current_level = n;
-    gc[n]->reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
+    gc[n]->reverse_comm(Grid3d::KSPACE,this,1,sizeof(double),
                         REVERSE_AD,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
 
     // extra per-atom virial communication
 
     if (vflag_atom)
-      gc[n]->reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
+      gc[n]->reverse_comm(Grid3d::KSPACE,this,6,sizeof(double),
                           REVERSE_AD_PERATOM,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
   }
 
@@ -228,13 +228,13 @@ void MSMCG::compute(int eflag, int vflag)
   // to fill ghost cells surrounding their 3d bricks
 
   current_level = 0;
-  gcall->forward_comm(GridComm::KSPACE,this,1,sizeof(double),
+  gcall->forward_comm(Grid3d::KSPACE,this,1,sizeof(double),
                       FORWARD_AD,gcall_buf1,gcall_buf2,MPI_DOUBLE);
 
   // extra per-atom energy/virial communication
 
   if (vflag_atom)
-    gcall->forward_comm(GridComm::KSPACE,this,6,sizeof(double),
+    gcall->forward_comm(Grid3d::KSPACE,this,6,sizeof(double),
                         FORWARD_AD_PERATOM,gcall_buf1,gcall_buf2,MPI_DOUBLE);
 
   // calculate the force on my particles (interpolation)
diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index e94193759f..bdcadaf4b8 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -29,7 +29,7 @@
 #include "error.h"
 #include "fft3d_wrap.h"
 #include "force.h"
-#include "gridcomm.h"
+#include "grid3d.h"
 #include "math_const.h"
 #include "math_special.h"
 #include "memory.h"
@@ -291,7 +291,7 @@ void PPPM::init()
   //   or overlap is allowed, then done
   // else reduce order and try again
 
-  GridComm *gctmp = nullptr;
+  Grid3d *gctmp = nullptr;
   int iteration = 0;
 
   while (order >= minorder) {
@@ -304,7 +304,7 @@ void PPPM::init()
     set_grid_local();
     if (overlap_allowed) break;
 
-    gctmp = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+    gctmp = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                          nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                          nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
 
@@ -634,7 +634,7 @@ void PPPM::compute(int eflag, int vflag)
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
-  gc->reverse_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+  gc->reverse_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                    REVERSE_RHO,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
   brick2fft();
 
@@ -649,20 +649,20 @@ void PPPM::compute(int eflag, int vflag)
   // to fill ghost cells surrounding their 3d bricks
 
   if (differentiation_flag == 1)
-    gc->forward_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+    gc->forward_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                      FORWARD_AD,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
   else
-    gc->forward_comm(GridComm::KSPACE,this,3,sizeof(FFT_SCALAR),
+    gc->forward_comm(Grid3d::KSPACE,this,3,sizeof(FFT_SCALAR),
                      FORWARD_IK,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom) {
     if (differentiation_flag == 1 && vflag_atom)
-      gc->forward_comm(GridComm::KSPACE,this,6,sizeof(FFT_SCALAR),
+      gc->forward_comm(Grid3d::KSPACE,this,6,sizeof(FFT_SCALAR),
                        FORWARD_AD_PERATOM,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
     else if (differentiation_flag == 0)
-      gc->forward_comm(GridComm::KSPACE,this,7,sizeof(FFT_SCALAR),
+      gc->forward_comm(Grid3d::KSPACE,this,7,sizeof(FFT_SCALAR),
                        FORWARD_IK_PERATOM,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
   }
 
@@ -810,9 +810,9 @@ void PPPM::allocate()
                     1,0,0,FFT_PRECISION,collective_flag);
 
   // create ghost grid object for rho and electric field communication
-  // also create 2 bufs for ghost grid cell comm, passed to GridComm methods
+  // also create 2 bufs for ghost grid cell comm, passed to Grid3d methods
 
-  gc = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+  gc = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                     nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
 
@@ -3055,7 +3055,7 @@ double PPPM::memory_usage()
     bytes += (double)2 * nfft_both * sizeof(FFT_SCALAR);;
   }
 
-  // two GridComm bufs
+  // two Grid3d bufs
 
   bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR);
 
@@ -3115,7 +3115,7 @@ void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
   density_brick = density_A_brick;
   density_fft = density_A_fft;
 
-  gc->reverse_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+  gc->reverse_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                    REVERSE_RHO,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
   brick2fft();
 
@@ -3124,7 +3124,7 @@ void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
   density_brick = density_B_brick;
   density_fft = density_B_fft;
 
-  gc->reverse_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+  gc->reverse_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                    REVERSE_RHO,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
   brick2fft();
 
diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h
index cac5338ed7..ec19023bb5 100644
--- a/src/KSPACE/pppm.h
+++ b/src/KSPACE/pppm.h
@@ -80,7 +80,7 @@ class PPPM : public KSpace {
 
   class FFT3d *fft1, *fft2;
   class Remap *remap;
-  class GridComm *gc;
+  class Grid3d *gc;
 
   FFT_SCALAR *gc_buf1, *gc_buf2;
   int ngc_buf1, ngc_buf2, npergrid;
diff --git a/src/KSPACE/pppm_cg.cpp b/src/KSPACE/pppm_cg.cpp
index 077eb9f3f4..f62da83cd5 100644
--- a/src/KSPACE/pppm_cg.cpp
+++ b/src/KSPACE/pppm_cg.cpp
@@ -21,7 +21,7 @@
 #include "atom.h"
 #include "domain.h"
 #include "error.h"
-#include "gridcomm.h"
+#include "grid3d.h"
 #include "math_const.h"
 #include "memory.h"
 #include "neighbor.h"
@@ -177,7 +177,7 @@ void PPPMCG::compute(int eflag, int vflag)
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
-  gc->reverse_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+  gc->reverse_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                    REVERSE_RHO,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
   brick2fft();
 
@@ -192,20 +192,20 @@ void PPPMCG::compute(int eflag, int vflag)
   // to fill ghost cells surrounding their 3d bricks
 
   if (differentiation_flag == 1)
-    gc->forward_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+    gc->forward_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                      FORWARD_AD,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
   else
-    gc->forward_comm(GridComm::KSPACE,this,3,sizeof(FFT_SCALAR),
+    gc->forward_comm(Grid3d::KSPACE,this,3,sizeof(FFT_SCALAR),
                      FORWARD_IK,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom) {
     if (differentiation_flag == 1 && vflag_atom)
-      gc->forward_comm(GridComm::KSPACE,this,6,sizeof(FFT_SCALAR),
+      gc->forward_comm(Grid3d::KSPACE,this,6,sizeof(FFT_SCALAR),
                        FORWARD_AD_PERATOM,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
     else if (differentiation_flag == 0)
-      gc->forward_comm(GridComm::KSPACE,this,7,sizeof(FFT_SCALAR),
+      gc->forward_comm(Grid3d::KSPACE,this,7,sizeof(FFT_SCALAR),
                        FORWARD_IK_PERATOM,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
   }
 
diff --git a/src/KSPACE/pppm_dipole.cpp b/src/KSPACE/pppm_dipole.cpp
index aef14b0189..0206119e37 100644
--- a/src/KSPACE/pppm_dipole.cpp
+++ b/src/KSPACE/pppm_dipole.cpp
@@ -24,7 +24,7 @@
 #include "error.h"
 #include "fft3d_wrap.h"
 #include "force.h"
-#include "gridcomm.h"
+#include "grid3d.h"
 #include "math_const.h"
 #include "math_special.h"
 #include "memory.h"
@@ -188,7 +188,7 @@ void PPPMDipole::init()
   //   or overlap is allowed, then done
   // else reduce order and try again
 
-  GridComm *gctmp = nullptr;
+  Grid3d *gctmp = nullptr;
   int iteration = 0;
 
   while (order >= minorder) {
@@ -201,7 +201,7 @@ void PPPMDipole::init()
     set_grid_local();
     if (overlap_allowed) break;
 
-    gctmp = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+    gctmp = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                          nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                          nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
 
@@ -441,7 +441,7 @@ void PPPMDipole::compute(int eflag, int vflag)
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
-  gc_dipole->reverse_comm(GridComm::KSPACE,this,3,sizeof(FFT_SCALAR),
+  gc_dipole->reverse_comm(Grid3d::KSPACE,this,3,sizeof(FFT_SCALAR),
                           REVERSE_MU,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
   brick2fft_dipole();
 
@@ -455,13 +455,13 @@ void PPPMDipole::compute(int eflag, int vflag)
   // all procs communicate E-field values
   // to fill ghost cells surrounding their 3d bricks
 
-  gc_dipole->forward_comm(GridComm::KSPACE,this,9,sizeof(FFT_SCALAR),
+  gc_dipole->forward_comm(Grid3d::KSPACE,this,9,sizeof(FFT_SCALAR),
                           FORWARD_MU,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom)
-    gc_dipole->forward_comm(GridComm::KSPACE,this,18,sizeof(FFT_SCALAR),
+    gc_dipole->forward_comm(Grid3d::KSPACE,this,18,sizeof(FFT_SCALAR),
                             FORWARD_MU_PERATOM,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
   // calculate the force on my particles
@@ -603,9 +603,9 @@ void PPPMDipole::allocate()
                     1,0,0,FFT_PRECISION,collective_flag);
 
   // create ghost grid object for rho and electric field communication
-  // also create 2 bufs for ghost grid cell comm, passed to GridComm methods
+  // also create 2 bufs for ghost grid cell comm, passed to Grid3d methods
 
-  gc_dipole = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+  gc_dipole = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                            nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                            nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
 
@@ -2519,7 +2519,7 @@ double PPPMDipole::memory_usage()
   if (peratom_allocate_flag)
     bytes += (double)21 * nbrick * sizeof(FFT_SCALAR);
 
-  // two GridComm bufs
+  // two Grid3d bufs
 
   bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR);
 
diff --git a/src/KSPACE/pppm_dipole.h b/src/KSPACE/pppm_dipole.h
index 4415d4014e..0239a9a0a8 100644
--- a/src/KSPACE/pppm_dipole.h
+++ b/src/KSPACE/pppm_dipole.h
@@ -70,7 +70,7 @@ class PPPMDipole : public PPPM {
   FFT_SCALAR *work3, *work4;
   FFT_SCALAR *densityx_fft_dipole, *densityy_fft_dipole, *densityz_fft_dipole;
 
-  class GridComm *gc_dipole;
+  class Grid3d *gc_dipole;
 
   int only_dipole_flag;
   double musum, musqsum, mu2;
diff --git a/src/KSPACE/pppm_dipole_spin.cpp b/src/KSPACE/pppm_dipole_spin.cpp
index 148da52770..ea92eb4685 100644
--- a/src/KSPACE/pppm_dipole_spin.cpp
+++ b/src/KSPACE/pppm_dipole_spin.cpp
@@ -23,7 +23,7 @@
 #include "domain.h"
 #include "error.h"
 #include "force.h"
-#include "gridcomm.h"
+#include "grid3d.h"
 #include "math_const.h"
 #include "memory.h"
 #include "pair.h"
@@ -173,7 +173,7 @@ void PPPMDipoleSpin::init()
   //   or overlap is allowed, then done
   // else reduce order and try again
 
-  GridComm *gctmp = nullptr;
+  Grid3d *gctmp = nullptr;
   int iteration = 0;
 
   while (order >= minorder) {
@@ -186,7 +186,7 @@ void PPPMDipoleSpin::init()
     set_grid_local();
     if (overlap_allowed) break;
 
-    gctmp = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+    gctmp = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                          nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                          nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
 
@@ -298,7 +298,7 @@ void PPPMDipoleSpin::compute(int eflag, int vflag)
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
-  gc_dipole->reverse_comm(GridComm::KSPACE,this,3,sizeof(FFT_SCALAR),
+  gc_dipole->reverse_comm(Grid3d::KSPACE,this,3,sizeof(FFT_SCALAR),
                           REVERSE_MU,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
   brick2fft_dipole();
 
@@ -312,13 +312,13 @@ void PPPMDipoleSpin::compute(int eflag, int vflag)
   // all procs communicate E-field values
   // to fill ghost cells surrounding their 3d bricks
 
-  gc_dipole->forward_comm(GridComm::KSPACE,this,9,sizeof(FFT_SCALAR),
+  gc_dipole->forward_comm(Grid3d::KSPACE,this,9,sizeof(FFT_SCALAR),
                           FORWARD_MU,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom)
-    gc->forward_comm(GridComm::KSPACE,this,18,sizeof(FFT_SCALAR),
+    gc->forward_comm(Grid3d::KSPACE,this,18,sizeof(FFT_SCALAR),
                      FORWARD_MU_PERATOM,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
   // calculate the force on my particles
diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp
index 7377ec6f39..a056574cfd 100644
--- a/src/KSPACE/pppm_disp.cpp
+++ b/src/KSPACE/pppm_disp.cpp
@@ -27,7 +27,7 @@
 #include "error.h"
 #include "fft3d_wrap.h"
 #include "force.h"
-#include "gridcomm.h"
+#include "grid3d.h"
 #include "math_const.h"
 #include "memory.h"
 #include "neighbor.h"
@@ -413,7 +413,7 @@ void PPPMDisp::init()
   int iteration = 0;
   if (function[0]) {
 
-    GridComm *gctmp = nullptr;
+    Grid3d *gctmp = nullptr;
     while (order >= minorder) {
 
       if (iteration && me == 0)
@@ -441,7 +441,7 @@ void PPPMDisp::init()
 
       if (overlap_allowed) break;
 
-      gctmp = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+      gctmp = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                            nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                            nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
 
@@ -493,7 +493,7 @@ void PPPMDisp::init()
   iteration = 0;
   if (function[1] + function[2] + function[3]) {
 
-    GridComm *gctmp = nullptr;
+    Grid3d *gctmp = nullptr;
     while (order_6 >= minorder) {
 
       if (iteration && me == 0)
@@ -519,7 +519,7 @@ void PPPMDisp::init()
 
       if (overlap_allowed) break;
 
-      gctmp = new GridComm(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+      gctmp = new Grid3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
                            nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,
                            nzlo_in_6,nzhi_in_6,
                            nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,
@@ -926,7 +926,7 @@ void PPPMDisp::compute(int eflag, int vflag)
 
     make_rho_c();
 
-    gc->reverse_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+    gc->reverse_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                      REVERSE_RHO,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
     brick2fft(nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in,
@@ -941,13 +941,13 @@ void PPPMDisp::compute(int eflag, int vflag)
                  virial_1,vg,vg2,
                  u_brick,v0_brick,v1_brick,v2_brick,v3_brick,v4_brick,v5_brick);
 
-      gc->forward_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+      gc->forward_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                        FORWARD_AD,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
       fieldforce_c_ad();
 
       if (vflag_atom)
-        gc->forward_comm(GridComm::KSPACE,this,6,sizeof(FFT_SCALAR),
+        gc->forward_comm(Grid3d::KSPACE,this,6,sizeof(FFT_SCALAR),
                          FORWARD_AD_PERATOM,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
     } else {
@@ -960,13 +960,13 @@ void PPPMDisp::compute(int eflag, int vflag)
                  vdx_brick,vdy_brick,vdz_brick,virial_1,vg,vg2,
                  u_brick,v0_brick,v1_brick,v2_brick,v3_brick,v4_brick,v5_brick);
 
-      gc->forward_comm(GridComm::KSPACE,this,3,sizeof(FFT_SCALAR),
+      gc->forward_comm(Grid3d::KSPACE,this,3,sizeof(FFT_SCALAR),
                        FORWARD_IK,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
       fieldforce_c_ik();
 
       if (evflag_atom)
-        gc->forward_comm(GridComm::KSPACE,this,7,sizeof(FFT_SCALAR),
+        gc->forward_comm(Grid3d::KSPACE,this,7,sizeof(FFT_SCALAR),
                          FORWARD_IK_PERATOM,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
     }
 
@@ -984,7 +984,7 @@ void PPPMDisp::compute(int eflag, int vflag)
 
     make_rho_g();
 
-    gc6->reverse_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+    gc6->reverse_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                       REVERSE_RHO_GEOM,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
     brick2fft(nxlo_in_6,nylo_in_6,nzlo_in_6,nxhi_in_6,nyhi_in_6,nzhi_in_6,
@@ -1000,13 +1000,13 @@ void PPPMDisp::compute(int eflag, int vflag)
                  u_brick_g,v0_brick_g,v1_brick_g,v2_brick_g,
                  v3_brick_g,v4_brick_g,v5_brick_g);
 
-      gc6->forward_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+      gc6->forward_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                         FORWARD_AD_GEOM,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       fieldforce_g_ad();
 
       if (vflag_atom)
-        gc6->forward_comm(GridComm::KSPACE,this,6,sizeof(FFT_SCALAR),
+        gc6->forward_comm(Grid3d::KSPACE,this,6,sizeof(FFT_SCALAR),
                           FORWARD_AD_PERATOM_GEOM,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
     } else {
@@ -1020,13 +1020,13 @@ void PPPMDisp::compute(int eflag, int vflag)
                  u_brick_g,v0_brick_g,v1_brick_g,v2_brick_g,
                  v3_brick_g,v4_brick_g,v5_brick_g);
 
-      gc6->forward_comm(GridComm::KSPACE,this,3,sizeof(FFT_SCALAR),
+      gc6->forward_comm(Grid3d::KSPACE,this,3,sizeof(FFT_SCALAR),
                         FORWARD_IK_GEOM,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       fieldforce_g_ik();
 
       if (evflag_atom)
-        gc6->forward_comm(GridComm::KSPACE,this,7,sizeof(FFT_SCALAR),
+        gc6->forward_comm(Grid3d::KSPACE,this,7,sizeof(FFT_SCALAR),
                           FORWARD_IK_PERATOM_GEOM,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
     }
 
@@ -1044,7 +1044,7 @@ void PPPMDisp::compute(int eflag, int vflag)
 
     make_rho_a();
 
-    gc6->reverse_comm(GridComm::KSPACE,this,7,sizeof(FFT_SCALAR),
+    gc6->reverse_comm(Grid3d::KSPACE,this,7,sizeof(FFT_SCALAR),
                       REVERSE_RHO_ARITH,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
     brick2fft_a();
@@ -1074,13 +1074,13 @@ void PPPMDisp::compute(int eflag, int vflag)
                     u_brick_a4,v0_brick_a4,v1_brick_a4,v2_brick_a4,
                     v3_brick_a4,v4_brick_a4,v5_brick_a4);
 
-      gc6->forward_comm(GridComm::KSPACE,this,7,sizeof(FFT_SCALAR),
+      gc6->forward_comm(Grid3d::KSPACE,this,7,sizeof(FFT_SCALAR),
                         FORWARD_AD_ARITH,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       fieldforce_a_ad();
 
       if (evflag_atom)
-        gc6->forward_comm(GridComm::KSPACE,this,42,sizeof(FFT_SCALAR),
+        gc6->forward_comm(Grid3d::KSPACE,this,42,sizeof(FFT_SCALAR),
                           FORWARD_AD_PERATOM_ARITH,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
     }  else {
@@ -1115,13 +1115,13 @@ void PPPMDisp::compute(int eflag, int vflag)
                     u_brick_a4,v0_brick_a4,v1_brick_a4,v2_brick_a4,
                     v3_brick_a4,v4_brick_a4,v5_brick_a4);
 
-      gc6->forward_comm(GridComm::KSPACE,this,21,sizeof(FFT_SCALAR),
+      gc6->forward_comm(Grid3d::KSPACE,this,21,sizeof(FFT_SCALAR),
                         FORWARD_IK_ARITH,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       fieldforce_a_ik();
 
       if (evflag_atom)
-        gc6->forward_comm(GridComm::KSPACE,this,49,sizeof(FFT_SCALAR),
+        gc6->forward_comm(Grid3d::KSPACE,this,49,sizeof(FFT_SCALAR),
                           FORWARD_IK_PERATOM_ARITH,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
     }
 
@@ -1139,7 +1139,7 @@ void PPPMDisp::compute(int eflag, int vflag)
 
     make_rho_none();
 
-    gc6->reverse_comm(GridComm::KSPACE,this,nsplit_alloc,sizeof(FFT_SCALAR),
+    gc6->reverse_comm(Grid3d::KSPACE,this,nsplit_alloc,sizeof(FFT_SCALAR),
                       REVERSE_RHO_NONE,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
     brick2fft_none();
@@ -1154,13 +1154,13 @@ void PPPMDisp::compute(int eflag, int vflag)
         n += 2;
       }
 
-      gc6->forward_comm(GridComm::KSPACE,this,1*nsplit_alloc,sizeof(FFT_SCALAR),
+      gc6->forward_comm(Grid3d::KSPACE,this,1*nsplit_alloc,sizeof(FFT_SCALAR),
                         FORWARD_AD_NONE,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       fieldforce_none_ad();
 
       if (vflag_atom)
-        gc6->forward_comm(GridComm::KSPACE,this,6*nsplit_alloc,sizeof(FFT_SCALAR),
+        gc6->forward_comm(Grid3d::KSPACE,this,6*nsplit_alloc,sizeof(FFT_SCALAR),
                           FORWARD_AD_PERATOM_NONE,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
     } else {
@@ -1174,13 +1174,13 @@ void PPPMDisp::compute(int eflag, int vflag)
         n += 2;
       }
 
-      gc6->forward_comm(GridComm::KSPACE,this,3*nsplit_alloc,sizeof(FFT_SCALAR),
+      gc6->forward_comm(Grid3d::KSPACE,this,3*nsplit_alloc,sizeof(FFT_SCALAR),
                         FORWARD_IK_NONE,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       fieldforce_none_ik();
 
       if (evflag_atom)
-        gc6->forward_comm(GridComm::KSPACE,this,7*nsplit_alloc,sizeof(FFT_SCALAR),
+        gc6->forward_comm(Grid3d::KSPACE,this,7*nsplit_alloc,sizeof(FFT_SCALAR),
                           FORWARD_IK_PERATOM_NONE,gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
     }
 
@@ -1748,9 +1748,9 @@ void _noopt PPPMDisp::allocate()
                       1,0,0,FFT_PRECISION,collective_flag);
 
     // create ghost grid object for rho and electric field communication
-    // also create 2 bufs for ghost grid cell comm, passed to GridComm methods
+    // also create 2 bufs for ghost grid cell comm, passed to Grid3d methods
 
-    gc = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+    gc = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
 
@@ -1831,10 +1831,10 @@ void _noopt PPPMDisp::allocate()
                 1,0,0,FFT_PRECISION,collective_flag);
 
     // create ghost grid object for rho and electric field communication
-    // also create 2 bufs for ghost grid cell comm, passed to GridComm methods
+    // also create 2 bufs for ghost grid cell comm, passed to Grid3d methods
 
     gc6 =
-      new GridComm(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+      new Grid3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
                    nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                    nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6);
 
@@ -1994,10 +1994,10 @@ void _noopt PPPMDisp::allocate()
                       1,0,0,FFT_PRECISION,collective_flag);
 
     // create ghost grid object for rho and electric field communication
-    // also create 2 bufs for ghost grid cell comm, passed to GridComm methods
+    // also create 2 bufs for ghost grid cell comm, passed to Grid3d methods
 
     gc6 =
-      new GridComm(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+      new Grid3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
                    nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                    nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6);
 
@@ -2081,10 +2081,10 @@ void _noopt PPPMDisp::allocate()
                       1,0,0,FFT_PRECISION,collective_flag);
 
     // create ghost grid object for rho and electric field communication
-    // also create 2 bufs for ghost grid cell comm, passed to GridComm methods
+    // also create 2 bufs for ghost grid cell comm, passed to Grid3d methods
 
     gc6 =
-      new GridComm(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+      new Grid3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
                    nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                    nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6);
 
@@ -8310,7 +8310,7 @@ double PPPMDisp::memory_usage()
     bytes += (double)nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR);
   }
 
-  // four GridComm bufs
+  // four Grid3d bufs
 
   bytes += (double)(ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR);
   bytes += (double)(ngc6_buf1 + ngc6_buf2) * npergrid6 * sizeof(FFT_SCALAR);
diff --git a/src/KSPACE/pppm_disp.h b/src/KSPACE/pppm_disp.h
index 1f254e772d..9ba3b61a12 100644
--- a/src/KSPACE/pppm_disp.h
+++ b/src/KSPACE/pppm_disp.h
@@ -178,7 +178,7 @@ class PPPMDisp : public KSpace {
   class FFT3d *fft1, *fft2;
   class FFT3d *fft1_6, *fft2_6;
   class Remap *remap, *remap_6;
-  class GridComm *gc, *gc6;
+  class Grid3d *gc, *gc6;
 
   FFT_SCALAR *gc_buf1, *gc_buf2, *gc6_buf1, *gc6_buf2;
   int ngc_buf1, ngc_buf2, npergrid;
diff --git a/src/KSPACE/pppm_stagger.cpp b/src/KSPACE/pppm_stagger.cpp
index 35aec73169..b12f50f561 100644
--- a/src/KSPACE/pppm_stagger.cpp
+++ b/src/KSPACE/pppm_stagger.cpp
@@ -21,7 +21,7 @@
 #include <cstring>
 #include <cmath>
 #include "atom.h"
-#include "gridcomm.h"
+#include "grid3d.h"
 #include "domain.h"
 #include "memory.h"
 #include "error.h"
@@ -157,7 +157,7 @@ void PPPMStagger::compute(int eflag, int vflag)
     //   to fully sum contribution in their 3d bricks
     // remap from 3d decomposition to FFT decomposition
 
-    gc->reverse_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+    gc->reverse_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                      REVERSE_RHO,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
     brick2fft();
 
@@ -172,20 +172,20 @@ void PPPMStagger::compute(int eflag, int vflag)
     // to fill ghost cells surrounding their 3d bricks
 
     if (differentiation_flag == 1)
-      gc->forward_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),
+      gc->forward_comm(Grid3d::KSPACE,this,1,sizeof(FFT_SCALAR),
                        FORWARD_AD,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
     else
-      gc->forward_comm(GridComm::KSPACE,this,3,sizeof(FFT_SCALAR),
+      gc->forward_comm(Grid3d::KSPACE,this,3,sizeof(FFT_SCALAR),
                        FORWARD_IK,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
     // extra per-atom energy/virial communication
 
     if (evflag_atom) {
       if (differentiation_flag == 1 && vflag_atom)
-        gc->forward_comm(GridComm::KSPACE,this,6,sizeof(FFT_SCALAR),
+        gc->forward_comm(Grid3d::KSPACE,this,6,sizeof(FFT_SCALAR),
                          FORWARD_AD_PERATOM,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
       else if (differentiation_flag == 0)
-        gc->forward_comm(GridComm::KSPACE,this,7,sizeof(FFT_SCALAR),
+        gc->forward_comm(Grid3d::KSPACE,this,7,sizeof(FFT_SCALAR),
                          FORWARD_IK_PERATOM,gc_buf1,gc_buf2,MPI_FFT_SCALAR);
     }
 
diff --git a/src/gridcomm.cpp b/src/grid2d.cpp
similarity index 92%
rename from src/gridcomm.cpp
rename to src/grid2d.cpp
index fc53026cc8..f96770d3c9 100644
--- a/src/gridcomm.cpp
+++ b/src/grid2d.cpp
@@ -12,7 +12,7 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#include "gridcomm.h"
+#include "grid3d.h"
 
 #include "comm.h"
 #include "error.h"
@@ -48,10 +48,10 @@ enum{REGULAR,TILED};
      communication is done across the periodic boundaries
 ------------------------------------------------------------------------- */
 
-GridComm::GridComm(LAMMPS *lmp, MPI_Comm gcomm,
-                   int gnx, int gny, int gnz,
-                   int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
-                   int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi)
+Grid3d::Grid3d(LAMMPS *lmp, MPI_Comm gcomm,
+               int gnx, int gny, int gnz,
+               int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
+               int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi)
   : Pointers(lmp)
 {
   if (comm->layout == Comm::LAYOUT_TILED) layout = TILED;
@@ -87,11 +87,11 @@ GridComm::GridComm(LAMMPS *lmp, MPI_Comm gcomm,
    e xyz lohi for flag = 2: 6 neighbor procs
 ------------------------------------------------------------------------- */
 
-GridComm::GridComm(LAMMPS *lmp, MPI_Comm gcomm, int flag,
-                   int gnx, int gny, int gnz,
-                   int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
-                   int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
-                   int exlo, int exhi, int eylo, int eyhi, int ezlo, int ezhi)
+Grid3d::Grid3d(LAMMPS *lmp, MPI_Comm gcomm, int flag,
+               int gnx, int gny, int gnz,
+               int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
+               int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
+               int exlo, int exhi, int eylo, int eyhi, int ezlo, int ezhi)
   : Pointers(lmp)
 {
   if (comm->layout == Comm::LAYOUT_TILED) layout = TILED;
@@ -124,14 +124,14 @@ GridComm::GridComm(LAMMPS *lmp, MPI_Comm gcomm, int flag,
                  oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
                  exlo,exhi,eylo,eyhi,ezlo,ezhi);
     } else {
-      error->all(FLERR,"GridComm does not support tiled layout with neighbor procs");
+      error->all(FLERR,"Grid3d does not support tiled layout with neighbor procs");
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-GridComm::~GridComm()
+Grid3d::~Grid3d()
 {
   // regular comm data struct
 
@@ -164,16 +164,16 @@ GridComm::~GridComm()
    store constructor args in local variables
 ------------------------------------------------------------------------- */
 
-void GridComm::initialize(MPI_Comm gcomm,
-                          int gnx, int gny, int gnz,
-                          int ixlo, int ixhi, int iylo, int iyhi,
-                          int izlo, int izhi,
-                          int oxlo, int oxhi, int oylo, int oyhi,
-                          int ozlo, int ozhi,
-                          int fxlo, int fxhi, int fylo, int fyhi,
-                          int fzlo, int fzhi,
-                          int pxlo, int pxhi, int pylo, int pyhi,
-                          int pzlo, int pzhi)
+void Grid3d::initialize(MPI_Comm gcomm,
+                        int gnx, int gny, int gnz,
+                        int ixlo, int ixhi, int iylo, int iyhi,
+                        int izlo, int izhi,
+                        int oxlo, int oxhi, int oylo, int oyhi,
+                        int ozlo, int ozhi,
+                        int fxlo, int fxhi, int fylo, int fyhi,
+                        int fzlo, int fzhi,
+                        int pxlo, int pxhi, int pylo, int pyhi,
+                        int pzlo, int pzhi)
 {
   gridcomm = gcomm;
   MPI_Comm_rank(gridcomm,&me);
@@ -229,7 +229,7 @@ void GridComm::initialize(MPI_Comm gcomm,
 
 /* ---------------------------------------------------------------------- */
 
-void GridComm::setup(int &nbuf1, int &nbuf2)
+void Grid3d::setup(int &nbuf1, int &nbuf2)
 {
   if (layout == REGULAR) setup_regular(nbuf1,nbuf2);
   else setup_tiled(nbuf1,nbuf2);
@@ -244,7 +244,7 @@ void GridComm::setup(int &nbuf1, int &nbuf2)
    all procs perform same # of swaps in a direction, even if some don't need it
 ------------------------------------------------------------------------- */
 
-void GridComm::setup_regular(int &nbuf1, int &nbuf2)
+void Grid3d::setup_regular(int &nbuf1, int &nbuf2)
 {
   int nsent,sendfirst,sendlast,recvfirst,recvlast;
   int sendplanes,recvplanes;
@@ -545,7 +545,7 @@ void GridComm::setup_regular(int &nbuf1, int &nbuf2)
      no exchanges by dimension, unlike CommTiled forward/reverse comm of particles
 ------------------------------------------------------------------------- */
 
-void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
+void Grid3d::setup_tiled(int &nbuf1, int &nbuf2)
 {
   int i,m;
   double xlo,xhi,ylo,yhi,zlo,zhi;
@@ -557,7 +557,7 @@ void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
   // dim is -1 for proc 0, but never accessed
 
   rcbinfo = (RCBinfo *)
-    memory->smalloc(nprocs*sizeof(RCBinfo),"GridComm:rcbinfo");
+    memory->smalloc(nprocs*sizeof(RCBinfo),"grid3d:rcbinfo");
   RCBinfo rcbone;
   rcbone.dim = comm->rcbcutdim;
   if (rcbone.dim <= 0) rcbone.cut = inxlo;
@@ -580,7 +580,7 @@ void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
 
   pbc[0] = pbc[1] = pbc[2] = 0;
 
-  memory->create(overlap_procs,nprocs,"GridComm:overlap_procs");
+  memory->create(overlap_procs,nprocs,"grid3d:overlap_procs");
   noverlap = maxoverlap = 0;
   overlap = nullptr;
 
@@ -591,9 +591,9 @@ void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
   // ncopy = # of overlaps with myself, across a periodic boundary
 
   int *proclist;
-  memory->create(proclist,noverlap,"GridComm:proclist");
+  memory->create(proclist,noverlap,"grid3d:proclist");
   srequest = (Request *)
-    memory->smalloc(noverlap*sizeof(Request),"GridComm:srequest");
+    memory->smalloc(noverlap*sizeof(Request),"grid3d:srequest");
 
   int nsend_request = 0;
   ncopy = 0;
@@ -612,17 +612,17 @@ void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
 
   auto irregular = new Irregular(lmp);
   int nrecv_request = irregular->create_data(nsend_request,proclist,1);
-  auto rrequest = (Request *) memory->smalloc(nrecv_request*sizeof(Request),"GridComm:rrequest");
+  auto rrequest = (Request *) memory->smalloc(nrecv_request*sizeof(Request),"grid3d:rrequest");
   irregular->exchange_data((char *) srequest,sizeof(Request),(char *) rrequest);
   irregular->destroy_data();
 
   // compute overlaps between received ghost boxes and my owned box
   // overlap box used to setup my Send data struct and respond to requests
 
-  send = (Send *) memory->smalloc(nrecv_request*sizeof(Send),"GridComm:send");
-  sresponse = (Response *) memory->smalloc(nrecv_request*sizeof(Response),"GridComm:sresponse");
+  send = (Send *) memory->smalloc(nrecv_request*sizeof(Send),"grid3d:send");
+  sresponse = (Response *) memory->smalloc(nrecv_request*sizeof(Response),"grid3d:sresponse");
   memory->destroy(proclist);
-  memory->create(proclist,nrecv_request,"GridComm:proclist");
+  memory->create(proclist,nrecv_request,"grid3d:proclist");
 
   for (m = 0; m < nrecv_request; m++) {
     send[m].proc = rrequest[m].sender;
@@ -651,7 +651,7 @@ void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
 
   int nsend_response = nrecv_request;
   int nrecv_response = irregular->create_data(nsend_response,proclist,1);
-  auto rresponse = (Response *) memory->smalloc(nrecv_response*sizeof(Response),"GridComm:rresponse");
+  auto rresponse = (Response *) memory->smalloc(nrecv_response*sizeof(Response),"grid3d:rresponse");
   irregular->exchange_data((char *) sresponse,sizeof(Response),(char *) rresponse);
   irregular->destroy_data();
   delete irregular;
@@ -660,7 +660,7 @@ void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
   // box used to setup my Recv data struct after unwrapping via PBC
   // adjacent = 0 if any box of ghost cells does not adjoin my owned cells
 
-  recv = (Recv *) memory->smalloc(nrecv_response*sizeof(Recv),"GridComm:recv");
+  recv = (Recv *) memory->smalloc(nrecv_response*sizeof(Recv),"grid3d:recv");
   adjacent = 1;
 
   for (i = 0; i < nrecv_response; i++) {
@@ -683,7 +683,7 @@ void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
 
   // create Copy data struct from overlaps with self
 
-  copy = (Copy *) memory->smalloc(ncopy*sizeof(Copy),"GridComm:copy");
+  copy = (Copy *) memory->smalloc(ncopy*sizeof(Copy),"grid3d:copy");
 
   ncopy = 0;
   for (m = 0; m < noverlap; m++) {
@@ -770,7 +770,7 @@ void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
      add all the procs it overlaps with to Overlap list
 ------------------------------------------------------------------------- */
 
-void GridComm::ghost_box_drop(int *box, int *pbc)
+void Grid3d::ghost_box_drop(int *box, int *pbc)
 {
   int i,m;
 
@@ -855,7 +855,7 @@ void GridComm::ghost_box_drop(int *box, int *pbc)
    return Np = # of procs, plist = proc IDs
 ------------------------------------------------------------------------- */
 
-void GridComm::box_drop_grid(int *box, int proclower, int procupper,
+void Grid3d::box_drop_grid(int *box, int proclower, int procupper,
                              int &np, int *plist)
 {
   // end recursion when partition is a single proc
@@ -886,7 +886,7 @@ void GridComm::box_drop_grid(int *box, int proclower, int procupper,
    return 1 if yes, 0 if no
 ------------------------------------------------------------------------- */
 
-int GridComm::ghost_adjacent()
+int Grid3d::ghost_adjacent()
 {
   if (layout == REGULAR) return ghost_adjacent_regular();
   return ghost_adjacent_tiled();
@@ -897,7 +897,7 @@ int GridComm::ghost_adjacent()
    return 0 if adjacent=0 for any proc, else 1
 ------------------------------------------------------------------------- */
 
-int GridComm::ghost_adjacent_regular()
+int Grid3d::ghost_adjacent_regular()
 {
   adjacent = 1;
   if (ghostxlo > inxhi-inxlo+1) adjacent = 0;
@@ -918,7 +918,7 @@ int GridComm::ghost_adjacent_regular()
    return 0 if adjacent=0 for any proc, else 1
 ------------------------------------------------------------------------- */
 
-int GridComm::ghost_adjacent_tiled()
+int Grid3d::ghost_adjacent_tiled()
 {
   int adjacent_all;
   MPI_Allreduce(&adjacent,&adjacent_all,1,MPI_INT,MPI_MIN,gridcomm);
@@ -929,7 +929,7 @@ int GridComm::ghost_adjacent_tiled()
    forward comm of my owned cells to other's ghost cells
 ------------------------------------------------------------------------- */
 
-void GridComm::forward_comm(int caller, void *ptr, int nper, int nbyte, int which,
+void Grid3d::forward_comm(int caller, void *ptr, int nper, int nbyte, int which,
                             void *buf1, void *buf2, MPI_Datatype datatype)
 {
   if (layout == REGULAR) {
@@ -960,7 +960,7 @@ void GridComm::forward_comm(int caller, void *ptr, int nper, int nbyte, int whic
 ------------------------------------------------------------------------- */
 
 template < class T >
-void GridComm::
+void Grid3d::
 forward_comm_regular(T *ptr, int nper, int /*nbyte*/, int which,
                      void *buf1, void *buf2, MPI_Datatype datatype)
 {
@@ -990,7 +990,7 @@ forward_comm_regular(T *ptr, int nper, int /*nbyte*/, int which,
 ------------------------------------------------------------------------- */
 
 template < class T >
-void GridComm::
+void Grid3d::
 forward_comm_tiled(T *ptr, int nper, int nbyte, int which,
                    void *buf1, void *vbuf2, MPI_Datatype datatype)
 {
@@ -1034,7 +1034,7 @@ forward_comm_tiled(T *ptr, int nper, int nbyte, int which,
    reverse comm of my ghost cells to sum to owner cells
 ------------------------------------------------------------------------- */
 
-void GridComm::reverse_comm(int caller, void *ptr, int nper, int nbyte, int which,
+void Grid3d::reverse_comm(int caller, void *ptr, int nper, int nbyte, int which,
                             void *buf1, void *buf2, MPI_Datatype datatype)
 {
   if (layout == REGULAR) {
@@ -1065,7 +1065,7 @@ void GridComm::reverse_comm(int caller, void *ptr, int nper, int nbyte, int whic
 ------------------------------------------------------------------------- */
 
 template < class T >
-void GridComm::
+void Grid3d::
 reverse_comm_regular(T *ptr, int nper, int /*nbyte*/, int which,
                      void *buf1, void *buf2, MPI_Datatype datatype)
 {
@@ -1095,7 +1095,7 @@ reverse_comm_regular(T *ptr, int nper, int /*nbyte*/, int which,
 ------------------------------------------------------------------------- */
 
 template < class T >
-void GridComm::
+void Grid3d::
 reverse_comm_tiled(T *ptr, int nper, int nbyte, int which,
                    void *buf1, void *vbuf2, MPI_Datatype datatype)
 {
@@ -1142,7 +1142,7 @@ reverse_comm_tiled(T *ptr, int nper, int nbyte, int which,
    caller can decide whether to store chunks, output them, etc
 ------------------------------------------------------------------------- */
 
-void GridComm::gather(int /*caller*/, void *ptr, int nper, int nbyte,
+void Grid3d::gather(int /*caller*/, void *ptr, int nper, int nbyte,
                       int which, void *buf, MPI_Datatype datatype)
 {
   int me = comm->me;
@@ -1158,8 +1158,8 @@ void GridComm::gather(int /*caller*/, void *ptr, int nper, int nbyte,
   // pack my data via callback to caller
 
   char *mybuf;
-  if (me == 0) memory->create(mybuf,maxsize*nbyte,"GridComm:mybuf");
-  else memory->create(mybuf,mysize*nbyte,"GridComm:mybuf");
+  if (me == 0) memory->create(mybuf,maxsize*nbyte,"grid3d:mybuf");
+  else memory->create(mybuf,mysize*nbyte,"grid3d:mybuf");
   fptr->pack_gather_grid(which,mybuf);
 
   // ping each proc for its data
@@ -1219,10 +1219,10 @@ void GridComm::gather(int /*caller*/, void *ptr, int nper, int nbyte,
    same swap list used by forward and reverse communication
 ------------------------------------------------------------------------- */
 
-void GridComm::grow_swap()
+void Grid3d::grow_swap()
 {
   maxswap += DELTA;
-  swap = (Swap *) memory->srealloc(swap,maxswap*sizeof(Swap),"GridComm:swap");
+  swap = (Swap *) memory->srealloc(swap,maxswap*sizeof(Swap),"grid3d:swap");
 }
 
 /* ----------------------------------------------------------------------
@@ -1233,11 +1233,11 @@ void GridComm::grow_swap()
    same swap list used by forward and reverse communication
 ------------------------------------------------------------------------- */
 
-void GridComm::grow_overlap()
+void Grid3d::grow_overlap()
 {
   maxoverlap += DELTA;
   overlap = (Overlap *)
-    memory->srealloc(overlap,maxoverlap*sizeof(Overlap),"GridComm:overlap");
+    memory->srealloc(overlap,maxoverlap*sizeof(Overlap),"grid3d:overlap");
 }
 
 /* ----------------------------------------------------------------------
@@ -1246,11 +1246,11 @@ void GridComm::grow_overlap()
      (fullxlo:fullxhi,fullylo:fullyhi,fullzlo:fullzhi)
 ------------------------------------------------------------------------- */
 
-int GridComm::indices(int *&list,
+int Grid3d::indices(int *&list,
                        int xlo, int xhi, int ylo, int yhi, int zlo, int zhi)
 {
   int nmax = (xhi-xlo+1) * (yhi-ylo+1) * (zhi-zlo+1);
-  memory->create(list,nmax,"GridComm:indices");
+  memory->create(list,nmax,"grid3d:indices");
   if (nmax == 0) return 0;
 
   int nx = (fullxhi-fullxlo+1);
diff --git a/src/gridcomm.h b/src/grid2d.h
similarity index 95%
rename from src/gridcomm.h
rename to src/grid2d.h
index 011faad9fb..5a48b8a6df 100644
--- a/src/gridcomm.h
+++ b/src/grid2d.h
@@ -11,22 +11,22 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#ifndef LMP_GRIDCOMM_H
-#define LMP_GRIDCOMM_H
+#ifndef LMP_GRID2D_H
+#define LMP_GRID2D_H
 
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
-class GridComm : protected Pointers {
+class Grid2d : protected Pointers {
  public:
   enum { KSPACE = 0, PAIR = 1, FIX = 2 };    // calling classes
 
-  GridComm(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int,
+  Grid2d(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int,
            int, int, int);
-  GridComm(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int,
+  Grid2d(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int,
            int, int, int, int, int, int, int, int, int, int);
-  ~GridComm() override;
+  ~Grid2d() override;
   void setup(int &, int &);
   int ghost_adjacent();
   void forward_comm(int, void *, int, int, int, void *, void *, MPI_Datatype);
diff --git a/src/grid3d.cpp b/src/grid3d.cpp
new file mode 100644
index 0000000000..f96770d3c9
--- /dev/null
+++ b/src/grid3d.cpp
@@ -0,0 +1,1267 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "grid3d.h"
+
+#include "comm.h"
+#include "error.h"
+#include "irregular.h"
+#include "pair.h"
+#include "kspace.h"
+#include "fix.h"
+#include "memory.h"
+
+using namespace LAMMPS_NS;
+
+enum{REGULAR,TILED};
+
+#define DELTA 16
+
+/* ----------------------------------------------------------------------
+   NOTES
+   tiled implementation only currently works for RCB, not general tiled
+   b/c RCB tree is used to find neighboring tiles
+   if o indices for ghosts are < 0 or hi indices are >= N,
+     then grid is treated as periodic in that dimension,
+     communication is done across the periodic boundaries
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   constructor called by all classes except MSM
+   gcomm = world communicator
+   gn xyz = size of global grid
+   i xyz lohi = portion of global grid this proc owns, 0 <= index < N
+   o xyz lohi = owned grid portion + ghost grid cells needed in all directions
+   if o indices are < 0 or hi indices are >= N,
+     then grid is treated as periodic in that dimension,
+     communication is done across the periodic boundaries
+------------------------------------------------------------------------- */
+
+Grid3d::Grid3d(LAMMPS *lmp, MPI_Comm gcomm,
+               int gnx, int gny, int gnz,
+               int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
+               int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi)
+  : Pointers(lmp)
+{
+  if (comm->layout == Comm::LAYOUT_TILED) layout = TILED;
+  else layout = REGULAR;
+
+  if (layout == REGULAR) {
+    int (*procneigh)[2] = comm->procneigh;
+    initialize(gcomm,gnx,gny,gnz,
+               ixlo,ixhi,iylo,iyhi,izlo,izhi,
+               oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+               oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+               procneigh[0][0],procneigh[0][1],
+               procneigh[1][0],procneigh[1][1],
+               procneigh[2][0],procneigh[2][1]);
+  } else {
+    initialize(gcomm,gnx,gny,gnz,
+               ixlo,ixhi,iylo,iyhi,izlo,izhi,
+               oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+               oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+               0,0,0,0,0,0);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   constructor called by MSM
+   gcomm = world communicator or sub-communicator for a hierarchical grid
+   flag = 1 if e xyz lohi values = larger grid stored by caller in gcomm = world
+   flag = 2 if e xyz lohi values = 6 neighbor procs in gcomm
+   gn xyz = size of global grid
+   i xyz lohi = portion of global grid this proc owns, 0 <= index < N
+   o xyz lohi = owned grid portion + ghost grid cells needed in all directions
+   e xyz lohi for flag = 1: extent of larger grid stored by caller
+   e xyz lohi for flag = 2: 6 neighbor procs
+------------------------------------------------------------------------- */
+
+Grid3d::Grid3d(LAMMPS *lmp, MPI_Comm gcomm, int flag,
+               int gnx, int gny, int gnz,
+               int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
+               int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
+               int exlo, int exhi, int eylo, int eyhi, int ezlo, int ezhi)
+  : Pointers(lmp)
+{
+  if (comm->layout == Comm::LAYOUT_TILED) layout = TILED;
+  else layout = REGULAR;
+
+  if (flag == 1) {
+    if (layout == REGULAR) {
+      // this assumes gcomm = world
+      int (*procneigh)[2] = comm->procneigh;
+      initialize(gcomm,gnx,gny,gnz,
+                 ixlo,ixhi,iylo,iyhi,izlo,izhi,
+                 oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+                 exlo,exhi,eylo,eyhi,ezlo,ezhi,
+                 procneigh[0][0],procneigh[0][1],
+                 procneigh[1][0],procneigh[1][1],
+                 procneigh[2][0],procneigh[2][1]);
+    } else {
+      initialize(gcomm,gnx,gny,gnz,
+                 ixlo,ixhi,iylo,iyhi,izlo,izhi,
+                 oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+                 exlo,exhi,eylo,eyhi,ezlo,ezhi,
+                 0,0,0,0,0,0);
+    }
+
+  } else if (flag == 2) {
+    if (layout == REGULAR) {
+      initialize(gcomm,gnx,gny,gnz,
+                 ixlo,ixhi,iylo,iyhi,izlo,izhi,
+                 oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+                 oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+                 exlo,exhi,eylo,eyhi,ezlo,ezhi);
+    } else {
+      error->all(FLERR,"Grid3d does not support tiled layout with neighbor procs");
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+Grid3d::~Grid3d()
+{
+  // regular comm data struct
+
+  for (int i = 0; i < nswap; i++) {
+    memory->destroy(swap[i].packlist);
+    memory->destroy(swap[i].unpacklist);
+  }
+  memory->sfree(swap);
+
+  // tiled comm data structs
+
+  for (int i = 0; i < nsend; i++)
+    memory->destroy(send[i].packlist);
+  memory->sfree(send);
+
+  for (int i = 0; i < nrecv; i++)
+    memory->destroy(recv[i].unpacklist);
+  memory->sfree(recv);
+
+  for (int i = 0; i < ncopy; i++) {
+    memory->destroy(copy[i].packlist);
+    memory->destroy(copy[i].unpacklist);
+  }
+  memory->sfree(copy);
+
+  delete [] requests;
+}
+
+/* ----------------------------------------------------------------------
+   store constructor args in local variables
+------------------------------------------------------------------------- */
+
+void Grid3d::initialize(MPI_Comm gcomm,
+                        int gnx, int gny, int gnz,
+                        int ixlo, int ixhi, int iylo, int iyhi,
+                        int izlo, int izhi,
+                        int oxlo, int oxhi, int oylo, int oyhi,
+                        int ozlo, int ozhi,
+                        int fxlo, int fxhi, int fylo, int fyhi,
+                        int fzlo, int fzhi,
+                        int pxlo, int pxhi, int pylo, int pyhi,
+                        int pzlo, int pzhi)
+{
+  gridcomm = gcomm;
+  MPI_Comm_rank(gridcomm,&me);
+  MPI_Comm_size(gridcomm,&nprocs);
+
+  nx = gnx;
+  ny = gny;
+  nz = gnz;
+
+  inxlo = ixlo;
+  inxhi = ixhi;
+  inylo = iylo;
+  inyhi = iyhi;
+  inzlo = izlo;
+  inzhi = izhi;
+
+  outxlo = oxlo;
+  outxhi = oxhi;
+  outylo = oylo;
+  outyhi = oyhi;
+  outzlo = ozlo;
+  outzhi = ozhi;
+
+  fullxlo = fxlo;
+  fullxhi = fxhi;
+  fullylo = fylo;
+  fullyhi = fyhi;
+  fullzlo = fzlo;
+  fullzhi = fzhi;
+
+  // for REGULAR layout, proc xyz lohi = my 6 neighbor procs in this MPI_Comm
+
+  if (layout == REGULAR) {
+    procxlo = pxlo;
+    procxhi = pxhi;
+    procylo = pylo;
+    procyhi = pyhi;
+    proczlo = pzlo;
+    proczhi = pzhi;
+  }
+
+  // internal data initializations
+
+  nswap = maxswap = 0;
+  swap = nullptr;
+
+  nsend = nrecv = ncopy = 0;
+  send = nullptr;
+  recv = nullptr;
+  copy = nullptr;
+  requests = nullptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Grid3d::setup(int &nbuf1, int &nbuf2)
+{
+  if (layout == REGULAR) setup_regular(nbuf1,nbuf2);
+  else setup_tiled(nbuf1,nbuf2);
+}
+
+/* ----------------------------------------------------------------------
+   setup comm for a regular grid of procs
+   each proc has 6 neighbors
+   comm pattern = series of swaps with one of those 6 procs
+   can be multiple swaps with same proc if ghost extent is large
+   swap may not be symmetric if both procs do not need same layers of ghosts
+   all procs perform same # of swaps in a direction, even if some don't need it
+------------------------------------------------------------------------- */
+
+void Grid3d::setup_regular(int &nbuf1, int &nbuf2)
+{
+  int nsent,sendfirst,sendlast,recvfirst,recvlast;
+  int sendplanes,recvplanes;
+  int notdoneme,notdone;
+
+  // notify 6 neighbor procs how many ghost grid planes I need from them
+  // ghost xyz lo = # of my lower grid planes that proc xyz lo needs as its ghosts
+  // ghost xyz hi = # of my upper grid planes that proc xyz hi needs as its ghosts
+  // if this proc is its own neighbor across periodic bounary, value is from self
+
+  int nplanes = inxlo - outxlo;
+  if (procxlo != me)
+      MPI_Sendrecv(&nplanes,1,MPI_INT,procxlo,0,
+                   &ghostxhi,1,MPI_INT,procxhi,0,gridcomm,MPI_STATUS_IGNORE);
+  else ghostxhi = nplanes;
+
+  nplanes = outxhi - inxhi;
+  if (procxhi != me)
+      MPI_Sendrecv(&nplanes,1,MPI_INT,procxhi,0,
+                   &ghostxlo,1,MPI_INT,procxlo,0,gridcomm,MPI_STATUS_IGNORE);
+  else ghostxlo = nplanes;
+
+  nplanes = inylo - outylo;
+  if (procylo != me)
+    MPI_Sendrecv(&nplanes,1,MPI_INT,procylo,0,
+                 &ghostyhi,1,MPI_INT,procyhi,0,gridcomm,MPI_STATUS_IGNORE);
+  else ghostyhi = nplanes;
+
+  nplanes = outyhi - inyhi;
+  if (procyhi != me)
+    MPI_Sendrecv(&nplanes,1,MPI_INT,procyhi,0,
+                 &ghostylo,1,MPI_INT,procylo,0,gridcomm,MPI_STATUS_IGNORE);
+  else ghostylo = nplanes;
+
+  nplanes = inzlo - outzlo;
+  if (proczlo != me)
+    MPI_Sendrecv(&nplanes,1,MPI_INT,proczlo,0,
+                 &ghostzhi,1,MPI_INT,proczhi,0,gridcomm,MPI_STATUS_IGNORE);
+  else ghostzhi = nplanes;
+
+  nplanes = outzhi - inzhi;
+  if (proczhi != me)
+    MPI_Sendrecv(&nplanes,1,MPI_INT,proczhi,0,
+                 &ghostzlo,1,MPI_INT,proczlo,0,gridcomm,MPI_STATUS_IGNORE);
+  else ghostzlo = nplanes;
+
+  // setup swaps = exchange of grid data with one of 6 neighobr procs
+  // can be more than one in a direction if ghost region extends beyond neigh proc
+  // all procs have same swap count, but swapsize npack/nunpack can be empty
+
+  nswap = 0;
+
+  // send own grid pts to -x processor, recv ghost grid pts from +x processor
+
+  nsent = 0;
+  sendfirst = inxlo;
+  sendlast = inxhi;
+  recvfirst = inxhi+1;
+  notdone = 1;
+
+  while (notdone) {
+    if (nswap == maxswap) grow_swap();
+
+    swap[nswap].sendproc = procxlo;
+    swap[nswap].recvproc = procxhi;
+    sendplanes = MIN(sendlast-sendfirst+1,ghostxlo-nsent);
+    swap[nswap].npack =
+      indices(swap[nswap].packlist,
+              sendfirst,sendfirst+sendplanes-1,inylo,inyhi,inzlo,inzhi);
+
+    if (procxlo != me)
+      MPI_Sendrecv(&sendplanes,1,MPI_INT,procxlo,0,
+                   &recvplanes,1,MPI_INT,procxhi,0,gridcomm,MPI_STATUS_IGNORE);
+    else recvplanes = sendplanes;
+
+    swap[nswap].nunpack =
+      indices(swap[nswap].unpacklist,
+              recvfirst,recvfirst+recvplanes-1,inylo,inyhi,inzlo,inzhi);
+
+    nsent += sendplanes;
+    sendfirst += sendplanes;
+    sendlast += recvplanes;
+    recvfirst += recvplanes;
+    nswap++;
+
+    if (nsent < ghostxlo) notdoneme = 1;
+    else notdoneme = 0;
+    MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
+  }
+
+  // send own grid pts to +x processor, recv ghost grid pts from -x processor
+
+  nsent = 0;
+  sendfirst = inxlo;
+  sendlast = inxhi;
+  recvlast = inxlo-1;
+  notdone = 1;
+
+  while (notdone) {
+    if (nswap == maxswap) grow_swap();
+
+    swap[nswap].sendproc = procxhi;
+    swap[nswap].recvproc = procxlo;
+    sendplanes = MIN(sendlast-sendfirst+1,ghostxhi-nsent);
+    swap[nswap].npack =
+      indices(swap[nswap].packlist,
+              sendlast-sendplanes+1,sendlast,inylo,inyhi,inzlo,inzhi);
+
+    if (procxhi != me)
+      MPI_Sendrecv(&sendplanes,1,MPI_INT,procxhi,0,
+                   &recvplanes,1,MPI_INT,procxlo,0,gridcomm,MPI_STATUS_IGNORE);
+    else recvplanes = sendplanes;
+
+    swap[nswap].nunpack =
+      indices(swap[nswap].unpacklist,
+              recvlast-recvplanes+1,recvlast,inylo,inyhi,inzlo,inzhi);
+
+    nsent += sendplanes;
+    sendfirst -= recvplanes;
+    sendlast -= sendplanes;
+    recvlast -= recvplanes;
+    nswap++;
+
+    if (nsent < ghostxhi) notdoneme = 1;
+    else notdoneme = 0;
+    MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
+  }
+
+  // send own grid pts to -y processor, recv ghost grid pts from +y processor
+
+  nsent = 0;
+  sendfirst = inylo;
+  sendlast = inyhi;
+  recvfirst = inyhi+1;
+  notdone = 1;
+
+  while (notdone) {
+    if (nswap == maxswap) grow_swap();
+
+    swap[nswap].sendproc = procylo;
+    swap[nswap].recvproc = procyhi;
+    sendplanes = MIN(sendlast-sendfirst+1,ghostylo-nsent);
+    swap[nswap].npack =
+      indices(swap[nswap].packlist,
+              outxlo,outxhi,sendfirst,sendfirst+sendplanes-1,inzlo,inzhi);
+
+    if (procylo != me)
+      MPI_Sendrecv(&sendplanes,1,MPI_INT,procylo,0,
+                   &recvplanes,1,MPI_INT,procyhi,0,gridcomm,MPI_STATUS_IGNORE);
+    else recvplanes = sendplanes;
+
+    swap[nswap].nunpack =
+      indices(swap[nswap].unpacklist,
+              outxlo,outxhi,recvfirst,recvfirst+recvplanes-1,inzlo,inzhi);
+
+    nsent += sendplanes;
+    sendfirst += sendplanes;
+    sendlast += recvplanes;
+    recvfirst += recvplanes;
+    nswap++;
+
+    if (nsent < ghostylo) notdoneme = 1;
+    else notdoneme = 0;
+    MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
+  }
+
+  // send own grid pts to +y processor, recv ghost grid pts from -y processor
+
+  nsent = 0;
+  sendfirst = inylo;
+  sendlast = inyhi;
+  recvlast = inylo-1;
+  notdone = 1;
+
+  while (notdone) {
+    if (nswap == maxswap) grow_swap();
+
+    swap[nswap].sendproc = procyhi;
+    swap[nswap].recvproc = procylo;
+    sendplanes = MIN(sendlast-sendfirst+1,ghostyhi-nsent);
+    swap[nswap].npack =
+      indices(swap[nswap].packlist,
+              outxlo,outxhi,sendlast-sendplanes+1,sendlast,inzlo,inzhi);
+
+    if (procyhi != me)
+      MPI_Sendrecv(&sendplanes,1,MPI_INT,procyhi,0,
+                   &recvplanes,1,MPI_INT,procylo,0,gridcomm,MPI_STATUS_IGNORE);
+    else recvplanes = sendplanes;
+
+    swap[nswap].nunpack =
+      indices(swap[nswap].unpacklist,
+              outxlo,outxhi,recvlast-recvplanes+1,recvlast,inzlo,inzhi);
+
+    nsent += sendplanes;
+    sendfirst -= recvplanes;
+    sendlast -= sendplanes;
+    recvlast -= recvplanes;
+    nswap++;
+
+    if (nsent < ghostyhi) notdoneme = 1;
+    else notdoneme = 0;
+    MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
+  }
+
+  // send own grid pts to -z processor, recv ghost grid pts from +z processor
+
+  nsent = 0;
+  sendfirst = inzlo;
+  sendlast = inzhi;
+  recvfirst = inzhi+1;
+  notdone = 1;
+
+  while (notdone) {
+    if (nswap == maxswap) grow_swap();
+
+    swap[nswap].sendproc = proczlo;
+    swap[nswap].recvproc = proczhi;
+    sendplanes = MIN(sendlast-sendfirst+1,ghostzlo-nsent);
+    swap[nswap].npack =
+      indices(swap[nswap].packlist,
+              outxlo,outxhi,outylo,outyhi,sendfirst,sendfirst+sendplanes-1);
+
+    if (proczlo != me)
+      MPI_Sendrecv(&sendplanes,1,MPI_INT,proczlo,0,
+                   &recvplanes,1,MPI_INT,proczhi,0,gridcomm,MPI_STATUS_IGNORE);
+    else recvplanes = sendplanes;
+
+    swap[nswap].nunpack =
+      indices(swap[nswap].unpacklist,
+              outxlo,outxhi,outylo,outyhi,recvfirst,recvfirst+recvplanes-1);
+
+    nsent += sendplanes;
+    sendfirst += sendplanes;
+    sendlast += recvplanes;
+    recvfirst += recvplanes;
+    nswap++;
+
+    if (nsent < ghostzlo) notdoneme = 1;
+    else notdoneme = 0;
+    MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
+  }
+
+  // send own grid pts to +z processor, recv ghost grid pts from -z processor
+
+  nsent = 0;
+  sendfirst = inzlo;
+  sendlast = inzhi;
+  recvlast = inzlo-1;
+  notdone = 1;
+
+  while (notdone) {
+    if (nswap == maxswap) grow_swap();
+
+    swap[nswap].sendproc = proczhi;
+    swap[nswap].recvproc = proczlo;
+    sendplanes = MIN(sendlast-sendfirst+1,ghostzhi-nsent);
+    swap[nswap].npack =
+      indices(swap[nswap].packlist,
+              outxlo,outxhi,outylo,outyhi,sendlast-sendplanes+1,sendlast);
+
+    if (proczhi != me)
+      MPI_Sendrecv(&sendplanes,1,MPI_INT,proczhi,0,
+                   &recvplanes,1,MPI_INT,proczlo,0,gridcomm,MPI_STATUS_IGNORE);
+    else recvplanes = sendplanes;
+
+    swap[nswap].nunpack =
+      indices(swap[nswap].unpacklist,
+              outxlo,outxhi,outylo,outyhi,recvlast-recvplanes+1,recvlast);
+
+    nsent += sendplanes;
+    sendfirst -= recvplanes;
+    sendlast -= sendplanes;
+    recvlast -= recvplanes;
+    nswap++;
+
+    if (nsent < ghostzhi) notdoneme = 1;
+    else notdoneme = 0;
+    MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
+  }
+
+  // ngrid = max of any forward/reverse pack/unpack grid points
+
+  int ngrid = 0;
+  for (int i = 0; i < nswap; i++) {
+    ngrid = MAX(ngrid,swap[i].npack);
+    ngrid = MAX(ngrid,swap[i].nunpack);
+  }
+
+  nbuf1 = nbuf2 = ngrid;
+}
+
+/* ----------------------------------------------------------------------
+   setup comm for RCB tiled proc domains
+   each proc has arbitrary # of neighbors that overlap its ghost extent
+   identify which procs will send me ghost cells, and vice versa
+   may not be symmetric if both procs do not need same layers of ghosts
+   comm pattern = post recvs for all my ghosts, send my owned, wait on recvs
+     no exchanges by dimension, unlike CommTiled forward/reverse comm of particles
+------------------------------------------------------------------------- */
+
+void Grid3d::setup_tiled(int &nbuf1, int &nbuf2)
+{
+  int i,m;
+  double xlo,xhi,ylo,yhi,zlo,zhi;
+  int ghostbox[6],pbc[3];
+
+  // setup RCB tree of cut info for grid
+  // access CommTiled to get cut dimension
+  // cut = this proc's inlo in that dim
+  // dim is -1 for proc 0, but never accessed
+
+  rcbinfo = (RCBinfo *)
+    memory->smalloc(nprocs*sizeof(RCBinfo),"grid3d:rcbinfo");
+  RCBinfo rcbone;
+  rcbone.dim = comm->rcbcutdim;
+  if (rcbone.dim <= 0) rcbone.cut = inxlo;
+  else if (rcbone.dim == 1) rcbone.cut = inylo;
+  else if (rcbone.dim == 2) rcbone.cut = inzlo;
+  MPI_Allgather(&rcbone,sizeof(RCBinfo),MPI_CHAR,
+                rcbinfo,sizeof(RCBinfo),MPI_CHAR,gridcomm);
+
+  // find overlaps of my extended ghost box with all other procs
+  // accounts for crossings of periodic boundaries
+  // noverlap = # of overlaps, including self
+  // overlap = vector of overlap info using Overlap data struct
+
+  ghostbox[0] = outxlo;
+  ghostbox[1] = outxhi;
+  ghostbox[2] = outylo;
+  ghostbox[3] = outyhi;
+  ghostbox[4] = outzlo;
+  ghostbox[5] = outzhi;
+
+  pbc[0] = pbc[1] = pbc[2] = 0;
+
+  memory->create(overlap_procs,nprocs,"grid3d:overlap_procs");
+  noverlap = maxoverlap = 0;
+  overlap = nullptr;
+
+  ghost_box_drop(ghostbox,pbc);
+
+  // send each proc an overlap message
+  // content: me, index of my overlap, box that overlaps with its owned cells
+  // ncopy = # of overlaps with myself, across a periodic boundary
+
+  int *proclist;
+  memory->create(proclist,noverlap,"grid3d:proclist");
+  srequest = (Request *)
+    memory->smalloc(noverlap*sizeof(Request),"grid3d:srequest");
+
+  int nsend_request = 0;
+  ncopy = 0;
+
+  for (m = 0; m < noverlap; m++) {
+    if (overlap[m].proc == me) ncopy++;
+    else {
+      proclist[nsend_request] = overlap[m].proc;
+      srequest[nsend_request].sender = me;
+      srequest[nsend_request].index = m;
+      for (i = 0; i < 6; i++)
+        srequest[nsend_request].box[i] = overlap[m].box[i];
+      nsend_request++;
+    }
+  }
+
+  auto irregular = new Irregular(lmp);
+  int nrecv_request = irregular->create_data(nsend_request,proclist,1);
+  auto rrequest = (Request *) memory->smalloc(nrecv_request*sizeof(Request),"grid3d:rrequest");
+  irregular->exchange_data((char *) srequest,sizeof(Request),(char *) rrequest);
+  irregular->destroy_data();
+
+  // compute overlaps between received ghost boxes and my owned box
+  // overlap box used to setup my Send data struct and respond to requests
+
+  send = (Send *) memory->smalloc(nrecv_request*sizeof(Send),"grid3d:send");
+  sresponse = (Response *) memory->smalloc(nrecv_request*sizeof(Response),"grid3d:sresponse");
+  memory->destroy(proclist);
+  memory->create(proclist,nrecv_request,"grid3d:proclist");
+
+  for (m = 0; m < nrecv_request; m++) {
+    send[m].proc = rrequest[m].sender;
+    xlo = MAX(rrequest[m].box[0],inxlo);
+    xhi = MIN(rrequest[m].box[1],inxhi);
+    ylo = MAX(rrequest[m].box[2],inylo);
+    yhi = MIN(rrequest[m].box[3],inyhi);
+    zlo = MAX(rrequest[m].box[4],inzlo);
+    zhi = MIN(rrequest[m].box[5],inzhi);
+    send[m].npack = indices(send[m].packlist,xlo,xhi,ylo,yhi,zlo,zhi);
+
+    proclist[m] = rrequest[m].sender;
+    sresponse[m].index = rrequest[m].index;
+    sresponse[m].box[0] = xlo;
+    sresponse[m].box[1] = xhi;
+    sresponse[m].box[2] = ylo;
+    sresponse[m].box[3] = yhi;
+    sresponse[m].box[4] = zlo;
+    sresponse[m].box[5] = zhi;
+  }
+
+  nsend = nrecv_request;
+
+  // reply to each Request message with a Response message
+  // content: index for the overlap on requestor, overlap box on my owned grid
+
+  int nsend_response = nrecv_request;
+  int nrecv_response = irregular->create_data(nsend_response,proclist,1);
+  auto rresponse = (Response *) memory->smalloc(nrecv_response*sizeof(Response),"grid3d:rresponse");
+  irregular->exchange_data((char *) sresponse,sizeof(Response),(char *) rresponse);
+  irregular->destroy_data();
+  delete irregular;
+
+  // process received responses
+  // box used to setup my Recv data struct after unwrapping via PBC
+  // adjacent = 0 if any box of ghost cells does not adjoin my owned cells
+
+  recv = (Recv *) memory->smalloc(nrecv_response*sizeof(Recv),"grid3d:recv");
+  adjacent = 1;
+
+  for (i = 0; i < nrecv_response; i++) {
+    m = rresponse[i].index;
+    recv[i].proc = overlap[m].proc;
+    xlo = rresponse[i].box[0] + overlap[m].pbc[0] * nx;
+    xhi = rresponse[i].box[1] + overlap[m].pbc[0] * nx;
+    ylo = rresponse[i].box[2] + overlap[m].pbc[1] * ny;
+    yhi = rresponse[i].box[3] + overlap[m].pbc[1] * ny;
+    zlo = rresponse[i].box[4] + overlap[m].pbc[2] * nz;
+    zhi = rresponse[i].box[5] + overlap[m].pbc[2] * nz;
+    recv[i].nunpack = indices(recv[i].unpacklist,xlo,xhi,ylo,yhi,zlo,zhi);
+
+    if (xlo != inxhi+1 && xhi != inxlo-1 &&
+        ylo != inyhi+1 && yhi != inylo-1 &&
+        zlo != inzhi+1 && zhi != inzlo-1) adjacent = 0;
+  }
+
+  nrecv = nrecv_response;
+
+  // create Copy data struct from overlaps with self
+
+  copy = (Copy *) memory->smalloc(ncopy*sizeof(Copy),"grid3d:copy");
+
+  ncopy = 0;
+  for (m = 0; m < noverlap; m++) {
+    if (overlap[m].proc != me) continue;
+    xlo = overlap[m].box[0];
+    xhi = overlap[m].box[1];
+    ylo = overlap[m].box[2];
+    yhi = overlap[m].box[3];
+    zlo = overlap[m].box[4];
+    zhi = overlap[m].box[5];
+    copy[ncopy].npack = indices(copy[ncopy].packlist,xlo,xhi,ylo,yhi,zlo,zhi);
+    xlo = overlap[m].box[0] + overlap[m].pbc[0] * nx;
+    xhi = overlap[m].box[1] + overlap[m].pbc[0] * nx;
+    ylo = overlap[m].box[2] + overlap[m].pbc[1] * ny;
+    yhi = overlap[m].box[3] + overlap[m].pbc[1] * ny;
+    zlo = overlap[m].box[4] + overlap[m].pbc[2] * nz;
+    zhi = overlap[m].box[5] + overlap[m].pbc[2] * nz;
+    copy[ncopy].nunpack = indices(copy[ncopy].unpacklist,xlo,xhi,ylo,yhi,zlo,zhi);
+    ncopy++;
+  }
+
+  // set offsets for received data
+
+  int offset = 0;
+  for (m = 0; m < nsend; m++) {
+    send[m].offset = offset;
+    offset += send[m].npack;
+  }
+
+  offset = 0;
+  for (m = 0; m < nrecv; m++) {
+    recv[m].offset = offset;
+    offset += recv[m].nunpack;
+  }
+
+  // length of MPI requests vector is max of nsend, nrecv
+
+  int nrequest = MAX(nsend,nrecv);
+  requests = new MPI_Request[nrequest];
+
+  // clean-up
+
+  memory->sfree(rcbinfo);
+  memory->destroy(proclist);
+  memory->destroy(overlap_procs);
+  memory->sfree(overlap);
+  memory->sfree(srequest);
+  memory->sfree(rrequest);
+  memory->sfree(sresponse);
+  memory->sfree(rresponse);
+
+  // nbuf1 = largest pack or unpack in any Send or Recv or Copy
+  // nbuf2 = larget of sum of all packs or unpacks in Send or Recv
+
+  nbuf1 = 0;
+
+  for (m = 0; m < ncopy; m++) {
+    nbuf1 = MAX(nbuf1,copy[m].npack);
+    nbuf1 = MAX(nbuf1,copy[m].nunpack);
+  }
+
+  int nbufs = 0;
+  for (m = 0; m < nsend; m++) {
+    nbuf1 = MAX(nbuf1,send[m].npack);
+    nbufs += send[m].npack;
+  }
+
+  int nbufr = 0;
+  for (m = 0; m < nrecv; m++) {
+    nbuf1 = MAX(nbuf1,recv[m].nunpack);
+    nbufr += recv[m].nunpack;
+  }
+
+  nbuf2 = MAX(nbufs,nbufr);
+}
+
+/* ----------------------------------------------------------------------
+   recursively split a box until it doesn't overlap any periodic boundaries
+   box = 6 integers = (xlo,xhi,ylo,yhi,zlo,zhi)
+     each lo/hi value may extend beyonw 0 to N-1 into another periodic image
+   pbc = flags in each dim of which periodic image the caller box was in
+   when a box straddles a periodic bounadry, split it in two
+   when a box does not straddle, drop it down RCB tree
+     add all the procs it overlaps with to Overlap list
+------------------------------------------------------------------------- */
+
+void Grid3d::ghost_box_drop(int *box, int *pbc)
+{
+  int i,m;
+
+  // newbox12 and newpbc are initially copies of caller box and pbc
+
+  int newbox1[6],newbox2[6],newpbc[3];
+
+  for (i = 0; i < 6; i++) newbox1[i] = newbox2[i] = box[i];
+  for (i = 0; i < 3; i++) newpbc[i] = pbc[i];
+
+  // 6 if tests to see if box needs to be split across a periodic boundary
+  // newbox1 and 2 = new split boxes, newpbc increments current pbc
+  // final else is no split
+
+  int splitflag = 1;
+
+  if (box[0] < 0) {
+    newbox1[0] = 0;
+    newbox2[0] = box[0] + nx;
+    newbox2[1] = nx - 1;
+    newpbc[0]--;
+  } else if (box[1] >= nx) {
+    newbox1[1] = nx - 1;
+    newbox2[0] = 0;
+    newbox2[1] = box[1] - nx;
+    newpbc[0]++;
+  } else if (box[2] < 0) {
+    newbox1[2] = 0;
+    newbox2[2] = box[2] + ny;
+    newbox2[3] = ny - 1;
+    newpbc[1]--;
+  } else if (box[3] >= ny) {
+    newbox1[3] = ny - 1;
+    newbox2[2] = 0;
+    newbox2[3] = box[3] - ny;
+    newpbc[1]++;
+  } else if (box[4] < 0) {
+    newbox1[4] = 0;
+    newbox2[4] = box[4] + nz;
+    newbox2[5] = nz - 1;
+    newpbc[2]--;
+  } else if (box[5] >= nz) {
+    newbox1[5] = nz - 1;
+    newbox2[4] = 0;
+    newbox2[5] = box[5] - nz;
+    newpbc[2]++;
+
+  // box is not split, drop on RCB tree
+  // returns nprocs = # of procs it overlaps, including self
+  // returns proc_overlap = list of proc IDs it overlaps
+  // skip self overlap if no crossing of periodic boundaries
+  // do not skip self if overlap is in another periodic image
+
+  } else {
+    splitflag = 0;
+    int np = 0;
+    box_drop_grid(box,0,nprocs-1,np,overlap_procs);
+    for (m = 0; m < np; m++) {
+      if (noverlap == maxoverlap) grow_overlap();
+      if (overlap_procs[m] == me &&
+          pbc[0] == 0 && pbc[1] == 0 && pbc[2] == 0) continue;
+      overlap[noverlap].proc = overlap_procs[m];
+      for (i = 0; i < 6; i++) overlap[noverlap].box[i] = box[i];
+      for (i = 0; i < 3; i++) overlap[noverlap].pbc[i] = pbc[i];
+      noverlap++;
+    }
+  }
+
+  // recurse with 2 split boxes
+
+  if (splitflag) {
+    ghost_box_drop(newbox1,pbc);
+    ghost_box_drop(newbox2,newpbc);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   recursively drop a box down the RCB tree to find all procs it overlaps with
+   box = 6 integers = (xlo,xhi,ylo,yhi,zlo,zhi)
+     each lo/hi value ranges from 0 to N-1 in a dim, N = grid size in that dim
+     box is guaranteed to be wholly within the global domain
+   return Np = # of procs, plist = proc IDs
+------------------------------------------------------------------------- */
+
+void Grid3d::box_drop_grid(int *box, int proclower, int procupper,
+                             int &np, int *plist)
+{
+  // end recursion when partition is a single proc
+  // add proclower to plist
+
+  if (proclower == procupper) {
+    plist[np++] = proclower;
+    return;
+  }
+
+  // drop box on each side of cut it extends beyond
+  // use < and >= criteria so does not include a box it only touches
+  // procmid = 1st processor in upper half of partition
+  //         = location in tree that stores this cut
+  // cut = index of first grid cell in upper partition
+  // dim = 0,1,2 dimension of cut
+
+  int procmid = proclower + (procupper - proclower) / 2 + 1;
+  int dim = rcbinfo[procmid].dim;
+  int cut = rcbinfo[procmid].cut;
+
+  if (box[2*dim] < cut) box_drop_grid(box,proclower,procmid-1,np,plist);
+  if (box[2*dim+1] >= cut) box_drop_grid(box,procmid,procupper,np,plist);
+}
+
+/* ----------------------------------------------------------------------
+   check if all procs only need ghost info from adjacent procs
+   return 1 if yes, 0 if no
+------------------------------------------------------------------------- */
+
+int Grid3d::ghost_adjacent()
+{
+  if (layout == REGULAR) return ghost_adjacent_regular();
+  return ghost_adjacent_tiled();
+}
+
+/* ----------------------------------------------------------------------
+   adjacent = 0 if a proc's ghost xyz lohi values exceed its subdomain size
+   return 0 if adjacent=0 for any proc, else 1
+------------------------------------------------------------------------- */
+
+int Grid3d::ghost_adjacent_regular()
+{
+  adjacent = 1;
+  if (ghostxlo > inxhi-inxlo+1) adjacent = 0;
+  if (ghostxhi > inxhi-inxlo+1) adjacent = 0;
+  if (ghostylo > inyhi-inylo+1) adjacent = 0;
+  if (ghostyhi > inyhi-inylo+1) adjacent = 0;
+  if (ghostzlo > inzhi-inzlo+1) adjacent = 0;
+  if (ghostzhi > inzhi-inzlo+1) adjacent = 0;
+
+  int adjacent_all;
+  MPI_Allreduce(&adjacent,&adjacent_all,1,MPI_INT,MPI_MIN,gridcomm);
+  return adjacent_all;
+}
+
+/* ----------------------------------------------------------------------
+   adjacent = 0 if a proc's received ghosts were flagged
+     as non-adjacent in setup_tiled()
+   return 0 if adjacent=0 for any proc, else 1
+------------------------------------------------------------------------- */
+
+int Grid3d::ghost_adjacent_tiled()
+{
+  int adjacent_all;
+  MPI_Allreduce(&adjacent,&adjacent_all,1,MPI_INT,MPI_MIN,gridcomm);
+  return adjacent_all;
+}
+
+/* ----------------------------------------------------------------------
+   forward comm of my owned cells to other's ghost cells
+------------------------------------------------------------------------- */
+
+void Grid3d::forward_comm(int caller, void *ptr, int nper, int nbyte, int which,
+                            void *buf1, void *buf2, MPI_Datatype datatype)
+{
+  if (layout == REGULAR) {
+    if (caller == KSPACE)
+      forward_comm_regular<KSpace>((KSpace *) ptr,nper,nbyte,which,
+                                   buf1,buf2,datatype);
+    else if (caller == PAIR)
+      forward_comm_regular<Pair>((Pair *) ptr,nper,nbyte,which,
+                                   buf1,buf2,datatype);
+    else if (caller == FIX)
+      forward_comm_regular<Fix>((Fix *) ptr,nper,nbyte,which,
+                                buf1,buf2,datatype);
+  } else {
+    if (caller == KSPACE)
+      forward_comm_tiled<KSpace>((KSpace *) ptr,nper,nbyte,which,
+                                 buf1,buf2,datatype);
+    else if (caller == PAIR)
+      forward_comm_tiled<Pair>((Pair *) ptr,nper,nbyte,which,
+                                 buf1,buf2,datatype);
+    else if (caller == FIX)
+      forward_comm_tiled<Fix>((Fix *) ptr,nper,nbyte,
+                              which,buf1,buf2,datatype);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   forward comm on regular grid of procs via list of swaps with 6 neighbor procs
+------------------------------------------------------------------------- */
+
+template < class T >
+void Grid3d::
+forward_comm_regular(T *ptr, int nper, int /*nbyte*/, int which,
+                     void *buf1, void *buf2, MPI_Datatype datatype)
+{
+  int m;
+  MPI_Request request;
+
+  for (m = 0; m < nswap; m++) {
+    if (swap[m].sendproc == me)
+      ptr->pack_forward_grid(which,buf2,swap[m].npack,swap[m].packlist);
+    else
+      ptr->pack_forward_grid(which,buf1,swap[m].npack,swap[m].packlist);
+
+    if (swap[m].sendproc != me) {
+      if (swap[m].nunpack) MPI_Irecv(buf2,nper*swap[m].nunpack,datatype,
+                                     swap[m].recvproc,0,gridcomm,&request);
+      if (swap[m].npack) MPI_Send(buf1,nper*swap[m].npack,datatype,
+                                  swap[m].sendproc,0,gridcomm);
+      if (swap[m].nunpack) MPI_Wait(&request,MPI_STATUS_IGNORE);
+    }
+
+    ptr->unpack_forward_grid(which,buf2,swap[m].nunpack,swap[m].unpacklist);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   forward comm on tiled grid decomp via Send/Recv lists of each neighbor proc
+------------------------------------------------------------------------- */
+
+template < class T >
+void Grid3d::
+forward_comm_tiled(T *ptr, int nper, int nbyte, int which,
+                   void *buf1, void *vbuf2, MPI_Datatype datatype)
+{
+  int i,m,offset;
+
+  auto buf2 = (char *) vbuf2;
+
+  // post all receives
+
+  for (m = 0; m < nrecv; m++) {
+    offset = nper * recv[m].offset * nbyte;
+    MPI_Irecv((void *) &buf2[offset],nper*recv[m].nunpack,datatype,
+              recv[m].proc,0,gridcomm,&requests[m]);
+  }
+
+  // perform all sends to other procs
+
+  for (m = 0; m < nsend; m++) {
+    ptr->pack_forward_grid(which,buf1,send[m].npack,send[m].packlist);
+    MPI_Send(buf1,nper*send[m].npack,datatype,send[m].proc,0,gridcomm);
+  }
+
+  // perform all copies to self
+
+  for (m = 0; m < ncopy; m++) {
+    ptr->pack_forward_grid(which,buf1,copy[m].npack,copy[m].packlist);
+    ptr->unpack_forward_grid(which,buf1,copy[m].nunpack,copy[m].unpacklist);
+  }
+
+  // unpack all received data
+
+  for (i = 0; i < nrecv; i++) {
+    MPI_Waitany(nrecv,requests,&m,MPI_STATUS_IGNORE);
+    offset = nper * recv[m].offset * nbyte;
+    ptr->unpack_forward_grid(which,(void *) &buf2[offset],
+                             recv[m].nunpack,recv[m].unpacklist);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   reverse comm of my ghost cells to sum to owner cells
+------------------------------------------------------------------------- */
+
+void Grid3d::reverse_comm(int caller, void *ptr, int nper, int nbyte, int which,
+                            void *buf1, void *buf2, MPI_Datatype datatype)
+{
+  if (layout == REGULAR) {
+    if (caller == KSPACE)
+      reverse_comm_regular<KSpace>((KSpace *) ptr,nper,nbyte,which,
+                                   buf1,buf2,datatype);
+    else if (caller == PAIR)
+      reverse_comm_regular<Pair>((Pair *) ptr,nper,nbyte,which,
+                                   buf1,buf2,datatype);
+    else if (caller == FIX)
+      reverse_comm_regular<Fix>((Fix *) ptr,nper,nbyte,which,
+                                buf1,buf2,datatype);
+  } else {
+    if (caller == KSPACE)
+      reverse_comm_tiled<KSpace>((KSpace *) ptr,nper,nbyte,which,
+                                 buf1,buf2,datatype);
+    else if (caller == PAIR)
+      reverse_comm_tiled<Pair>((Pair *) ptr,nper,nbyte,which,
+                                 buf1,buf2,datatype);
+    else if (caller == FIX)
+      reverse_comm_tiled<Fix>((Fix *) ptr,nper,nbyte,which,
+                              buf1,buf2,datatype);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   reverse comm on regular grid of procs via list of swaps with 6 neighbor procs
+------------------------------------------------------------------------- */
+
+template < class T >
+void Grid3d::
+reverse_comm_regular(T *ptr, int nper, int /*nbyte*/, int which,
+                     void *buf1, void *buf2, MPI_Datatype datatype)
+{
+  int m;
+  MPI_Request request;
+
+  for (m = nswap-1; m >= 0; m--) {
+    if (swap[m].recvproc == me)
+      ptr->pack_reverse_grid(which,buf2,swap[m].nunpack,swap[m].unpacklist);
+    else
+      ptr->pack_reverse_grid(which,buf1,swap[m].nunpack,swap[m].unpacklist);
+
+    if (swap[m].recvproc != me) {
+      if (swap[m].npack) MPI_Irecv(buf2,nper*swap[m].npack,datatype,
+                                   swap[m].sendproc,0,gridcomm,&request);
+      if (swap[m].nunpack) MPI_Send(buf1,nper*swap[m].nunpack,datatype,
+                                     swap[m].recvproc,0,gridcomm);
+      if (swap[m].npack) MPI_Wait(&request,MPI_STATUS_IGNORE);
+    }
+
+    ptr->unpack_reverse_grid(which,buf2,swap[m].npack,swap[m].packlist);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   reverse comm on tiled grid decomp via Send/Recv lists of each neighbor proc
+------------------------------------------------------------------------- */
+
+template < class T >
+void Grid3d::
+reverse_comm_tiled(T *ptr, int nper, int nbyte, int which,
+                   void *buf1, void *vbuf2, MPI_Datatype datatype)
+{
+  int i,m,offset;
+
+  auto buf2 = (char *) vbuf2;
+
+  // post all receives
+
+  for (m = 0; m < nsend; m++) {
+    offset = nper * send[m].offset * nbyte;
+    MPI_Irecv((void *) &buf2[offset],nper*send[m].npack,datatype,
+              send[m].proc,0,gridcomm,&requests[m]);
+  }
+
+  // perform all sends to other procs
+
+  for (m = 0; m < nrecv; m++) {
+    ptr->pack_reverse_grid(which,buf1,recv[m].nunpack,recv[m].unpacklist);
+    MPI_Send(buf1,nper*recv[m].nunpack,datatype,recv[m].proc,0,gridcomm);
+  }
+
+  // perform all copies to self
+
+  for (m = 0; m < ncopy; m++) {
+    ptr->pack_reverse_grid(which,buf1,copy[m].nunpack,copy[m].unpacklist);
+    ptr->unpack_reverse_grid(which,buf1,copy[m].npack,copy[m].packlist);
+  }
+
+  // unpack all received data
+
+  for (i = 0; i < nsend; i++) {
+    MPI_Waitany(nsend,requests,&m,MPI_STATUS_IGNORE);
+    offset = nper * send[m].offset * nbyte;
+    ptr->unpack_reverse_grid(which,(void *) &buf2[offset],
+                             send[m].npack,send[m].packlist);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   gather global grid values to proc 0, one grid chunk at a time
+   proc 0 pings each proc for its grid chunk
+   pack/unpack operations are performed by caller via callbacks
+   caller can decide whether to store chunks, output them, etc
+------------------------------------------------------------------------- */
+
+void Grid3d::gather(int /*caller*/, void *ptr, int nper, int nbyte,
+                      int which, void *buf, MPI_Datatype datatype)
+{
+  int me = comm->me;
+  Fix *fptr = (Fix *) ptr;
+
+  // maxsize = max grid data owned by any proc
+
+  int mysize = (inxhi-inxlo+1) * (inyhi-inylo+1) * (inzhi-inzlo+1);
+  mysize *= nper;
+  int maxsize;
+  MPI_Allreduce(&mysize,&maxsize,1,MPI_INT,MPI_MAX,world);
+
+  // pack my data via callback to caller
+
+  char *mybuf;
+  if (me == 0) memory->create(mybuf,maxsize*nbyte,"grid3d:mybuf");
+  else memory->create(mybuf,mysize*nbyte,"grid3d:mybuf");
+  fptr->pack_gather_grid(which,mybuf);
+
+  // ping each proc for its data
+  // unpack into full buffer via callback to caller
+
+  int xlo,xhi,ylo,yhi,zlo,zhi,tmp;
+  int bounds[6];
+
+  if (me == 0) {
+    MPI_Status status;
+    MPI_Request request;
+
+    for (int iproc = 0; iproc < nprocs; iproc++) {
+      if (iproc) {
+        MPI_Irecv(mybuf,maxsize,datatype,iproc,0,world,&request);
+        MPI_Send(&tmp,0,MPI_INT,iproc,0,world);
+        MPI_Wait(&request,&status);
+        MPI_Recv(bounds,6,MPI_INT,iproc,0,world,&status);
+        xlo = bounds[0];
+        xhi = bounds[1];
+        ylo = bounds[2];
+        yhi = bounds[3];
+        zlo = bounds[4];
+        zhi = bounds[5];
+      } else {
+        xlo = inxlo;
+        xhi = inxhi;
+        ylo = inylo;
+        yhi = inyhi;
+        zlo = inzlo;
+        zhi = inzhi;
+      }
+
+      fptr->unpack_gather_grid(which,mybuf,buf,xlo,xhi,ylo,yhi,zlo,zhi);
+    }
+
+  } else {
+    MPI_Recv(&tmp,0,MPI_INT,0,0,world,MPI_STATUS_IGNORE);
+    MPI_Rsend(mybuf,mysize,datatype,0,0,world);
+    bounds[0] = inxlo;
+    bounds[1] = inxhi;
+    bounds[2] = inylo;
+    bounds[3] = inyhi;
+    bounds[4] = inzlo;
+    bounds[5] = inzhi;
+    MPI_Send(bounds,6,MPI_INT,0,0,world);
+  }
+
+  memory->destroy(mybuf);
+}
+
+/* ----------------------------------------------------------------------
+   create swap stencil for grid own/ghost communication
+   swaps covers all 3 dimensions and both directions
+   swaps cover multiple iterations in a direction if need grid pts
+     from further away than nearest-neighbor proc
+   same swap list used by forward and reverse communication
+------------------------------------------------------------------------- */
+
+void Grid3d::grow_swap()
+{
+  maxswap += DELTA;
+  swap = (Swap *) memory->srealloc(swap,maxswap*sizeof(Swap),"grid3d:swap");
+}
+
+/* ----------------------------------------------------------------------
+   create swap stencil for grid own/ghost communication
+   swaps covers all 3 dimensions and both directions
+   swaps cover multiple iterations in a direction if need grid pts
+     from further away than nearest-neighbor proc
+   same swap list used by forward and reverse communication
+------------------------------------------------------------------------- */
+
+void Grid3d::grow_overlap()
+{
+  maxoverlap += DELTA;
+  overlap = (Overlap *)
+    memory->srealloc(overlap,maxoverlap*sizeof(Overlap),"grid3d:overlap");
+}
+
+/* ----------------------------------------------------------------------
+   create 1d list of offsets into 3d array section (xlo:xhi,ylo:yhi,zlo:zhi)
+   assume 3d array is allocated as
+     (fullxlo:fullxhi,fullylo:fullyhi,fullzlo:fullzhi)
+------------------------------------------------------------------------- */
+
+int Grid3d::indices(int *&list,
+                       int xlo, int xhi, int ylo, int yhi, int zlo, int zhi)
+{
+  int nmax = (xhi-xlo+1) * (yhi-ylo+1) * (zhi-zlo+1);
+  memory->create(list,nmax,"grid3d:indices");
+  if (nmax == 0) return 0;
+
+  int nx = (fullxhi-fullxlo+1);
+  int ny = (fullyhi-fullylo+1);
+
+  int n = 0;
+  int ix,iy,iz;
+  for (iz = zlo; iz <= zhi; iz++)
+    for (iy = ylo; iy <= yhi; iy++)
+      for (ix = xlo; ix <= xhi; ix++)
+        list[n++] = (iz-fullzlo)*ny*nx + (iy-fullylo)*nx + (ix-fullxlo);
+
+  return nmax;
+}
diff --git a/src/grid3d.h b/src/grid3d.h
new file mode 100644
index 0000000000..1eb88871a1
--- /dev/null
+++ b/src/grid3d.h
@@ -0,0 +1,200 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_GRID3D_H
+#define LMP_GRID3D_H
+
+#include "pointers.h"
+
+namespace LAMMPS_NS {
+
+class Grid3d : protected Pointers {
+ public:
+  enum { KSPACE = 0, PAIR = 1, FIX = 2 };    // calling classes
+
+  Grid3d(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int,
+           int, int, int);
+  Grid3d(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int,
+           int, int, int, int, int, int, int, int, int, int);
+  ~Grid3d() override;
+  void setup(int &, int &);
+  int ghost_adjacent();
+  void forward_comm(int, void *, int, int, int, void *, void *, MPI_Datatype);
+  void reverse_comm(int, void *, int, int, int, void *, void *, MPI_Datatype);
+  void gather(int, void *, int, int, int, void *, MPI_Datatype);
+
+ protected:
+  int me, nprocs;
+  int layout;           // REGULAR or TILED
+  MPI_Comm gridcomm;    // communicator for this class
+                        // usually world, but MSM calls with subset
+
+  // inputs from caller via constructor
+
+  int nx, ny, nz;      // size of global grid in all 3 dims
+  int inxlo, inxhi;    // inclusive extent of my grid chunk
+  int inylo, inyhi;    //   0 <= in <= N-1
+  int inzlo, inzhi;
+  int outxlo, outxhi;      // inclusive extent of my grid chunk plus
+  int outylo, outyhi;      //   ghost cells in all 6 directions
+  int outzlo, outzhi;      //   lo indices can be < 0, hi indices can be >= N
+  int fullxlo, fullxhi;    // extent of grid chunk that caller stores
+  int fullylo, fullyhi;    //   can be same as out indices or larger
+  int fullzlo, fullzhi;
+
+  // -------------------------------------------
+  // internal variables for REGULAR layout
+  // -------------------------------------------
+
+  int procxlo, procxhi;    // 6 neighbor procs that adjoin me
+  int procylo, procyhi;    // not used for comm_style = tiled
+  int proczlo, proczhi;
+
+  int ghostxlo, ghostxhi;    // # of my owned grid planes needed
+  int ghostylo, ghostyhi;    // by neighobr procs in each dir as their ghost planes
+  int ghostzlo, ghostzhi;
+
+  // swap = exchange of owned and ghost grid cells between 2 procs, including self
+
+  struct Swap {
+    int sendproc;       // proc to send to for forward comm
+    int recvproc;       // proc to recv from for forward comm
+    int npack;          // # of datums to pack
+    int nunpack;        // # of datums to unpack
+    int *packlist;      // 3d array offsets to pack
+    int *unpacklist;    // 3d array offsets to unpack
+  };
+
+  int nswap, maxswap;
+  Swap *swap;
+
+  // -------------------------------------------
+  // internal variables for TILED layout
+  // -------------------------------------------
+
+  int *overlap_procs;       // length of Nprocs in communicator
+  MPI_Request *requests;    // length of max messages this proc receives
+
+  // RCB tree of cut info
+  // each proc contributes one value, except proc 0
+
+  struct RCBinfo {
+    int dim;    // 0,1,2 = which dim the cut is in
+    int cut;    // grid index of lowest cell in upper half of cut
+  };
+
+  RCBinfo *rcbinfo;
+
+  // overlap = a proc whose owned cells overlap with my extended ghost box
+  // includes overlaps across periodic boundaries, can also be self
+
+  struct Overlap {
+    int proc;      // proc whose owned cells overlap my ghost cells
+    int box[6];    // box that overlaps otherproc's owned cells
+                   // this box is wholly contained within global grid
+    int pbc[3];    // PBC offsets to convert box to a portion of my ghost box
+                   // my ghost box may extend beyond global grid
+  };
+
+  int noverlap, maxoverlap;
+  Overlap *overlap;
+
+  // request = sent to each proc whose owned cells overlap my ghost cells
+
+  struct Request {
+    int sender;    // sending proc
+    int index;     // index of overlap on sender
+    int box[6];    // box that overlaps receiver's owned cells
+                   // wholly contained within global grid
+  };
+
+  Request *srequest, *rrequest;
+
+  // response = reply from each proc whose owned cells overlap my ghost cells
+
+  struct Response {
+    int index;     // index of my overlap for the initial request
+    int box[6];    // box that overlaps responder's owned cells
+                   // wholly contained within global grid
+                   // has to unwrapped by PBC to map to my ghost cells
+  };
+
+  Response *sresponse, *rresponse;
+
+  // send = proc to send a subset of my owned cells to, for forward comm
+  // for reverse comm, proc I receive ghost overlaps with my owned cells from
+  // offset used in reverse comm to recv a message in middle of a large buffer
+
+  struct Send {
+    int proc;
+    int npack;
+    int *packlist;
+    int offset;
+  };
+
+  // recv = proc to recv a subset of my ghost cells from, for forward comm
+  // for reverse comm, proc I send a subset of my ghost cells to
+  // offset used in forward comm to recv a message in middle of a large buffer
+
+  struct Recv {
+    int proc;
+    int nunpack;
+    int *unpacklist;
+    int offset;
+  };
+
+  int adjacent;    // 0 on a proc who receives ghosts from a non-neighbor proc
+
+  // copy = subset of my owned cells to copy into subset of my ghost cells
+  // that describes forward comm, for reverse comm it is the opposite
+
+  struct Copy {
+    int npack;
+    int nunpack;
+    int *packlist;
+    int *unpacklist;
+  };
+
+  int nsend, nrecv, ncopy;
+  Send *send;
+  Recv *recv;
+  Copy *copy;
+
+  // -------------------------------------------
+  // internal methods
+  // -------------------------------------------
+
+  void initialize(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, int, int,
+                  int, int, int, int, int, int, int, int, int, int, int, int, int);
+  virtual void setup_regular(int &, int &);
+  virtual void setup_tiled(int &, int &);
+  void ghost_box_drop(int *, int *);
+  void box_drop_grid(int *, int, int, int &, int *);
+
+  int ghost_adjacent_regular();
+  int ghost_adjacent_tiled();
+
+  template <class T> void forward_comm_regular(T *, int, int, int, void *, void *, MPI_Datatype);
+  template <class T> void forward_comm_tiled(T *, int, int, int, void *, void *, MPI_Datatype);
+  template <class T> void reverse_comm_regular(T *, int, int, int, void *, void *, MPI_Datatype);
+  template <class T> void reverse_comm_tiled(T *, int, int, int, void *, void *, MPI_Datatype);
+
+  virtual void grow_swap();
+  void grow_overlap();
+
+  int indices(int *&, int, int, int, int, int, int);
+};
+
+}    // namespace LAMMPS_NS
+
+#endif