diff --git a/src/KSPACE/commgrid.cpp b/src/KSPACE/gridcomm.cpp
similarity index 97%
rename from src/KSPACE/commgrid.cpp
rename to src/KSPACE/gridcomm.cpp
index 7e2db0b148..c45f8d9b9e 100644
--- a/src/KSPACE/commgrid.cpp
+++ b/src/KSPACE/gridcomm.cpp
@@ -12,7 +12,7 @@
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
-#include "commgrid.h"
+#include "gridcomm.h"
 #include "comm.h"
 #include "kspace.h"
 #include "memory.h"
@@ -24,7 +24,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
+GridComm::GridComm(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
                    int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
                    int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
                    int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi) 
@@ -71,10 +71,11 @@ CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
 
 /* ---------------------------------------------------------------------- */
 
-CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
+GridComm::GridComm(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
                    int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
                    int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
-                   int oxlo_max, int oxhi_max, int oylo_max, int oyhi_max, int ozlo_max, int ozhi_max,
+                   int oxlo_max, int oxhi_max, int oylo_max, int oyhi_max, 
+                   int ozlo_max, int ozhi_max,
                    int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi) 
   : Pointers(lmp)
 {
@@ -119,7 +120,7 @@ CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
 
 /* ---------------------------------------------------------------------- */
 
-CommGrid::~CommGrid()
+GridComm::~GridComm()
 {
   for (int i = 0; i < nswap; i++) {
     memory->destroy(swap[i].packlist);
@@ -140,7 +141,7 @@ CommGrid::~CommGrid()
    if no neighbor proc, value is from self
 ------------------------------------------------------------------------- */
 
-void CommGrid::ghost_notify()
+void GridComm::ghost_notify()
 {
   int nplanes = inxlo - outxlo;
   if (procxlo != me)
@@ -184,7 +185,7 @@ void CommGrid::ghost_notify()
    if yes, return 1, else return 0
 ------------------------------------------------------------------------- */
 
-int CommGrid::ghost_overlap()
+int GridComm::ghost_overlap()
 {
   int nearest = 0;
   if (ghostxlo > inxhi-inxlo+1) nearest = 1;
@@ -208,7 +209,7 @@ int CommGrid::ghost_overlap()
    same swap list used by forward and reverse communication
 ------------------------------------------------------------------------- */
 
-void CommGrid::setup()
+void GridComm::setup()
 {
   int nsent,sendfirst,sendlast,recvfirst,recvlast;
   int sendplanes,recvplanes;
@@ -486,7 +487,7 @@ void CommGrid::setup()
    use swap list in forward order to acquire copy of all needed ghost grid pts
 ------------------------------------------------------------------------- */
 
-void CommGrid::forward_comm(KSpace *kspace, int which)
+void GridComm::forward_comm(KSpace *kspace, int which)
 {
   for (int m = 0; m < nswap; m++) {
     if (swap[m].sendproc == me) 
@@ -511,7 +512,7 @@ void CommGrid::forward_comm(KSpace *kspace, int which)
    for each owned grid pt that some other proc has copy of as a ghost grid pt
 ------------------------------------------------------------------------- */
 
-void CommGrid::reverse_comm(KSpace *kspace, int which)
+void GridComm::reverse_comm(KSpace *kspace, int which)
 {
   for (int m = nswap-1; m >= 0; m--) {
     if (swap[m].recvproc == me) 
@@ -537,7 +538,7 @@ void CommGrid::reverse_comm(KSpace *kspace, int which)
      outzlo_max:outzhi_max)
 ------------------------------------------------------------------------- */
 
-int CommGrid::indices(int *&list, 
+int GridComm::indices(int *&list, 
                        int xlo, int xhi, int ylo, int yhi, int zlo, int zhi)
 {
   int nmax = (xhi-xlo+1) * (yhi-ylo+1) * (zhi-zlo+1);
@@ -560,7 +561,7 @@ int CommGrid::indices(int *&list,
    memory usage of send/recv bufs
 ------------------------------------------------------------------------- */
 
-double CommGrid::memory_usage()
+double GridComm::memory_usage()
 {
   double bytes = 2*nbuf * sizeof(double);
   return bytes;
diff --git a/src/KSPACE/commgrid.h b/src/KSPACE/gridcomm.h
similarity index 92%
rename from src/KSPACE/commgrid.h
rename to src/KSPACE/gridcomm.h
index b069e74f87..dbadff692a 100644
--- a/src/KSPACE/commgrid.h
+++ b/src/KSPACE/gridcomm.h
@@ -11,8 +11,8 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#ifndef LMP_COMMGRID_H
-#define LMP_COMMGRID_H
+#ifndef LMP_GRIDCOMM_H
+#define LMP_GRIDCOMM_H
 
 #include "pointers.h"
 
@@ -26,18 +26,18 @@ typedef double FFT_SCALAR;
 
 namespace LAMMPS_NS {
 
-class CommGrid : protected Pointers {
+class GridComm : protected Pointers {
  public:
-  CommGrid(class LAMMPS *, MPI_Comm, int, int,
+  GridComm(class LAMMPS *, MPI_Comm, int, int,
            int, int, int, int, int, int,
            int, int, int, int, int, int,
            int, int, int, int, int, int);
-  CommGrid(class LAMMPS *, MPI_Comm, int, int,
+  GridComm(class LAMMPS *, MPI_Comm, int, int,
            int, int, int, int, int, int,
            int, int, int, int, int, int,
            int, int, int, int, int, int,
            int, int, int, int, int, int);
-  ~CommGrid();
+  ~GridComm();
   void ghost_notify();
   int ghost_overlap();
   void setup();
diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp
index 6ce6d69cfe..5ce73ee41f 100644
--- a/src/KSPACE/msm.cpp
+++ b/src/KSPACE/msm.cpp
@@ -24,7 +24,7 @@
 #include "msm.h"
 #include "atom.h"
 #include "comm.h"
-#include "commgrid.h"
+#include "gridcomm.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
@@ -639,7 +639,7 @@ void MSM::allocate()
   int (*procneigh_all)[2] = comm->procneigh;
 
 
-  cg_all = new CommGrid(lmp,world,1,1,
+  cg_all = new GridComm(lmp,world,1,1,
                     nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0],
                     nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all,
                     nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0],
@@ -659,7 +659,7 @@ void MSM::allocate()
 
     if (active_flag[n]) {
       int **procneigh = procneigh_levels[n];
-      cg[n] = new CommGrid(lmp,world_levels[n],1,1,
+      cg[n] = new GridComm(lmp,world_levels[n],1,1,
                         nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
                         nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -708,7 +708,7 @@ void MSM::allocate_peratom()
   int (*procneigh_all)[2] = comm->procneigh;
 
   cg_peratom_all =
-    new CommGrid(lmp,world,6,6,
+    new GridComm(lmp,world,6,6,
                  nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0],
                  nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all,
                  nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0],
@@ -736,7 +736,7 @@ void MSM::allocate_peratom()
     if (active_flag[n]) {
       int **procneigh = procneigh_levels[n];
       cg_peratom[n] =
-        new CommGrid(lmp,world_levels[n],6,6,
+        new GridComm(lmp,world_levels[n],6,6,
                      nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
                      nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -782,8 +782,8 @@ void MSM::allocate_levels()
 {
   ngrid = new int[levels];
 
-  cg = new CommGrid*[levels];
-  cg_peratom = new CommGrid*[levels];
+  cg = new GridComm*[levels];
+  cg_peratom = new GridComm*[levels];
 
   memory->create(procneigh_levels,levels,3,2,"msm:procneigh_levels");
   world_levels = new MPI_Comm[levels];
diff --git a/src/KSPACE/msm.h b/src/KSPACE/msm.h
index 82be217f5b..9845916184 100644
--- a/src/KSPACE/msm.h
+++ b/src/KSPACE/msm.h
@@ -81,10 +81,10 @@ class MSM : public KSpace {
   int procgrid[3];                  // procs assigned in each dim of 3d grid
   int myloc[3];                     // which proc I am in each dim
   int ***procneigh_levels;          // my 6 neighboring procs, 0/1 = left/right
-  class CommGrid **cg;
-  class CommGrid **cg_peratom;
-  class CommGrid *cg_all;
-  class CommGrid *cg_peratom_all;
+  class GridComm **cg;
+  class GridComm **cg_peratom;
+  class GridComm *cg_all;
+  class GridComm *cg_peratom_all;
   
   int current_level;
 
diff --git a/src/KSPACE/msm_cg.cpp b/src/KSPACE/msm_cg.cpp
index 8b7c3e0cda..e1f1262c9f 100644
--- a/src/KSPACE/msm_cg.cpp
+++ b/src/KSPACE/msm_cg.cpp
@@ -21,9 +21,8 @@
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
-
 #include "atom.h"
-#include "commgrid.h"
+#include "gridcomm.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index 35a78408ea..5f3f8ce2db 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -27,7 +27,7 @@
 #include "pppm.h"
 #include "atom.h"
 #include "comm.h"
-#include "commgrid.h"
+#include "gridcomm.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
@@ -272,7 +272,7 @@ void PPPM::init()
 
   int (*procneigh)[2] = comm->procneigh;
 
-  CommGrid *cgtmp = NULL;
+  GridComm *cgtmp = NULL;
   int iteration = 0;
 
   while (order >= minorder) {
@@ -285,7 +285,7 @@ void PPPM::init()
     set_grid_local();
     if (overlap_allowed) break;
 
-    cgtmp = new CommGrid(lmp,world,1,1,
+    cgtmp = new GridComm(lmp,world,1,1,
                          nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                          nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                          procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -805,13 +805,13 @@ void PPPM::allocate()
   int (*procneigh)[2] = comm->procneigh;
 
   if (differentiation_flag == 1)
-    cg = new CommGrid(lmp,world,1,1,
+    cg = new GridComm(lmp,world,1,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   else
-    cg = new CommGrid(lmp,world,3,1,
+    cg = new GridComm(lmp,world,3,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -901,14 +901,14 @@ void PPPM::allocate_peratom()
 
   if (differentiation_flag == 1)
     cg_peratom =
-      new CommGrid(lmp,world,6,1,
+      new GridComm(lmp,world,6,1,
                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                    nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                    procneigh[0][0],procneigh[0][1],procneigh[1][0],
                    procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   else
     cg_peratom =
-      new CommGrid(lmp,world,7,1,
+      new GridComm(lmp,world,7,1,
                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                    nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                    procneigh[0][0],procneigh[0][1],procneigh[1][0],
diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h
index a061c656be..8100ac501f 100644
--- a/src/KSPACE/pppm.h
+++ b/src/KSPACE/pppm.h
@@ -93,8 +93,8 @@ class PPPM : public KSpace {
 
   class FFT3d *fft1,*fft2;
   class Remap *remap;
-  class CommGrid *cg;
-  class CommGrid *cg_peratom;
+  class GridComm *cg;
+  class GridComm *cg_peratom;
 
   int **part2grid;             // storage for particle -> grid mapping
   int nmax;
diff --git a/src/KSPACE/pppm_cg.cpp b/src/KSPACE/pppm_cg.cpp
index 53da74f97f..d071c3993a 100644
--- a/src/KSPACE/pppm_cg.cpp
+++ b/src/KSPACE/pppm_cg.cpp
@@ -22,7 +22,7 @@
 #include "string.h"
 
 #include "atom.h"
-#include "commgrid.h"
+#include "gridcomm.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp
index dcf80da2d5..195aa47af3 100755
--- a/src/KSPACE/pppm_disp.cpp
+++ b/src/KSPACE/pppm_disp.cpp
@@ -26,7 +26,7 @@
 #include "math_const.h"
 #include "atom.h"
 #include "comm.h"
-#include "commgrid.h"
+#include "gridcomm.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
@@ -348,7 +348,7 @@ void PPPMDisp::init()
 
   int iteration = 0;
   if (function[0]) {
-    CommGrid *cgtmp = NULL;
+    GridComm *cgtmp = NULL;
     while (order >= minorder) {
 
       if (iteration && me == 0)
@@ -376,7 +376,7 @@ void PPPMDisp::init()
 
       if (overlap_allowed) break;
 
-      cgtmp = new CommGrid(lmp, world,1,1,
+      cgtmp = new GridComm(lmp, world,1,1,
                            nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                            nxlo_out,nxhi_out,nylo_out,nyhi_out,
                            nzlo_out,nzhi_out,
@@ -445,7 +445,7 @@ void PPPMDisp::init()
 
   iteration = 0;
   if (function[1] + function[2] + function[3]) {
-    CommGrid *cgtmp = NULL;
+    GridComm *cgtmp = NULL;
     while (order_6 >= minorder) {
 
       if (iteration && me == 0)
@@ -471,7 +471,7 @@ void PPPMDisp::init()
 
       if (overlap_allowed) break;
 
-      cgtmp = new CommGrid(lmp,world,1,1,
+      cgtmp = new GridComm(lmp,world,1,1,
                            nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,
                            nzlo_in_6,nzhi_in_6,
                            nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,
@@ -1654,13 +1654,13 @@ void PPPMDisp::allocate()
   // create ghost grid object for rho and electric field communication
 
   if (differentiation_flag == 1)
-    cg = new CommGrid(lmp,world,1,1,
+    cg = new GridComm(lmp,world,1,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   else
-    cg = new CommGrid(lmp,world,3,1,
+    cg = new GridComm(lmp,world,3,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -1733,13 +1733,13 @@ void PPPMDisp::allocate()
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
-      cg_6 = new CommGrid(lmp,world,1,1,
+      cg_6 = new GridComm(lmp,world,1,1,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
-      cg_6 = new CommGrid(lmp,world,3,1,
+      cg_6 = new GridComm(lmp,world,3,1,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -1890,13 +1890,13 @@ void PPPMDisp::allocate()
 
 
     if (differentiation_flag == 1)
-      cg_6 = new CommGrid(lmp,world,7,7,
+      cg_6 = new GridComm(lmp,world,7,7,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
-      cg_6 = new CommGrid(lmp,world,21,7,
+      cg_6 = new GridComm(lmp,world,21,7,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -1969,13 +1969,13 @@ void PPPMDisp::allocate()
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
-      cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc,
+      cg_6 = new GridComm(lmp,world,nsplit_alloc,nsplit_alloc,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
-      cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc,
+      cg_6 = new GridComm(lmp,world,3*nsplit_alloc,nsplit_alloc,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -2017,14 +2017,14 @@ void PPPMDisp::allocate_peratom()
 
     if (differentiation_flag == 1)
       cg_peratom =
-        new CommGrid(lmp,world,6,1,
+        new GridComm(lmp,world,6,1,
                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom =
-        new CommGrid(lmp,world,7,1,
+        new GridComm(lmp,world,7,1,
                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -2056,14 +2056,14 @@ void PPPMDisp::allocate_peratom()
 
     if (differentiation_flag == 1)
       cg_peratom_6 =
-        new CommGrid(lmp,world,6,1,
+        new GridComm(lmp,world,6,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom_6 =
-        new CommGrid(lmp,world,7,1,
+        new GridComm(lmp,world,7,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -2185,14 +2185,14 @@ void PPPMDisp::allocate_peratom()
 
     if (differentiation_flag == 1)
       cg_peratom_6 =
-        new CommGrid(lmp,world,42,1,
+        new GridComm(lmp,world,42,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom_6 =
-        new CommGrid(lmp,world,49,1,
+        new GridComm(lmp,world,49,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
@@ -2223,14 +2223,14 @@ void PPPMDisp::allocate_peratom()
 
     if (differentiation_flag == 1)
       cg_peratom_6 =
-        new CommGrid(lmp,world,6*nsplit_alloc,1,
+        new GridComm(lmp,world,6*nsplit_alloc,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom_6 =
-        new CommGrid(lmp,world,7*nsplit_alloc,1,
+        new GridComm(lmp,world,7*nsplit_alloc,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
diff --git a/src/KSPACE/pppm_disp.h b/src/KSPACE/pppm_disp.h
index e1ca2747a2..8b62605b7c 100755
--- a/src/KSPACE/pppm_disp.h
+++ b/src/KSPACE/pppm_disp.h
@@ -187,10 +187,10 @@ Variables needed for calculating the 1/r and 1/r^6 potential
   class FFT3d *fft1_6, *fft2_6;
   class Remap *remap;
   class Remap *remap_6;
-  class CommGrid *cg;
-  class CommGrid *cg_peratom;
-  class CommGrid *cg_6;
-  class CommGrid *cg_peratom_6;
+  class GridComm *cg;
+  class GridComm *cg_peratom;
+  class GridComm *cg_6;
+  class GridComm *cg_peratom_6;
 
   int **part2grid;             // storage for particle -> grid mapping
   int **part2grid_6;
diff --git a/src/KSPACE/pppm_stagger.cpp b/src/KSPACE/pppm_stagger.cpp
index 530418fab1..3863f41f89 100755
--- a/src/KSPACE/pppm_stagger.cpp
+++ b/src/KSPACE/pppm_stagger.cpp
@@ -23,7 +23,7 @@
 #include "math.h"
 #include "pppm_stagger.h"
 #include "atom.h"
-#include "commgrid.h"
+#include "gridcomm.h"
 #include "force.h"
 #include "domain.h"
 #include "memory.h"
diff --git a/src/USER-CUDA/comm_cuda.cpp b/src/USER-CUDA/comm_cuda.cpp
index 672643646c..4c130e67b5 100644
--- a/src/USER-CUDA/comm_cuda.cpp
+++ b/src/USER-CUDA/comm_cuda.cpp
@@ -56,7 +56,7 @@ enum{SINGLE,MULTI};
    setup MPI and allocate buffer space
 ------------------------------------------------------------------------- */
 
-CommCuda::CommCuda(LAMMPS *lmp):Comm(lmp)
+CommCuda::CommCuda(LAMMPS *lmp) : CommBrick(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
diff --git a/src/USER-CUDA/comm_cuda.h b/src/USER-CUDA/comm_cuda.h
index 125441af1b..db810e2c13 100644
--- a/src/USER-CUDA/comm_cuda.h
+++ b/src/USER-CUDA/comm_cuda.h
@@ -17,11 +17,11 @@
 #include "pointers.h"
 
 #include "cuda_data.h"
-#include "comm.h"
+#include "comm_brick.h"
 
 namespace LAMMPS_NS {
 
-class CommCuda : public Comm {
+class CommCuda : public CommBrick {
 public:
   CommCuda(class LAMMPS *);
   ~CommCuda();
diff --git a/src/accelerator_cuda.h b/src/accelerator_cuda.h
index fb99ce2004..19639a6932 100644
--- a/src/accelerator_cuda.h
+++ b/src/accelerator_cuda.h
@@ -31,7 +31,7 @@
 // dummy interface to USER-CUDA
 // needed for compiling when USER-CUDA is not installed
 
-#include "comm.h"
+#include "comm_brick.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "modify.h"
@@ -52,9 +52,9 @@ class Cuda {
   void uploadAll() {}
 };
 
-class CommCuda : public Comm {
+class CommCuda : public CommBrick {
  public:
- CommCuda(class LAMMPS *lmp) : Comm(lmp) {}
+ CommCuda(class LAMMPS *lmp) : CommBrick(lmp) {}
   ~CommCuda() {}
 };
 
diff --git a/src/accelerator_kokkos.h b/src/accelerator_kokkos.h
index e6b2a21e84..3332e657df 100644
--- a/src/accelerator_kokkos.h
+++ b/src/accelerator_kokkos.h
@@ -32,7 +32,7 @@
 // needed for compiling when KOKKOS is not installed
 
 #include "atom.h"
-#include "comm.h"
+#include "comm_brick.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "modify.h"
@@ -56,9 +56,9 @@ class AtomKokkos : public Atom {
   ~AtomKokkos() {}
 };
 
-class CommKokkos : public Comm {
+class CommKokkos : public CommBrick {
  public:
-  CommKokkos(class LAMMPS *lmp) : Comm(lmp) {}
+  CommKokkos(class LAMMPS *lmp) : CommBrick(lmp) {}
   ~CommKokkos() {}
 };
 
diff --git a/src/balance.cpp b/src/balance.cpp
index ccca989369..a76a7ce364 100644
--- a/src/balance.cpp
+++ b/src/balance.cpp
@@ -30,7 +30,8 @@
 
 using namespace LAMMPS_NS;
 
-enum{NONE,UNIFORM,USER,DYNAMIC};
+enum{XYZ,SHIFT,RCB};
+enum{NONE,UNIFORM,USER};
 enum{X,Y,Z};
 
 /* ---------------------------------------------------------------------- */
@@ -44,7 +45,7 @@ Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
   memory->create(allproccount,nprocs,"balance:allproccount");
 
   user_xsplit = user_ysplit = user_zsplit = NULL;
-  dflag = 0;
+  shift_allocate = 0;
 
   fp = NULL;
   firststep = 1;
@@ -61,7 +62,7 @@ Balance::~Balance()
   delete [] user_ysplit;
   delete [] user_zsplit;
 
-  if (dflag) {
+  if (shift_allocate) {
     delete [] bdim;
     delete [] count;
     delete [] sum;
@@ -89,18 +90,21 @@ void Balance::command(int narg, char **arg)
 
   // parse arguments
 
-  if (narg < 1) error->all(FLERR,"Illegal balance command");
+  if (narg < 2) error->all(FLERR,"Illegal balance command");
+
+  thresh = force->numeric(FLERR,arg[0]);
 
   int dimension = domain->dimension;
   int *procgrid = comm->procgrid;
+  style = -1;
   xflag = yflag = zflag = NONE;
-  dflag = 0;
-  outflag = 0;
 
-  int iarg = 0;
+  int iarg = 1;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"x") == 0) {
-      if (xflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
+      if (style != -1 && style != XYZ) 
+        error->all(FLERR,"Illegal balance command");
+      style = XYZ;
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         xflag = UNIFORM;
@@ -118,7 +122,9 @@ void Balance::command(int narg, char **arg)
         user_xsplit[procgrid[0]] = 1.0;
       }
     } else if (strcmp(arg[iarg],"y") == 0) {
-      if (yflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
+      if (style != -1 && style != XYZ) 
+        error->all(FLERR,"Illegal balance command");
+      style = XYZ;
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         yflag = UNIFORM;
@@ -136,7 +142,9 @@ void Balance::command(int narg, char **arg)
         user_ysplit[procgrid[1]] = 1.0;
       }
     } else if (strcmp(arg[iarg],"z") == 0) {
-      if (zflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
+      if (style != -1 && style != XYZ) 
+        error->all(FLERR,"Illegal balance command");
+      style = XYZ;
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         zflag = UNIFORM;
@@ -154,22 +162,32 @@ void Balance::command(int narg, char **arg)
         user_zsplit[procgrid[2]] = 1.0;
       }
 
-    } else if (strcmp(arg[iarg],"dynamic") == 0) {
-      if (xflag != NONE || yflag != NONE || zflag != NONE)
-        error->all(FLERR,"Illegal balance command");
+    } else if (strcmp(arg[iarg],"shift") == 0) {
+      if (style != -1) error->all(FLERR,"Illegal balance command");
       if (iarg+4 > narg) error->all(FLERR,"Illegal balance command");
-      dflag = 1;
-      xflag = yflag = DYNAMIC;
-      if (dimension == 3) zflag = DYNAMIC;
+      style = SHIFT;
       if (strlen(arg[iarg+1]) > 3) error->all(FLERR,"Illegal balance command");
       strcpy(bstr,arg[iarg+1]);
       nitermax = force->inumeric(FLERR,arg[iarg+2]);
       if (nitermax <= 0) error->all(FLERR,"Illegal balance command");
-      thresh = force->numeric(FLERR,arg[iarg+3]);
-      if (thresh < 1.0) error->all(FLERR,"Illegal balance command");
+      stopthresh = force->numeric(FLERR,arg[iarg+3]);
+      if (stopthresh < 1.0) error->all(FLERR,"Illegal balance command");
       iarg += 4;
 
-    } else if (strcmp(arg[iarg],"out") == 0) {
+    } else if (strcmp(arg[iarg],"rcb") == 0) {
+      if (style != -1) error->all(FLERR,"Illegal balance command");
+      style = RCB;
+      iarg++;
+
+    } else break;
+  }
+
+  // optional keywords
+
+  outflag = 0;
+
+  while (iarg < narg) {
+    if (strcmp(arg[iarg],"out") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
       if (outflag) error->all(FLERR,"Illegal balance command");
       outflag = 1;
@@ -183,50 +201,58 @@ void Balance::command(int narg, char **arg)
 
   // error check
 
-  if (zflag && dimension == 2)
-    error->all(FLERR,"Cannot balance in z dimension for 2d simulation");
+  if (style == XYZ) {
+    if (zflag != NONE  && dimension == 2)
+      error->all(FLERR,"Cannot balance in z dimension for 2d simulation");
 
-  if (xflag == USER)
-    for (int i = 1; i <= procgrid[0]; i++)
-      if (user_xsplit[i-1] >= user_xsplit[i])
-        error->all(FLERR,"Illegal balance command");
-  if (yflag == USER)
-    for (int i = 1; i <= procgrid[1]; i++)
-      if (user_ysplit[i-1] >= user_ysplit[i])
-        error->all(FLERR,"Illegal balance command");
-  if (zflag == USER)
-    for (int i = 1; i <= procgrid[2]; i++)
-      if (user_zsplit[i-1] >= user_zsplit[i])
-        error->all(FLERR,"Illegal balance command");
+    if (xflag == USER)
+      for (int i = 1; i <= procgrid[0]; i++)
+        if (user_xsplit[i-1] >= user_xsplit[i])
+          error->all(FLERR,"Illegal balance command");
+    if (yflag == USER)
+      for (int i = 1; i <= procgrid[1]; i++)
+        if (user_ysplit[i-1] >= user_ysplit[i])
+          error->all(FLERR,"Illegal balance command");
+    if (zflag == USER)
+      for (int i = 1; i <= procgrid[2]; i++)
+        if (user_zsplit[i-1] >= user_zsplit[i])
+          error->all(FLERR,"Illegal balance command");
+  }
 
-  if (dflag) {
+  if (style == SHIFT) {
     const int blen=strlen(bstr);
     for (int i = 0; i < blen; i++) {
       if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
-        error->all(FLERR,"Balance dynamic string is invalid");
+        error->all(FLERR,"Balance shift string is invalid");
       if (bstr[i] == 'z' && dimension == 2)
-        error->all(FLERR,"Balance dynamic string is invalid");
+        error->all(FLERR,"Balance shift string is invalid");
       for (int j = i+1; j < blen; j++)
         if (bstr[i] == bstr[j])
-          error->all(FLERR,"Balance dynamic string is invalid");
+          error->all(FLERR,"Balance shift string is invalid");
     }
   }
 
   // insure atoms are in current box & update box via shrink-wrap
-  // no exchange() since doesn't matter if atoms are assigned to correct procs
+  // init entire system since comm->setup is done
+  // comm::init needs neighbor::init needs pair::init needs kspace::init, etc
+
+  lmp->init();
 
   if (domain->triclinic) domain->x2lamda(atom->nlocal);
   domain->pbc();
   domain->reset_box();
+  comm->setup();
+  comm->exchange();
   if (domain->triclinic) domain->lamda2x(atom->nlocal);
 
   // imbinit = initial imbalance
-  // use current splits instead of nlocal since atoms may not be in sub-box
 
-  domain->x2lamda(atom->nlocal);
   int maxinit;
-  double imbinit = imbalance_splits(maxinit);
-  domain->lamda2x(atom->nlocal);
+  double imbinit = imbalance_nlocal(maxinit);
+
+  // no load-balance if imbalance doesn't exceed threshhold
+
+  if (imbinit < thresh) return;
 
   // debug output of initial state
 
@@ -236,40 +262,55 @@ void Balance::command(int narg, char **arg)
 
   int niter = 0;
 
-  // explicit setting of sub-domain sizes
+  // NOTE: if using XYZ or SHIFT and current partition is TILING,
+  //   then need to create initial BRICK partition before performing LB
 
-  if (xflag == UNIFORM) {
-    for (int i = 0; i < procgrid[0]; i++)
-      comm->xsplit[i] = i * 1.0/procgrid[0];
-    comm->xsplit[procgrid[0]] = 1.0;
+  // perform load-balance
+  // style XYZ = explicit setting of cutting planes of logical 3d grid
+
+  if (style == XYZ) {
+    if (xflag == UNIFORM) {
+      for (int i = 0; i < procgrid[0]; i++)
+        comm->xsplit[i] = i * 1.0/procgrid[0];
+      comm->xsplit[procgrid[0]] = 1.0;
+    }
+
+    if (yflag == UNIFORM) {
+      for (int i = 0; i < procgrid[1]; i++)
+        comm->ysplit[i] = i * 1.0/procgrid[1];
+      comm->ysplit[procgrid[1]] = 1.0;
+    }
+
+    if (zflag == UNIFORM) {
+      for (int i = 0; i < procgrid[2]; i++)
+        comm->zsplit[i] = i * 1.0/procgrid[2];
+      comm->zsplit[procgrid[2]] = 1.0;
+    }
+
+    if (xflag == USER)
+      for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
+
+    if (yflag == USER)
+      for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
+
+    if (zflag == USER)
+      for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
   }
 
-  if (yflag == UNIFORM) {
-    for (int i = 0; i < procgrid[1]; i++)
-      comm->ysplit[i] = i * 1.0/procgrid[1];
-    comm->ysplit[procgrid[1]] = 1.0;
-  }
+  // style SHIFT = adjust cutting planes of logical 3d grid
 
-  if (zflag == UNIFORM) {
-    for (int i = 0; i < procgrid[2]; i++)
-      comm->zsplit[i] = i * 1.0/procgrid[2];
-    comm->zsplit[procgrid[2]] = 1.0;
-  }
-
-  if (xflag == USER)
-    for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
-
-  if (yflag == USER)
-    for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
-
-  if (zflag == USER)
-    for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
-
-  // static load-balance of sub-domain sizes
-
-  if (dflag) {
+  if (style == SHIFT) {
     static_setup(bstr);
-    niter = dynamic();
+    niter = shift();
+  }
+
+  // style RCB = 
+
+  if (style == RCB) {
+    error->all(FLERR,"Balance rcb is not yet supported");
+
+    if (comm->style == 0) 
+      error->all(FLERR,"Cannot use balance rcb with comm_style brick");
   }
 
   // output of final result
@@ -279,15 +320,18 @@ void Balance::command(int narg, char **arg)
   // reset comm->uniform flag if necessary
 
   if (comm->uniform) {
-    if (xflag == USER || xflag == DYNAMIC) comm->uniform = 0;
-    if (yflag == USER || yflag == DYNAMIC) comm->uniform = 0;
-    if (zflag == USER || zflag == DYNAMIC) comm->uniform = 0;
+    if (style == SHIFT) comm->uniform = 0;
+    if (style == XYZ && xflag == USER) comm->uniform = 0;
+    if (style == XYZ && yflag == USER) comm->uniform = 0;
+    if (style == XYZ && zflag == USER) comm->uniform = 0;
   } else {
     if (dimension == 3) {
-      if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
+      if (style == XYZ && 
+          xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
         comm->uniform = 1;
     } else {
-      if (xflag == UNIFORM && yflag == UNIFORM) comm->uniform = 1;
+      if (style == XYZ && xflag == UNIFORM && yflag == UNIFORM)
+        comm->uniform = 1;
     }
   }
 
@@ -431,6 +475,8 @@ double Balance::imbalance_splits(int &max)
 
 void Balance::static_setup(char *str)
 {
+  shift_allocate = 1;
+
   ndim = strlen(str);
   bdim = new int[ndim];
 
@@ -456,17 +502,16 @@ void Balance::static_setup(char *str)
 }
 
 /* ----------------------------------------------------------------------
-   setup dynamic load balance operations
+   setup shift load balance operations
    called from fix balance
-   set rho = 1 for dynamic balancing after call to dynamic_setup()
+   set rho = 1 for shift balancing after call to shift_setup()
 ------------------------------------------------------------------------- */
 
-void Balance::dynamic_setup(char *str, int nitermax_in, double thresh_in)
+void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
 {
-  dflag = 1;
   static_setup(str);
   nitermax = nitermax_in;
-  thresh = thresh_in;
+  stopthresh = thresh_in;
 
   rho = 1;
 }
@@ -477,7 +522,7 @@ void Balance::dynamic_setup(char *str, int nitermax_in, double thresh_in)
    return niter = iteration count
 ------------------------------------------------------------------------- */
 
-int Balance::dynamic()
+int Balance::shift()
 {
   int i,j,k,m,np,max;
   double *split = NULL;
@@ -490,7 +535,7 @@ int Balance::dynamic()
   // set delta for 1d balancing = root of threshhold
   // root = # of dimensions being balanced on
 
-  double delta = pow(thresh,1.0/ndim) - 1.0;
+  double delta = pow(stopthresh,1.0/ndim) - 1.0;
   int *procgrid = comm->procgrid;
 
   // all balancing done in lamda coords
@@ -624,7 +669,7 @@ int Balance::dynamic()
     // this is a true 3d test of particle count per processor
 
     double imbfactor = imbalance_splits(max);
-    if (imbfactor <= thresh) break;
+    if (imbfactor <= stopthresh) break;
   }
 
   // restore real coords
@@ -867,6 +912,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp)
    debug output for Idim and count
    only called by proc 0
 ------------------------------------------------------------------------- */
+
 #ifdef BALANCE_DEBUG
 void Balance::debug_output(int idim, int m, int np, double *split)
 {
diff --git a/src/balance.h b/src/balance.h
index 013ed2189f..66ad6db721 100644
--- a/src/balance.h
+++ b/src/balance.h
@@ -30,22 +30,24 @@ class Balance : protected Pointers {
   Balance(class LAMMPS *);
   ~Balance();
   void command(int, char **);
-  void dynamic_setup(char *, int, double);
-  int dynamic();
+  void shift_setup(char *, int, double);
+  int shift();
   double imbalance_nlocal(int &);
   void dumpout(bigint, FILE *);
 
  private:
   int me,nprocs;
 
+  double thresh;                                    // threshhold to perform LB
+  int style;                                        // style of LB
   int xflag,yflag,zflag;                            // xyz LB flags
   double *user_xsplit,*user_ysplit,*user_zsplit;    // params for xyz LB
 
-  int dflag;                 // dynamic LB flag
-  int nitermax;              // params for dynamic LB
-  double thresh;
+  int nitermax;              // params for shift LB
+  double stopthresh;
   char bstr[4];
 
+  int shift_allocate;        // 1 if SHIFT vectors have been allocated
   int ndim;                  // length of balance string bstr
   int *bdim;                 // XYZ for each character in bstr
   bigint *count;             // counts for slices in one dim
diff --git a/src/comm.cpp b/src/comm.cpp
index bf5213af53..ab5fe9ba26 100644
--- a/src/comm.cpp
+++ b/src/comm.cpp
@@ -11,60 +11,37 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-/* ----------------------------------------------------------------------
-   Contributing author (triclinic) : Pieter in 't Veld (SNL)
-------------------------------------------------------------------------- */
-
-#include "lmptype.h"
 #include "mpi.h"
-#include "math.h"
 #include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
 #include "comm.h"
 #include "universe.h"
 #include "atom.h"
-#include "atom_vec.h"
 #include "force.h"
-#include "pair.h"
 #include "domain.h"
-#include "neighbor.h"
 #include "group.h"
-#include "modify.h"
-#include "fix.h"
-#include "compute.h"
-#include "output.h"
-#include "dump.h"
 #include "procmap.h"
-#include "math_extra.h"
-#include "error.h"
 #include "memory.h"
-
-#ifdef _OPENMP
-#include "omp.h"
-#endif
+#include "error.h"
 
 using namespace LAMMPS_NS;
 
-#define BUFFACTOR 1.5
-#define BUFMIN 1000
-#define BUFEXTRA 1000
-#define BIG 1.0e20
-
-enum{SINGLE,MULTI};
+enum{SINGLE,MULTI};             // same as in Comm sub-styles
 enum{MULTIPLE};                   // same as in ProcMap
 enum{ONELEVEL,TWOLEVEL,NUMA,CUSTOM};
 enum{CART,CARTREORDER,XYZ};
 
-/* ----------------------------------------------------------------------
-   setup MPI and allocate buffer space
-------------------------------------------------------------------------- */
+/* ---------------------------------------------------------------------- */
 
 Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
 {
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
+  mode = 0;
+  bordergroup = 0;
+  cutghostuser = 0.0;
+  ghost_velocity = 0;
+
   user_procgrid[0] = user_procgrid[1] = user_procgrid[2] = 0;
   coregrid[0] = coregrid[1] = coregrid[2] = 1;
   gridflag = ONELEVEL;
@@ -74,68 +51,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
   otherflag = 0;
   outfile = NULL;
 
-  grid2proc = NULL;
-
-  bordergroup = 0;
-  style = SINGLE;
-  uniform = 1;
   xsplit = ysplit = zsplit = NULL;
-  multilo = multihi = NULL;
-  cutghostmulti = NULL;
-  cutghostuser = 0.0;
-  ghost_velocity = 0;
-
-  // use of OpenMP threads
-  // query OpenMP for number of threads/process set by user at run-time
-  // if the OMP_NUM_THREADS environment variable is not set, we default
-  // to using 1 thread. This follows the principle of the least surprise,
-  // while practically all OpenMP implementations violate it by using
-  // as many threads as there are (virtual) CPU cores by default.
-
-  nthreads = 1;
-#ifdef _OPENMP
-  if (getenv("OMP_NUM_THREADS") == NULL) {
-    nthreads = 1;
-    if (me == 0)
-      error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
-  } else {
-    nthreads = omp_get_max_threads();
-  }
-
-  // enforce consistent number of threads across all MPI tasks
-
-  MPI_Bcast(&nthreads,1,MPI_INT,0,world);
-  omp_set_num_threads(nthreads);
-
-  if (me == 0) {
-    if (screen)
-      fprintf(screen,"  using %d OpenMP thread(s) per MPI task\n",nthreads);
-    if (logfile)
-      fprintf(logfile,"  using %d OpenMP thread(s) per MPI task\n",nthreads);
-  }
-#endif
-
-  // initialize comm buffers & exchange memory
-  // NOTE: allow for AtomVec to set maxexchange_atom, e.g. for atom_style body
-
-  maxexchange_atom = maxexchange_fix = 0;
-  maxexchange = maxexchange_atom + maxexchange_fix;
-  bufextra = maxexchange + BUFEXTRA;
-
-  maxsend = BUFMIN;
-  memory->create(buf_send,maxsend+bufextra,"comm:buf_send");
-  maxrecv = BUFMIN;
-  memory->create(buf_recv,maxrecv,"comm:buf_recv");
-
-  maxswap = 6;
-  allocate_swap(maxswap);
-
-  sendlist = (int **) memory->smalloc(maxswap*sizeof(int *),"comm:sendlist");
-  memory->create(maxsendlist,maxswap,"comm:maxsendlist");
-  for (int i = 0; i < maxswap; i++) {
-    maxsendlist[i] = BUFMIN;
-    memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]");
-  }
 }
 
 /* ---------------------------------------------------------------------- */
@@ -148,25 +64,186 @@ Comm::~Comm()
 
   delete [] customfile;
   delete [] outfile;
-
-  memory->destroy(grid2proc);
-
-  free_swap();
-  if (style == MULTI) {
-    free_multi();
-    memory->destroy(cutghostmulti);
-  }
-
-  if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
-  memory->sfree(sendlist);
-  memory->destroy(maxsendlist);
-
-  memory->destroy(buf_send);
-  memory->destroy(buf_recv);
 }
 
 /* ----------------------------------------------------------------------
-   create 3d grid of procs based on Nprocs and box size & shape
+   modify communication params
+   invoked from input script by comm_modify command
+------------------------------------------------------------------------- */
+
+void Comm::modify_params(int narg, char **arg)
+{
+  if (narg < 1) error->all(FLERR,"Illegal comm_modify command");
+
+  int iarg = 0;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg],"mode") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal comm_modify command");
+      if (strcmp(arg[0],"single") == 0) mode = SINGLE;
+      else if (strcmp(arg[0],"multi") == 0) mode = MULTI;
+      else error->all(FLERR,"Illegal comm_modify command");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"group") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal comm_modify command");
+      bordergroup = group->find(arg[iarg+1]);
+      if (bordergroup < 0)
+        error->all(FLERR,"Invalid group in comm_modify command");
+      if (bordergroup && (atom->firstgroupname == NULL ||
+                          strcmp(arg[iarg+1],atom->firstgroupname) != 0))
+        error->all(FLERR,"Comm_modify group != atom_modify first group");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"cutoff") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal comm_modify command");
+      cutghostuser = force->numeric(FLERR,arg[iarg+1]);
+      if (cutghostuser < 0.0)
+        error->all(FLERR,"Invalid cutoff in comm_modify command");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"vel") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal comm_modify command");
+      if (strcmp(arg[iarg+1],"yes") == 0) ghost_velocity = 1;
+      else if (strcmp(arg[iarg+1],"no") == 0) ghost_velocity = 0;
+      else error->all(FLERR,"Illegal comm_modify command");
+      iarg += 2;
+    } else error->all(FLERR,"Illegal comm_modify command");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   set dimensions for 3d grid of processors, and associated flags
+   invoked from input script by processors command
+------------------------------------------------------------------------- */
+
+void Comm::set_processors(int narg, char **arg)
+{
+  if (narg < 3) error->all(FLERR,"Illegal processors command");
+
+  if (strcmp(arg[0],"*") == 0) user_procgrid[0] = 0;
+  else user_procgrid[0] = force->inumeric(FLERR,arg[0]);
+  if (strcmp(arg[1],"*") == 0) user_procgrid[1] = 0;
+  else user_procgrid[1] = force->inumeric(FLERR,arg[1]);
+  if (strcmp(arg[2],"*") == 0) user_procgrid[2] = 0;
+  else user_procgrid[2] = force->inumeric(FLERR,arg[2]);
+
+  if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0)
+    error->all(FLERR,"Illegal processors command");
+
+  int p = user_procgrid[0]*user_procgrid[1]*user_procgrid[2];
+  if (p && p != nprocs)
+    error->all(FLERR,"Specified processors != physical processors");
+
+  int iarg = 3;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg],"grid") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
+
+      if (strcmp(arg[iarg+1],"onelevel") == 0) {
+        gridflag = ONELEVEL;
+
+      } else if (strcmp(arg[iarg+1],"twolevel") == 0) {
+        if (iarg+6 > narg) error->all(FLERR,"Illegal processors command");
+        gridflag = TWOLEVEL;
+
+        ncores = force->inumeric(FLERR,arg[iarg+2]);
+        if (strcmp(arg[iarg+3],"*") == 0) user_coregrid[0] = 0;
+        else user_coregrid[0] = force->inumeric(FLERR,arg[iarg+3]);
+        if (strcmp(arg[iarg+4],"*") == 0) user_coregrid[1] = 0;
+        else user_coregrid[1] = force->inumeric(FLERR,arg[iarg+4]);
+        if (strcmp(arg[iarg+5],"*") == 0) user_coregrid[2] = 0;
+        else user_coregrid[2] = force->inumeric(FLERR,arg[iarg+5]);
+
+        if (ncores <= 0 || user_coregrid[0] < 0 ||
+            user_coregrid[1] < 0 || user_coregrid[2] < 0)
+          error->all(FLERR,"Illegal processors command");
+        iarg += 4;
+
+      } else if (strcmp(arg[iarg+1],"numa") == 0) {
+        gridflag = NUMA;
+
+      } else if (strcmp(arg[iarg],"custom") == 0) {
+        if (iarg+3 > narg) error->all(FLERR,"Illegal processors command");
+        gridflag = CUSTOM;
+        delete [] customfile;
+        int n = strlen(arg[iarg+2]) + 1;
+        customfile = new char[n];
+        strcpy(customfile,arg[iarg+2]);
+        iarg += 1;
+
+      } else error->all(FLERR,"Illegal processors command");
+      iarg += 2;
+
+    } else if (strcmp(arg[iarg],"map") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
+      if (strcmp(arg[iarg+1],"cart") == 0) mapflag = CART;
+      else if (strcmp(arg[iarg+1],"cart/reorder") == 0) mapflag = CARTREORDER;
+      else if (strcmp(arg[iarg+1],"xyz") == 0 ||
+               strcmp(arg[iarg+1],"xzy") == 0 ||
+               strcmp(arg[iarg+1],"yxz") == 0 ||
+               strcmp(arg[iarg+1],"yzx") == 0 ||
+               strcmp(arg[iarg+1],"zxy") == 0 ||
+               strcmp(arg[iarg+1],"zyx") == 0) {
+        mapflag = XYZ;
+        strcpy(xyz,arg[iarg+1]);
+      } else error->all(FLERR,"Illegal processors command");
+      iarg += 2;
+
+    } else if (strcmp(arg[iarg],"part") == 0) {
+      if (iarg+4 > narg) error->all(FLERR,"Illegal processors command");
+      if (universe->nworlds == 1)
+        error->all(FLERR,
+                   "Cannot use processors part command "
+                   "without using partitions");
+      int isend = force->inumeric(FLERR,arg[iarg+1]);
+      int irecv = force->inumeric(FLERR,arg[iarg+2]);
+      if (isend < 1 || isend > universe->nworlds ||
+          irecv < 1 || irecv > universe->nworlds || isend == irecv)
+        error->all(FLERR,"Invalid partitions in processors part command");
+      if (isend-1 == universe->iworld) {
+        if (send_to_partition >= 0)
+          error->all(FLERR,
+                     "Sending partition in processors part command "
+                     "is already a sender");
+        send_to_partition = irecv-1;
+      }
+      if (irecv-1 == universe->iworld) {
+        if (recv_from_partition >= 0)
+          error->all(FLERR,
+                     "Receiving partition in processors part command "
+                     "is already a receiver");
+        recv_from_partition = isend-1;
+      }
+
+      // only receiver has otherflag dependency
+
+      if (strcmp(arg[iarg+3],"multiple") == 0) {
+        if (universe->iworld == irecv-1) {
+          otherflag = 1;
+          other_style = MULTIPLE;
+        }
+      } else error->all(FLERR,"Illegal processors command");
+      iarg += 4;
+
+    } else if (strcmp(arg[iarg],"file") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
+      delete [] outfile;
+      int n = strlen(arg[iarg+1]) + 1;
+      outfile = new char[n];
+      strcpy(outfile,arg[iarg+1]);
+      iarg += 2;
+
+    } else error->all(FLERR,"Illegal processors command");
+  }
+
+  // error checks
+
+  if (gridflag == NUMA && mapflag != CART)
+    error->all(FLERR,"Processors grid numa and map style are incompatible");
+  if (otherflag && (gridflag == NUMA || gridflag == CUSTOM))
+    error->all(FLERR,
+               "Processors part option and grid style are incompatible");
+}
+
+/* ----------------------------------------------------------------------
+   create a 3d grid of procs based on Nprocs and box size & shape
    map processors to grid, setup xyz split for a uniform grid
 ------------------------------------------------------------------------- */
 
@@ -313,1266 +390,6 @@ void Comm::set_proc_grid(int outflag)
   }
 }
 
-/* ---------------------------------------------------------------------- */
-
-void Comm::init()
-{
-  triclinic = domain->triclinic;
-  map_style = atom->map_style;
-
-  // comm_only = 1 if only x,f are exchanged in forward/reverse comm
-  // comm_x_only = 0 if ghost_velocity since velocities are added
-
-  comm_x_only = atom->avec->comm_x_only;
-  comm_f_only = atom->avec->comm_f_only;
-  if (ghost_velocity) comm_x_only = 0;
-
-  // set per-atom sizes for forward/reverse/border comm
-  // augment by velocity and fix quantities if needed
-
-  size_forward = atom->avec->size_forward;
-  size_reverse = atom->avec->size_reverse;
-  size_border = atom->avec->size_border;
-
-  if (ghost_velocity) size_forward += atom->avec->size_velocity;
-  if (ghost_velocity) size_border += atom->avec->size_velocity;
-
-  for (int i = 0; i < modify->nfix; i++)
-    size_border += modify->fix[i]->comm_border;
-  
-  // maxexchange = max # of datums/atom in exchange communication
-  // maxforward = # of datums in largest forward communication
-  // maxreverse = # of datums in largest reverse communication
-  // query pair,fix,compute,dump for their requirements
-  // pair style can force reverse comm even if newton off
-
-  maxexchange = BUFMIN + maxexchange_fix;
-  maxforward = MAX(size_forward,size_border);
-  maxreverse = size_reverse;
-
-  if (force->pair) maxforward = MAX(maxforward,force->pair->comm_forward);
-  if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse);
-
-  for (int i = 0; i < modify->nfix; i++) {
-    maxforward = MAX(maxforward,modify->fix[i]->comm_forward);
-    maxreverse = MAX(maxreverse,modify->fix[i]->comm_reverse);
-  }
-
-  for (int i = 0; i < modify->ncompute; i++) {
-    maxforward = MAX(maxforward,modify->compute[i]->comm_forward);
-    maxreverse = MAX(maxreverse,modify->compute[i]->comm_reverse);
-  }
-
-  for (int i = 0; i < output->ndump; i++) {
-    maxforward = MAX(maxforward,output->dump[i]->comm_forward);
-    maxreverse = MAX(maxreverse,output->dump[i]->comm_reverse);
-  }
-
-  if (force->newton == 0) maxreverse = 0;
-  if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse_off);
-
-  // memory for multi-style communication
-
-  if (style == MULTI && multilo == NULL) {
-    allocate_multi(maxswap);
-    memory->create(cutghostmulti,atom->ntypes+1,3,"comm:cutghostmulti");
-  }
-  if (style == SINGLE && multilo) {
-    free_multi();
-    memory->destroy(cutghostmulti);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   setup spatial-decomposition communication patterns
-   function of neighbor cutoff(s) & cutghostuser & current box size
-   single style sets slab boundaries (slablo,slabhi) based on max cutoff
-   multi style sets type-dependent slab boundaries (multilo,multihi)
-------------------------------------------------------------------------- */
-
-void Comm::setup()
-{
-  // cutghost[] = max distance at which ghost atoms need to be acquired
-  // for orthogonal:
-  //   cutghost is in box coords = neigh->cutghost in all 3 dims
-  // for triclinic:
-  //   neigh->cutghost = distance between tilted planes in box coords
-  //   cutghost is in lamda coords = distance between those planes
-  // for multi:
-  //   cutghostmulti = same as cutghost, only for each atom type
-
-  int i;
-  int ntypes = atom->ntypes;
-  double *prd,*sublo,*subhi;
-
-  double cut = MAX(neighbor->cutneighmax,cutghostuser);
-
-  if (triclinic == 0) {
-    prd = domain->prd;
-    sublo = domain->sublo;
-    subhi = domain->subhi;
-    cutghost[0] = cutghost[1] = cutghost[2] = cut;
-
-    if (style == MULTI) {
-      double *cuttype = neighbor->cuttype;
-      for (i = 1; i <= ntypes; i++)
-        cutghostmulti[i][0] = cutghostmulti[i][1] = cutghostmulti[i][2] =
-          cuttype[i];
-    }
-
-  } else {
-    prd = domain->prd_lamda;
-    sublo = domain->sublo_lamda;
-    subhi = domain->subhi_lamda;
-    double *h_inv = domain->h_inv;
-    double length0,length1,length2;
-    length0 = sqrt(h_inv[0]*h_inv[0] + h_inv[5]*h_inv[5] + h_inv[4]*h_inv[4]);
-    cutghost[0] = cut * length0;
-    length1 = sqrt(h_inv[1]*h_inv[1] + h_inv[3]*h_inv[3]);
-    cutghost[1] = cut * length1;
-    length2 = h_inv[2];
-    cutghost[2] = cut * length2;
-
-    if (style == MULTI) {
-      double *cuttype = neighbor->cuttype;
-      for (i = 1; i <= ntypes; i++) {
-        cutghostmulti[i][0] = cuttype[i] * length0;
-        cutghostmulti[i][1] = cuttype[i] * length1;
-        cutghostmulti[i][2] = cuttype[i] * length2;
-      }
-    }
-  }
-
-  // recvneed[idim][0/1] = # of procs away I recv atoms from, within cutghost
-  //   0 = from left, 1 = from right
-  //   do not cross non-periodic boundaries, need[2] = 0 for 2d
-  // sendneed[idim][0/1] = # of procs away I send atoms to
-  //   0 = to left, 1 = to right
-  //   set equal to recvneed[idim][1/0] of neighbor proc
-  // maxneed[idim] = max procs away any proc recvs atoms in either direction
-  // uniform = 1 = uniform sized sub-domains:
-  //   maxneed is directly computable from sub-domain size
-  //     limit to procgrid-1 for non-PBC
-  //   recvneed = maxneed except for procs near non-PBC
-  //   sendneed = recvneed of neighbor on each side
-  // uniform = 0 = non-uniform sized sub-domains:
-  //   compute recvneed via updown() which accounts for non-PBC
-  //   sendneed = recvneed of neighbor on each side
-  //   maxneed via Allreduce() of recvneed
-
-  int *periodicity = domain->periodicity;
-  int left,right;
-
-  if (uniform) {
-    maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
-    maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
-    maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
-    if (domain->dimension == 2) maxneed[2] = 0;
-    if (!periodicity[0]) maxneed[0] = MIN(maxneed[0],procgrid[0]-1);
-    if (!periodicity[1]) maxneed[1] = MIN(maxneed[1],procgrid[1]-1);
-    if (!periodicity[2]) maxneed[2] = MIN(maxneed[2],procgrid[2]-1);
-
-    if (!periodicity[0]) {
-      recvneed[0][0] = MIN(maxneed[0],myloc[0]);
-      recvneed[0][1] = MIN(maxneed[0],procgrid[0]-myloc[0]-1);
-      left = myloc[0] - 1;
-      if (left < 0) left = procgrid[0] - 1;
-      sendneed[0][0] = MIN(maxneed[0],procgrid[0]-left-1);
-      right = myloc[0] + 1;
-      if (right == procgrid[0]) right = 0;
-      sendneed[0][1] = MIN(maxneed[0],right);
-    } else recvneed[0][0] = recvneed[0][1] =
-             sendneed[0][0] = sendneed[0][1] = maxneed[0];
-
-    if (!periodicity[1]) {
-      recvneed[1][0] = MIN(maxneed[1],myloc[1]);
-      recvneed[1][1] = MIN(maxneed[1],procgrid[1]-myloc[1]-1);
-      left = myloc[1] - 1;
-      if (left < 0) left = procgrid[1] - 1;
-      sendneed[1][0] = MIN(maxneed[1],procgrid[1]-left-1);
-      right = myloc[1] + 1;
-      if (right == procgrid[1]) right = 0;
-      sendneed[1][1] = MIN(maxneed[1],right);
-    } else recvneed[1][0] = recvneed[1][1] =
-             sendneed[1][0] = sendneed[1][1] = maxneed[1];
-
-    if (!periodicity[2]) {
-      recvneed[2][0] = MIN(maxneed[2],myloc[2]);
-      recvneed[2][1] = MIN(maxneed[2],procgrid[2]-myloc[2]-1);
-      left = myloc[2] - 1;
-      if (left < 0) left = procgrid[2] - 1;
-      sendneed[2][0] = MIN(maxneed[2],procgrid[2]-left-1);
-      right = myloc[2] + 1;
-      if (right == procgrid[2]) right = 0;
-      sendneed[2][1] = MIN(maxneed[2],right);
-    } else recvneed[2][0] = recvneed[2][1] =
-             sendneed[2][0] = sendneed[2][1] = maxneed[2];
-
-  } else {
-    recvneed[0][0] = updown(0,0,myloc[0],prd[0],periodicity[0],xsplit);
-    recvneed[0][1] = updown(0,1,myloc[0],prd[0],periodicity[0],xsplit);
-    left = myloc[0] - 1;
-    if (left < 0) left = procgrid[0] - 1;
-    sendneed[0][0] = updown(0,1,left,prd[0],periodicity[0],xsplit);
-    right = myloc[0] + 1;
-    if (right == procgrid[0]) right = 0;
-    sendneed[0][1] = updown(0,0,right,prd[0],periodicity[0],xsplit);
-
-    recvneed[1][0] = updown(1,0,myloc[1],prd[1],periodicity[1],ysplit);
-    recvneed[1][1] = updown(1,1,myloc[1],prd[1],periodicity[1],ysplit);
-    left = myloc[1] - 1;
-    if (left < 0) left = procgrid[1] - 1;
-    sendneed[1][0] = updown(1,1,left,prd[1],periodicity[1],ysplit);
-    right = myloc[1] + 1;
-    if (right == procgrid[1]) right = 0;
-    sendneed[1][1] = updown(1,0,right,prd[1],periodicity[1],ysplit);
-
-    if (domain->dimension == 3) {
-      recvneed[2][0] = updown(2,0,myloc[2],prd[2],periodicity[2],zsplit);
-      recvneed[2][1] = updown(2,1,myloc[2],prd[2],periodicity[2],zsplit);
-      left = myloc[2] - 1;
-      if (left < 0) left = procgrid[2] - 1;
-      sendneed[2][0] = updown(2,1,left,prd[2],periodicity[2],zsplit);
-      right = myloc[2] + 1;
-      if (right == procgrid[2]) right = 0;
-      sendneed[2][1] = updown(2,0,right,prd[2],periodicity[2],zsplit);
-    } else recvneed[2][0] = recvneed[2][1] =
-             sendneed[2][0] = sendneed[2][1] = 0;
-
-    int all[6];
-    MPI_Allreduce(&recvneed[0][0],all,6,MPI_INT,MPI_MAX,world);
-    maxneed[0] = MAX(all[0],all[1]);
-    maxneed[1] = MAX(all[2],all[3]);
-    maxneed[2] = MAX(all[4],all[5]);
-  }
-
-  // allocate comm memory
-
-  nswap = 2 * (maxneed[0]+maxneed[1]+maxneed[2]);
-  if (nswap > maxswap) grow_swap(nswap);
-
-  // setup parameters for each exchange:
-  // sendproc = proc to send to at each swap
-  // recvproc = proc to recv from at each swap
-  // for style SINGLE:
-  //   slablo/slabhi = boundaries for slab of atoms to send at each swap
-  //   use -BIG/midpt/BIG to insure all atoms included even if round-off occurs
-  //   if round-off, atoms recvd across PBC can be < or > than subbox boundary
-  //   note that borders() only loops over subset of atoms during each swap
-  //   treat all as PBC here, non-PBC is handled in borders() via r/s need[][]
-  // for style MULTI:
-  //   multilo/multihi is same, with slablo/slabhi for each atom type
-  // pbc_flag: 0 = nothing across a boundary, 1 = something across a boundary
-  // pbc = -1/0/1 for PBC factor in each of 3/6 orthogonal/triclinic dirs
-  // for triclinic, slablo/hi and pbc_border will be used in lamda (0-1) coords
-  // 1st part of if statement is sending to the west/south/down
-  // 2nd part of if statement is sending to the east/north/up
-
-  int dim,ineed;
-
-  int iswap = 0;
-  for (dim = 0; dim < 3; dim++) {
-    for (ineed = 0; ineed < 2*maxneed[dim]; ineed++) {
-      pbc_flag[iswap] = 0;
-      pbc[iswap][0] = pbc[iswap][1] = pbc[iswap][2] =
-        pbc[iswap][3] = pbc[iswap][4] = pbc[iswap][5] = 0;
-
-      if (ineed % 2 == 0) {
-        sendproc[iswap] = procneigh[dim][0];
-        recvproc[iswap] = procneigh[dim][1];
-        if (style == SINGLE) {
-          if (ineed < 2) slablo[iswap] = -BIG;
-          else slablo[iswap] = 0.5 * (sublo[dim] + subhi[dim]);
-          slabhi[iswap] = sublo[dim] + cutghost[dim];
-        } else {
-          for (i = 1; i <= ntypes; i++) {
-            if (ineed < 2) multilo[iswap][i] = -BIG;
-            else multilo[iswap][i] = 0.5 * (sublo[dim] + subhi[dim]);
-            multihi[iswap][i] = sublo[dim] + cutghostmulti[i][dim];
-          }
-        }
-        if (myloc[dim] == 0) {
-          pbc_flag[iswap] = 1;
-          pbc[iswap][dim] = 1;
-          if (triclinic) {
-            if (dim == 1) pbc[iswap][5] = 1;
-            else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = 1;
-          }
-        }
-
-      } else {
-        sendproc[iswap] = procneigh[dim][1];
-        recvproc[iswap] = procneigh[dim][0];
-        if (style == SINGLE) {
-          slablo[iswap] = subhi[dim] - cutghost[dim];
-          if (ineed < 2) slabhi[iswap] = BIG;
-          else slabhi[iswap] = 0.5 * (sublo[dim] + subhi[dim]);
-        } else {
-          for (i = 1; i <= ntypes; i++) {
-            multilo[iswap][i] = subhi[dim] - cutghostmulti[i][dim];
-            if (ineed < 2) multihi[iswap][i] = BIG;
-            else multihi[iswap][i] = 0.5 * (sublo[dim] + subhi[dim]);
-          }
-        }
-        if (myloc[dim] == procgrid[dim]-1) {
-          pbc_flag[iswap] = 1;
-          pbc[iswap][dim] = -1;
-          if (triclinic) {
-            if (dim == 1) pbc[iswap][5] = -1;
-            else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = -1;
-          }
-        }
-      }
-
-      iswap++;
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   walk up/down the extent of nearby processors in dim and dir
-   loc = myloc of proc to start at
-   dir = 0/1 = walk to left/right
-   do not cross non-periodic boundaries
-   is not called for z dim in 2d
-   return how many procs away are needed to encompass cutghost away from loc
-------------------------------------------------------------------------- */
-
-int Comm::updown(int dim, int dir, int loc,
-                 double prd, int periodicity, double *split)
-{
-  int index,count;
-  double frac,delta;
-
-  if (dir == 0) {
-    frac = cutghost[dim]/prd;
-    index = loc - 1;
-    delta = 0.0;
-    count = 0;
-    while (delta < frac) {
-      if (index < 0) {
-        if (!periodicity) break;
-        index = procgrid[dim] - 1;
-      }
-      count++;
-      delta += split[index+1] - split[index];
-      index--;
-    }
-
-  } else {
-    frac = cutghost[dim]/prd;
-    index = loc + 1;
-    delta = 0.0;
-    count = 0;
-    while (delta < frac) {
-      if (index >= procgrid[dim]) {
-        if (!periodicity) break;
-        index = 0;
-      }
-      count++;
-      delta += split[index+1] - split[index];
-      index++;
-    }
-  }
-
-  return count;
-}
-
-/* ----------------------------------------------------------------------
-   forward communication of atom coords every timestep
-   other per-atom attributes may also be sent via pack/unpack routines
-------------------------------------------------------------------------- */
-
-void Comm::forward_comm(int dummy)
-{
-  int n;
-  MPI_Request request;
-  MPI_Status status;
-  AtomVec *avec = atom->avec;
-  double **x = atom->x;
-  double *buf;
-
-  // exchange data with another proc
-  // if other proc is self, just copy
-  // if comm_x_only set, exchange or copy directly to x, don't unpack
-
-  for (int iswap = 0; iswap < nswap; iswap++) {
-    if (sendproc[iswap] != me) {
-      if (comm_x_only) {
-        if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
-        else buf = NULL;
-        if (size_forward_recv[iswap])
-          MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
-                    recvproc[iswap],0,world,&request);
-        n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
-                            buf_send,pbc_flag[iswap],pbc[iswap]);
-        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
-        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
-      } else if (ghost_velocity) {
-        if (size_forward_recv[iswap])
-          MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
-                    recvproc[iswap],0,world,&request);
-        n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
-                                buf_send,pbc_flag[iswap],pbc[iswap]);
-        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
-        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
-        avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
-      } else {
-        if (size_forward_recv[iswap])
-          MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
-                    recvproc[iswap],0,world,&request);
-        n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
-                            buf_send,pbc_flag[iswap],pbc[iswap]);
-        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
-        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
-        avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv);
-      }
-
-    } else {
-      if (comm_x_only) {
-        if (sendnum[iswap])
-          n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
-                              x[firstrecv[iswap]],pbc_flag[iswap],
-                              pbc[iswap]);
-      } else if (ghost_velocity) {
-        n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
-                                buf_send,pbc_flag[iswap],pbc[iswap]);
-        avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
-      } else {
-        n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
-                            buf_send,pbc_flag[iswap],pbc[iswap]);
-        avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send);
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   reverse communication of forces on atoms every timestep
-   other per-atom attributes may also be sent via pack/unpack routines
-------------------------------------------------------------------------- */
-
-void Comm::reverse_comm()
-{
-  int n;
-  MPI_Request request;
-  MPI_Status status;
-  AtomVec *avec = atom->avec;
-  double **f = atom->f;
-  double *buf;
-
-  // exchange data with another proc
-  // if other proc is self, just copy
-  // if comm_f_only set, exchange or copy directly from f, don't pack
-
-  for (int iswap = nswap-1; iswap >= 0; iswap--) {
-    if (sendproc[iswap] != me) {
-      if (comm_f_only) {
-        if (size_reverse_recv[iswap])
-          MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
-                    sendproc[iswap],0,world,&request);
-        if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]];
-        else buf = NULL;
-        if (size_reverse_send[iswap])
-          MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
-                   recvproc[iswap],0,world);
-        if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
-      } else {
-        if (size_reverse_recv[iswap])
-          MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
-                    sendproc[iswap],0,world,&request);
-        n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
-        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
-        if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
-      }
-      avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv);
-
-    } else {
-      if (comm_f_only) {
-        if (sendnum[iswap])
-            avec->unpack_reverse(sendnum[iswap],sendlist[iswap],
-                                f[firstrecv[iswap]]);
-      } else {
-        n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
-        avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send);
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   exchange: move atoms to correct processors
-   atoms exchanged with all 6 stencil neighbors
-   send out atoms that have left my box, receive ones entering my box
-   atoms will be lost if not inside some proc's box
-     can happen if atom moves outside of non-periodic bounary
-     or if atom moves more than one proc away
-   this routine called before every reneighboring
-   for triclinic, atoms must be in lamda coords (0-1) before exchange is called
-------------------------------------------------------------------------- */
-
-void Comm::exchange()
-{
-  int i,m,nsend,nrecv,nrecv1,nrecv2,nlocal;
-  double lo,hi,value;
-  double **x;
-  double *sublo,*subhi,*buf;
-  MPI_Request request;
-  MPI_Status status;
-  AtomVec *avec = atom->avec;
-
-  // clear global->local map for owned and ghost atoms
-  // b/c atoms migrate to new procs in exchange() and
-  //   new ghosts are created in borders()
-  // map_set() is done at end of borders()
-  // clear ghost count and any ghost bonus data internal to AtomVec
-
-  if (map_style) atom->map_clear();
-  atom->nghost = 0;
-  atom->avec->clear_bonus();
-
-  // insure send buf is large enough for single atom
-  // fixes can change per-atom size requirement on-the-fly
-
-  int bufextra_old = bufextra;
-  maxexchange = maxexchange_atom + maxexchange_fix;
-  bufextra = maxexchange + BUFEXTRA;
-  if (bufextra > bufextra_old)
-    memory->grow(buf_send,maxsend+bufextra,"comm:buf_send");
-
-  // subbox bounds for orthogonal or triclinic
-
-  if (triclinic == 0) {
-    sublo = domain->sublo;
-    subhi = domain->subhi;
-  } else {
-    sublo = domain->sublo_lamda;
-    subhi = domain->subhi_lamda;
-  }
-
-  // loop over dimensions
-
-  for (int dim = 0; dim < 3; dim++) {
-
-    // fill buffer with atoms leaving my box, using < and >=
-    // when atom is deleted, fill it in with last atom
-
-    x = atom->x;
-    lo = sublo[dim];
-    hi = subhi[dim];
-    nlocal = atom->nlocal;
-    i = nsend = 0;
-
-    while (i < nlocal) {
-      if (x[i][dim] < lo || x[i][dim] >= hi) {
-        if (nsend > maxsend) grow_send(nsend,1);
-        nsend += avec->pack_exchange(i,&buf_send[nsend]);
-        avec->copy(nlocal-1,i,1);
-        nlocal--;
-      } else i++;
-    }
-    atom->nlocal = nlocal;
-
-    // send/recv atoms in both directions
-    // if 1 proc in dimension, no send/recv, set recv buf to send buf
-    // if 2 procs in dimension, single send/recv
-    // if more than 2 procs in dimension, send/recv to both neighbors
-
-    if (procgrid[dim] == 1) {
-      nrecv = nsend;
-      buf = buf_send;
-
-    } else {
-      MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
-                   &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status);
-      nrecv = nrecv1;
-      if (procgrid[dim] > 2) {
-        MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
-                     &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status);
-        nrecv += nrecv2;
-      }
-      if (nrecv > maxrecv) grow_recv(nrecv);
-
-      MPI_Irecv(buf_recv,nrecv1,MPI_DOUBLE,procneigh[dim][1],0,
-                world,&request);
-      MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][0],0,world);
-      MPI_Wait(&request,&status);
-
-      if (procgrid[dim] > 2) {
-        MPI_Irecv(&buf_recv[nrecv1],nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
-                  world,&request);
-        MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][1],0,world);
-        MPI_Wait(&request,&status);
-      }
-
-      buf = buf_recv;
-    }
-
-    // check incoming atoms to see if they are in my box
-    // if so, add to my list
-
-    m = 0;
-    while (m < nrecv) {
-      value = buf[m+dim+1];
-      if (value >= lo && value < hi) m += avec->unpack_exchange(&buf[m]);
-      else m += static_cast<int> (buf[m]);
-    }
-  }
-
-  if (atom->firstgroupname) atom->first_reorder();
-}
-
-/* ----------------------------------------------------------------------
-   borders: list nearby atoms to send to neighboring procs at every timestep
-   one list is created for every swap that will be made
-   as list is made, actually do swaps
-   this does equivalent of a communicate, so don't need to explicitly
-     call communicate routine on reneighboring timestep
-   this routine is called before every reneighboring
-   for triclinic, atoms must be in lamda coords (0-1) before borders is called
-------------------------------------------------------------------------- */
-
-void Comm::borders()
-{
-  int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
-  int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
-  double lo,hi;
-  int *type;
-  double **x;
-  double *buf,*mlo,*mhi;
-  MPI_Request request;
-  MPI_Status status;
-  AtomVec *avec = atom->avec;
-
-  // do swaps over all 3 dimensions
-
-  iswap = 0;
-  smax = rmax = 0;
-
-  for (dim = 0; dim < 3; dim++) {
-    nlast = 0;
-    twoneed = 2*maxneed[dim];
-    for (ineed = 0; ineed < twoneed; ineed++) {
-
-      // find atoms within slab boundaries lo/hi using <= and >=
-      // check atoms between nfirst and nlast
-      //   for first swaps in a dim, check owned and ghost
-      //   for later swaps in a dim, only check newly arrived ghosts
-      // store sent atom indices in list for use in future timesteps
-
-      x = atom->x;
-      if (style == SINGLE) {
-        lo = slablo[iswap];
-        hi = slabhi[iswap];
-      } else {
-        type = atom->type;
-        mlo = multilo[iswap];
-        mhi = multihi[iswap];
-      }
-      if (ineed % 2 == 0) {
-        nfirst = nlast;
-        nlast = atom->nlocal + atom->nghost;
-      }
-
-      nsend = 0;
-
-      // sendflag = 0 if I do not send on this swap
-      // sendneed test indicates receiver no longer requires data
-      // e.g. due to non-PBC or non-uniform sub-domains
-
-      if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
-      else sendflag = 1;
-
-      // find send atoms according to SINGLE vs MULTI
-      // all atoms eligible versus atoms in bordergroup
-      // only need to limit loop to bordergroup for first sends (ineed < 2)
-      // on these sends, break loop in two: owned (in group) and ghost
-
-      if (sendflag) {
-        if (!bordergroup || ineed >= 2) {
-          if (style == SINGLE) {
-            for (i = nfirst; i < nlast; i++)
-              if (x[i][dim] >= lo && x[i][dim] <= hi) {
-                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
-                sendlist[iswap][nsend++] = i;
-              }
-          } else {
-            for (i = nfirst; i < nlast; i++) {
-              itype = type[i];
-              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
-                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
-                sendlist[iswap][nsend++] = i;
-              }
-            }
-          }
-
-        } else {
-          if (style == SINGLE) {
-            ngroup = atom->nfirst;
-            for (i = 0; i < ngroup; i++)
-              if (x[i][dim] >= lo && x[i][dim] <= hi) {
-                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
-                sendlist[iswap][nsend++] = i;
-              }
-            for (i = atom->nlocal; i < nlast; i++)
-              if (x[i][dim] >= lo && x[i][dim] <= hi) {
-                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
-                sendlist[iswap][nsend++] = i;
-              }
-          } else {
-            ngroup = atom->nfirst;
-            for (i = 0; i < ngroup; i++) {
-              itype = type[i];
-              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
-                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
-                sendlist[iswap][nsend++] = i;
-              }
-            }
-            for (i = atom->nlocal; i < nlast; i++) {
-              itype = type[i];
-              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
-                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
-                sendlist[iswap][nsend++] = i;
-              }
-            }
-          }
-        }
-      }
-
-      // pack up list of border atoms
-
-      if (nsend*size_border > maxsend) grow_send(nsend*size_border,0);
-      if (ghost_velocity)
-        n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
-                                  pbc_flag[iswap],pbc[iswap]);
-      else
-        n = avec->pack_border(nsend,sendlist[iswap],buf_send,
-                              pbc_flag[iswap],pbc[iswap]);
-
-      // swap atoms with other proc
-      // no MPI calls except SendRecv if nsend/nrecv = 0
-      // put incoming ghosts at end of my atom arrays
-      // if swapping with self, simply copy, no messages
-
-      if (sendproc[iswap] != me) {
-        MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
-                     &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
-        if (nrecv*size_border > maxrecv) grow_recv(nrecv*size_border);
-        if (nrecv) MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
-                             recvproc[iswap],0,world,&request);
-        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
-        if (nrecv) MPI_Wait(&request,&status);
-        buf = buf_recv;
-      } else {
-        nrecv = nsend;
-        buf = buf_send;
-      }
-
-      // unpack buffer
-
-      if (ghost_velocity)
-        avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
-      else
-        avec->unpack_border(nrecv,atom->nlocal+atom->nghost,buf);
-
-      // set all pointers & counters
-
-      smax = MAX(smax,nsend);
-      rmax = MAX(rmax,nrecv);
-      sendnum[iswap] = nsend;
-      recvnum[iswap] = nrecv;
-      size_forward_recv[iswap] = nrecv*size_forward;
-      size_reverse_send[iswap] = nrecv*size_reverse;
-      size_reverse_recv[iswap] = nsend*size_reverse;
-      firstrecv[iswap] = atom->nlocal + atom->nghost;
-      atom->nghost += nrecv;
-      iswap++;
-    }
-  }
-
-  // insure send/recv buffers are long enough for all forward & reverse comm
-
-  int max = MAX(maxforward*smax,maxreverse*rmax);
-  if (max > maxsend) grow_send(max,0);
-  max = MAX(maxforward*rmax,maxreverse*smax);
-  if (max > maxrecv) grow_recv(max);
-
-  // reset global->local map
-
-  if (map_style) atom->map_set();
-}
-
-/* ----------------------------------------------------------------------
-   forward communication invoked by a Pair
-------------------------------------------------------------------------- */
-
-void Comm::forward_comm_pair(Pair *pair)
-{
-  int iswap,n;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (iswap = 0; iswap < nswap; iswap++) {
-
-    // pack buffer
-
-    n = pair->pack_comm(sendnum[iswap],sendlist[iswap],
-                        buf_send,pbc_flag[iswap],pbc[iswap]);
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (recvnum[iswap])
-        MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
-                  world,&request);
-      if (sendnum[iswap])
-        MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
-      if (recvnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    pair->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   reverse communication invoked by a Pair
-------------------------------------------------------------------------- */
-
-void Comm::reverse_comm_pair(Pair *pair)
-{
-  int iswap,n;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (iswap = nswap-1; iswap >= 0; iswap--) {
-
-    // pack buffer
-
-    n = pair->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (sendnum[iswap])
-        MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
-                  world,&request);
-      if (recvnum[iswap])
-        MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
-      if (sendnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    pair->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   forward communication invoked by a Fix
-   n = constant number of datums per atom
-------------------------------------------------------------------------- */
-
-void Comm::forward_comm_fix(Fix *fix)
-{
-  int iswap,n;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (iswap = 0; iswap < nswap; iswap++) {
-
-    // pack buffer
-
-    n = fix->pack_comm(sendnum[iswap],sendlist[iswap],
-                       buf_send,pbc_flag[iswap],pbc[iswap]);
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (recvnum[iswap])
-        MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
-                  world,&request);
-      if (sendnum[iswap])
-        MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
-      if (recvnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    fix->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   reverse communication invoked by a Fix
-   n = constant number of datums per atom
-------------------------------------------------------------------------- */
-
-void Comm::reverse_comm_fix(Fix *fix)
-{
-  int iswap,n;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (iswap = nswap-1; iswap >= 0; iswap--) {
-
-    // pack buffer
-
-    n = fix->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (sendnum[iswap])
-        MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
-                  world,&request);
-      if (recvnum[iswap])
-        MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
-      if (sendnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    fix->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   forward communication invoked by a Fix
-   n = total datums for all atoms, allows for variable number/atom
-------------------------------------------------------------------------- */
-
-void Comm::forward_comm_variable_fix(Fix *fix)
-{
-  int iswap,n;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (iswap = 0; iswap < nswap; iswap++) {
-
-    // pack buffer
-
-    n = fix->pack_comm(sendnum[iswap],sendlist[iswap],
-                       buf_send,pbc_flag[iswap],pbc[iswap]);
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (recvnum[iswap])
-        MPI_Irecv(buf_recv,maxrecv,MPI_DOUBLE,recvproc[iswap],0,
-                  world,&request);
-      if (sendnum[iswap])
-        MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
-      if (recvnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    fix->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   reverse communication invoked by a Fix
-   n = total datums for all atoms, allows for variable number/atom
-------------------------------------------------------------------------- */
-
-void Comm::reverse_comm_variable_fix(Fix *fix)
-{
-  int iswap,n;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (iswap = nswap-1; iswap >= 0; iswap--) {
-
-    // pack buffer
-
-    n = fix->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (sendnum[iswap])
-        MPI_Irecv(buf_recv,maxrecv,MPI_DOUBLE,sendproc[iswap],0,
-                  world,&request);
-      if (recvnum[iswap])
-        MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
-      if (sendnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    fix->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   forward communication invoked by a Compute
-------------------------------------------------------------------------- */
-
-void Comm::forward_comm_compute(Compute *compute)
-{
-  int iswap,n;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (iswap = 0; iswap < nswap; iswap++) {
-
-    // pack buffer
-
-    n = compute->pack_comm(sendnum[iswap],sendlist[iswap],
-                           buf_send,pbc_flag[iswap],pbc[iswap]);
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (recvnum[iswap])
-        MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
-                  world,&request);
-      if (sendnum[iswap])
-        MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
-      if (recvnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    compute->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   reverse communication invoked by a Compute
-------------------------------------------------------------------------- */
-
-void Comm::reverse_comm_compute(Compute *compute)
-{
-  int iswap,n;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (iswap = nswap-1; iswap >= 0; iswap--) {
-
-    // pack buffer
-
-    n = compute->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (sendnum[iswap])
-        MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
-                  world,&request);
-      if (recvnum[iswap])
-        MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
-      if (sendnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    compute->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   forward communication invoked by a Dump
-------------------------------------------------------------------------- */
-
-void Comm::forward_comm_dump(Dump *dump)
-{
-  int iswap,n;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (iswap = 0; iswap < nswap; iswap++) {
-
-    // pack buffer
-
-    n = dump->pack_comm(sendnum[iswap],sendlist[iswap],
-                        buf_send,pbc_flag[iswap],pbc[iswap]);
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (recvnum[iswap])
-        MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
-                  world,&request);
-      if (sendnum[iswap])
-        MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
-      if (recvnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    dump->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   reverse communication invoked by a Dump
-------------------------------------------------------------------------- */
-
-void Comm::reverse_comm_dump(Dump *dump)
-{
-  int iswap,n;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  for (iswap = nswap-1; iswap >= 0; iswap--) {
-
-    // pack buffer
-
-    n = dump->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (sendnum[iswap])
-        MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
-                  world,&request);
-      if (recvnum[iswap])
-        MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
-      if (sendnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    dump->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   forward communication of N values in array
-------------------------------------------------------------------------- */
-
-void Comm::forward_comm_array(int n, double **array)
-{
-  int i,j,k,m,iswap,last;
-  double *buf;
-  MPI_Request request;
-  MPI_Status status;
-
-  // NOTE: check that buf_send and buf_recv are big enough
-
-  for (iswap = 0; iswap < nswap; iswap++) {
-
-    // pack buffer
-
-    m = 0;
-    for (i = 0; i < sendnum[iswap]; i++) {
-      j = sendlist[iswap][i];
-      for (k = 0; k < n; k++)
-        buf_send[m++] = array[j][k];
-    }
-
-    // exchange with another proc
-    // if self, set recv buffer to send buffer
-
-    if (sendproc[iswap] != me) {
-      if (recvnum[iswap])
-        MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
-                  world,&request);
-      if (sendnum[iswap])
-        MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
-      if (recvnum[iswap]) MPI_Wait(&request,&status);
-      buf = buf_recv;
-    } else buf = buf_send;
-
-    // unpack buffer
-
-    m = 0;
-    last = firstrecv[iswap] + recvnum[iswap];
-    for (i = firstrecv[iswap]; i < last; i++)
-      for (k = 0; k < n; k++)
-        array[i][k] = buf[m++];
-  }
-}
-
-/* ----------------------------------------------------------------------
-   exchange info provided with all 6 stencil neighbors
-------------------------------------------------------------------------- */
-
-int Comm::exchange_variable(int n, double *inbuf, double *&outbuf)
-{
-  int nsend,nrecv,nrecv1,nrecv2;
-  MPI_Request request;
-  MPI_Status status;
-
-  nrecv = n;
-  if (nrecv > maxrecv) grow_recv(nrecv);
-  memcpy(buf_recv,inbuf,nrecv*sizeof(double));
-
-  // loop over dimensions
-
-  for (int dim = 0; dim < 3; dim++) {
-    
-    // no exchange if only one proc in a dimension
-
-    if (procgrid[dim] == 1) continue;
-
-    // send/recv info in both directions using same buf_recv
-    // if 2 procs in dimension, single send/recv
-    // if more than 2 procs in dimension, send/recv to both neighbors
-
-    nsend = nrecv;
-    MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
-                 &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status);
-    nrecv += nrecv1;
-    if (procgrid[dim] > 2) {
-      MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
-                   &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status);
-      nrecv += nrecv2;
-    } else nrecv2 = 0;
-
-    if (nrecv > maxrecv) grow_recv(nrecv);
-    
-    MPI_Irecv(&buf_recv[nsend],nrecv1,MPI_DOUBLE,procneigh[dim][1],0,
-              world,&request);
-    MPI_Send(buf_recv,nsend,MPI_DOUBLE,procneigh[dim][0],0,world);
-    MPI_Wait(&request,&status);
-    
-    if (procgrid[dim] > 2) {
-      MPI_Irecv(&buf_recv[nsend+nrecv1],nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
-                world,&request);
-      MPI_Send(buf_recv,nsend,MPI_DOUBLE,procneigh[dim][1],0,world);
-      MPI_Wait(&request,&status);
-    }
-  }
-
-  outbuf = buf_recv;
-  return nrecv;
-}
-
 /* ----------------------------------------------------------------------
    communicate inbuf around full ring of processors with messtag
    nbytes = size of inbuf = n datums * nper bytes
@@ -1687,313 +504,3 @@ int Comm::read_lines_from_file_universe(FILE *fp, int nlines, int maxline,
   MPI_Bcast(buf,m,MPI_CHAR,0,uworld);
   return 0;
 }
-
-/* ----------------------------------------------------------------------
-   realloc the size of the send buffer as needed with BUFFACTOR and bufextra
-   if flag = 1, realloc
-   if flag = 0, don't need to realloc with copy, just free/malloc
-------------------------------------------------------------------------- */
-
-void Comm::grow_send(int n, int flag)
-{
-  maxsend = static_cast<int> (BUFFACTOR * n);
-  if (flag)
-    memory->grow(buf_send,maxsend+bufextra,"comm:buf_send");
-  else {
-    memory->destroy(buf_send);
-    memory->create(buf_send,maxsend+bufextra,"comm:buf_send");
-  }
-}
-
-/* ----------------------------------------------------------------------
-   free/malloc the size of the recv buffer as needed with BUFFACTOR
-------------------------------------------------------------------------- */
-
-void Comm::grow_recv(int n)
-{
-  maxrecv = static_cast<int> (BUFFACTOR * n);
-  memory->destroy(buf_recv);
-  memory->create(buf_recv,maxrecv,"comm:buf_recv");
-}
-
-/* ----------------------------------------------------------------------
-   realloc the size of the iswap sendlist as needed with BUFFACTOR
-------------------------------------------------------------------------- */
-
-void Comm::grow_list(int iswap, int n)
-{
-  maxsendlist[iswap] = static_cast<int> (BUFFACTOR * n);
-  memory->grow(sendlist[iswap],maxsendlist[iswap],"comm:sendlist[iswap]");
-}
-
-/* ----------------------------------------------------------------------
-   realloc the buffers needed for swaps
-------------------------------------------------------------------------- */
-
-void Comm::grow_swap(int n)
-{
-  free_swap();
-  allocate_swap(n);
-  if (style == MULTI) {
-    free_multi();
-    allocate_multi(n);
-  }
-
-  sendlist = (int **)
-    memory->srealloc(sendlist,n*sizeof(int *),"comm:sendlist");
-  memory->grow(maxsendlist,n,"comm:maxsendlist");
-  for (int i = maxswap; i < n; i++) {
-    maxsendlist[i] = BUFMIN;
-    memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]");
-  }
-  maxswap = n;
-}
-
-/* ----------------------------------------------------------------------
-   allocation of swap info
-------------------------------------------------------------------------- */
-
-void Comm::allocate_swap(int n)
-{
-  memory->create(sendnum,n,"comm:sendnum");
-  memory->create(recvnum,n,"comm:recvnum");
-  memory->create(sendproc,n,"comm:sendproc");
-  memory->create(recvproc,n,"comm:recvproc");
-  memory->create(size_forward_recv,n,"comm:size");
-  memory->create(size_reverse_send,n,"comm:size");
-  memory->create(size_reverse_recv,n,"comm:size");
-  memory->create(slablo,n,"comm:slablo");
-  memory->create(slabhi,n,"comm:slabhi");
-  memory->create(firstrecv,n,"comm:firstrecv");
-  memory->create(pbc_flag,n,"comm:pbc_flag");
-  memory->create(pbc,n,6,"comm:pbc");
-}
-
-/* ----------------------------------------------------------------------
-   allocation of multi-type swap info
-------------------------------------------------------------------------- */
-
-void Comm::allocate_multi(int n)
-{
-  multilo = memory->create(multilo,n,atom->ntypes+1,"comm:multilo");
-  multihi = memory->create(multihi,n,atom->ntypes+1,"comm:multihi");
-}
-
-/* ----------------------------------------------------------------------
-   free memory for swaps
-------------------------------------------------------------------------- */
-
-void Comm::free_swap()
-{
-  memory->destroy(sendnum);
-  memory->destroy(recvnum);
-  memory->destroy(sendproc);
-  memory->destroy(recvproc);
-  memory->destroy(size_forward_recv);
-  memory->destroy(size_reverse_send);
-  memory->destroy(size_reverse_recv);
-  memory->destroy(slablo);
-  memory->destroy(slabhi);
-  memory->destroy(firstrecv);
-  memory->destroy(pbc_flag);
-  memory->destroy(pbc);
-}
-
-/* ----------------------------------------------------------------------
-   free memory for multi-type swaps
-------------------------------------------------------------------------- */
-
-void Comm::free_multi()
-{
-  memory->destroy(multilo);
-  memory->destroy(multihi);
-}
-
-/* ----------------------------------------------------------------------
-   set communication style
-   invoked from input script by communicate command
-------------------------------------------------------------------------- */
-
-void Comm::set(int narg, char **arg)
-{
-  if (narg < 1) error->all(FLERR,"Illegal communicate command");
-
-  if (strcmp(arg[0],"single") == 0) style = SINGLE;
-  else if (strcmp(arg[0],"multi") == 0) style = MULTI;
-  else error->all(FLERR,"Illegal communicate command");
-
-  int iarg = 1;
-  while (iarg < narg) {
-    if (strcmp(arg[iarg],"group") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal communicate command");
-      bordergroup = group->find(arg[iarg+1]);
-      if (bordergroup < 0)
-        error->all(FLERR,"Invalid group in communicate command");
-      if (bordergroup && (atom->firstgroupname == NULL ||
-                          strcmp(arg[iarg+1],atom->firstgroupname) != 0))
-        error->all(FLERR,"Communicate group != atom_modify first group");
-      iarg += 2;
-    } else if (strcmp(arg[iarg],"cutoff") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal communicate command");
-      cutghostuser = force->numeric(FLERR,arg[iarg+1]);
-      if (cutghostuser < 0.0)
-        error->all(FLERR,"Invalid cutoff in communicate command");
-      iarg += 2;
-    } else if (strcmp(arg[iarg],"vel") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal communicate command");
-      if (strcmp(arg[iarg+1],"yes") == 0) ghost_velocity = 1;
-      else if (strcmp(arg[iarg+1],"no") == 0) ghost_velocity = 0;
-      else error->all(FLERR,"Illegal communicate command");
-      iarg += 2;
-    } else error->all(FLERR,"Illegal communicate command");
-  }
-}
-
-/* ----------------------------------------------------------------------
-   set dimensions for 3d grid of processors, and associated flags
-   invoked from input script by processors command
-------------------------------------------------------------------------- */
-
-void Comm::set_processors(int narg, char **arg)
-{
-  if (narg < 3) error->all(FLERR,"Illegal processors command");
-
-  if (strcmp(arg[0],"*") == 0) user_procgrid[0] = 0;
-  else user_procgrid[0] = force->inumeric(FLERR,arg[0]);
-  if (strcmp(arg[1],"*") == 0) user_procgrid[1] = 0;
-  else user_procgrid[1] = force->inumeric(FLERR,arg[1]);
-  if (strcmp(arg[2],"*") == 0) user_procgrid[2] = 0;
-  else user_procgrid[2] = force->inumeric(FLERR,arg[2]);
-
-  if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0)
-    error->all(FLERR,"Illegal processors command");
-
-  int p = user_procgrid[0]*user_procgrid[1]*user_procgrid[2];
-  if (p && p != nprocs)
-    error->all(FLERR,"Specified processors != physical processors");
-
-  int iarg = 3;
-  while (iarg < narg) {
-    if (strcmp(arg[iarg],"grid") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
-
-      if (strcmp(arg[iarg+1],"onelevel") == 0) {
-        gridflag = ONELEVEL;
-
-      } else if (strcmp(arg[iarg+1],"twolevel") == 0) {
-        if (iarg+6 > narg) error->all(FLERR,"Illegal processors command");
-        gridflag = TWOLEVEL;
-
-        ncores = force->inumeric(FLERR,arg[iarg+2]);
-        if (strcmp(arg[iarg+3],"*") == 0) user_coregrid[0] = 0;
-        else user_coregrid[0] = force->inumeric(FLERR,arg[iarg+3]);
-        if (strcmp(arg[iarg+4],"*") == 0) user_coregrid[1] = 0;
-        else user_coregrid[1] = force->inumeric(FLERR,arg[iarg+4]);
-        if (strcmp(arg[iarg+5],"*") == 0) user_coregrid[2] = 0;
-        else user_coregrid[2] = force->inumeric(FLERR,arg[iarg+5]);
-
-        if (ncores <= 0 || user_coregrid[0] < 0 ||
-            user_coregrid[1] < 0 || user_coregrid[2] < 0)
-          error->all(FLERR,"Illegal processors command");
-        iarg += 4;
-
-      } else if (strcmp(arg[iarg+1],"numa") == 0) {
-        gridflag = NUMA;
-
-      } else if (strcmp(arg[iarg],"custom") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal processors command");
-        gridflag = CUSTOM;
-        delete [] customfile;
-        int n = strlen(arg[iarg+2]) + 1;
-        customfile = new char[n];
-        strcpy(customfile,arg[iarg+2]);
-        iarg += 1;
-
-      } else error->all(FLERR,"Illegal processors command");
-      iarg += 2;
-
-    } else if (strcmp(arg[iarg],"map") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
-      if (strcmp(arg[iarg+1],"cart") == 0) mapflag = CART;
-      else if (strcmp(arg[iarg+1],"cart/reorder") == 0) mapflag = CARTREORDER;
-      else if (strcmp(arg[iarg+1],"xyz") == 0 ||
-               strcmp(arg[iarg+1],"xzy") == 0 ||
-               strcmp(arg[iarg+1],"yxz") == 0 ||
-               strcmp(arg[iarg+1],"yzx") == 0 ||
-               strcmp(arg[iarg+1],"zxy") == 0 ||
-               strcmp(arg[iarg+1],"zyx") == 0) {
-        mapflag = XYZ;
-        strcpy(xyz,arg[iarg+1]);
-      } else error->all(FLERR,"Illegal processors command");
-      iarg += 2;
-
-    } else if (strcmp(arg[iarg],"part") == 0) {
-      if (iarg+4 > narg) error->all(FLERR,"Illegal processors command");
-      if (universe->nworlds == 1)
-        error->all(FLERR,
-                   "Cannot use processors part command "
-                   "without using partitions");
-      int isend = force->inumeric(FLERR,arg[iarg+1]);
-      int irecv = force->inumeric(FLERR,arg[iarg+2]);
-      if (isend < 1 || isend > universe->nworlds ||
-          irecv < 1 || irecv > universe->nworlds || isend == irecv)
-        error->all(FLERR,"Invalid partitions in processors part command");
-      if (isend-1 == universe->iworld) {
-        if (send_to_partition >= 0)
-          error->all(FLERR,
-                     "Sending partition in processors part command "
-                     "is already a sender");
-        send_to_partition = irecv-1;
-      }
-      if (irecv-1 == universe->iworld) {
-        if (recv_from_partition >= 0)
-          error->all(FLERR,
-                     "Receiving partition in processors part command "
-                     "is already a receiver");
-        recv_from_partition = isend-1;
-      }
-
-      // only receiver has otherflag dependency
-
-      if (strcmp(arg[iarg+3],"multiple") == 0) {
-        if (universe->iworld == irecv-1) {
-          otherflag = 1;
-          other_style = MULTIPLE;
-        }
-      } else error->all(FLERR,"Illegal processors command");
-      iarg += 4;
-
-    } else if (strcmp(arg[iarg],"file") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
-      delete [] outfile;
-      int n = strlen(arg[iarg+1]) + 1;
-      outfile = new char[n];
-      strcpy(outfile,arg[iarg+1]);
-      iarg += 2;
-
-    } else error->all(FLERR,"Illegal processors command");
-  }
-
-  // error checks
-
-  if (gridflag == NUMA && mapflag != CART)
-    error->all(FLERR,"Processors grid numa and map style are incompatible");
-  if (otherflag && (gridflag == NUMA || gridflag == CUSTOM))
-    error->all(FLERR,
-               "Processors part option and grid style are incompatible");
-}
-
-/* ----------------------------------------------------------------------
-   return # of bytes of allocated memory
-------------------------------------------------------------------------- */
-
-bigint Comm::memory_usage()
-{
-  bigint bytes = 0;
-  bytes += nprocs * sizeof(int);    // grid2proc
-  for (int i = 0; i < nswap; i++)
-    bytes += memory->usage(sendlist[i],maxsendlist[i]);
-  bytes += memory->usage(buf_send,maxsend+bufextra);
-  bytes += memory->usage(buf_recv,maxrecv);
-  return bytes;
-}
diff --git a/src/comm.h b/src/comm.h
index 2c72359552..1406120175 100644
--- a/src/comm.h
+++ b/src/comm.h
@@ -20,6 +20,10 @@ namespace LAMMPS_NS {
 
 class Comm : protected Pointers {
  public:
+  int style;     // comm pattern: 0 = 6-way stencil, 1 = irregular tiling
+  int layout;    // current proc domains: 0 = logical bricks, 1 = general tiling
+                 // can do style=1 on layout=0, but not vice versa
+
   int me,nprocs;                    // proc info
   int procgrid[3];                  // procs assigned in each dim of 3d grid
   int user_procgrid[3];             // user request for procs in each dim
@@ -42,63 +46,50 @@ class Comm : protected Pointers {
 
   Comm(class LAMMPS *);
   virtual ~Comm();
+  void modify_params(int, char **);
 
-  virtual void init();
+  void set_processors(int, char **);      // set 3d processor grid attributes
   virtual void set_proc_grid(int outflag = 1); // setup 3d grid of procs
-  virtual void setup();                        // setup 3d comm pattern
-  virtual void forward_comm(int dummy = 0);    // forward comm of atom coords
-  virtual void reverse_comm();                 // reverse comm of forces
-  virtual void exchange();                     // move atoms to new procs
-  virtual void borders();                      // setup list of atoms to comm
 
-  virtual void forward_comm_pair(class Pair *);    // forward comm from a Pair
-  virtual void reverse_comm_pair(class Pair *);    // reverse comm from a Pair
-  virtual void forward_comm_fix(class Fix *);      // forward comm from a Fix
-  virtual void reverse_comm_fix(class Fix *);      // reverse comm from a Fix
-  virtual void forward_comm_variable_fix(class Fix *); // variable-size variant
-  virtual void reverse_comm_variable_fix(class Fix *); // variable-size variant
-  virtual void forward_comm_compute(class Compute *);  // forward from a Compute
-  virtual void reverse_comm_compute(class Compute *);  // reverse from a Compute
-  virtual void forward_comm_dump(class Dump *);    // forward comm from a Dump
-  virtual void reverse_comm_dump(class Dump *);    // reverse comm from a Dump
-  void forward_comm_array(int, double **);         // forward comm of array
+  virtual void init() = 0;
+  virtual void setup() = 0;                      // setup 3d comm pattern
+  virtual void forward_comm(int dummy = 0) = 0;  // forward comm of atom coords
+  virtual void reverse_comm() = 0;               // reverse comm of forces
+  virtual void exchange() = 0;                   // move atoms to new procs
+  virtual void borders() = 0;                    // setup list of atoms to comm
 
-  int exchange_variable(int, double *,   // exchange of info on neigh stencil
-                        double *&);
-  void ring(int, int, void *, int, void (*)(int, char *),   // ring comm
+  // forward/reverse comm from a Pair, Fix, Compute, Dump
+
+  virtual void forward_comm_pair(class Pair *) = 0;
+  virtual void reverse_comm_pair(class Pair *) = 0;
+  virtual void forward_comm_fix(class Fix *) = 0;
+  virtual void reverse_comm_fix(class Fix *) = 0;
+  virtual void forward_comm_variable_fix(class Fix *) = 0;
+  virtual void reverse_comm_variable_fix(class Fix *) = 0;
+  virtual void forward_comm_compute(class Compute *) = 0;
+  virtual void reverse_comm_compute(class Compute *) = 0;
+  virtual void forward_comm_dump(class Dump *) = 0;
+  virtual void reverse_comm_dump(class Dump *) = 0;
+
+  // forward comm of an array
+  // exchange of info on neigh stencil
+  // set processor mapping options
+
+  virtual void forward_comm_array(int, double **) = 0;  
+  virtual int exchange_variable(int, double *, double *&) = 0;
+  virtual bigint memory_usage() = 0;
+
+  // non-virtual functions common to all Comm styles
+
+  void ring(int, int, void *, int, void (*)(int, char *),
             void *, int self = 1);
-  int read_lines_from_file(FILE *, int, int, char *);  // read/bcast file lines
+  int read_lines_from_file(FILE *, int, int, char *);  
   int read_lines_from_file_universe(FILE *, int, int, char *);  
 
-  virtual void set(int, char **);         // set communication style
-  void set_processors(int, char **);      // set 3d processor grid attributes
-
-  virtual bigint memory_usage();
-
  protected:
-  int style;                        // single vs multi-type comm
-  int nswap;                        // # of swaps to perform = sum of maxneed
-  int recvneed[3][2];               // # of procs away I recv atoms from
-  int sendneed[3][2];               // # of procs away I send atoms to
-  int maxneed[3];                   // max procs away any proc needs, per dim
-  int triclinic;                    // 0 if domain is orthog, 1 if triclinic
-  int maxswap;                      // max # of swaps memory is allocated for
-  int size_forward;                 // # of per-atom datums in forward comm
-  int size_reverse;                 // # of datums in reverse comm
-  int size_border;                  // # of datums in forward border comm
-  int *sendnum,*recvnum;            // # of atoms to send/recv in each swap
-  int *sendproc,*recvproc;          // proc to send/recv to/from at each swap
-  int *size_forward_recv;           // # of values to recv in each forward comm
-  int *size_reverse_send;           // # to send in each reverse comm
-  int *size_reverse_recv;           // # to recv in each reverse comm
-  double *slablo,*slabhi;           // bounds of slab to send at each swap
-  double **multilo,**multihi;       // bounds of slabs for multi-type swap
-  double **cutghostmulti;           // cutghost on a per-type basis
-  int *pbc_flag;                    // general flag for sending atoms thru PBC
-  int **pbc;                        // dimension flags for PBC adjustments
-  int comm_x_only,comm_f_only;      // 1 if only exchange x,f in for/rev comm
-  int map_style;                    // non-0 if global->local mapping is done
-  int bordergroup;                  // only communicate this group in borders
+  int mode;                  // 0 = single cutoff, 1 = multi-type cutoff
+  int bordergroup;           // only communicate this group in borders
+
   int gridflag;                     // option for creating 3d grid
   int mapflag;                      // option for mapping procs to 3d grid
   char xyz[4];                      // xyz mapping of procs to 3d grid
@@ -112,29 +103,6 @@ class Comm : protected Pointers {
   int ncores;                       // # of cores per node
   int coregrid[3];                  // 3d grid of cores within a node
   int user_coregrid[3];             // user request for cores in each dim
-
-  int *firstrecv;                   // where to put 1st recv atom in each swap
-  int **sendlist;                   // list of atoms to send in each swap
-  int *maxsendlist;                 // max size of send list for each swap
-
-  double *buf_send;                 // send buffer for all comm
-  double *buf_recv;                 // recv buffer for all comm
-  int maxsend,maxrecv;              // current size of send/recv buffer
-  int maxforward,maxreverse;        // max # of datums in forward/reverse comm
-
-  int maxexchange;                  // max # of datums/atom in exchange comm 
-  int bufextra;                     // extra space beyond maxsend in send buffer
-
-  int updown(int, int, int, double, int, double *);
-                                            // compare cutoff to procs
-  virtual void grow_send(int, int);         // reallocate send buffer
-  virtual void grow_recv(int);              // free/allocate recv buffer
-  virtual void grow_list(int, int);         // reallocate one sendlist
-  virtual void grow_swap(int);              // grow swap and multi arrays
-  virtual void allocate_swap(int);          // allocate swap arrays
-  virtual void allocate_multi(int);         // allocate multi arrays
-  virtual void free_swap();                 // free swap arrays
-  virtual void free_multi();                // free multi arrays
 };
 
 }
@@ -143,67 +111,4 @@ class Comm : protected Pointers {
 
 /* ERROR/WARNING messages:
 
-W: OMP_NUM_THREADS environment is not set.
-
-This environment variable must be set appropriately to use the
-USER-OMP pacakge.
-
-E: Bad grid of processors
-
-The 3d grid of processors defined by the processors command does not
-match the number of processors LAMMPS is being run on.
-
-E: Processor count in z must be 1 for 2d simulation
-
-Self-explanatory.
-
-E: Illegal ... command
-
-Self-explanatory.  Check the input script syntax and compare to the
-documentation for the command.  You can use -echo screen as a
-command-line option when running LAMMPS to see the offending line.
-
-E: Invalid group in communicate command
-
-Self-explanatory.
-
-E: Communicate group != atom_modify first group
-
-Self-explanatory.
-
-E: Invalid cutoff in communicate command
-
-Specified cutoff must be >= 0.0.
-
-E: Specified processors != physical processors
-
-The 3d grid of processors defined by the processors command does not
-match the number of processors LAMMPS is being run on.
-
-E: Cannot use processors part command without using partitions
-
-See the command-line -partition switch.
-
-E: Invalid partitions in processors part command
-
-Valid partitions are numbered 1 to N and the sender and receiver
-cannot be the same partition.
-
-E: Sending partition in processors part command is already a sender
-
-Cannot specify a partition to be a sender twice.
-
-E: Receiving partition in processors part command is already a receiver
-
-Cannot specify a partition to be a receiver twice.
-
-E: Processors grid numa and map style are incompatible
-
-Using numa for gstyle in the processors command requires using
-cart for the map option.
-
-E: Processors part option and grid style are incompatible
-
-Cannot use gstyle numa or custom with the part option.
-
 */
diff --git a/src/fix_balance.cpp b/src/fix_balance.cpp
index cf24cb9014..4078c6dc46 100644
--- a/src/fix_balance.cpp
+++ b/src/fix_balance.cpp
@@ -27,12 +27,14 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
+enum{SHIFT,RCB};
+
 /* ---------------------------------------------------------------------- */
 
 FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
-  if (narg < 7) error->all(FLERR,"Illegal fix balance command");
+  if (narg < 6) error->all(FLERR,"Illegal fix balance command");
 
   box_change_domain = 1;
   scalar_flag = 1;
@@ -47,23 +49,24 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
   int dimension = domain->dimension;
 
   nevery = force->inumeric(FLERR,arg[3]);
-  if (strlen(arg[4]) > 3) error->all(FLERR,"Illegal fix balance command");
-  strcpy(bstr,arg[4]);
-  nitermax = force->inumeric(FLERR,arg[5]);
-  thresh = force->numeric(FLERR,arg[6]);
+  if (nevery < 0) error->all(FLERR,"Illegal fix balance command");
+  thresh = force->numeric(FLERR,arg[4]);
 
-  if (nevery < 0 || nitermax <= 0 || thresh < 1.0)
-    error->all(FLERR,"Illegal fix balance command");
+  if (strcmp(arg[5],"shift") == 0) lbstyle = SHIFT;
+  else if (strcmp(arg[5],"rcb") == 0) lbstyle = RCB;
+  else error->all(FLERR,"Illegal fix balance command");
 
-  int blen = strlen(bstr);
-  for (int i = 0; i < blen; i++) {
-    if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
-      error->all(FLERR,"Fix balance string is invalid");
-    if (bstr[i] == 'z' && dimension == 2)
-      error->all(FLERR,"Fix balance string is invalid for 2d simulation");
-    for (int j = i+1; j < blen; j++)
-      if (bstr[i] == bstr[j])
-        error->all(FLERR,"Fix balance string is invalid");
+  int iarg = 5;
+  if (lbstyle == SHIFT) {
+    if (iarg+4 > narg) error->all(FLERR,"Illegal fix balance command");
+    strcpy(bstr,arg[iarg+1]);
+    nitermax = force->inumeric(FLERR,arg[iarg+2]);
+    if (nitermax <= 0) error->all(FLERR,"Illegal fix balance command");
+    stopthresh = force->numeric(FLERR,arg[iarg+3]);
+    if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command");
+    iarg += 4;
+  } else if (lbstyle == RCB) {
+    iarg++;
   }
 
   // optional args
@@ -71,7 +74,6 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
   int outarg = 0;
   fp = NULL;
 
-  int iarg = 7;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"out") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix balance command");
@@ -80,11 +82,27 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
     } else error->all(FLERR,"Illegal fix balance command");
   }
 
+  // error check
+
+  if (lbstyle == SHIFT) {
+    int blen = strlen(bstr);
+    for (int i = 0; i < blen; i++) {
+      if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
+        error->all(FLERR,"Fix balance shift string is invalid");
+      if (bstr[i] == 'z' && dimension == 2)
+        error->all(FLERR,"Fix balance shift string is invalid");
+      for (int j = i+1; j < blen; j++)
+        if (bstr[i] == bstr[j])
+          error->all(FLERR,"Fix balance shift string is invalid");
+    }
+  }
+
   // create instance of Balance class and initialize it with params
+  // NOTE: do I need Balance instance if RCB?
   // create instance of Irregular class
 
   balance = new Balance(lmp);
-  balance->dynamic_setup(bstr,nitermax,thresh);
+  if (lbstyle == SHIFT) balance->shift_setup(bstr,nitermax,thresh);
   irregular = new Irregular(lmp);
 
   // output file
@@ -216,13 +234,18 @@ void FixBalance::pre_neighbor()
 void FixBalance::rebalance()
 {
   imbprev = imbnow;
-  itercount = balance->dynamic();
+
+  if (lbstyle == SHIFT) {
+    itercount = balance->shift();
+  } else if (lbstyle == RCB) {
+  }
 
   // output of final result
 
   if (fp) balance->dumpout(update->ntimestep,fp);
 
   // reset comm->uniform flag
+  // NOTE: this needs to change with RCB
 
   comm->uniform = 0;
 
diff --git a/src/fix_balance.h b/src/fix_balance.h
index 0f3202c9b3..33b5501232 100644
--- a/src/fix_balance.h
+++ b/src/fix_balance.h
@@ -40,9 +40,9 @@ class FixBalance : public Fix {
   double memory_usage();
 
  private:
-  int nevery,nitermax;
+  int nevery,lbstyle,nitermax;
+  double thresh,stopthresh;
   char bstr[3];
-  double thresh;
   FILE *fp;
 
   double imbnow;                // current imbalance factor
diff --git a/src/force.cpp b/src/force.cpp
index 44c369acca..c316c04a52 100644
--- a/src/force.cpp
+++ b/src/force.cpp
@@ -523,6 +523,10 @@ void Force::create_kspace(int narg, char **arg, const char *suffix)
     kspace_style = new char[n];
     strcpy(kspace_style,arg[0]);
   }
+
+  if (comm->style == 1 && !kspace_match("ewald",0))
+    error->all(FLERR,
+               "Cannot yet use KSpace solver with grid with comm style tiled");
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/input.cpp b/src/input.cpp
index 2c90363c30..ab7c08c2e4 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -24,6 +24,8 @@
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
+#include "comm_brick.h"
+#include "comm_tiled.h"
 #include "group.h"
 #include "domain.h"
 #include "output.h"
@@ -553,7 +555,8 @@ int Input::execute_command()
   else if (!strcmp(command,"bond_style")) bond_style();
   else if (!strcmp(command,"boundary")) boundary();
   else if (!strcmp(command,"box")) box();
-  else if (!strcmp(command,"communicate")) communicate();
+  else if (!strcmp(command,"comm_modify")) comm_modify();
+  else if (!strcmp(command,"comm_style")) comm_style();
   else if (!strcmp(command,"compute")) compute();
   else if (!strcmp(command,"compute_modify")) compute_modify();
   else if (!strcmp(command,"dielectric")) dielectric();
@@ -1145,9 +1148,25 @@ void Input::box()
 
 /* ---------------------------------------------------------------------- */
 
-void Input::communicate()
+void Input::comm_modify()
 {
-  comm->set(narg,arg);
+  comm->modify_params(narg,arg);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Input::comm_style()
+{
+  if (narg < 1) error->all(FLERR,"Illegal comm_style command");
+  if (strcmp(arg[0],"brick") == 0) {
+    if (comm->layout)
+      error->all(FLERR,
+                 "Cannot switch to comm style brick from "
+                 "irregular tiling of proc domains");
+    comm = new CommBrick(lmp);
+  } else if (strcmp(arg[0],"tiled") == 0) {
+    comm = new CommTiled(lmp);
+  } else error->all(FLERR,"Illegal comm_style command");
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/input.h b/src/input.h
index eaacbdf832..4fc809b847 100644
--- a/src/input.h
+++ b/src/input.h
@@ -83,7 +83,8 @@ class Input : protected Pointers {
   void bond_style();
   void boundary();
   void box();
-  void communicate();
+  void comm_modify();
+  void comm_style();
   void compute();
   void compute_modify();
   void dielectric();
diff --git a/src/lammps.cpp b/src/lammps.cpp
index f37b4106dd..c3151cf8f3 100644
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@@ -35,6 +35,7 @@
 #include "update.h"
 #include "neighbor.h"
 #include "comm.h"
+#include "comm_brick.h"
 #include "domain.h"
 #include "force.h"
 #include "modify.h"
@@ -558,7 +559,7 @@ void LAMMPS::create()
 
   if (cuda) comm = new CommCuda(this);
   else if (kokkos) comm = new CommKokkos(this);
-  else comm = new Comm(this);
+  else comm = new CommBrick(this);
 
   if (cuda) neighbor = new NeighborCuda(this);
   else if (kokkos) neighbor = new NeighborKokkos(this);