diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp
new file mode 100644
index 0000000000..be5f275df4
--- /dev/null
+++ b/src/comm_brick.cpp
@@ -0,0 +1,1537 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author (triclinic) : Pieter in 't Veld (SNL)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "string.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "comm_brick.h"
+#include "universe.h"
+#include "atom.h"
+#include "atom_vec.h"
+#include "force.h"
+#include "pair.h"
+#include "domain.h"
+#include "neighbor.h"
+#include "group.h"
+#include "modify.h"
+#include "fix.h"
+#include "compute.h"
+#include "output.h"
+#include "dump.h"
+#include "math_extra.h"
+#include "error.h"
+#include "memory.h"
+
+#ifdef _OPENMP
+#include "omp.h"
+#endif
+
+using namespace LAMMPS_NS;
+
+#define BUFFACTOR 1.5
+#define BUFMIN 1000
+#define BUFEXTRA 1000
+#define BIG 1.0e20
+
+enum{SINGLE,MULTI};               // same as in Comm
+
+/* ---------------------------------------------------------------------- */
+
+CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
+{
+  style = layout = 0;
+
+  recv_from_partition = send_to_partition = -1;
+  otherflag = 0;
+
+  grid2proc = NULL;
+
+  uniform = 1;
+  multilo = multihi = NULL;
+  cutghostmulti = NULL;
+
+  // use of OpenMP threads
+  // query OpenMP for number of threads/process set by user at run-time
+  // if the OMP_NUM_THREADS environment variable is not set, we default
+  // to using 1 thread. This follows the principle of the least surprise,
+  // while practically all OpenMP implementations violate it by using
+  // as many threads as there are (virtual) CPU cores by default.
+
+  nthreads = 1;
+#ifdef _OPENMP
+  if (getenv("OMP_NUM_THREADS") == NULL) {
+    nthreads = 1;
+    if (me == 0)
+      error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
+  } else {
+    nthreads = omp_get_max_threads();
+  }
+
+  // enforce consistent number of threads across all MPI tasks
+
+  MPI_Bcast(&nthreads,1,MPI_INT,0,world);
+  omp_set_num_threads(nthreads);
+
+  if (me == 0) {
+    if (screen)
+      fprintf(screen,"  using %d OpenMP thread(s) per MPI task\n",nthreads);
+    if (logfile)
+      fprintf(logfile,"  using %d OpenMP thread(s) per MPI task\n",nthreads);
+  }
+#endif
+
+  // initialize comm buffers & exchange memory
+  // NOTE: allow for AtomVec to set maxexchange_atom, e.g. for atom_style body
+
+  maxexchange_atom = maxexchange_fix = 0;
+  maxexchange = maxexchange_atom + maxexchange_fix;
+  bufextra = maxexchange + BUFEXTRA;
+
+  maxsend = BUFMIN;
+  memory->create(buf_send,maxsend+bufextra,"comm:buf_send");
+  maxrecv = BUFMIN;
+  memory->create(buf_recv,maxrecv,"comm:buf_recv");
+
+  maxswap = 6;
+  allocate_swap(maxswap);
+
+  sendlist = (int **) memory->smalloc(maxswap*sizeof(int *),"comm:sendlist");
+  memory->create(maxsendlist,maxswap,"comm:maxsendlist");
+  for (int i = 0; i < maxswap; i++) {
+    maxsendlist[i] = BUFMIN;
+    memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+CommBrick::~CommBrick()
+{
+  memory->destroy(grid2proc);
+
+  free_swap();
+  if (mode == MULTI) {
+    free_multi();
+    memory->destroy(cutghostmulti);
+  }
+
+  if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
+  memory->sfree(sendlist);
+  memory->destroy(maxsendlist);
+
+  memory->destroy(buf_send);
+  memory->destroy(buf_recv);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void CommBrick::init()
+{
+  triclinic = domain->triclinic;
+  map_style = atom->map_style;
+
+  // comm_only = 1 if only x,f are exchanged in forward/reverse comm
+  // comm_x_only = 0 if ghost_velocity since velocities are added
+
+  comm_x_only = atom->avec->comm_x_only;
+  comm_f_only = atom->avec->comm_f_only;
+  if (ghost_velocity) comm_x_only = 0;
+
+  // set per-atom sizes for forward/reverse/border comm
+  // augment by velocity and fix quantities if needed
+
+  size_forward = atom->avec->size_forward;
+  size_reverse = atom->avec->size_reverse;
+  size_border = atom->avec->size_border;
+
+  if (ghost_velocity) size_forward += atom->avec->size_velocity;
+  if (ghost_velocity) size_border += atom->avec->size_velocity;
+
+  for (int i = 0; i < modify->nfix; i++)
+    size_border += modify->fix[i]->comm_border;
+  
+  // maxexchange = max # of datums/atom in exchange communication
+  // maxforward = # of datums in largest forward communication
+  // maxreverse = # of datums in largest reverse communication
+  // query pair,fix,compute,dump for their requirements
+  // pair style can force reverse comm even if newton off
+
+  maxexchange = BUFMIN + maxexchange_fix;
+  maxforward = MAX(size_forward,size_border);
+  maxreverse = size_reverse;
+
+  if (force->pair) maxforward = MAX(maxforward,force->pair->comm_forward);
+  if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse);
+
+  for (int i = 0; i < modify->nfix; i++) {
+    maxforward = MAX(maxforward,modify->fix[i]->comm_forward);
+    maxreverse = MAX(maxreverse,modify->fix[i]->comm_reverse);
+  }
+
+  for (int i = 0; i < modify->ncompute; i++) {
+    maxforward = MAX(maxforward,modify->compute[i]->comm_forward);
+    maxreverse = MAX(maxreverse,modify->compute[i]->comm_reverse);
+  }
+
+  for (int i = 0; i < output->ndump; i++) {
+    maxforward = MAX(maxforward,output->dump[i]->comm_forward);
+    maxreverse = MAX(maxreverse,output->dump[i]->comm_reverse);
+  }
+
+  if (force->newton == 0) maxreverse = 0;
+  if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse_off);
+
+  // memory for multi-style communication
+
+  if (mode == MULTI && multilo == NULL) {
+    allocate_multi(maxswap);
+    memory->create(cutghostmulti,atom->ntypes+1,3,"comm:cutghostmulti");
+  }
+  if (mode == SINGLE && multilo) {
+    free_multi();
+    memory->destroy(cutghostmulti);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   setup spatial-decomposition communication patterns
+   function of neighbor cutoff(s) & cutghostuser & current box size
+   single mode sets slab boundaries (slablo,slabhi) based on max cutoff
+   multi mode sets type-dependent slab boundaries (multilo,multihi)
+------------------------------------------------------------------------- */
+
+void CommBrick::setup()
+{
+  // cutghost[] = max distance at which ghost atoms need to be acquired
+  // for orthogonal:
+  //   cutghost is in box coords = neigh->cutghost in all 3 dims
+  // for triclinic:
+  //   neigh->cutghost = distance between tilted planes in box coords
+  //   cutghost is in lamda coords = distance between those planes
+  // for multi:
+  //   cutghostmulti = same as cutghost, only for each atom type
+
+  int i;
+  int ntypes = atom->ntypes;
+  double *prd,*sublo,*subhi;
+
+  double cut = MAX(neighbor->cutneighmax,cutghostuser);
+
+  if (triclinic == 0) {
+    prd = domain->prd;
+    sublo = domain->sublo;
+    subhi = domain->subhi;
+    cutghost[0] = cutghost[1] = cutghost[2] = cut;
+
+    if (mode == MULTI) {
+      double *cuttype = neighbor->cuttype;
+      for (i = 1; i <= ntypes; i++)
+        cutghostmulti[i][0] = cutghostmulti[i][1] = cutghostmulti[i][2] =
+          cuttype[i];
+    }
+
+  } else {
+    prd = domain->prd_lamda;
+    sublo = domain->sublo_lamda;
+    subhi = domain->subhi_lamda;
+    double *h_inv = domain->h_inv;
+    double length0,length1,length2;
+    length0 = sqrt(h_inv[0]*h_inv[0] + h_inv[5]*h_inv[5] + h_inv[4]*h_inv[4]);
+    cutghost[0] = cut * length0;
+    length1 = sqrt(h_inv[1]*h_inv[1] + h_inv[3]*h_inv[3]);
+    cutghost[1] = cut * length1;
+    length2 = h_inv[2];
+    cutghost[2] = cut * length2;
+
+    if (mode == MULTI) {
+      double *cuttype = neighbor->cuttype;
+      for (i = 1; i <= ntypes; i++) {
+        cutghostmulti[i][0] = cuttype[i] * length0;
+        cutghostmulti[i][1] = cuttype[i] * length1;
+        cutghostmulti[i][2] = cuttype[i] * length2;
+      }
+    }
+  }
+
+  // recvneed[idim][0/1] = # of procs away I recv atoms from, within cutghost
+  //   0 = from left, 1 = from right
+  //   do not cross non-periodic boundaries, need[2] = 0 for 2d
+  // sendneed[idim][0/1] = # of procs away I send atoms to
+  //   0 = to left, 1 = to right
+  //   set equal to recvneed[idim][1/0] of neighbor proc
+  // maxneed[idim] = max procs away any proc recvs atoms in either direction
+  // uniform = 1 = uniform sized sub-domains:
+  //   maxneed is directly computable from sub-domain size
+  //     limit to procgrid-1 for non-PBC
+  //   recvneed = maxneed except for procs near non-PBC
+  //   sendneed = recvneed of neighbor on each side
+  // uniform = 0 = non-uniform sized sub-domains:
+  //   compute recvneed via updown() which accounts for non-PBC
+  //   sendneed = recvneed of neighbor on each side
+  //   maxneed via Allreduce() of recvneed
+
+  int *periodicity = domain->periodicity;
+  int left,right;
+
+  if (uniform) {
+    maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
+    maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
+    maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
+    if (domain->dimension == 2) maxneed[2] = 0;
+    if (!periodicity[0]) maxneed[0] = MIN(maxneed[0],procgrid[0]-1);
+    if (!periodicity[1]) maxneed[1] = MIN(maxneed[1],procgrid[1]-1);
+    if (!periodicity[2]) maxneed[2] = MIN(maxneed[2],procgrid[2]-1);
+
+    if (!periodicity[0]) {
+      recvneed[0][0] = MIN(maxneed[0],myloc[0]);
+      recvneed[0][1] = MIN(maxneed[0],procgrid[0]-myloc[0]-1);
+      left = myloc[0] - 1;
+      if (left < 0) left = procgrid[0] - 1;
+      sendneed[0][0] = MIN(maxneed[0],procgrid[0]-left-1);
+      right = myloc[0] + 1;
+      if (right == procgrid[0]) right = 0;
+      sendneed[0][1] = MIN(maxneed[0],right);
+    } else recvneed[0][0] = recvneed[0][1] =
+             sendneed[0][0] = sendneed[0][1] = maxneed[0];
+
+    if (!periodicity[1]) {
+      recvneed[1][0] = MIN(maxneed[1],myloc[1]);
+      recvneed[1][1] = MIN(maxneed[1],procgrid[1]-myloc[1]-1);
+      left = myloc[1] - 1;
+      if (left < 0) left = procgrid[1] - 1;
+      sendneed[1][0] = MIN(maxneed[1],procgrid[1]-left-1);
+      right = myloc[1] + 1;
+      if (right == procgrid[1]) right = 0;
+      sendneed[1][1] = MIN(maxneed[1],right);
+    } else recvneed[1][0] = recvneed[1][1] =
+             sendneed[1][0] = sendneed[1][1] = maxneed[1];
+
+    if (!periodicity[2]) {
+      recvneed[2][0] = MIN(maxneed[2],myloc[2]);
+      recvneed[2][1] = MIN(maxneed[2],procgrid[2]-myloc[2]-1);
+      left = myloc[2] - 1;
+      if (left < 0) left = procgrid[2] - 1;
+      sendneed[2][0] = MIN(maxneed[2],procgrid[2]-left-1);
+      right = myloc[2] + 1;
+      if (right == procgrid[2]) right = 0;
+      sendneed[2][1] = MIN(maxneed[2],right);
+    } else recvneed[2][0] = recvneed[2][1] =
+             sendneed[2][0] = sendneed[2][1] = maxneed[2];
+
+  } else {
+    recvneed[0][0] = updown(0,0,myloc[0],prd[0],periodicity[0],xsplit);
+    recvneed[0][1] = updown(0,1,myloc[0],prd[0],periodicity[0],xsplit);
+    left = myloc[0] - 1;
+    if (left < 0) left = procgrid[0] - 1;
+    sendneed[0][0] = updown(0,1,left,prd[0],periodicity[0],xsplit);
+    right = myloc[0] + 1;
+    if (right == procgrid[0]) right = 0;
+    sendneed[0][1] = updown(0,0,right,prd[0],periodicity[0],xsplit);
+
+    recvneed[1][0] = updown(1,0,myloc[1],prd[1],periodicity[1],ysplit);
+    recvneed[1][1] = updown(1,1,myloc[1],prd[1],periodicity[1],ysplit);
+    left = myloc[1] - 1;
+    if (left < 0) left = procgrid[1] - 1;
+    sendneed[1][0] = updown(1,1,left,prd[1],periodicity[1],ysplit);
+    right = myloc[1] + 1;
+    if (right == procgrid[1]) right = 0;
+    sendneed[1][1] = updown(1,0,right,prd[1],periodicity[1],ysplit);
+
+    if (domain->dimension == 3) {
+      recvneed[2][0] = updown(2,0,myloc[2],prd[2],periodicity[2],zsplit);
+      recvneed[2][1] = updown(2,1,myloc[2],prd[2],periodicity[2],zsplit);
+      left = myloc[2] - 1;
+      if (left < 0) left = procgrid[2] - 1;
+      sendneed[2][0] = updown(2,1,left,prd[2],periodicity[2],zsplit);
+      right = myloc[2] + 1;
+      if (right == procgrid[2]) right = 0;
+      sendneed[2][1] = updown(2,0,right,prd[2],periodicity[2],zsplit);
+    } else recvneed[2][0] = recvneed[2][1] =
+             sendneed[2][0] = sendneed[2][1] = 0;
+
+    int all[6];
+    MPI_Allreduce(&recvneed[0][0],all,6,MPI_INT,MPI_MAX,world);
+    maxneed[0] = MAX(all[0],all[1]);
+    maxneed[1] = MAX(all[2],all[3]);
+    maxneed[2] = MAX(all[4],all[5]);
+  }
+
+  // allocate comm memory
+
+  nswap = 2 * (maxneed[0]+maxneed[1]+maxneed[2]);
+  if (nswap > maxswap) grow_swap(nswap);
+
+  // setup parameters for each exchange:
+  // sendproc = proc to send to at each swap
+  // recvproc = proc to recv from at each swap
+  // for mode SINGLE:
+  //   slablo/slabhi = boundaries for slab of atoms to send at each swap
+  //   use -BIG/midpt/BIG to insure all atoms included even if round-off occurs
+  //   if round-off, atoms recvd across PBC can be < or > than subbox boundary
+  //   note that borders() only loops over subset of atoms during each swap
+  //   treat all as PBC here, non-PBC is handled in borders() via r/s need[][]
+  // for mode MULTI:
+  //   multilo/multihi is same, with slablo/slabhi for each atom type
+  // pbc_flag: 0 = nothing across a boundary, 1 = something across a boundary
+  // pbc = -1/0/1 for PBC factor in each of 3/6 orthogonal/triclinic dirs
+  // for triclinic, slablo/hi and pbc_border will be used in lamda (0-1) coords
+  // 1st part of if statement is sending to the west/south/down
+  // 2nd part of if statement is sending to the east/north/up
+
+  int dim,ineed;
+
+  int iswap = 0;
+  for (dim = 0; dim < 3; dim++) {
+    for (ineed = 0; ineed < 2*maxneed[dim]; ineed++) {
+      pbc_flag[iswap] = 0;
+      pbc[iswap][0] = pbc[iswap][1] = pbc[iswap][2] =
+        pbc[iswap][3] = pbc[iswap][4] = pbc[iswap][5] = 0;
+
+      if (ineed % 2 == 0) {
+        sendproc[iswap] = procneigh[dim][0];
+        recvproc[iswap] = procneigh[dim][1];
+        if (mode == SINGLE) {
+          if (ineed < 2) slablo[iswap] = -BIG;
+          else slablo[iswap] = 0.5 * (sublo[dim] + subhi[dim]);
+          slabhi[iswap] = sublo[dim] + cutghost[dim];
+        } else {
+          for (i = 1; i <= ntypes; i++) {
+            if (ineed < 2) multilo[iswap][i] = -BIG;
+            else multilo[iswap][i] = 0.5 * (sublo[dim] + subhi[dim]);
+            multihi[iswap][i] = sublo[dim] + cutghostmulti[i][dim];
+          }
+        }
+        if (myloc[dim] == 0) {
+          pbc_flag[iswap] = 1;
+          pbc[iswap][dim] = 1;
+          if (triclinic) {
+            if (dim == 1) pbc[iswap][5] = 1;
+            else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = 1;
+          }
+        }
+
+      } else {
+        sendproc[iswap] = procneigh[dim][1];
+        recvproc[iswap] = procneigh[dim][0];
+        if (mode == SINGLE) {
+          slablo[iswap] = subhi[dim] - cutghost[dim];
+          if (ineed < 2) slabhi[iswap] = BIG;
+          else slabhi[iswap] = 0.5 * (sublo[dim] + subhi[dim]);
+        } else {
+          for (i = 1; i <= ntypes; i++) {
+            multilo[iswap][i] = subhi[dim] - cutghostmulti[i][dim];
+            if (ineed < 2) multihi[iswap][i] = BIG;
+            else multihi[iswap][i] = 0.5 * (sublo[dim] + subhi[dim]);
+          }
+        }
+        if (myloc[dim] == procgrid[dim]-1) {
+          pbc_flag[iswap] = 1;
+          pbc[iswap][dim] = -1;
+          if (triclinic) {
+            if (dim == 1) pbc[iswap][5] = -1;
+            else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = -1;
+          }
+        }
+      }
+
+      iswap++;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   walk up/down the extent of nearby processors in dim and dir
+   loc = myloc of proc to start at
+   dir = 0/1 = walk to left/right
+   do not cross non-periodic boundaries
+   is not called for z dim in 2d
+   return how many procs away are needed to encompass cutghost away from loc
+------------------------------------------------------------------------- */
+
+int CommBrick::updown(int dim, int dir, int loc,
+                 double prd, int periodicity, double *split)
+{
+  int index,count;
+  double frac,delta;
+
+  if (dir == 0) {
+    frac = cutghost[dim]/prd;
+    index = loc - 1;
+    delta = 0.0;
+    count = 0;
+    while (delta < frac) {
+      if (index < 0) {
+        if (!periodicity) break;
+        index = procgrid[dim] - 1;
+      }
+      count++;
+      delta += split[index+1] - split[index];
+      index--;
+    }
+
+  } else {
+    frac = cutghost[dim]/prd;
+    index = loc + 1;
+    delta = 0.0;
+    count = 0;
+    while (delta < frac) {
+      if (index >= procgrid[dim]) {
+        if (!periodicity) break;
+        index = 0;
+      }
+      count++;
+      delta += split[index+1] - split[index];
+      index++;
+    }
+  }
+
+  return count;
+}
+
+/* ----------------------------------------------------------------------
+   forward communication of atom coords every timestep
+   other per-atom attributes may also be sent via pack/unpack routines
+------------------------------------------------------------------------- */
+
+void CommBrick::forward_comm(int dummy)
+{
+  int n;
+  MPI_Request request;
+  MPI_Status status;
+  AtomVec *avec = atom->avec;
+  double **x = atom->x;
+  double *buf;
+
+  // exchange data with another proc
+  // if other proc is self, just copy
+  // if comm_x_only set, exchange or copy directly to x, don't unpack
+
+  for (int iswap = 0; iswap < nswap; iswap++) {
+    if (sendproc[iswap] != me) {
+      if (comm_x_only) {
+        if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
+        else buf = NULL;
+        if (size_forward_recv[iswap])
+          MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
+                    recvproc[iswap],0,world,&request);
+        n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
+                            buf_send,pbc_flag[iswap],pbc[iswap]);
+        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
+        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
+      } else if (ghost_velocity) {
+        if (size_forward_recv[iswap])
+          MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
+                    recvproc[iswap],0,world,&request);
+        n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
+                                buf_send,pbc_flag[iswap],pbc[iswap]);
+        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
+        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
+        avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
+      } else {
+        if (size_forward_recv[iswap])
+          MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
+                    recvproc[iswap],0,world,&request);
+        n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
+                            buf_send,pbc_flag[iswap],pbc[iswap]);
+        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
+        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
+        avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv);
+      }
+
+    } else {
+      if (comm_x_only) {
+        if (sendnum[iswap])
+          n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
+                              x[firstrecv[iswap]],pbc_flag[iswap],
+                              pbc[iswap]);
+      } else if (ghost_velocity) {
+        n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
+                                buf_send,pbc_flag[iswap],pbc[iswap]);
+        avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
+      } else {
+        n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
+                            buf_send,pbc_flag[iswap],pbc[iswap]);
+        avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication of forces on atoms every timestep
+   other per-atom attributes may also be sent via pack/unpack routines
+------------------------------------------------------------------------- */
+
+void CommBrick::reverse_comm()
+{
+  int n;
+  MPI_Request request;
+  MPI_Status status;
+  AtomVec *avec = atom->avec;
+  double **f = atom->f;
+  double *buf;
+
+  // exchange data with another proc
+  // if other proc is self, just copy
+  // if comm_f_only set, exchange or copy directly from f, don't pack
+
+  for (int iswap = nswap-1; iswap >= 0; iswap--) {
+    if (sendproc[iswap] != me) {
+      if (comm_f_only) {
+        if (size_reverse_recv[iswap])
+          MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
+                    sendproc[iswap],0,world,&request);
+        if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]];
+        else buf = NULL;
+        if (size_reverse_send[iswap])
+          MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
+                   recvproc[iswap],0,world);
+        if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
+      } else {
+        if (size_reverse_recv[iswap])
+          MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
+                    sendproc[iswap],0,world,&request);
+        n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
+        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
+        if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
+      }
+      avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv);
+
+    } else {
+      if (comm_f_only) {
+        if (sendnum[iswap])
+            avec->unpack_reverse(sendnum[iswap],sendlist[iswap],
+                                f[firstrecv[iswap]]);
+      } else {
+        n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
+        avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   exchange: move atoms to correct processors
+   atoms exchanged with all 6 stencil neighbors
+   send out atoms that have left my box, receive ones entering my box
+   atoms will be lost if not inside some proc's box
+     can happen if atom moves outside of non-periodic bounary
+     or if atom moves more than one proc away
+   this routine called before every reneighboring
+   for triclinic, atoms must be in lamda coords (0-1) before exchange is called
+------------------------------------------------------------------------- */
+
+void CommBrick::exchange()
+{
+  int i,m,nsend,nrecv,nrecv1,nrecv2,nlocal;
+  double lo,hi,value;
+  double **x;
+  double *sublo,*subhi,*buf;
+  MPI_Request request;
+  MPI_Status status;
+  AtomVec *avec = atom->avec;
+
+  // clear global->local map for owned and ghost atoms
+  // b/c atoms migrate to new procs in exchange() and
+  //   new ghosts are created in borders()
+  // map_set() is done at end of borders()
+  // clear ghost count and any ghost bonus data internal to AtomVec
+
+  if (map_style) atom->map_clear();
+  atom->nghost = 0;
+  atom->avec->clear_bonus();
+
+  // insure send buf is large enough for single atom
+  // fixes can change per-atom size requirement on-the-fly
+
+  int bufextra_old = bufextra;
+  maxexchange = maxexchange_atom + maxexchange_fix;
+  bufextra = maxexchange + BUFEXTRA;
+  if (bufextra > bufextra_old)
+    memory->grow(buf_send,maxsend+bufextra,"comm:buf_send");
+
+  // subbox bounds for orthogonal or triclinic
+
+  if (triclinic == 0) {
+    sublo = domain->sublo;
+    subhi = domain->subhi;
+  } else {
+    sublo = domain->sublo_lamda;
+    subhi = domain->subhi_lamda;
+  }
+
+  // loop over dimensions
+
+  for (int dim = 0; dim < 3; dim++) {
+
+    // fill buffer with atoms leaving my box, using < and >=
+    // when atom is deleted, fill it in with last atom
+
+    x = atom->x;
+    lo = sublo[dim];
+    hi = subhi[dim];
+    nlocal = atom->nlocal;
+    i = nsend = 0;
+
+    while (i < nlocal) {
+      if (x[i][dim] < lo || x[i][dim] >= hi) {
+        if (nsend > maxsend) grow_send(nsend,1);
+        nsend += avec->pack_exchange(i,&buf_send[nsend]);
+        avec->copy(nlocal-1,i,1);
+        nlocal--;
+      } else i++;
+    }
+    atom->nlocal = nlocal;
+
+    // send/recv atoms in both directions
+    // if 1 proc in dimension, no send/recv, set recv buf to send buf
+    // if 2 procs in dimension, single send/recv
+    // if more than 2 procs in dimension, send/recv to both neighbors
+
+    if (procgrid[dim] == 1) {
+      nrecv = nsend;
+      buf = buf_send;
+
+    } else {
+      MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
+                   &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status);
+      nrecv = nrecv1;
+      if (procgrid[dim] > 2) {
+        MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
+                     &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status);
+        nrecv += nrecv2;
+      }
+      if (nrecv > maxrecv) grow_recv(nrecv);
+
+      MPI_Irecv(buf_recv,nrecv1,MPI_DOUBLE,procneigh[dim][1],0,
+                world,&request);
+      MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][0],0,world);
+      MPI_Wait(&request,&status);
+
+      if (procgrid[dim] > 2) {
+        MPI_Irecv(&buf_recv[nrecv1],nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
+                  world,&request);
+        MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][1],0,world);
+        MPI_Wait(&request,&status);
+      }
+
+      buf = buf_recv;
+    }
+
+    // check incoming atoms to see if they are in my box
+    // if so, add to my list
+
+    m = 0;
+    while (m < nrecv) {
+      value = buf[m+dim+1];
+      if (value >= lo && value < hi) m += avec->unpack_exchange(&buf[m]);
+      else m += static_cast<int> (buf[m]);
+    }
+  }
+
+  if (atom->firstgroupname) atom->first_reorder();
+}
+
+/* ----------------------------------------------------------------------
+   borders: list nearby atoms to send to neighboring procs at every timestep
+   one list is created for every swap that will be made
+   as list is made, actually do swaps
+   this does equivalent of a communicate, so don't need to explicitly
+     call communicate routine on reneighboring timestep
+   this routine is called before every reneighboring
+   for triclinic, atoms must be in lamda coords (0-1) before borders is called
+------------------------------------------------------------------------- */
+
+void CommBrick::borders()
+{
+  int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
+  int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
+  double lo,hi;
+  int *type;
+  double **x;
+  double *buf,*mlo,*mhi;
+  MPI_Request request;
+  MPI_Status status;
+  AtomVec *avec = atom->avec;
+
+  // do swaps over all 3 dimensions
+
+  iswap = 0;
+  smax = rmax = 0;
+
+  for (dim = 0; dim < 3; dim++) {
+    nlast = 0;
+    twoneed = 2*maxneed[dim];
+    for (ineed = 0; ineed < twoneed; ineed++) {
+
+      // find atoms within slab boundaries lo/hi using <= and >=
+      // check atoms between nfirst and nlast
+      //   for first swaps in a dim, check owned and ghost
+      //   for later swaps in a dim, only check newly arrived ghosts
+      // store sent atom indices in list for use in future timesteps
+
+      x = atom->x;
+      if (mode == SINGLE) {
+        lo = slablo[iswap];
+        hi = slabhi[iswap];
+      } else {
+        type = atom->type;
+        mlo = multilo[iswap];
+        mhi = multihi[iswap];
+      }
+      if (ineed % 2 == 0) {
+        nfirst = nlast;
+        nlast = atom->nlocal + atom->nghost;
+      }
+
+      nsend = 0;
+
+      // sendflag = 0 if I do not send on this swap
+      // sendneed test indicates receiver no longer requires data
+      // e.g. due to non-PBC or non-uniform sub-domains
+
+      if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
+      else sendflag = 1;
+
+      // find send atoms according to SINGLE vs MULTI
+      // all atoms eligible versus atoms in bordergroup
+      // only need to limit loop to bordergroup for first sends (ineed < 2)
+      // on these sends, break loop in two: owned (in group) and ghost
+
+      if (sendflag) {
+        if (!bordergroup || ineed >= 2) {
+          if (mode == SINGLE) {
+            for (i = nfirst; i < nlast; i++)
+              if (x[i][dim] >= lo && x[i][dim] <= hi) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+          } else {
+            for (i = nfirst; i < nlast; i++) {
+              itype = type[i];
+              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+            }
+          }
+
+        } else {
+          if (mode == SINGLE) {
+            ngroup = atom->nfirst;
+            for (i = 0; i < ngroup; i++)
+              if (x[i][dim] >= lo && x[i][dim] <= hi) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+            for (i = atom->nlocal; i < nlast; i++)
+              if (x[i][dim] >= lo && x[i][dim] <= hi) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+          } else {
+            ngroup = atom->nfirst;
+            for (i = 0; i < ngroup; i++) {
+              itype = type[i];
+              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+            }
+            for (i = atom->nlocal; i < nlast; i++) {
+              itype = type[i];
+              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+            }
+          }
+        }
+      }
+
+      // pack up list of border atoms
+
+      if (nsend*size_border > maxsend) grow_send(nsend*size_border,0);
+      if (ghost_velocity)
+        n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
+                                  pbc_flag[iswap],pbc[iswap]);
+      else
+        n = avec->pack_border(nsend,sendlist[iswap],buf_send,
+                              pbc_flag[iswap],pbc[iswap]);
+
+      // swap atoms with other proc
+      // no MPI calls except SendRecv if nsend/nrecv = 0
+      // put incoming ghosts at end of my atom arrays
+      // if swapping with self, simply copy, no messages
+
+      if (sendproc[iswap] != me) {
+        MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
+                     &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
+        if (nrecv*size_border > maxrecv) grow_recv(nrecv*size_border);
+        if (nrecv) MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
+                             recvproc[iswap],0,world,&request);
+        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
+        if (nrecv) MPI_Wait(&request,&status);
+        buf = buf_recv;
+      } else {
+        nrecv = nsend;
+        buf = buf_send;
+      }
+
+      // unpack buffer
+
+      if (ghost_velocity)
+        avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
+      else
+        avec->unpack_border(nrecv,atom->nlocal+atom->nghost,buf);
+
+      // set all pointers & counters
+
+      smax = MAX(smax,nsend);
+      rmax = MAX(rmax,nrecv);
+      sendnum[iswap] = nsend;
+      recvnum[iswap] = nrecv;
+      size_forward_recv[iswap] = nrecv*size_forward;
+      size_reverse_send[iswap] = nrecv*size_reverse;
+      size_reverse_recv[iswap] = nsend*size_reverse;
+      firstrecv[iswap] = atom->nlocal + atom->nghost;
+      atom->nghost += nrecv;
+      iswap++;
+    }
+  }
+
+  // insure send/recv buffers are long enough for all forward & reverse comm
+
+  int max = MAX(maxforward*smax,maxreverse*rmax);
+  if (max > maxsend) grow_send(max,0);
+  max = MAX(maxforward*rmax,maxreverse*smax);
+  if (max > maxrecv) grow_recv(max);
+
+  // reset global->local map
+
+  if (map_style) atom->map_set();
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Pair
+------------------------------------------------------------------------- */
+
+void CommBrick::forward_comm_pair(Pair *pair)
+{
+  int iswap,n;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  for (iswap = 0; iswap < nswap; iswap++) {
+
+    // pack buffer
+
+    n = pair->pack_comm(sendnum[iswap],sendlist[iswap],
+                        buf_send,pbc_flag[iswap],pbc[iswap]);
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (recvnum[iswap])
+        MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
+                  world,&request);
+      if (sendnum[iswap])
+        MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
+      if (recvnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    pair->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Pair
+------------------------------------------------------------------------- */
+
+void CommBrick::reverse_comm_pair(Pair *pair)
+{
+  int iswap,n;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  for (iswap = nswap-1; iswap >= 0; iswap--) {
+
+    // pack buffer
+
+    n = pair->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (sendnum[iswap])
+        MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
+                  world,&request);
+      if (recvnum[iswap])
+        MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
+      if (sendnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    pair->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Fix
+   n = constant number of datums per atom
+------------------------------------------------------------------------- */
+
+void CommBrick::forward_comm_fix(Fix *fix)
+{
+  int iswap,n;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  for (iswap = 0; iswap < nswap; iswap++) {
+
+    // pack buffer
+
+    n = fix->pack_comm(sendnum[iswap],sendlist[iswap],
+                       buf_send,pbc_flag[iswap],pbc[iswap]);
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (recvnum[iswap])
+        MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
+                  world,&request);
+      if (sendnum[iswap])
+        MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
+      if (recvnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    fix->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Fix
+   n = constant number of datums per atom
+------------------------------------------------------------------------- */
+
+void CommBrick::reverse_comm_fix(Fix *fix)
+{
+  int iswap,n;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  for (iswap = nswap-1; iswap >= 0; iswap--) {
+
+    // pack buffer
+
+    n = fix->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (sendnum[iswap])
+        MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
+                  world,&request);
+      if (recvnum[iswap])
+        MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
+      if (sendnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    fix->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Fix
+   n = total datums for all atoms, allows for variable number/atom
+------------------------------------------------------------------------- */
+
+void CommBrick::forward_comm_variable_fix(Fix *fix)
+{
+  int iswap,n;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  for (iswap = 0; iswap < nswap; iswap++) {
+
+    // pack buffer
+
+    n = fix->pack_comm(sendnum[iswap],sendlist[iswap],
+                       buf_send,pbc_flag[iswap],pbc[iswap]);
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (recvnum[iswap])
+        MPI_Irecv(buf_recv,maxrecv,MPI_DOUBLE,recvproc[iswap],0,
+                  world,&request);
+      if (sendnum[iswap])
+        MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
+      if (recvnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    fix->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Fix
+   n = total datums for all atoms, allows for variable number/atom
+------------------------------------------------------------------------- */
+
+void CommBrick::reverse_comm_variable_fix(Fix *fix)
+{
+  int iswap,n;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  for (iswap = nswap-1; iswap >= 0; iswap--) {
+
+    // pack buffer
+
+    n = fix->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (sendnum[iswap])
+        MPI_Irecv(buf_recv,maxrecv,MPI_DOUBLE,sendproc[iswap],0,
+                  world,&request);
+      if (recvnum[iswap])
+        MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
+      if (sendnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    fix->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Compute
+------------------------------------------------------------------------- */
+
+void CommBrick::forward_comm_compute(Compute *compute)
+{
+  int iswap,n;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  for (iswap = 0; iswap < nswap; iswap++) {
+
+    // pack buffer
+
+    n = compute->pack_comm(sendnum[iswap],sendlist[iswap],
+                           buf_send,pbc_flag[iswap],pbc[iswap]);
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (recvnum[iswap])
+        MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
+                  world,&request);
+      if (sendnum[iswap])
+        MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
+      if (recvnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    compute->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Compute
+------------------------------------------------------------------------- */
+
+void CommBrick::reverse_comm_compute(Compute *compute)
+{
+  int iswap,n;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  for (iswap = nswap-1; iswap >= 0; iswap--) {
+
+    // pack buffer
+
+    n = compute->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (sendnum[iswap])
+        MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
+                  world,&request);
+      if (recvnum[iswap])
+        MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
+      if (sendnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    compute->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Dump
+------------------------------------------------------------------------- */
+
+void CommBrick::forward_comm_dump(Dump *dump)
+{
+  int iswap,n;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  for (iswap = 0; iswap < nswap; iswap++) {
+
+    // pack buffer
+
+    n = dump->pack_comm(sendnum[iswap],sendlist[iswap],
+                        buf_send,pbc_flag[iswap],pbc[iswap]);
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (recvnum[iswap])
+        MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
+                  world,&request);
+      if (sendnum[iswap])
+        MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
+      if (recvnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    dump->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Dump
+------------------------------------------------------------------------- */
+
+void CommBrick::reverse_comm_dump(Dump *dump)
+{
+  int iswap,n;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  for (iswap = nswap-1; iswap >= 0; iswap--) {
+
+    // pack buffer
+
+    n = dump->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (sendnum[iswap])
+        MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
+                  world,&request);
+      if (recvnum[iswap])
+        MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
+      if (sendnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    dump->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   forward communication of N values in array
+------------------------------------------------------------------------- */
+
+void CommBrick::forward_comm_array(int n, double **array)
+{
+  int i,j,k,m,iswap,last;
+  double *buf;
+  MPI_Request request;
+  MPI_Status status;
+
+  // NOTE: check that buf_send and buf_recv are big enough
+
+  for (iswap = 0; iswap < nswap; iswap++) {
+
+    // pack buffer
+
+    m = 0;
+    for (i = 0; i < sendnum[iswap]; i++) {
+      j = sendlist[iswap][i];
+      for (k = 0; k < n; k++)
+        buf_send[m++] = array[j][k];
+    }
+
+    // exchange with another proc
+    // if self, set recv buffer to send buffer
+
+    if (sendproc[iswap] != me) {
+      if (recvnum[iswap])
+        MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
+                  world,&request);
+      if (sendnum[iswap])
+        MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
+      if (recvnum[iswap]) MPI_Wait(&request,&status);
+      buf = buf_recv;
+    } else buf = buf_send;
+
+    // unpack buffer
+
+    m = 0;
+    last = firstrecv[iswap] + recvnum[iswap];
+    for (i = firstrecv[iswap]; i < last; i++)
+      for (k = 0; k < n; k++)
+        array[i][k] = buf[m++];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   exchange info provided with all 6 stencil neighbors
+------------------------------------------------------------------------- */
+
+int CommBrick::exchange_variable(int n, double *inbuf, double *&outbuf)
+{
+  int nsend,nrecv,nrecv1,nrecv2;
+  MPI_Request request;
+  MPI_Status status;
+
+  nrecv = n;
+  if (nrecv > maxrecv) grow_recv(nrecv);
+  memcpy(buf_recv,inbuf,nrecv*sizeof(double));
+
+  // loop over dimensions
+
+  for (int dim = 0; dim < 3; dim++) {
+    
+    // no exchange if only one proc in a dimension
+
+    if (procgrid[dim] == 1) continue;
+
+    // send/recv info in both directions using same buf_recv
+    // if 2 procs in dimension, single send/recv
+    // if more than 2 procs in dimension, send/recv to both neighbors
+
+    nsend = nrecv;
+    MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
+                 &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status);
+    nrecv += nrecv1;
+    if (procgrid[dim] > 2) {
+      MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
+                   &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status);
+      nrecv += nrecv2;
+    } else nrecv2 = 0;
+
+    if (nrecv > maxrecv) grow_recv(nrecv);
+    
+    MPI_Irecv(&buf_recv[nsend],nrecv1,MPI_DOUBLE,procneigh[dim][1],0,
+              world,&request);
+    MPI_Send(buf_recv,nsend,MPI_DOUBLE,procneigh[dim][0],0,world);
+    MPI_Wait(&request,&status);
+    
+    if (procgrid[dim] > 2) {
+      MPI_Irecv(&buf_recv[nsend+nrecv1],nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
+                world,&request);
+      MPI_Send(buf_recv,nsend,MPI_DOUBLE,procneigh[dim][1],0,world);
+      MPI_Wait(&request,&status);
+    }
+  }
+
+  outbuf = buf_recv;
+  return nrecv;
+}
+
+/* ----------------------------------------------------------------------
+   realloc the size of the send buffer as needed with BUFFACTOR and bufextra
+   if flag = 1, realloc
+   if flag = 0, don't need to realloc with copy, just free/malloc
+------------------------------------------------------------------------- */
+
+void CommBrick::grow_send(int n, int flag)
+{
+  maxsend = static_cast<int> (BUFFACTOR * n);
+  if (flag)
+    memory->grow(buf_send,maxsend+bufextra,"comm:buf_send");
+  else {
+    memory->destroy(buf_send);
+    memory->create(buf_send,maxsend+bufextra,"comm:buf_send");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   free/malloc the size of the recv buffer as needed with BUFFACTOR
+------------------------------------------------------------------------- */
+
+void CommBrick::grow_recv(int n)
+{
+  maxrecv = static_cast<int> (BUFFACTOR * n);
+  memory->destroy(buf_recv);
+  memory->create(buf_recv,maxrecv,"comm:buf_recv");
+}
+
+/* ----------------------------------------------------------------------
+   realloc the size of the iswap sendlist as needed with BUFFACTOR
+------------------------------------------------------------------------- */
+
+void CommBrick::grow_list(int iswap, int n)
+{
+  maxsendlist[iswap] = static_cast<int> (BUFFACTOR * n);
+  memory->grow(sendlist[iswap],maxsendlist[iswap],"comm:sendlist[iswap]");
+}
+
+/* ----------------------------------------------------------------------
+   realloc the buffers needed for swaps
+------------------------------------------------------------------------- */
+
+void CommBrick::grow_swap(int n)
+{
+  free_swap();
+  allocate_swap(n);
+  if (mode == MULTI) {
+    free_multi();
+    allocate_multi(n);
+  }
+
+  sendlist = (int **)
+    memory->srealloc(sendlist,n*sizeof(int *),"comm:sendlist");
+  memory->grow(maxsendlist,n,"comm:maxsendlist");
+  for (int i = maxswap; i < n; i++) {
+    maxsendlist[i] = BUFMIN;
+    memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]");
+  }
+  maxswap = n;
+}
+
+/* ----------------------------------------------------------------------
+   allocation of swap info
+------------------------------------------------------------------------- */
+
+void CommBrick::allocate_swap(int n)
+{
+  memory->create(sendnum,n,"comm:sendnum");
+  memory->create(recvnum,n,"comm:recvnum");
+  memory->create(sendproc,n,"comm:sendproc");
+  memory->create(recvproc,n,"comm:recvproc");
+  memory->create(size_forward_recv,n,"comm:size");
+  memory->create(size_reverse_send,n,"comm:size");
+  memory->create(size_reverse_recv,n,"comm:size");
+  memory->create(slablo,n,"comm:slablo");
+  memory->create(slabhi,n,"comm:slabhi");
+  memory->create(firstrecv,n,"comm:firstrecv");
+  memory->create(pbc_flag,n,"comm:pbc_flag");
+  memory->create(pbc,n,6,"comm:pbc");
+}
+
+/* ----------------------------------------------------------------------
+   allocation of multi-type swap info
+------------------------------------------------------------------------- */
+
+void CommBrick::allocate_multi(int n)
+{
+  multilo = memory->create(multilo,n,atom->ntypes+1,"comm:multilo");
+  multihi = memory->create(multihi,n,atom->ntypes+1,"comm:multihi");
+}
+
+/* ----------------------------------------------------------------------
+   free memory for swaps
+------------------------------------------------------------------------- */
+
+void CommBrick::free_swap()
+{
+  memory->destroy(sendnum);
+  memory->destroy(recvnum);
+  memory->destroy(sendproc);
+  memory->destroy(recvproc);
+  memory->destroy(size_forward_recv);
+  memory->destroy(size_reverse_send);
+  memory->destroy(size_reverse_recv);
+  memory->destroy(slablo);
+  memory->destroy(slabhi);
+  memory->destroy(firstrecv);
+  memory->destroy(pbc_flag);
+  memory->destroy(pbc);
+}
+
+/* ----------------------------------------------------------------------
+   free memory for multi-type swaps
+------------------------------------------------------------------------- */
+
+void CommBrick::free_multi()
+{
+  memory->destroy(multilo);
+  memory->destroy(multihi);
+}
+
+/* ----------------------------------------------------------------------
+   return # of bytes of allocated memory
+------------------------------------------------------------------------- */
+
+bigint CommBrick::memory_usage()
+{
+  bigint bytes = 0;
+  bytes += nprocs * sizeof(int);    // grid2proc
+  for (int i = 0; i < nswap; i++)
+    bytes += memory->usage(sendlist[i],maxsendlist[i]);
+  bytes += memory->usage(buf_send,maxsend+bufextra);
+  bytes += memory->usage(buf_recv,maxrecv);
+  return bytes;
+}
diff --git a/src/comm_brick.h b/src/comm_brick.h
new file mode 100644
index 0000000000..20592accab
--- /dev/null
+++ b/src/comm_brick.h
@@ -0,0 +1,164 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_COMM_BRICK_H
+#define LMP_COMM_BRICK_H
+
+#include "comm.h"
+
+namespace LAMMPS_NS {
+
+class CommBrick : public Comm {
+ public:
+  CommBrick(class LAMMPS *);
+  virtual ~CommBrick();
+
+  virtual void init();
+  virtual void setup();                        // setup 3d comm pattern
+  virtual void forward_comm(int dummy = 0);    // forward comm of atom coords
+  virtual void reverse_comm();                 // reverse comm of forces
+  virtual void exchange();                     // move atoms to new procs
+  virtual void borders();                      // setup list of atoms to comm
+
+  virtual void forward_comm_pair(class Pair *);    // forward comm from a Pair
+  virtual void reverse_comm_pair(class Pair *);    // reverse comm from a Pair
+  virtual void forward_comm_fix(class Fix *);      // forward comm from a Fix
+  virtual void reverse_comm_fix(class Fix *);      // reverse comm from a Fix
+  virtual void forward_comm_variable_fix(class Fix *); // variable-size variant
+  virtual void reverse_comm_variable_fix(class Fix *); // variable-size variant
+  virtual void forward_comm_compute(class Compute *);  // forward from a Compute
+  virtual void reverse_comm_compute(class Compute *);  // reverse from a Compute
+  virtual void forward_comm_dump(class Dump *);    // forward comm from a Dump
+  virtual void reverse_comm_dump(class Dump *);    // reverse comm from a Dump
+
+  void forward_comm_array(int, double **);         // forward comm of array
+  int exchange_variable(int, double *, double *&);  // exchange on neigh stencil
+  virtual bigint memory_usage();
+
+ protected:
+  int nswap;                        // # of swaps to perform = sum of maxneed
+  int recvneed[3][2];               // # of procs away I recv atoms from
+  int sendneed[3][2];               // # of procs away I send atoms to
+  int maxneed[3];                   // max procs away any proc needs, per dim
+  int triclinic;                    // 0 if domain is orthog, 1 if triclinic
+  int maxswap;                      // max # of swaps memory is allocated for
+  int size_forward;                 // # of per-atom datums in forward comm
+  int size_reverse;                 // # of datums in reverse comm
+  int size_border;                  // # of datums in forward border comm
+  int *sendnum,*recvnum;            // # of atoms to send/recv in each swap
+  int *sendproc,*recvproc;          // proc to send/recv to/from at each swap
+  int *size_forward_recv;           // # of values to recv in each forward comm
+  int *size_reverse_send;           // # to send in each reverse comm
+  int *size_reverse_recv;           // # to recv in each reverse comm
+  double *slablo,*slabhi;           // bounds of slab to send at each swap
+  double **multilo,**multihi;       // bounds of slabs for multi-type swap
+  double **cutghostmulti;           // cutghost on a per-type basis
+  int *pbc_flag;                    // general flag for sending atoms thru PBC
+  int **pbc;                        // dimension flags for PBC adjustments
+  int comm_x_only,comm_f_only;      // 1 if only exchange x,f in for/rev comm
+  int map_style;                    // non-0 if global->local mapping is done
+
+  int *firstrecv;                   // where to put 1st recv atom in each swap
+  int **sendlist;                   // list of atoms to send in each swap
+  int *maxsendlist;                 // max size of send list for each swap
+
+  double *buf_send;                 // send buffer for all comm
+  double *buf_recv;                 // recv buffer for all comm
+  int maxsend,maxrecv;              // current size of send/recv buffer
+  int maxforward,maxreverse;        // max # of datums in forward/reverse comm
+
+  int maxexchange;                  // max # of datums/atom in exchange comm 
+  int bufextra;                     // extra space beyond maxsend in send buffer
+
+  int updown(int, int, int, double, int, double *);
+                                            // compare cutoff to procs
+  virtual void grow_send(int, int);         // reallocate send buffer
+  virtual void grow_recv(int);              // free/allocate recv buffer
+  virtual void grow_list(int, int);         // reallocate one sendlist
+  virtual void grow_swap(int);              // grow swap and multi arrays
+  virtual void allocate_swap(int);          // allocate swap arrays
+  virtual void allocate_multi(int);         // allocate multi arrays
+  virtual void free_swap();                 // free swap arrays
+  virtual void free_multi();                // free multi arrays
+};
+
+}
+
+#endif
+
+/* ERROR/WARNING messages:
+
+W: OMP_NUM_THREADS environment is not set.
+
+This environment variable must be set appropriately to use the
+USER-OMP pacakge.
+
+E: Bad grid of processors
+
+The 3d grid of processors defined by the processors command does not
+match the number of processors LAMMPS is being run on.
+
+E: Processor count in z must be 1 for 2d simulation
+
+Self-explanatory.
+
+E: Illegal ... command
+
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
+
+E: Invalid group in communicate command
+
+Self-explanatory.
+
+E: Communicate group != atom_modify first group
+
+Self-explanatory.
+
+E: Invalid cutoff in communicate command
+
+Specified cutoff must be >= 0.0.
+
+E: Specified processors != physical processors
+
+The 3d grid of processors defined by the processors command does not
+match the number of processors LAMMPS is being run on.
+
+E: Cannot use processors part command without using partitions
+
+See the command-line -partition switch.
+
+E: Invalid partitions in processors part command
+
+Valid partitions are numbered 1 to N and the sender and receiver
+cannot be the same partition.
+
+E: Sending partition in processors part command is already a sender
+
+Cannot specify a partition to be a sender twice.
+
+E: Receiving partition in processors part command is already a receiver
+
+Cannot specify a partition to be a receiver twice.
+
+E: Processors grid numa and map style are incompatible
+
+Using numa for gstyle in the processors command requires using
+cart for the map option.
+
+E: Processors part option and grid style are incompatible
+
+Cannot use gstyle numa or custom with the part option.
+
+*/
diff --git a/src/comm_tiled.cpp b/src/comm_tiled.cpp
new file mode 100644
index 0000000000..655e5a5d60
--- /dev/null
+++ b/src/comm_tiled.cpp
@@ -0,0 +1,210 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "comm_tiled.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+enum{SINGLE,MULTI};               // same as in Comm
+
+/* ---------------------------------------------------------------------- */
+
+CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp)
+{
+  style = 1;
+  layout = 0;
+
+  error->all(FLERR,"Comm_style tiled is not yet supported");
+}
+
+/* ---------------------------------------------------------------------- */
+
+CommTiled::~CommTiled()
+{
+}
+
+/* ---------------------------------------------------------------------- */
+
+void CommTiled::init()
+{
+}
+
+/* ----------------------------------------------------------------------
+   setup spatial-decomposition communication patterns
+   function of neighbor cutoff(s) & cutghostuser & current box size
+   single mode sets slab boundaries (slablo,slabhi) based on max cutoff
+   multi mode sets type-dependent slab boundaries (multilo,multihi)
+------------------------------------------------------------------------- */
+
+void CommTiled::setup()
+{
+}
+
+/* ----------------------------------------------------------------------
+   forward communication of atom coords every timestep
+   other per-atom attributes may also be sent via pack/unpack routines
+------------------------------------------------------------------------- */
+
+void CommTiled::forward_comm(int dummy)
+{
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication of forces on atoms every timestep
+   other per-atom attributes may also be sent via pack/unpack routines
+------------------------------------------------------------------------- */
+
+void CommTiled::reverse_comm()
+{
+}
+
+/* ----------------------------------------------------------------------
+   exchange: move atoms to correct processors
+   atoms exchanged with all 6 stencil neighbors
+   send out atoms that have left my box, receive ones entering my box
+   atoms will be lost if not inside some proc's box
+     can happen if atom moves outside of non-periodic bounary
+     or if atom moves more than one proc away
+   this routine called before every reneighboring
+   for triclinic, atoms must be in lamda coords (0-1) before exchange is called
+------------------------------------------------------------------------- */
+
+void CommTiled::exchange()
+{
+}
+
+/* ----------------------------------------------------------------------
+   borders: list nearby atoms to send to neighboring procs at every timestep
+   one list is created for every swap that will be made
+   as list is made, actually do swaps
+   this does equivalent of a communicate, so don't need to explicitly
+     call communicate routine on reneighboring timestep
+   this routine is called before every reneighboring
+   for triclinic, atoms must be in lamda coords (0-1) before borders is called
+------------------------------------------------------------------------- */
+
+void CommTiled::borders()
+{
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Pair
+------------------------------------------------------------------------- */
+
+void CommTiled::forward_comm_pair(Pair *pair)
+{
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Pair
+------------------------------------------------------------------------- */
+
+void CommTiled::reverse_comm_pair(Pair *pair)
+{
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Fix
+   n = constant number of datums per atom
+------------------------------------------------------------------------- */
+
+void CommTiled::forward_comm_fix(Fix *fix)
+{
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Fix
+   n = constant number of datums per atom
+------------------------------------------------------------------------- */
+
+void CommTiled::reverse_comm_fix(Fix *fix)
+{
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Fix
+   n = total datums for all atoms, allows for variable number/atom
+------------------------------------------------------------------------- */
+
+void CommTiled::forward_comm_variable_fix(Fix *fix)
+{
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Fix
+   n = total datums for all atoms, allows for variable number/atom
+------------------------------------------------------------------------- */
+
+void CommTiled::reverse_comm_variable_fix(Fix *fix)
+{
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Compute
+------------------------------------------------------------------------- */
+
+void CommTiled::forward_comm_compute(Compute *compute)
+{
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Compute
+------------------------------------------------------------------------- */
+
+void CommTiled::reverse_comm_compute(Compute *compute)
+{
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Dump
+------------------------------------------------------------------------- */
+
+void CommTiled::forward_comm_dump(Dump *dump)
+{
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Dump
+------------------------------------------------------------------------- */
+
+void CommTiled::reverse_comm_dump(Dump *dump)
+{
+}
+
+/* ----------------------------------------------------------------------
+   forward communication of N values in array
+------------------------------------------------------------------------- */
+
+void CommTiled::forward_comm_array(int n, double **array)
+{
+}
+
+/* ----------------------------------------------------------------------
+   exchange info provided with all 6 stencil neighbors
+------------------------------------------------------------------------- */
+
+int CommTiled::exchange_variable(int n, double *inbuf, double *&outbuf)
+{
+}
+
+/* ----------------------------------------------------------------------
+   return # of bytes of allocated memory
+------------------------------------------------------------------------- */
+
+bigint CommTiled::memory_usage()
+{
+  bigint bytes = 0;
+  return bytes;
+}
diff --git a/src/comm_tiled.h b/src/comm_tiled.h
new file mode 100644
index 0000000000..139e829313
--- /dev/null
+++ b/src/comm_tiled.h
@@ -0,0 +1,55 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_COMM_TILED_H
+#define LMP_COMM_TILED_H
+
+#include "comm.h"
+
+namespace LAMMPS_NS {
+
+class CommTiled : public Comm {
+ public:
+  CommTiled(class LAMMPS *);
+  virtual ~CommTiled();
+
+  void init();
+  void setup();                        // setup comm pattern
+  void forward_comm(int dummy = 0);    // forward comm of atom coords
+  void reverse_comm();                 // reverse comm of forces
+  void exchange();                     // move atoms to new procs
+  void borders();                      // setup list of atoms to comm
+
+  void forward_comm_pair(class Pair *);    // forward comm from a Pair
+  void reverse_comm_pair(class Pair *);    // reverse comm from a Pair
+  void forward_comm_fix(class Fix *);      // forward comm from a Fix
+  void reverse_comm_fix(class Fix *);      // reverse comm from a Fix
+  void forward_comm_variable_fix(class Fix *); // variable-size variant
+  void reverse_comm_variable_fix(class Fix *); // variable-size variant
+  void forward_comm_compute(class Compute *);  // forward from a Compute
+  void reverse_comm_compute(class Compute *);  // reverse from a Compute
+  void forward_comm_dump(class Dump *);    // forward comm from a Dump
+  void reverse_comm_dump(class Dump *);    // reverse comm from a Dump
+
+  void forward_comm_array(int, double **);         // forward comm of array
+  int exchange_variable(int, double *, double *&);  // exchange on neigh stencil
+  bigint memory_usage();
+};
+
+}
+
+#endif
+
+/* ERROR/WARNING messages:
+
+*/