diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.ewald.16 b/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.ewald.16
deleted file mode 100644
index 20d3faaf7f..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.ewald.16
+++ /dev/null
@@ -1,92 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-
-units		lj
-atom_style  charge
-
-read_data data.cloud_wall
-  orthogonal box = (0 0 0) to (10 10 10)
-  2 by 2 by 4 MPI processor grid
-  reading atoms ...
-  300 atoms
-
-velocity	all set 0.0 0.0 0.0 mom no
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix     1 all nve
-kspace_style scafacos ewald  0.001
-kspace_modify scafacos tolerance field
-
-timestep	0.005
-
-thermo_style custom step atoms cpu temp pe ke etotal ecoul press
-
-run_style verlet
-
-#dump simple all custom 1000 id x y z vx vy vz
-#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
-#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
-#dump_modify dmpvtk pad 7
-
-thermo 10
-run	100
-Setting up ScaFaCoS with solver ewald ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.313 | 3.501 | 3.689 Mbytes
-Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
-       0      300            0            0   0.49647271            0   0.49647271            0            0 
-      10      300  0.051135063  0.014582562   0.44286522   0.02180093   0.46466616            0 0.0043601861 
-      20      300   0.10210872  0.058693359   0.37869251  0.087746571   0.46643909            0  0.017549314 
-      30      300   0.15278506   0.13468789   0.26730177    0.2013584   0.46866017            0  0.040271679 
-      40      300   0.19430375   0.50949535  0.083356437   0.76169555   0.84505198            0   0.15233911 
-      50      300   0.23220921    1.1731116 -0.055261984    1.7538018    1.6985399            0   0.35076037 
-      60      300   0.27002859    1.3589639  -0.33351524     2.031651    1.6981358            0    0.4063302 
-      70      300   0.30781388    1.6482648  -0.76570045    2.4641559    1.6984554            0   0.49283118 
-      80      300   0.34566283    2.8640899   -2.4038488    4.2818144    1.8779656            0   0.85636288 
-      90      300   0.38424087    93.168442   -2.5911448    139.28682    136.69568            0    27.857364 
-     100      300   0.42331123    94.146897   -1.3480439    140.74961    139.40157            0    28.149922 
-Loop time of 0.423331 on 16 procs for 100 steps with 300 atoms
-
-Performance: 102047.913 tau/day, 236.222 timesteps/s
-99.2% CPU use with 16 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 2.5988e-05 | 3.6508e-05 | 5.5075e-05 |   0.0 |  0.01
-Kspace  | 0.41852    | 0.41925    | 0.41976    |   0.1 | 99.04
-Neigh   | 0.00023413 | 0.00056887 | 0.0012875  |   0.0 |  0.13
-Comm    | 0.0019519  | 0.0022772  | 0.0027158  |   0.5 |  0.54
-Output  | 0.00028276 | 0.00030752 | 0.0003624  |   0.0 |  0.07
-Modify  | 8.3685e-05 | 0.0001286  | 0.00018764 |   0.0 |  0.03
-Other   |            | 0.000758   |            |       |  0.18
-
-Nlocal:    18.75 ave 39 max 6 min
-Histogram: 6 1 1 0 1 2 2 1 1 1
-Nghost:    122.812 ave 195 max 63 min
-Histogram: 8 0 0 0 0 0 0 1 3 4
-Neighs:    160.625 ave 598 max 13 min
-Histogram: 8 2 1 1 1 0 0 2 0 1
-
-Total # of neighbors = 2570
-Ave neighs/atom = 8.56667
-Neighbor list builds = 23
-Dangerous builds = 16
-Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.ewald.8 b/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.ewald.8
deleted file mode 100644
index d26138522a..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.ewald.8
+++ /dev/null
@@ -1,92 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-
-units		lj
-atom_style  charge
-
-read_data data.cloud_wall
-  orthogonal box = (0 0 0) to (10 10 10)
-  2 by 2 by 2 MPI processor grid
-  reading atoms ...
-  300 atoms
-
-velocity	all set 0.0 0.0 0.0 mom no
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix     1 all nve
-kspace_style scafacos ewald  0.001
-kspace_modify scafacos tolerance field
-
-timestep	0.005
-
-thermo_style custom step atoms cpu temp pe ke etotal ecoul press
-
-run_style verlet
-
-#dump simple all custom 1000 id x y z vx vy vz
-#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
-#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
-#dump_modify dmpvtk pad 7
-
-thermo 10
-run	100
-Setting up ScaFaCoS with solver ewald ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.317 | 3.317 | 3.317 Mbytes
-Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
-       0      300            0            0   0.49647271            0   0.49647271            0            0 
-      10      300  0.057411432  0.014718629   0.45088339   0.02200435   0.47288774            0   0.00440087 
-      20      300   0.11482716   0.05922597   0.38470912  0.088542825   0.47325194            0  0.017708565 
-      30      300   0.17278481   0.13587829   0.27058048   0.20313804   0.47371852            0  0.040627608 
-      40      300   0.23021507   0.51353118  0.088432648   0.76772911   0.85616176            0   0.15354582 
-      50      300   0.28812647    1.1760001 -0.058088247    1.7581201    1.7000319            0   0.35162403 
-      60      300   0.34651113    1.3627885  -0.33736672    2.0373688    1.7000021            0   0.40747376 
-      70      300   0.40509939    1.6529365  -0.77082139    2.4711401    1.7003187            0   0.49422802 
-      80      300   0.46342874    2.9569837   -2.4624654    4.4206907    1.9582253            0   0.88413814 
-      90      300   0.52329254    81.642726   -2.5370215    122.05588    119.51885            0    24.411175 
-     100      300   0.58335209    85.047974    -1.128107    127.14672    126.01861            0    25.429344 
-Loop time of 0.583369 on 8 procs for 100 steps with 300 atoms
-
-Performance: 74052.598 tau/day, 171.418 timesteps/s
-99.7% CPU use with 8 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 4.0531e-05 | 4.6492e-05 | 4.8876e-05 |   0.0 |  0.01
-Kspace  | 0.57805    | 0.5785     | 0.57893    |   0.0 | 99.17
-Neigh   | 0.00062275 | 0.00091892 | 0.0013313  |   0.0 |  0.16
-Comm    | 0.002604   | 0.0028289  | 0.0031538  |   0.3 |  0.48
-Output  | 0.0002265  | 0.0002434  | 0.00029039 |   0.0 |  0.04
-Modify  | 0.00016117 | 0.00017747 | 0.00019884 |   0.0 |  0.03
-Other   |            | 0.00065    |            |       |  0.11
-
-Nlocal:    37.5 ave 46 max 31 min
-Histogram: 2 0 0 2 1 0 2 0 0 1
-Nghost:    203.875 ave 212 max 192 min
-Histogram: 1 0 1 0 0 2 1 0 0 3
-Neighs:    321.625 ave 599 max 112 min
-Histogram: 1 2 0 1 1 0 1 1 0 1
-
-Total # of neighbors = 2573
-Ave neighs/atom = 8.57667
-Neighbor list builds = 23
-Dangerous builds = 16
-Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.16 b/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.16
deleted file mode 100644
index c9c1e0d155..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.16
+++ /dev/null
@@ -1,99 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-units	    lj
-atom_style  charge
-
-read_data data.cloud_wall
-  orthogonal box = (0 0 0) to (10 10 10)
-  2 by 2 by 4 MPI processor grid
-  reading atoms ...
-  300 atoms
-
-velocity	all set 0.0 0.0 0.0 mom no
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix     1 all nve
-
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-kspace_style scafacos fmm 1.0e-3
-kspace_modify scafacos tolerance energy_rel
-kspace_modify scafacos fmm_tuning 1
-ScaFaCoS setting fmm inhomogen tuning ...
-
-timestep	0.005
-
-thermo_style custom step atoms cpu temp pe ke etotal ecoul press
-
-run_style verlet
-
-#dump simple all custom 1000 id x y z vx vy vz
-#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
-#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
-#dump_modify dmpvtk pad 7
-
-thermo 10
-run	100
-Setting up ScaFaCoS with solver fmm ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.313 | 3.501 | 3.689 Mbytes
-Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
-       0      300            0            0   0.49646402            0   0.49646402            0  0.016548801 
-      10      300  0.023007393  0.015455559   0.47335833   0.02310606   0.49646439            0  0.020399823 
-      20      300  0.045746088   0.06229069   0.40334177  0.093124582   0.49646635            0  0.032069642 
-      30      300  0.068123341   0.14310163   0.28254277   0.21393694   0.49647971            0   0.05220548 
-      40      300  0.090359211   0.52929788  0.089669015   0.79130033   0.88096934            0   0.16124903 
-      50      300   0.11304998    1.1963022 -0.082792461    1.7884718    1.7056794            0   0.35493462 
-      60      300   0.13585806    1.3928167  -0.37659239     2.082261    1.7056686            0   0.40389911 
-      70      300   0.15867376    1.7069009  -0.84571914    2.5518169    1.7060978            0   0.48217274 
-      80      300   0.18324137    15.358343    -3.368063    22.960722    19.592659            0    4.4798757 
-      90      300   0.20960689    42.280432   -2.1623864    63.209247     61.04686            0     12.56977 
-     100      300   0.23539281     41.48079  -0.89904529    62.013782    61.114736            0    12.372788 
-Loop time of 0.235411 on 16 procs for 100 steps with 300 atoms
-
-Performance: 183509.107 tau/day, 424.790 timesteps/s
-97.9% CPU use with 16 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 3.2425e-05 | 4.4718e-05 | 6.1274e-05 |   0.0 |  0.02
-Kspace  | 0.23097    | 0.23143    | 0.2318     |   0.1 | 98.31
-Neigh   | 0.00015116 | 0.00035347 | 0.00075746 |   0.0 |  0.15
-Comm    | 0.0020316  | 0.002282   | 0.0025339  |   0.3 |  0.97
-Output  | 0.00034404 | 0.00037053 | 0.00042701 |   0.0 |  0.16
-Modify  | 9.3937e-05 | 0.00014532 | 0.00018811 |   0.0 |  0.06
-Other   |            | 0.0007878  |            |       |  0.33
-
-Nlocal:    18.75 ave 36 max 6 min
-Histogram: 4 3 1 0 0 1 2 1 2 2
-Nghost:    127 ave 196 max 71 min
-Histogram: 8 0 0 0 0 0 0 1 6 1
-Neighs:    153.688 ave 491 max 10 min
-Histogram: 8 1 1 1 1 1 0 0 0 3
-
-Total # of neighbors = 2459
-Ave neighs/atom = 8.19667
-Neighbor list builds = 15
-Dangerous builds = 0
-Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.2 b/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.2
deleted file mode 100644
index 1a9bc77146..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.2
+++ /dev/null
@@ -1,99 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-units	    lj
-atom_style  charge
-
-read_data data.cloud_wall
-  orthogonal box = (0 0 0) to (10 10 10)
-  1 by 1 by 2 MPI processor grid
-  reading atoms ...
-  300 atoms
-
-velocity	all set 0.0 0.0 0.0 mom no
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix     1 all nve
-
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-kspace_style scafacos fmm 1.0e-3
-kspace_modify scafacos tolerance energy_rel
-kspace_modify scafacos fmm_tuning 1
-ScaFaCoS setting fmm inhomogen tuning ...
-
-timestep	0.005
-
-thermo_style custom step atoms cpu temp pe ke etotal ecoul press
-
-run_style verlet
-
-#dump simple all custom 1000 id x y z vx vy vz
-#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
-#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
-#dump_modify dmpvtk pad 7
-
-thermo 10
-run	100
-Setting up ScaFaCoS with solver fmm ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.354 | 3.354 | 3.355 Mbytes
-Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
-       0      300            0            0   0.49646402            0   0.49646402            0  0.016548801 
-      10      300  0.038181543  0.015455559   0.47335833   0.02310606   0.49646439            0  0.020399823 
-      20      300  0.076276302   0.06229069   0.40334177  0.093124582   0.49646635            0  0.032069642 
-      30      300   0.11437607   0.14310163   0.28254277   0.21393694   0.49647971            0   0.05220548 
-      40      300   0.15244293   0.52929788  0.089669015   0.79130033   0.88096934            0   0.16124903 
-      50      300   0.19081283    1.1963022 -0.082792461    1.7884718    1.7056794            0   0.35493462 
-      60      300   0.22923493    1.3928167  -0.37659239     2.082261    1.7056686            0   0.40389911 
-      70      300   0.26754427    1.7069009  -0.84571914    2.5518169    1.7060978            0   0.48217274 
-      80      300   0.30721259    15.358343    -3.368063    22.960722    19.592659            0    4.4798757 
-      90      300   0.34865618    42.280432   -2.1623864    63.209247     61.04686            0     12.56977 
-     100      300   0.39100981     41.48079  -0.89904529    62.013782    61.114736            0    12.372788 
-Loop time of 0.391022 on 2 procs for 100 steps with 300 atoms
-
-Performance: 110479.760 tau/day, 255.740 timesteps/s
-99.6% CPU use with 2 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 8.0109e-05 | 8.1539e-05 | 8.297e-05  |   0.0 |  0.02
-Kspace  | 0.38534    | 0.38582    | 0.3863     |   0.1 | 98.67
-Neigh   | 0.0014851  | 0.0019699  | 0.0024548  |   1.1 |  0.50
-Comm    | 0.0019314  | 0.0020101  | 0.0020888  |   0.2 |  0.51
-Output  | 0.00014496 | 0.00017297 | 0.00020099 |   0.0 |  0.04
-Modify  | 0.0005033  | 0.00052273 | 0.00054216 |   0.0 |  0.13
-Other   |            | 0.0004461  |            |       |  0.11
-
-Nlocal:    150 ave 159 max 141 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-Nghost:    392 ave 395 max 389 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-Neighs:    1229.5 ave 1773 max 686 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-
-Total # of neighbors = 2459
-Ave neighs/atom = 8.19667
-Neighbor list builds = 15
-Dangerous builds = 0
-Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.4 b/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.4
deleted file mode 100644
index eae3f2b840..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.4
+++ /dev/null
@@ -1,99 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-units	    lj
-atom_style  charge
-
-read_data data.cloud_wall
-  orthogonal box = (0 0 0) to (10 10 10)
-  1 by 2 by 2 MPI processor grid
-  reading atoms ...
-  300 atoms
-
-velocity	all set 0.0 0.0 0.0 mom no
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix     1 all nve
-
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-kspace_style scafacos fmm 1.0e-3
-kspace_modify scafacos tolerance energy_rel
-kspace_modify scafacos fmm_tuning 1
-ScaFaCoS setting fmm inhomogen tuning ...
-
-timestep	0.005
-
-thermo_style custom step atoms cpu temp pe ke etotal ecoul press
-
-run_style verlet
-
-#dump simple all custom 1000 id x y z vx vy vz
-#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
-#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
-#dump_modify dmpvtk pad 7
-
-thermo 10
-run	100
-Setting up ScaFaCoS with solver fmm ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.333 | 3.333 | 3.333 Mbytes
-Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
-       0      300            0            0   0.49646402            0   0.49646402            0  0.016548801 
-      10      300  0.029414415  0.015455559   0.47335833   0.02310606   0.49646439            0  0.020399823 
-      20      300  0.058616877   0.06229069   0.40334177  0.093124582   0.49646635            0  0.032069642 
-      30      300  0.087769508   0.14310163   0.28254277   0.21393694   0.49647971            0   0.05220548 
-      40      300    0.1168611   0.52929788  0.089669015   0.79130033   0.88096934            0   0.16124903 
-      50      300   0.14482284    1.1963022 -0.082792461    1.7884718    1.7056794            0   0.35493462 
-      60      300   0.17198443    1.3928167  -0.37659239     2.082261    1.7056686            0   0.40389911 
-      70      300   0.19868851    1.7069009  -0.84571914    2.5518169    1.7060978            0   0.48217274 
-      80      300   0.22835517    15.358343    -3.368063    22.960722    19.592659            0    4.4798757 
-      90      300   0.26023602    42.280432   -2.1623864    63.209247     61.04686            0     12.56977 
-     100      300   0.29043221     41.48079  -0.89904529    62.013782    61.114736            0    12.372788 
-Loop time of 0.290448 on 4 procs for 100 steps with 300 atoms
-
-Performance: 148735.741 tau/day, 344.296 timesteps/s
-99.0% CPU use with 4 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 5.9605e-05 | 6.187e-05  | 6.4135e-05 |   0.0 |  0.02
-Kspace  | 0.28551    | 0.28584    | 0.28604    |   0.0 | 98.41
-Neigh   | 0.00077796 | 0.0010615  | 0.0013225  |   0.7 |  0.37
-Comm    | 0.002372   | 0.0024325  | 0.002497   |   0.1 |  0.84
-Output  | 0.00025368 | 0.0002659  | 0.00029516 |   0.0 |  0.09
-Modify  | 0.00030279 | 0.00031865 | 0.00033021 |   0.0 |  0.11
-Other   |            | 0.0004706  |            |       |  0.16
-
-Nlocal:    75 ave 81 max 70 min
-Histogram: 2 0 0 0 0 0 0 1 0 1
-Nghost:    282.5 ave 290 max 274 min
-Histogram: 1 0 0 1 0 0 0 0 1 1
-Neighs:    614.75 ave 981 max 285 min
-Histogram: 1 1 0 0 0 0 0 1 0 1
-
-Total # of neighbors = 2459
-Ave neighs/atom = 8.19667
-Neighbor list builds = 15
-Dangerous builds = 0
-Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.8 b/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.8
deleted file mode 100644
index feb7e2a5a6..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.8
+++ /dev/null
@@ -1,99 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-units	    lj
-atom_style  charge
-
-read_data data.cloud_wall
-  orthogonal box = (0 0 0) to (10 10 10)
-  2 by 2 by 2 MPI processor grid
-  reading atoms ...
-  300 atoms
-
-velocity	all set 0.0 0.0 0.0 mom no
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix     1 all nve
-
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-kspace_style scafacos fmm 1.0e-3
-kspace_modify scafacos tolerance energy_rel
-kspace_modify scafacos fmm_tuning 1
-ScaFaCoS setting fmm inhomogen tuning ...
-
-timestep	0.005
-
-thermo_style custom step atoms cpu temp pe ke etotal ecoul press
-
-run_style verlet
-
-#dump simple all custom 1000 id x y z vx vy vz
-#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
-#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
-#dump_modify dmpvtk pad 7
-
-thermo 10
-run	100
-Setting up ScaFaCoS with solver fmm ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.317 | 3.317 | 3.317 Mbytes
-Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
-       0      300            0            0   0.49646402            0   0.49646402            0  0.016548801 
-      10      300  0.026465416  0.015455559   0.47335833   0.02310606   0.49646439            0  0.020399823 
-      20      300  0.057377338   0.06229069   0.40334177  0.093124582   0.49646635            0  0.032069642 
-      30      300  0.088356495   0.14310163   0.28254277   0.21393694   0.49647971            0   0.05220548 
-      40      300   0.11900806   0.52929788  0.089669015   0.79130033   0.88096934            0   0.16124903 
-      50      300   0.15157914    1.1963022 -0.082792461    1.7884718    1.7056794            0   0.35493462 
-      60      300   0.18608141    1.3928167  -0.37659239     2.082261    1.7056686            0   0.40389911 
-      70      300   0.21956491    1.7069009  -0.84571914    2.5518169    1.7060978            0   0.48217274 
-      80      300   0.24269128    15.358343    -3.368063    22.960722    19.592659            0    4.4798757 
-      90      300   0.26847005    42.280432   -2.1623864    63.209247     61.04686            0     12.56977 
-     100      300   0.29283834     41.48079  -0.89904529    62.013782    61.114736            0    12.372788 
-Loop time of 0.292855 on 8 procs for 100 steps with 300 atoms
-
-Performance: 147513.337 tau/day, 341.466 timesteps/s
-98.4% CPU use with 8 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 4.7207e-05 | 5.5045e-05 | 6.9618e-05 |   0.0 |  0.02
-Kspace  | 0.28739    | 0.28773    | 0.2881     |   0.0 | 98.25
-Neigh   | 0.00040698 | 0.00060901 | 0.00082922 |   0.0 |  0.21
-Comm    | 0.0029533  | 0.0031788  | 0.0034056  |   0.3 |  1.09
-Output  | 0.00029063 | 0.00030866 | 0.00035119 |   0.0 |  0.11
-Modify  | 0.00018978 | 0.00022188 | 0.00026703 |   0.0 |  0.08
-Other   |            | 0.0007486  |            |       |  0.26
-
-Nlocal:    37.5 ave 45 max 31 min
-Histogram: 1 1 1 1 1 0 1 0 1 1
-Nghost:    200 ave 209 max 189 min
-Histogram: 1 0 0 0 1 4 0 0 0 2
-Neighs:    307.375 ave 514 max 115 min
-Histogram: 2 1 0 1 1 0 0 0 1 2
-
-Total # of neighbors = 2459
-Ave neighs/atom = 8.19667
-Neighbor list builds = 15
-Dangerous builds = 0
-Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.16 b/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.16
deleted file mode 100644
index d6e7cbea37..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.16
+++ /dev/null
@@ -1,92 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-
-units		lj
-atom_style  charge
-
-read_data data.cloud_wall
-  orthogonal box = (0 0 0) to (10 10 10)
-  2 by 2 by 4 MPI processor grid
-  reading atoms ...
-  300 atoms
-
-velocity	all set 0.0 0.0 0.0 mom no
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix     1 all nve
-kspace_style scafacos p2nfft  0.001
-kspace_modify scafacos tolerance field
-
-timestep	0.005
-
-thermo_style custom step atoms cpu temp pe ke etotal ecoul press
-
-run_style verlet
-
-#dump simple all custom 1000 id x y z vx vy vz
-#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
-#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
-#dump_modify dmpvtk pad 7
-
-thermo 10
-run	100
-Setting up ScaFaCoS with solver p2nfft ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.313 | 3.501 | 3.689 Mbytes
-Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
-       0      300            0            0   0.49683273            0   0.49683273            0  0.016561091 
-      10      300  0.015678644  0.015479312   0.47369009  0.023141571   0.49683166            0  0.020417984 
-      20      300  0.031283855  0.062386358   0.40356181  0.093267605   0.49682941            0  0.032105581 
-      30      300  0.046878099   0.14331637    0.2825636   0.21425798   0.49682157            0  0.052270382 
-      40      300  0.062416077   0.53041843  0.089505208   0.79297556   0.88248077            0   0.16157862 
-      50      300  0.078029871    1.1948397 -0.083317439    1.7862853    1.7029679            0   0.35447982 
-      60      300  0.093806505    1.3915614  -0.37745551    2.0803842    1.7029287            0   0.40349499 
-      70      300    0.1096344    1.7061978  -0.84746071    2.5507657     1.703305            0   0.48190445 
-      80      300   0.12532592    20.692093     -3.32971     30.93468     27.60497            0    6.0759456 
-      90      300   0.14175463    48.999403   -2.1632167    73.254107    71.090891            0    14.578714 
-     100      300   0.15838337    51.199785  -0.81127924    76.543678    75.732399            0    15.281693 
-Loop time of 0.158406 on 16 procs for 100 steps with 300 atoms
-
-Performance: 272716.448 tau/day, 631.288 timesteps/s
-99.4% CPU use with 16 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 2.718e-05  | 3.7491e-05 | 5.6744e-05 |   0.0 |  0.02
-Kspace  | 0.15435    | 0.15482    | 0.15516    |   0.1 | 97.74
-Neigh   | 0.00014806 | 0.0003508  | 0.00074744 |   0.0 |  0.22
-Comm    | 0.0016866  | 0.0019967  | 0.0023787  |   0.5 |  1.26
-Output  | 0.00027871 | 0.00033027 | 0.00038028 |   0.0 |  0.21
-Modify  | 8.0347e-05 | 0.00011933 | 0.00016522 |   0.0 |  0.08
-Other   |            | 0.0007506  |            |       |  0.47
-
-Nlocal:    18.75 ave 33 max 6 min
-Histogram: 2 6 0 0 0 0 2 1 2 3
-Nghost:    128.875 ave 198 max 71 min
-Histogram: 7 1 0 0 0 0 0 1 5 2
-Neighs:    153.812 ave 490 max 14 min
-Histogram: 8 0 3 0 1 1 0 0 1 2
-
-Total # of neighbors = 2461
-Ave neighs/atom = 8.20333
-Neighbor list builds = 15
-Dangerous builds = 0
-Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.2 b/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.2
deleted file mode 100644
index fb863797f3..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.2
+++ /dev/null
@@ -1,92 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-
-units		lj
-atom_style  charge
-
-read_data data.cloud_wall
-  orthogonal box = (0 0 0) to (10 10 10)
-  1 by 1 by 2 MPI processor grid
-  reading atoms ...
-  300 atoms
-
-velocity	all set 0.0 0.0 0.0 mom no
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix     1 all nve
-kspace_style scafacos p2nfft  0.001
-kspace_modify scafacos tolerance field
-
-timestep	0.005
-
-thermo_style custom step atoms cpu temp pe ke etotal ecoul press
-
-run_style verlet
-
-#dump simple all custom 1000 id x y z vx vy vz
-#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
-#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
-#dump_modify dmpvtk pad 7
-
-thermo 10
-run	100
-Setting up ScaFaCoS with solver p2nfft ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.354 | 3.354 | 3.355 Mbytes
-Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
-       0      300            0            0   0.49683273            0   0.49683273            0  0.016561091 
-      10      300  0.044083834  0.015479312   0.47369009  0.023141571   0.49683166            0  0.020417984 
-      20      300  0.088379145  0.062386358   0.40356181  0.093267605   0.49682941            0  0.032105581 
-      30      300   0.13264704   0.14331637    0.2825636   0.21425798   0.49682157            0  0.052270382 
-      40      300   0.17687225   0.53041843  0.089505208   0.79297556   0.88248077            0   0.16157862 
-      50      300   0.22116137    1.1948397 -0.083317439    1.7862853    1.7029679            0   0.35447982 
-      60      300   0.26515126    1.3915614  -0.37745551    2.0803842    1.7029287            0   0.40349499 
-      70      300   0.30891085    1.7061978  -0.84746071    2.5507657     1.703305            0   0.48190445 
-      80      300   0.35292292    20.692093     -3.32971     30.93468     27.60497            0    6.0759456 
-      90      300   0.39845228    48.999403   -2.1632167    73.254107    71.090891            0    14.578714 
-     100      300   0.44492316    51.199785  -0.81127924    76.543678    75.732399            0    15.281693 
-Loop time of 0.444937 on 2 procs for 100 steps with 300 atoms
-
-Performance: 97092.373 tau/day, 224.751 timesteps/s
-100.0% CPU use with 2 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 7.7248e-05 | 7.844e-05  | 7.9632e-05 |   0.0 |  0.02
-Kspace  | 0.43932    | 0.43979    | 0.44026    |   0.1 | 98.84
-Neigh   | 0.0014915  | 0.0019662  | 0.0024409  |   1.1 |  0.44
-Comm    | 0.0019331  | 0.0019941  | 0.0020552  |   0.1 |  0.45
-Output  | 0.00013781 | 0.00016308 | 0.00018835 |   0.0 |  0.04
-Modify  | 0.00050378 | 0.00050449 | 0.00050521 |   0.0 |  0.11
-Other   |            | 0.0004425  |            |       |  0.10
-
-Nlocal:    150 ave 157 max 143 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-Nghost:    399 ave 402 max 396 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-Neighs:    1230.5 ave 1756 max 705 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-
-Total # of neighbors = 2461
-Ave neighs/atom = 8.20333
-Neighbor list builds = 15
-Dangerous builds = 0
-Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.8 b/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.8
deleted file mode 100644
index a6f9228673..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.8
+++ /dev/null
@@ -1,92 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-
-units		lj
-atom_style  charge
-
-read_data data.cloud_wall
-  orthogonal box = (0 0 0) to (10 10 10)
-  2 by 2 by 2 MPI processor grid
-  reading atoms ...
-  300 atoms
-
-velocity	all set 0.0 0.0 0.0 mom no
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix     1 all nve
-kspace_style scafacos p2nfft  0.001
-kspace_modify scafacos tolerance field
-
-timestep	0.005
-
-thermo_style custom step atoms cpu temp pe ke etotal ecoul press
-
-run_style verlet
-
-#dump simple all custom 1000 id x y z vx vy vz
-#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
-#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
-#dump_modify dmpvtk pad 7
-
-thermo 10
-run	100
-Setting up ScaFaCoS with solver p2nfft ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.317 | 3.317 | 3.317 Mbytes
-Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
-       0      300            0            0   0.49683273            0   0.49683273            0  0.016561091 
-      10      300   0.01961565  0.015479312   0.47369009  0.023141571   0.49683166            0  0.020417984 
-      20      300  0.039346695  0.062386358   0.40356181  0.093267605   0.49682941            0  0.032105581 
-      30      300  0.059037447   0.14331637    0.2825636   0.21425798   0.49682157            0  0.052270382 
-      40      300  0.078732729   0.53041843  0.089505208   0.79297556   0.88248077            0   0.16157862 
-      50      300  0.098586798    1.1948397 -0.083317439    1.7862853    1.7029679            0   0.35447982 
-      60      300   0.11857247    1.3915614  -0.37745551    2.0803842    1.7029287            0   0.40349499 
-      70      300    0.1385541    1.7061978  -0.84746071    2.5507657     1.703305            0   0.48190445 
-      80      300   0.15850091    20.692093     -3.32971     30.93468     27.60497            0    6.0759456 
-      90      300   0.17892075    48.999403   -2.1632167    73.254107    71.090891            0    14.578714 
-     100      300   0.19964767    51.199785  -0.81127924    76.543678    75.732399            0    15.281693 
-Loop time of 0.199664 on 8 procs for 100 steps with 300 atoms
-
-Performance: 216363.074 tau/day, 500.840 timesteps/s
-99.4% CPU use with 8 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 4.5061e-05 | 4.7535e-05 | 5.3167e-05 |   0.0 |  0.02
-Kspace  | 0.19551    | 0.19584    | 0.19611    |   0.0 | 98.08
-Neigh   | 0.00041366 | 0.00060952 | 0.00082064 |   0.0 |  0.31
-Comm    | 0.0021496  | 0.0022282  | 0.0024025  |   0.2 |  1.12
-Output  | 0.0002346  | 0.00024167 | 0.00027847 |   0.0 |  0.12
-Modify  | 0.00016665 | 0.00017652 | 0.0001924  |   0.0 |  0.09
-Other   |            | 0.0005245  |            |       |  0.26
-
-Nlocal:    37.5 ave 42 max 33 min
-Histogram: 2 1 0 1 0 0 1 0 1 2
-Nghost:    202.25 ave 212 max 194 min
-Histogram: 1 0 2 1 0 2 0 1 0 1
-Neighs:    307.625 ave 505 max 129 min
-Histogram: 3 0 0 1 1 0 0 0 1 2
-
-Total # of neighbors = 2461
-Ave neighs/atom = 8.20333
-Neighbor list builds = 15
-Dangerous builds = 0
-Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.16 b/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.16
deleted file mode 100644
index 2fc46da3cd..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.16
+++ /dev/null
@@ -1,102 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.NaCl
-  orthogonal box = (0 0 0) to (1 1 1)
-  2 by 2 by 4 MPI processor grid
-  reading atoms ...
-  8 atoms
-
-replicate 8 8 8
-  orthogonal box = (0 0 0) to (8 8 8)
-  2 by 2 by 4 MPI processor grid
-  4096 atoms
-  Time spent = 0.000462294 secs
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos ewald 0.001
-kspace_modify    scafacos tolerance field
-
-timestep	0.005
-thermo          10
-
-run		100
-Setting up ScaFaCoS with solver ewald ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 8 8 8
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.501 | 3.501 | 3.501 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5   -1.7475938            0   0.50185691         -nan 
-      10     1.500011    -1.747529            0   0.50193816         -nan 
-      20    1.5000023   -1.7475152            0   0.50193898         -nan 
-      30    1.4999308    -1.747404            0   0.50194285         -nan 
-      40    1.4997722   -1.7471622            0   0.50194686         -nan 
-      50    1.4995835    -1.746878            0   0.50194808         -nan 
-      60    1.4996054   -1.7469114            0   0.50194749         -nan 
-      70    1.5004341   -1.7481558            0   0.50194592         -nan 
-      80    1.5033218   -1.7524875            0   0.50194458         -nan 
-      90    1.5108306   -1.7637462            0   0.50194636         -nan 
-     100    1.5292479   -1.7913449            0   0.50196695         -nan 
-Loop time of 80.2777 on 16 procs for 100 steps with 4096 atoms
-
-Performance: 538.132 tau/day, 1.246 timesteps/s
-99.8% CPU use with 16 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.0003705  | 0.00039807 | 0.00048542 |   0.0 |  0.00
-Kspace  | 80.262     | 80.263     | 80.264     |   0.0 | 99.98
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.010191   | 0.011419   | 0.012416   |   0.6 |  0.01
-Output  | 0.00028253 | 0.00033158 | 0.0004065  |   0.0 |  0.00
-Modify  | 0.00082541 | 0.0008464  | 0.00087833 |   0.0 |  0.00
-Other   |            | 0.001511   |            |       |  0.00
-
-Nlocal:    256 ave 256 max 256 min
-Histogram: 16 0 0 0 0 0 0 0 0 0
-Nghost:    2816 ave 2816 max 2816 min
-Histogram: 16 0 0 0 0 0 0 0 0 0
-Neighs:    32768 ave 32768 max 32768 min
-Histogram: 16 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 524288
-Ave neighs/atom = 128
-Neighbor list builds = 0
-Dangerous builds = 0
-Total wall time: 0:01:22
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.2 b/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.2
deleted file mode 100644
index b6bf41de24..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.2
+++ /dev/null
@@ -1,102 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.NaCl
-  orthogonal box = (0 0 0) to (1 1 1)
-  1 by 1 by 2 MPI processor grid
-  reading atoms ...
-  8 atoms
-
-replicate 8 8 8
-  orthogonal box = (0 0 0) to (8 8 8)
-  1 by 1 by 2 MPI processor grid
-  4096 atoms
-  Time spent = 0.000344753 secs
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos ewald 0.001
-kspace_modify    scafacos tolerance field
-
-timestep	0.005
-thermo          10
-
-run		100
-Setting up ScaFaCoS with solver ewald ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 8 8 8
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 4.574 | 4.574 | 4.574 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5   -1.7475938            0   0.50185691     11.99707 
-      10     1.500011    -1.747529            0   0.50193816    11.997158 
-      20    1.5000023   -1.7475152            0   0.50193898    11.997089 
-      30    1.4999308    -1.747404            0   0.50194285    11.996517 
-      40    1.4997722   -1.7471622            0   0.50194686    11.995248 
-      50    1.4995835    -1.746878            0   0.50194808    11.993739 
-      60    1.4996054   -1.7469114            0   0.50194749    11.993914 
-      70    1.5004341   -1.7481558            0   0.50194592    12.000543 
-      80    1.5033218   -1.7524875            0   0.50194458    12.023638 
-      90    1.5108306   -1.7637462            0   0.50194636    12.083694 
-     100    1.5292479   -1.7913449            0   0.50196695    12.230996 
-Loop time of 566.796 on 2 procs for 100 steps with 4096 atoms
-
-Performance: 76.218 tau/day, 0.176 timesteps/s
-100.0% CPU use with 2 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.0010231  | 0.0010413  | 0.0010595  |   0.1 |  0.00
-Kspace  | 566.77     | 566.77     | 566.77     |   0.0 | 99.99
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.019707   | 0.01982    | 0.019932   |   0.1 |  0.00
-Output  | 0.0002656  | 0.00029266 | 0.00031972 |   0.0 |  0.00
-Modify  | 0.0055575  | 0.0055707  | 0.0055838  |   0.0 |  0.00
-Other   |            | 0.002497   |            |       |  0.00
-
-Nlocal:    2048 ave 2048 max 2048 min
-Histogram: 2 0 0 0 0 0 0 0 0 0
-Nghost:    7168 ave 7168 max 7168 min
-Histogram: 2 0 0 0 0 0 0 0 0 0
-Neighs:    262144 ave 262144 max 262144 min
-Histogram: 2 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 524288
-Ave neighs/atom = 128
-Neighbor list builds = 0
-Dangerous builds = 0
-Total wall time: 0:09:38
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.8 b/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.8
deleted file mode 100644
index 4a1a5b91ed..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.8
+++ /dev/null
@@ -1,102 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.NaCl
-  orthogonal box = (0 0 0) to (1 1 1)
-  2 by 2 by 2 MPI processor grid
-  reading atoms ...
-  8 atoms
-
-replicate 8 8 8
-  orthogonal box = (0 0 0) to (8 8 8)
-  2 by 2 by 2 MPI processor grid
-  4096 atoms
-  Time spent = 0.000232935 secs
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos ewald 0.001
-kspace_modify    scafacos tolerance field
-
-timestep	0.005
-thermo          10
-
-run		100
-Setting up ScaFaCoS with solver ewald ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 8 8 8
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.508 | 3.508 | 3.508 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5   -1.7475938            0   0.50185691         -nan 
-      10     1.500011    -1.747529            0   0.50193816         -nan 
-      20    1.5000023   -1.7475152            0   0.50193898         -nan 
-      30    1.4999308    -1.747404            0   0.50194285         -nan 
-      40    1.4997722   -1.7471622            0   0.50194686         -nan 
-      50    1.4995835    -1.746878            0   0.50194808         -nan 
-      60    1.4996054   -1.7469114            0   0.50194749         -nan 
-      70    1.5004341   -1.7481558            0   0.50194592         -nan 
-      80    1.5033218   -1.7524875            0   0.50194458         -nan 
-      90    1.5108306   -1.7637462            0   0.50194636         -nan 
-     100    1.5292479   -1.7913449            0   0.50196695         -nan 
-Loop time of 154.44 on 8 procs for 100 steps with 4096 atoms
-
-Performance: 279.720 tau/day, 0.647 timesteps/s
-99.9% CPU use with 8 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.00049257 | 0.00051311 | 0.00059295 |   0.0 |  0.00
-Kspace  | 154.42     | 154.42     | 154.42     |   0.0 | 99.99
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.012076   | 0.013177   | 0.014308   |   0.8 |  0.01
-Output  | 0.00025177 | 0.00028065 | 0.00030136 |   0.0 |  0.00
-Modify  | 0.0015776  | 0.0017182  | 0.0018268  |   0.2 |  0.00
-Other   |            | 0.001309   |            |       |  0.00
-
-Nlocal:    512 ave 512 max 512 min
-Histogram: 8 0 0 0 0 0 0 0 0 0
-Nghost:    3584 ave 3584 max 3584 min
-Histogram: 8 0 0 0 0 0 0 0 0 0
-Neighs:    65536 ave 65536 max 65536 min
-Histogram: 8 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 524288
-Ave neighs/atom = 128
-Neighbor list builds = 0
-Dangerous builds = 0
-Total wall time: 0:02:38
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.16 b/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.16
deleted file mode 100644
index 4bd0c3d4f3..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.16
+++ /dev/null
@@ -1,102 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.NaCl
-  orthogonal box = (0 0 0) to (1 1 1)
-  2 by 2 by 4 MPI processor grid
-  reading atoms ...
-  8 atoms
-
-replicate 8 8 8
-  orthogonal box = (0 0 0) to (8 8 8)
-  2 by 2 by 4 MPI processor grid
-  4096 atoms
-  Time spent = 0.000400543 secs
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos fmm 0.001
-kspace_modify    scafacos tolerance energy
-
-timestep	0.005
-thermo          10
-
-run		100
-Setting up ScaFaCoS with solver fmm ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 8 8 8
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.501 | 3.501 | 3.501 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5   -1.7475646            0   0.50188608     10.44368 
-      10    1.5000016   -1.7475671            0   0.50188602     10.44369 
-      20    1.4999827   -1.7475388            0   0.50188592    10.443564 
-      30    1.4999016   -1.7474173            0    0.5018858    10.443023 
-      40    1.4997356   -1.7471685            0   0.50188572    10.441917 
-      50    1.4995414   -1.7468771            0    0.5018858    10.440623 
-      60    1.4995587   -1.7469027            0   0.50188622    10.440739 
-      70    1.5003837   -1.7481389            0   0.50188727    10.446238 
-      80    1.5032684   -1.7524625            0   0.50188958    10.465466 
-      90    1.5107749    -1.763714            0   0.50189507    10.515502 
-     100      1.52919    -1.791306            0   0.50191895    10.638261 
-Loop time of 4.23774 on 16 procs for 100 steps with 4096 atoms
-
-Performance: 10194.102 tau/day, 23.597 timesteps/s
-99.6% CPU use with 16 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.00038028 | 0.00040729 | 0.00046206 |   0.0 |  0.01
-Kspace  | 4.2206     | 4.2211     | 4.2216     |   0.0 | 99.61
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.011439   | 0.012491   | 0.013172   |   0.4 |  0.29
-Output  | 0.00042915 | 0.000489   | 0.00061274 |   0.0 |  0.01
-Modify  | 0.00093102 | 0.00099151 | 0.0010982  |   0.0 |  0.02
-Other   |            | 0.002255   |            |       |  0.05
-
-Nlocal:    256 ave 256 max 256 min
-Histogram: 16 0 0 0 0 0 0 0 0 0
-Nghost:    2816 ave 2816 max 2816 min
-Histogram: 16 0 0 0 0 0 0 0 0 0
-Neighs:    32768 ave 32768 max 32768 min
-Histogram: 16 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 524288
-Ave neighs/atom = 128
-Neighbor list builds = 0
-Dangerous builds = 0
-Total wall time: 0:00:06
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.2 b/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.2
deleted file mode 100644
index 50dcc99393..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.2
+++ /dev/null
@@ -1,102 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.NaCl
-  orthogonal box = (0 0 0) to (1 1 1)
-  1 by 1 by 2 MPI processor grid
-  reading atoms ...
-  8 atoms
-
-replicate 8 8 8
-  orthogonal box = (0 0 0) to (8 8 8)
-  1 by 1 by 2 MPI processor grid
-  4096 atoms
-  Time spent = 0.0003407 secs
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos fmm 0.001
-kspace_modify    scafacos tolerance energy
-
-timestep	0.005
-thermo          10
-
-run		100
-Setting up ScaFaCoS with solver fmm ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 8 8 8
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 4.574 | 4.574 | 4.574 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5   -1.7475646            0   0.50188608     10.44368 
-      10    1.5000016   -1.7475671            0   0.50188602     10.44369 
-      20    1.4999827   -1.7475388            0   0.50188592    10.443564 
-      30    1.4999016   -1.7474173            0    0.5018858    10.443023 
-      40    1.4997356   -1.7471685            0   0.50188572    10.441917 
-      50    1.4995414   -1.7468771            0    0.5018858    10.440623 
-      60    1.4995587   -1.7469027            0   0.50188622    10.440739 
-      70    1.5003837   -1.7481389            0   0.50188727    10.446238 
-      80    1.5032684   -1.7524625            0   0.50188958    10.465466 
-      90    1.5107749    -1.763714            0   0.50189507    10.515502 
-     100      1.52919    -1.791306            0   0.50191895    10.638261 
-Loop time of 17.9401 on 2 procs for 100 steps with 4096 atoms
-
-Performance: 2408.014 tau/day, 5.574 timesteps/s
-99.9% CPU use with 2 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.0010042  | 0.0010235  | 0.0010428  |   0.1 |  0.01
-Kspace  | 17.912     | 17.912     | 17.912     |   0.0 | 99.84
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.018252   | 0.018528   | 0.018804   |   0.2 |  0.10
-Output  | 0.00034094 | 0.00035989 | 0.00037885 |   0.0 |  0.00
-Modify  | 0.0055602  | 0.0056567  | 0.0057533  |   0.1 |  0.03
-Other   |            | 0.002716   |            |       |  0.02
-
-Nlocal:    2048 ave 2048 max 2048 min
-Histogram: 2 0 0 0 0 0 0 0 0 0
-Nghost:    7168 ave 7168 max 7168 min
-Histogram: 2 0 0 0 0 0 0 0 0 0
-Neighs:    262144 ave 262144 max 262144 min
-Histogram: 2 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 524288
-Ave neighs/atom = 128
-Neighbor list builds = 0
-Dangerous builds = 0
-Total wall time: 0:00:19
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.8 b/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.8
deleted file mode 100644
index 02a5528c59..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.8
+++ /dev/null
@@ -1,102 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.NaCl
-  orthogonal box = (0 0 0) to (1 1 1)
-  2 by 2 by 2 MPI processor grid
-  reading atoms ...
-  8 atoms
-
-replicate 8 8 8
-  orthogonal box = (0 0 0) to (8 8 8)
-  2 by 2 by 2 MPI processor grid
-  4096 atoms
-  Time spent = 0.000236988 secs
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos fmm 0.001
-kspace_modify    scafacos tolerance energy
-
-timestep	0.005
-thermo          10
-
-run		100
-Setting up ScaFaCoS with solver fmm ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 8 8 8
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.508 | 3.508 | 3.508 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5   -1.7475646            0   0.50188608     10.44368 
-      10    1.5000016   -1.7475671            0   0.50188602     10.44369 
-      20    1.4999827   -1.7475388            0   0.50188592    10.443564 
-      30    1.4999016   -1.7474173            0    0.5018858    10.443023 
-      40    1.4997356   -1.7471685            0   0.50188572    10.441917 
-      50    1.4995414   -1.7468771            0    0.5018858    10.440623 
-      60    1.4995587   -1.7469027            0   0.50188622    10.440739 
-      70    1.5003837   -1.7481389            0   0.50188727    10.446238 
-      80    1.5032684   -1.7524625            0   0.50188958    10.465466 
-      90    1.5107749    -1.763714            0   0.50189507    10.515502 
-     100      1.52919    -1.791306            0   0.50191895    10.638261 
-Loop time of 5.96037 on 8 procs for 100 steps with 4096 atoms
-
-Performance: 7247.876 tau/day, 16.777 timesteps/s
-99.8% CPU use with 8 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.00049591 | 0.0005368  | 0.00056005 |   0.0 |  0.01
-Kspace  | 5.94       | 5.941      | 5.9419     |   0.0 | 99.68
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.013702   | 0.014631   | 0.015768   |   0.6 |  0.25
-Output  | 0.00044751 | 0.00048846 | 0.00058961 |   0.0 |  0.01
-Modify  | 0.0016675  | 0.0017205  | 0.0017893  |   0.1 |  0.03
-Other   |            | 0.001971   |            |       |  0.03
-
-Nlocal:    512 ave 512 max 512 min
-Histogram: 8 0 0 0 0 0 0 0 0 0
-Nghost:    3584 ave 3584 max 3584 min
-Histogram: 8 0 0 0 0 0 0 0 0 0
-Neighs:    65536 ave 65536 max 65536 min
-Histogram: 8 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 524288
-Ave neighs/atom = 128
-Neighbor list builds = 0
-Dangerous builds = 0
-Total wall time: 0:00:07
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.16 b/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.16
deleted file mode 100644
index 6e6e8f6e94..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.16
+++ /dev/null
@@ -1,102 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.NaCl
-  orthogonal box = (0 0 0) to (1 1 1)
-  2 by 2 by 4 MPI processor grid
-  reading atoms ...
-  8 atoms
-
-replicate 8 8 8
-  orthogonal box = (0 0 0) to (8 8 8)
-  2 by 2 by 4 MPI processor grid
-  4096 atoms
-  Time spent = 0.000361443 secs
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos p2nfft 0.001
-kspace_modify    scafacos tolerance field
-
-timestep	0.005
-thermo          10
-
-run		100
-Setting up ScaFaCoS with solver p2nfft ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 8 8 8
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.501 | 3.501 | 3.501 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5   -1.7477245            0   0.50172614    10.443537 
-      10    1.5000176   -1.7475898            0   0.50188725    10.443798 
-      20    1.5000161   -1.7475262            0   0.50194874    10.443843 
-      30    1.4999486   -1.7474019            0   0.50197176    10.443413 
-      40    1.4997889   -1.7471525            0   0.50198161    10.442357 
-      50    1.4995945   -1.7468614            0   0.50198122    10.441061 
-      60     1.499609   -1.7468813            0   0.50198309     10.44116 
-      70    1.5004314   -1.7481179            0   0.50197962    10.446638 
-      80    1.5033149   -1.7524495            0   0.50197233     10.46585 
-      90    1.5108219   -1.7637095            0   0.50197005    10.515883 
-     100     1.529239   -1.7913105            0     0.501988    10.638649 
-Loop time of 1.56685 on 16 procs for 100 steps with 4096 atoms
-
-Performance: 27571.239 tau/day, 63.822 timesteps/s
-99.8% CPU use with 16 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.00036407 | 0.00040755 | 0.00047517 |   0.0 |  0.03
-Kspace  | 1.5521     | 1.553      | 1.5536     |   0.0 | 99.12
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.009537   | 0.010175   | 0.011894   |   0.6 |  0.65
-Output  | 0.000319   | 0.00039139 | 0.00052881 |   0.0 |  0.02
-Modify  | 0.00086999 | 0.00097834 | 0.0010362  |   0.0 |  0.06
-Other   |            | 0.001859   |            |       |  0.12
-
-Nlocal:    256 ave 256 max 256 min
-Histogram: 16 0 0 0 0 0 0 0 0 0
-Nghost:    2816 ave 2816 max 2816 min
-Histogram: 16 0 0 0 0 0 0 0 0 0
-Neighs:    32768 ave 32768 max 32768 min
-Histogram: 16 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 524288
-Ave neighs/atom = 128
-Neighbor list builds = 0
-Dangerous builds = 0
-Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.2 b/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.2
deleted file mode 100644
index 72a53c48ff..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.2
+++ /dev/null
@@ -1,102 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.NaCl
-  orthogonal box = (0 0 0) to (1 1 1)
-  1 by 1 by 2 MPI processor grid
-  reading atoms ...
-  8 atoms
-
-replicate 8 8 8
-  orthogonal box = (0 0 0) to (8 8 8)
-  1 by 1 by 2 MPI processor grid
-  4096 atoms
-  Time spent = 0.0003438 secs
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos p2nfft 0.001
-kspace_modify    scafacos tolerance field
-
-timestep	0.005
-thermo          10
-
-run		100
-Setting up ScaFaCoS with solver p2nfft ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 8 8 8
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 4.574 | 4.574 | 4.574 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5   -1.7477245            0   0.50172614    10.443537 
-      10    1.5000176   -1.7475898            0   0.50188725    10.443798 
-      20    1.5000161   -1.7475262            0   0.50194874    10.443843 
-      30    1.4999486   -1.7474019            0   0.50197176    10.443413 
-      40    1.4997889   -1.7471525            0   0.50198161    10.442357 
-      50    1.4995945   -1.7468614            0   0.50198122    10.441061 
-      60     1.499609   -1.7468813            0   0.50198309     10.44116 
-      70    1.5004314   -1.7481179            0   0.50197962    10.446638 
-      80    1.5033149   -1.7524495            0   0.50197233     10.46585 
-      90    1.5108219   -1.7637095            0   0.50197005    10.515883 
-     100     1.529239   -1.7913105            0     0.501988    10.638649 
-Loop time of 9.38943 on 2 procs for 100 steps with 4096 atoms
-
-Performance: 4600.920 tau/day, 10.650 timesteps/s
-99.9% CPU use with 2 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.0010064  | 0.0010065  | 0.0010066  |   0.0 |  0.01
-Kspace  | 9.3602     | 9.3603     | 9.3604     |   0.0 | 99.69
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.019444   | 0.01968    | 0.019916   |   0.2 |  0.21
-Output  | 0.00033355 | 0.00035357 | 0.0003736  |   0.0 |  0.00
-Modify  | 0.0055819  | 0.0056176  | 0.0056534  |   0.0 |  0.06
-Other   |            | 0.002495   |            |       |  0.03
-
-Nlocal:    2048 ave 2048 max 2048 min
-Histogram: 2 0 0 0 0 0 0 0 0 0
-Nghost:    7168 ave 7168 max 7168 min
-Histogram: 2 0 0 0 0 0 0 0 0 0
-Neighs:    262144 ave 262144 max 262144 min
-Histogram: 2 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 524288
-Ave neighs/atom = 128
-Neighbor list builds = 0
-Dangerous builds = 0
-Total wall time: 0:00:11
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.8 b/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.8
deleted file mode 100644
index b5e889dfc1..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.8
+++ /dev/null
@@ -1,102 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.NaCl
-  orthogonal box = (0 0 0) to (1 1 1)
-  2 by 2 by 2 MPI processor grid
-  reading atoms ...
-  8 atoms
-
-replicate 8 8 8
-  orthogonal box = (0 0 0) to (8 8 8)
-  2 by 2 by 2 MPI processor grid
-  4096 atoms
-  Time spent = 0.000324488 secs
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos p2nfft 0.001
-kspace_modify    scafacos tolerance field
-
-timestep	0.005
-thermo          10
-
-run		100
-Setting up ScaFaCoS with solver p2nfft ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 8 8 8
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.508 | 3.508 | 3.508 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5   -1.7477245            0   0.50172614    10.443537 
-      10    1.5000176   -1.7475898            0   0.50188725    10.443798 
-      20    1.5000161   -1.7475262            0   0.50194874    10.443843 
-      30    1.4999486   -1.7474019            0   0.50197176    10.443413 
-      40    1.4997889   -1.7471525            0   0.50198161    10.442357 
-      50    1.4995945   -1.7468614            0   0.50198122    10.441061 
-      60     1.499609   -1.7468813            0   0.50198309     10.44116 
-      70    1.5004314   -1.7481179            0   0.50197962    10.446638 
-      80    1.5033149   -1.7524495            0   0.50197233     10.46585 
-      90    1.5108219   -1.7637095            0   0.50197005    10.515883 
-     100     1.529239   -1.7913105            0     0.501988    10.638649 
-Loop time of 2.88506 on 8 procs for 100 steps with 4096 atoms
-
-Performance: 14973.700 tau/day, 34.661 timesteps/s
-99.6% CPU use with 8 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.000489   | 0.00051507 | 0.00052857 |   0.0 |  0.02
-Kspace  | 2.8657     | 2.866      | 2.8664     |   0.0 | 99.34
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.014354   | 0.014851   | 0.015097   |   0.2 |  0.51
-Output  | 0.00037169 | 0.00042769 | 0.00054169 |   0.0 |  0.01
-Modify  | 0.0015774  | 0.0016578  | 0.0018044  |   0.2 |  0.06
-Other   |            | 0.001645   |            |       |  0.06
-
-Nlocal:    512 ave 512 max 512 min
-Histogram: 8 0 0 0 0 0 0 0 0 0
-Nghost:    3584 ave 3584 max 3584 min
-Histogram: 8 0 0 0 0 0 0 0 0 0
-Neighs:    65536 ave 65536 max 65536 min
-Histogram: 8 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 524288
-Ave neighs/atom = 128
-Neighbor list builds = 0
-Dangerous builds = 0
-Total wall time: 0:00:03
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p3m.2 b/examples/USER/scafacos/log.08Aug18.scafacos.g++.p3m.2
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.2 b/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.2
deleted file mode 100644
index 1ffab0ca6a..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.2
+++ /dev/null
@@ -1,105 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
-  1 by 1 by 2 MPI processor grid
-  reading atoms ...
-  1000 atoms
-change_box all boundary f f f
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos direct 0.001
-
-timestep	0.005
-thermo          1
-run		20
-Setting up ScaFaCoS with solver direct ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 6.48 | 6.861 | 7.243 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417787            0    1.6235721 0.0015678854 
-       1    18.780041   -10.770002            0    17.371889  0.016718957 
-       2    65.289192   -11.084705            0    86.751149  0.060353634 
-       3    121.92987   -7.0625759            0    175.64933   0.11404974 
-       4    185.78164   -5.8777512            0    272.51604   0.17462195 
-       5    286.36222    -4.382053            0    424.73173   0.26918926 
-       6    481.42206   -4.3095567            0     717.1014   0.45274088 
-       7    488.59167   -3.8685194            0     728.2861   0.45956866 
-       8    497.85287   -3.0417966            0    742.99073   0.46838116 
-       9    499.61615    -3.419003            0     745.2558   0.46983345 
-      10    502.63684   -2.8360961            0    750.36521   0.47280809 
-      11     504.4846   -2.7628105            0    753.20736   0.47462793 
-      12    506.54485   -2.8460356            0    756.21142   0.47651441 
-      13    508.27211    -2.730935            0    758.91482   0.47813752 
-      14    510.57045   -2.6094877            0    762.48033   0.48031431 
-      15    513.14798   -2.7150827            0    766.23717   0.48275229 
-      16    515.78124   -2.3961811            0    770.50201   0.48526333 
-      17    515.70265   -2.2982683            0    770.48215   0.48526617 
-      18     515.7081   -2.1515983            0    770.63699   0.48530393 
-      19    515.74906   -2.0581436            0    770.79182   0.48530977 
-      20    515.70883   -1.8922577            0    770.89742   0.48527105 
-Loop time of 0.284007 on 2 procs for 20 steps with 1000 atoms
-
-Performance: 30421.778 tau/day, 70.421 timesteps/s
-99.1% CPU use with 2 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.00022578 | 0.00022626 | 0.00022674 |   0.0 |  0.08
-Kspace  | 0.18253    | 0.20503    | 0.22752    |   5.0 | 72.19
-Neigh   | 0.05363    | 0.076239   | 0.098848   |   8.2 | 26.84
-Comm    | 0.0014737  | 0.0016443  | 0.0018148  |   0.4 |  0.58
-Output  | 0.000247   | 0.00032353 | 0.00040007 |   0.0 |  0.11
-Modify  | 0.00029159 | 0.00029731 | 0.00030303 |   0.0 |  0.10
-Other   |            | 0.0002506  |            |       |  0.09
-
-Nlocal:    500 ave 516 max 484 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-Nghost:    456.5 ave 475 max 438 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-Neighs:    123908 ave 172139 max 75678 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-
-Total # of neighbors = 247817
-Ave neighs/atom = 247.817
-Neighbor list builds = 19
-Dangerous builds = 18
-Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.8 b/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.8
deleted file mode 100644
index e72ade73b5..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.8
+++ /dev/null
@@ -1,105 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
-  2 by 2 by 2 MPI processor grid
-  reading atoms ...
-  1000 atoms
-change_box all boundary f f f
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos direct 0.001
-
-timestep	0.005
-thermo          1
-run		20
-Setting up ScaFaCoS with solver direct ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 4.164 | 4.26 | 4.546 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417787            0    1.6235721 0.0015678854 
-       1    18.780041   -10.770002            0    17.371889  0.016718957 
-       2    65.289192   -11.084705            0    86.751149  0.060353634 
-       3    121.92987   -7.0625759            0    175.64933   0.11404974 
-       4    185.78164   -5.8777512            0    272.51604   0.17462195 
-       5    286.36222    -4.382053            0    424.73173   0.26918926 
-       6    481.42206   -4.3095567            0     717.1014   0.45274088 
-       7    488.59167   -3.8685194            0     728.2861   0.45956866 
-       8    497.85287   -3.0417966            0    742.99073   0.46838116 
-       9    499.61615    -3.419003            0     745.2558   0.46983345 
-      10    502.63684   -2.8360961            0    750.36521   0.47280809 
-      11     504.4846   -2.7628105            0    753.20736   0.47462793 
-      12    506.54485   -2.8460356            0    756.21142   0.47651441 
-      13    508.27211    -2.730935            0    758.91482   0.47813752 
-      14    510.57045   -2.6094877            0    762.48033   0.48031431 
-      15    513.14798   -2.7150827            0    766.23717   0.48275229 
-      16    515.78124   -2.3961811            0    770.50201   0.48526333 
-      17    515.70265   -2.2982683            0    770.48215   0.48526617 
-      18     515.7081   -2.1515983            0    770.63699   0.48530393 
-      19    515.74906   -2.0581436            0    770.79182   0.48530977 
-      20    515.70883   -1.8922577            0    770.89742   0.48527105 
-Loop time of 0.0883947 on 8 procs for 20 steps with 1000 atoms
-
-Performance: 97743.448 tau/day, 226.258 timesteps/s
-99.2% CPU use with 8 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.0002284  | 0.00024167 | 0.00029922 |   0.0 |  0.27
-Kspace  | 0.055725   | 0.063153   | 0.071883   |   2.4 | 71.44
-Neigh   | 0.012251   | 0.021348   | 0.029026   |   4.3 | 24.15
-Comm    | 0.0025573  | 0.0029825  | 0.0034359  |   0.5 |  3.37
-Output  | 0.00034451 | 0.00044149 | 0.00057721 |   0.0 |  0.50
-Modify  | 7.8917e-05 | 8.437e-05  | 8.9407e-05 |   0.0 |  0.10
-Other   |            | 0.0001439  |            |       |  0.16
-
-Nlocal:    125 ave 133 max 113 min
-Histogram: 2 0 0 0 0 1 1 0 2 2
-Nghost:    773.625 ave 788 max 764 min
-Histogram: 1 1 2 1 1 0 0 0 1 1
-Neighs:    30977.1 ave 50690 max 10447 min
-Histogram: 1 1 1 0 1 1 0 0 2 1
-
-Total # of neighbors = 247817
-Ave neighs/atom = 247.817
-Neighbor list builds = 19
-Dangerous builds = 18
-Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.2 b/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.2
deleted file mode 100644
index e8e614004d..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.2
+++ /dev/null
@@ -1,109 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
-  1 by 1 by 2 MPI processor grid
-  reading atoms ...
-  1000 atoms
-change_box all boundary f f f
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos fmm 0.001
-kspace_modify scafacos tolerance energy_rel
-kspace_modify scafacos fmm_tuning 1
-ScaFaCoS setting fmm inhomogen tuning ...
-
-timestep	0.005
-thermo          1
-
-run		20
-Setting up ScaFaCoS with solver fmm ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 6.48 | 6.861 | 7.243 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417141            0    1.6235786 0.0015676581 
-       1    18.780412   -10.770009            0    17.372438  0.016719188 
-       2    65.294131   -11.084501            0    86.758754   0.06035827 
-       3    121.92555   -7.0612033            0    175.64423    0.1140457 
-       4    185.71165   -5.8781334            0    272.41077   0.17455524 
-       5    286.28339   -4.3800108            0    424.61565   0.26911306 
-       6    481.28097   -4.3052012            0    716.89433   0.45262045 
-       7    487.26022   -3.8672741            0    726.29216   0.45830216 
-       8    493.65478   -3.0242687            0    736.71742   0.46443761 
-       9    495.66203   -3.4336343            0    739.31592   0.46613014 
-      10    498.41831   -2.8837072            0    743.99613   0.46887706 
-      11    499.20944   -2.7724783            0    745.29287   0.46966875 
-      12    500.97345   -2.8281484            0    747.88057   0.47126462 
-      13    507.46412   -2.7752775            0    757.65971   0.47728761 
-      14    525.35729   -2.5749814            0    784.67292   0.49422171 
-      15     563.9578   -2.9982381            0    842.09253   0.53043696 
-      16    645.47602   -2.5519203            0    964.69389   0.60730795 
-      17    647.09276   -2.2568468            0    967.41166   0.60891914 
-      18    647.12596   -2.2791003            0    967.43915   0.60900309 
-      19    647.24862   -2.2495226            0    967.65253   0.60908339 
-      20    647.51175   -2.0239179            0    968.27244   0.60932598 
-Loop time of 0.701186 on 2 procs for 20 steps with 1000 atoms
-
-Performance: 12321.981 tau/day, 28.523 timesteps/s
-99.7% CPU use with 2 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.00022388 | 0.00022912 | 0.00023437 |   0.0 |  0.03
-Kspace  | 0.60189    | 0.62405    | 0.64621    |   2.8 | 89.00
-Neigh   | 0.051681   | 0.073973   | 0.096265   |   8.2 | 10.55
-Comm    | 0.0016983  | 0.0018919  | 0.0020854  |   0.4 |  0.27
-Output  | 0.00034356 | 0.00044572 | 0.00054789 |   0.0 |  0.06
-Modify  | 0.00031281 | 0.0003171  | 0.00032139 |   0.0 |  0.05
-Other   |            | 0.0002786  |            |       |  0.04
-
-Nlocal:    500 ave 509 max 491 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-Nghost:    455.5 ave 467 max 444 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-Neighs:    122171 ave 171834 max 72508 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-
-Total # of neighbors = 244342
-Ave neighs/atom = 244.342
-Neighbor list builds = 19
-Dangerous builds = 18
-Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.8 b/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.8
deleted file mode 100644
index 22d1140813..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.8
+++ /dev/null
@@ -1,109 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
-  2 by 2 by 2 MPI processor grid
-  reading atoms ...
-  1000 atoms
-change_box all boundary f f f
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos fmm 0.001
-kspace_modify scafacos tolerance energy_rel
-kspace_modify scafacos fmm_tuning 1
-ScaFaCoS setting fmm inhomogen tuning ...
-
-timestep	0.005
-thermo          1
-
-run		20
-Setting up ScaFaCoS with solver fmm ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 4.164 | 4.26 | 4.546 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417141            0    1.6235786 0.0015676581 
-       1    18.780412   -10.770009            0    17.372438  0.016719188 
-       2    65.294131   -11.084501            0    86.758754   0.06035827 
-       3    121.92555   -7.0612033            0    175.64423    0.1140457 
-       4    185.71165   -5.8781334            0    272.41077   0.17455524 
-       5    286.28339   -4.3800108            0    424.61565   0.26911306 
-       6    481.28097   -4.3052012            0    716.89433   0.45262045 
-       7    487.26022   -3.8672741            0    726.29216   0.45830216 
-       8    493.65478   -3.0242687            0    736.71742   0.46443761 
-       9    495.66203   -3.4336343            0    739.31592   0.46613014 
-      10    498.41831   -2.8837072            0    743.99613   0.46887706 
-      11    499.20944   -2.7724783            0    745.29287   0.46966875 
-      12    500.97345   -2.8281484            0    747.88057   0.47126462 
-      13    507.46412   -2.7752775            0    757.65971   0.47728761 
-      14    525.35729   -2.5749814            0    784.67292   0.49422171 
-      15     563.9578   -2.9982381            0    842.09253   0.53043696 
-      16    645.47602   -2.5519203            0    964.69389   0.60730795 
-      17    647.09276   -2.2568468            0    967.41166   0.60891914 
-      18    647.12596   -2.2791003            0    967.43915   0.60900309 
-      19    647.24862   -2.2495226            0    967.65253   0.60908339 
-      20    647.51175   -2.0239179            0    968.27244   0.60932598 
-Loop time of 0.569395 on 8 procs for 20 steps with 1000 atoms
-
-Performance: 15174.000 tau/day, 35.125 timesteps/s
-99.3% CPU use with 8 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.00021982 | 0.00023353 | 0.0002408  |   0.0 |  0.04
-Kspace  | 0.53679    | 0.54466    | 0.55292    |   0.8 | 95.66
-Neigh   | 0.011844   | 0.02033    | 0.028357   |   4.2 |  3.57
-Comm    | 0.0028894  | 0.0031579  | 0.0034704  |   0.4 |  0.55
-Output  | 0.0005579  | 0.00067073 | 0.0008719  |   0.0 |  0.12
-Modify  | 0.0001018  | 0.00011405 | 0.00012612 |   0.0 |  0.02
-Other   |            | 0.0002268  |            |       |  0.04
-
-Nlocal:    125 ave 137 max 111 min
-Histogram: 1 1 0 0 0 2 2 1 0 1
-Nghost:    768.875 ave 788 max 761 min
-Histogram: 4 0 2 0 0 0 1 0 0 1
-Neighs:    30542.8 ave 48077 max 10011 min
-Histogram: 1 1 1 0 1 1 0 0 0 3
-
-Total # of neighbors = 244342
-Ave neighs/atom = 244.342
-Neighbor list builds = 19
-Dangerous builds = 18
-Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.2 b/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.2
deleted file mode 100644
index bd7e7fee1f..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.2
+++ /dev/null
@@ -1,107 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
-  1 by 1 by 2 MPI processor grid
-  reading atoms ...
-  1000 atoms
-change_box all boundary f f f
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos p2nfft 0.001
-kspace_modify scafacos tolerance potential
-
-timestep	0.005
-thermo          1
-
-run		20
-Setting up ScaFaCoS with solver p2nfft ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 6.48 | 6.861 | 7.243 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417787            0    1.6235721 0.0015678854 
-       1    18.780041   -10.770002            0    17.371889  0.016718957 
-       2    65.289192   -11.084705            0    86.751149  0.060353634 
-       3    121.92987   -7.0625759            0    175.64933   0.11404974 
-       4    185.78164   -5.8777511            0    272.51603   0.17462194 
-       5    286.36221   -4.3820531            0    424.73172   0.26918925 
-       6    481.42203   -4.3095567            0    717.10136   0.45274086 
-       7    488.59165   -3.8685193            0    728.28607   0.45956865 
-       8    497.85288   -3.0417938            0    742.99075   0.46838117 
-       9    499.61619   -3.4190063            0    745.25585   0.46983349 
-      10    502.63691   -2.8360951            0    750.36531   0.47280815 
-      11     504.4847   -2.7628089            0    753.20751   0.47462802 
-      12    506.54494   -2.8460319            0    756.21157    0.4765145 
-      13     508.2722   -2.7309328            0    758.91497   0.47813761 
-      14    510.57053   -2.6094792            0    762.48045   0.48031438 
-      15    513.14804   -2.7150819            0    766.23726   0.48275234 
-      16    515.78127   -2.3961749            0    770.50206   0.48526336 
-      17    515.70267   -2.2982581            0    770.48219   0.48526619 
-      18    515.70813   -2.1516075            0    770.63702   0.48530395 
-      19    515.74908   -2.0581483            0    770.79185   0.48530979 
-      20    515.70881    -1.892235            0    770.89742   0.48527104 
-Loop time of 0.701267 on 2 procs for 20 steps with 1000 atoms
-
-Performance: 12320.557 tau/day, 28.520 timesteps/s
-99.6% CPU use with 2 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.0002265  | 0.00022769 | 0.00022888 |   0.0 |  0.03
-Kspace  | 0.60195    | 0.62374    | 0.64554    |   2.8 | 88.95
-Neigh   | 0.05268    | 0.074592   | 0.096504   |   8.0 | 10.64
-Comm    | 0.0015199  | 0.0016934  | 0.0018668  |   0.4 |  0.24
-Output  | 0.00031519 | 0.00041544 | 0.0005157  |   0.0 |  0.06
-Modify  | 0.00029492 | 0.00030565 | 0.00031638 |   0.0 |  0.04
-Other   |            | 0.000288   |            |       |  0.04
-
-Nlocal:    500 ave 516 max 484 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-Nghost:    456.5 ave 475 max 438 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-Neighs:    123908 ave 172139 max 75678 min
-Histogram: 1 0 0 0 0 0 0 0 0 1
-
-Total # of neighbors = 247817
-Ave neighs/atom = 247.817
-Neighbor list builds = 19
-Dangerous builds = 18
-Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.4 b/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.4
deleted file mode 100644
index 8a7e591f9a..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.4
+++ /dev/null
@@ -1,107 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
-  1 by 2 by 2 MPI processor grid
-  reading atoms ...
-  1000 atoms
-change_box all boundary f f f
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos p2nfft 0.001
-kspace_modify scafacos tolerance potential
-
-timestep	0.005
-thermo          1
-
-run		20
-Setting up ScaFaCoS with solver p2nfft ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 4.837 | 5.123 | 5.6 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417787            0    1.6235721 0.0015678854 
-       1    18.780041   -10.770002            0    17.371889  0.016718957 
-       2    65.289192   -11.084705            0    86.751149  0.060353634 
-       3    121.92987   -7.0625759            0    175.64933   0.11404974 
-       4    185.78164   -5.8777511            0    272.51603   0.17462194 
-       5    286.36221   -4.3820531            0    424.73172   0.26918925 
-       6    481.42203   -4.3095567            0    717.10136   0.45274086 
-       7    488.59165   -3.8685193            0    728.28607   0.45956865 
-       8    497.85288   -3.0417938            0    742.99075   0.46838117 
-       9    499.61619   -3.4190063            0    745.25585   0.46983349 
-      10    502.63691   -2.8360951            0    750.36531   0.47280815 
-      11     504.4847   -2.7628089            0    753.20751   0.47462802 
-      12    506.54494   -2.8460319            0    756.21157    0.4765145 
-      13     508.2722   -2.7309328            0    758.91497   0.47813761 
-      14    510.57053   -2.6094792            0    762.48045   0.48031438 
-      15    513.14804   -2.7150819            0    766.23726   0.48275234 
-      16    515.78127   -2.3961749            0    770.50206   0.48526336 
-      17    515.70267   -2.2982581            0    770.48219   0.48526619 
-      18    515.70813   -2.1516075            0    770.63702   0.48530395 
-      19    515.74908   -2.0581483            0    770.79185   0.48530979 
-      20    515.70881    -1.892235            0    770.89742   0.48527104 
-Loop time of 0.427495 on 4 procs for 20 steps with 1000 atoms
-
-Performance: 20210.785 tau/day, 46.784 timesteps/s
-99.7% CPU use with 4 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.0002327  | 0.00023341 | 0.00023437 |   0.0 |  0.05
-Kspace  | 0.36897    | 0.38411    | 0.39988    |   1.9 | 89.85
-Neigh   | 0.023831   | 0.039796   | 0.055124   |   6.1 |  9.31
-Comm    | 0.0022776  | 0.0025444  | 0.0028152  |   0.4 |  0.60
-Output  | 0.00033784 | 0.0004344  | 0.00057077 |   0.0 |  0.10
-Modify  | 0.00016117 | 0.00016713 | 0.00017095 |   0.0 |  0.04
-Other   |            | 0.0002093  |            |       |  0.05
-
-Nlocal:    250 ave 259 max 238 min
-Histogram: 1 0 0 1 0 0 0 0 0 2
-Nghost:    672.25 ave 683 max 663 min
-Histogram: 2 0 0 0 0 0 0 0 1 1
-Neighs:    61954.2 ave 97157 max 25016 min
-Histogram: 1 0 0 1 0 0 1 0 0 1
-
-Total # of neighbors = 247817
-Ave neighs/atom = 247.817
-Neighbor list builds = 19
-Dangerous builds = 18
-Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.8 b/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.8
deleted file mode 100644
index 13922c09ac..0000000000
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.8
+++ /dev/null
@@ -1,107 +0,0 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
-  using 1 OpenMP thread(s) per MPI task
-# Point dipoles in a 2d box
-
-units		lj
-atom_style  charge
-
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
-  2 by 2 by 2 MPI processor grid
-  reading atoms ...
-  1000 atoms
-change_box all boundary f f f
-
-velocity	all create 1.5 49893
-
-neighbor	1.0 bin
-neigh_modify	delay 0
-
-fix             1 all nve
-
-# LAMMPS computes pairwise and long-range Coulombics
-
-#pair_style      coul/long 3.0
-#pair_coeff      * *
-#kspace_style    pppm 1.0e-3
-
-# Scafacos computes entire long-range Coulombics
-# use dummy pair style to perform atom sorting
-
-pair_style	zero 1.0
-pair_coeff	* *
-
-#fix		2 all scafacos p3m tolerance field 0.001
-
-kspace_style    scafacos p2nfft 0.001
-kspace_modify scafacos tolerance potential
-
-timestep	0.005
-thermo          1
-
-run		20
-Setting up ScaFaCoS with solver p2nfft ...
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/atomonly/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 4.164 | 4.26 | 4.546 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417787            0    1.6235721 0.0015678854 
-       1    18.780041   -10.770002            0    17.371889  0.016718957 
-       2    65.289192   -11.084705            0    86.751149  0.060353634 
-       3    121.92987   -7.0625759            0    175.64933   0.11404974 
-       4    185.78164   -5.8777511            0    272.51603   0.17462194 
-       5    286.36221   -4.3820531            0    424.73172   0.26918925 
-       6    481.42203   -4.3095567            0    717.10136   0.45274086 
-       7    488.59165   -3.8685193            0    728.28607   0.45956865 
-       8    497.85288   -3.0417938            0    742.99075   0.46838117 
-       9    499.61619   -3.4190063            0    745.25585   0.46983349 
-      10    502.63691   -2.8360951            0    750.36531   0.47280815 
-      11     504.4847   -2.7628089            0    753.20751   0.47462802 
-      12    506.54494   -2.8460319            0    756.21157    0.4765145 
-      13     508.2722   -2.7309328            0    758.91497   0.47813761 
-      14    510.57053   -2.6094792            0    762.48045   0.48031438 
-      15    513.14804   -2.7150819            0    766.23726   0.48275234 
-      16    515.78127   -2.3961749            0    770.50206   0.48526336 
-      17    515.70267   -2.2982581            0    770.48219   0.48526619 
-      18    515.70813   -2.1516075            0    770.63702   0.48530395 
-      19    515.74908   -2.0581483            0    770.79185   0.48530979 
-      20    515.70881    -1.892235            0    770.89742   0.48527104 
-Loop time of 0.242145 on 8 procs for 20 steps with 1000 atoms
-
-Performance: 35681.038 tau/day, 82.595 timesteps/s
-99.2% CPU use with 8 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.0002315  | 0.00023672 | 0.00024652 |   0.0 |  0.10
-Kspace  | 0.20915    | 0.21666    | 0.22564    |   1.3 | 89.48
-Neigh   | 0.012218   | 0.021341   | 0.029026   |   4.3 |  8.81
-Comm    | 0.0028954  | 0.0031248  | 0.0033553  |   0.3 |  1.29
-Output  | 0.00039291 | 0.00049406 | 0.00066066 |   0.0 |  0.20
-Modify  | 8.7976e-05 | 9.2953e-05 | 9.7752e-05 |   0.0 |  0.04
-Other   |            | 0.0001938  |            |       |  0.08
-
-Nlocal:    125 ave 133 max 113 min
-Histogram: 2 0 0 0 0 1 1 0 2 2
-Nghost:    773.625 ave 788 max 764 min
-Histogram: 1 1 2 1 1 0 0 0 1 1
-Neighs:    30977.1 ave 50690 max 10447 min
-Histogram: 1 1 1 0 1 1 0 0 2 1
-
-Total # of neighbors = 247817
-Ave neighs/atom = 247.817
-Neighbor list builds = 19
-Dangerous builds = 18
-Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.27Nov18.scafacos.cw.ewald.g++.1 b/examples/USER/scafacos/log.27Nov18.scafacos.cw.ewald.g++.1
new file mode 100644
index 0000000000..e0b10de95e
--- /dev/null
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.cw.ewald.g++.1
@@ -0,0 +1,92 @@
+LAMMPS (27 Nov 2018)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos ewald  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul press
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver ewald ...
+WARNING: Virial computation for Ewald not available (src/USER-SCAFACOS/scafacos.cpp:107)
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.34 | 3.34 | 3.34 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
+       0      300            0            0   0.49647271            0   0.49647271            0            0 
+      10      300   0.13361073  0.015454603   0.47336798  0.023104631   0.49647261            0 0.0046209262 
+      20      300   0.26624846  0.062286838   0.40335451  0.093118823   0.49647334            0  0.018623765 
+      30      300   0.39882493   0.14309258    0.2825546   0.21392341   0.49647801            0  0.042784682 
+      40      300   0.53216863   0.52823732   0.08958779   0.78971479   0.87930258            0   0.15794296 
+      50      300   0.66626191    1.1912156 -0.082808834    1.7808674    1.6980585            0   0.35617347 
+      60      300   0.80045819    1.3877079  -0.37658402    2.0746234    1.6980393            0   0.41492467 
+      70      300   0.93514919    1.7017671  -0.84569366    2.5441418    1.6984482            0   0.50882837 
+      80      300    1.0722892    15.128456    -3.370646    22.617042    19.246396            0    4.5234083 
+      90      300    1.2106726    41.994547   -2.2024419    62.781847    60.579406            0    12.556369 
+     100      300    1.3514247    41.352114  -0.92449496    61.821411    60.896916            0    12.364282 
+Loop time of 1.35144 on 1 procs for 100 steps with 300 atoms
+
+Performance: 31966.000 tau/day, 73.995 timesteps/s
+99.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.718e-05  | 2.718e-05  | 2.718e-05  |   0.0 |  0.00
+Kspace  | 1.3486     | 1.3486     | 1.3486     |   0.0 | 99.79
+Neigh   | 0.0019956  | 0.0019956  | 0.0019956  |   0.0 |  0.15
+Comm    | 0.00027394 | 0.00027394 | 0.00027394 |   0.0 |  0.02
+Output  | 0.00013924 | 0.00013924 | 0.00013924 |   0.0 |  0.01
+Modify  | 0.00023341 | 0.00023341 | 0.00023341 |   0.0 |  0.02
+Other   |            | 0.000195   |            |       |  0.01
+
+Nlocal:    300 ave 300 max 300 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    374 ave 374 max 374 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    2465 ave 2465 max 2465 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 2465
+Ave neighs/atom = 8.21667
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.27Nov18.scafacos.cw.ewald.g++.4 b/examples/USER/scafacos/log.27Nov18.scafacos.cw.ewald.g++.4
new file mode 100644
index 0000000000..f881ed003a
--- /dev/null
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.cw.ewald.g++.4
@@ -0,0 +1,92 @@
+LAMMPS (27 Nov 2018)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos ewald  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul press
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver ewald ...
+WARNING: Virial computation for Ewald not available (src/USER-SCAFACOS/scafacos.cpp:107)
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.333 | 3.333 | 3.333 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
+       0      300            0            0   0.49647271            0   0.49647271            0            0 
+      10      300   0.04107213  0.015454603   0.47336798  0.023104631   0.49647261            0 0.0046209262 
+      20      300  0.083310604  0.062286838   0.40335451  0.093118823   0.49647334            0  0.018623765 
+      30      300   0.12387085   0.14309258    0.2825546   0.21392341   0.49647801            0  0.042784682 
+      40      300    0.1672492   0.52823732   0.08958779   0.78971479   0.87930258            0   0.15794296 
+      50      300   0.21049809    1.1912156 -0.082808834    1.7808674    1.6980585            0   0.35617347 
+      60      300   0.25216699    1.3877079  -0.37658402    2.0746234    1.6980393            0   0.41492467 
+      70      300   0.29355645    1.7017671  -0.84569366    2.5441418    1.6984482            0   0.50882837 
+      80      300   0.33713698    15.128456    -3.370646    22.617042    19.246396            0    4.5234083 
+      90      300   0.38109112    41.994547   -2.2024419    62.781847    60.579406            0    12.556369 
+     100      300    0.4267664    41.352114  -0.92449496    61.821411    60.896916            0    12.364282 
+Loop time of 0.426857 on 4 procs for 100 steps with 300 atoms
+
+Performance: 101204.784 tau/day, 234.270 timesteps/s
+97.1% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.6689e-05 | 1.8835e-05 | 2.2173e-05 |   0.0 |  0.00
+Kspace  | 0.42368    | 0.42384    | 0.42405    |   0.0 | 99.29
+Neigh   | 0.00040579 | 0.00056726 | 0.00075126 |   0.0 |  0.13
+Comm    | 0.0010395  | 0.0010909  | 0.001189   |   0.2 |  0.26
+Output  | 0.00015545 | 0.00034326 | 0.00090313 |   0.0 |  0.08
+Modify  | 9.8705e-05 | 0.00010544 | 0.00011539 |   0.0 |  0.02
+Other   |            | 0.0008868  |            |       |  0.21
+
+Nlocal:    75 ave 81 max 70 min
+Histogram: 2 0 0 0 0 0 0 1 0 1
+Nghost:    282.5 ave 290 max 274 min
+Histogram: 1 0 0 1 0 0 0 0 1 1
+Neighs:    616.25 ave 983 max 283 min
+Histogram: 1 1 0 0 0 0 0 1 0 1
+
+Total # of neighbors = 2465
+Ave neighs/atom = 8.21667
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.1 b/examples/USER/scafacos/log.27Nov18.scafacos.cw.fmm.g++.1
similarity index 51%
rename from examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.1
rename to examples/USER/scafacos/log.27Nov18.scafacos.cw.fmm.g++.1
index 714ce85a5f..da0c698187 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.fmm.1
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.cw.fmm.g++.1
@@ -1,5 +1,4 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 units	    lj
 atom_style  charge
@@ -59,41 +58,41 @@ Neighbor list info ...
 Per MPI rank memory allocation (min/avg/max) = 3.34 | 3.34 | 3.34 Mbytes
 Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
        0      300            0            0   0.49646402            0   0.49646402            0  0.016548801 
-      10      300  0.063865185  0.015455559   0.47335833   0.02310606   0.49646439            0  0.020399823 
-      20      300   0.12760854   0.06229069   0.40334177  0.093124582   0.49646635            0  0.032069642 
-      30      300   0.19143319   0.14310163   0.28254277   0.21393694   0.49647971            0   0.05220548 
-      40      300   0.25553131   0.52929788  0.089669015   0.79130033   0.88096934            0   0.16124903 
-      50      300   0.31961966    1.1963022 -0.082792461    1.7884718    1.7056794            0   0.35493462 
-      60      300   0.38388991    1.3928167  -0.37659239     2.082261    1.7056686            0   0.40389911 
-      70      300   0.44797421    1.7069009  -0.84571914    2.5518169    1.7060978            0   0.48217274 
-      80      300   0.50961447    15.358343    -3.368063    22.960722    19.592659            0    4.4798757 
-      90      300   0.57181501    42.280432   -2.1623864    63.209247     61.04686            0     12.56977 
-     100      300   0.63501096     41.48079  -0.89904529    62.013782    61.114736            0    12.372788 
-Loop time of 0.635022 on 1 procs for 100 steps with 300 atoms
+      10      300  0.025602102  0.015455672   0.47336221  0.023106229   0.49646844            0  0.020399986 
+      20      300  0.051156282  0.062291145   0.40334518  0.093125262   0.49647044            0  0.032069892 
+      30      300  0.076699495   0.14310262   0.28254543   0.21393842   0.49648385            0  0.052205865 
+      40      300   0.10232902   0.52932294  0.089676201   0.79133779   0.88101399            0   0.16125676 
+      50      300   0.12800455    1.1962334 -0.082794531     1.788369    1.7055744            0   0.35491397 
+      60      300    0.1537931    1.3927481  -0.37659486    2.0821585    1.7055636            0   0.40387853 
+      70      300   0.17947531    1.7068337  -0.84572368    2.5517165    1.7059928            0    0.4821525 
+      80      300   0.20517826    15.509932    -3.369527    23.187348    19.817821            0    4.5251521 
+      90      300   0.23049045     42.26862   -2.0785273    63.191586    61.113059            0    12.569033 
+     100      300   0.25625205    41.459415  -0.88546313    61.981825    61.096362            0     12.36685 
+Loop time of 0.256263 on 1 procs for 100 steps with 300 atoms
 
-Performance: 68029.122 tau/day, 157.475 timesteps/s
-99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
+Performance: 168576.488 tau/day, 390.223 timesteps/s
+99.4% CPU use with 1 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 9.0837e-05 | 9.0837e-05 | 9.0837e-05 |   0.0 |  0.01
-Kspace  | 0.62877    | 0.62877    | 0.62877    |   0.0 | 99.01
-Neigh   | 0.0035319  | 0.0035319  | 0.0035319  |   0.0 |  0.56
-Comm    | 0.0010211  | 0.0010211  | 0.0010211  |   0.0 |  0.16
-Output  | 0.00014758 | 0.00014758 | 0.00014758 |   0.0 |  0.02
-Modify  | 0.0010428  | 0.0010428  | 0.0010428  |   0.0 |  0.16
-Other   |            | 0.0004218  |            |       |  0.07
+Pair    | 2.7895e-05 | 2.7895e-05 | 2.7895e-05 |   0.0 |  0.01
+Kspace  | 0.25335    | 0.25335    | 0.25335    |   0.0 | 98.86
+Neigh   | 0.0020251  | 0.0020251  | 0.0020251  |   0.0 |  0.79
+Comm    | 0.00027776 | 0.00027776 | 0.00027776 |   0.0 |  0.11
+Output  | 0.00014305 | 0.00014305 | 0.00014305 |   0.0 |  0.06
+Modify  | 0.0002346  | 0.0002346  | 0.0002346  |   0.0 |  0.09
+Other   |            | 0.0002046  |            |       |  0.08
 
 Nlocal:    300 ave 300 max 300 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:    374 ave 374 max 374 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    2459 ave 2459 max 2459 min
+Neighs:    2461 ave 2461 max 2461 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 
-Total # of neighbors = 2459
-Ave neighs/atom = 8.19667
+Total # of neighbors = 2461
+Ave neighs/atom = 8.20333
 Neighbor list builds = 15
 Dangerous builds = 0
 Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.27Nov18.scafacos.cw.fmm.g++.4 b/examples/USER/scafacos/log.27Nov18.scafacos.cw.fmm.g++.4
new file mode 100644
index 0000000000..30fa988863
--- /dev/null
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.cw.fmm.g++.4
@@ -0,0 +1,98 @@
+LAMMPS (27 Nov 2018)
+  using 1 OpenMP thread(s) per MPI task
+units	    lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+kspace_style scafacos fmm 1.0e-3
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+ScaFaCoS setting fmm inhomogen tuning ...
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul press
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.333 | 3.333 | 3.333 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
+       0      300            0            0   0.49646402            0   0.49646402            0  0.016548801 
+      10      300   0.01548481  0.015455672   0.47336221  0.023106229   0.49646844            0  0.020399986 
+      20      300  0.031042576  0.062291145   0.40334518  0.093125262   0.49647044            0  0.032069892 
+      30      300  0.044115782   0.14310262   0.28254543   0.21393842   0.49648385            0  0.052205865 
+      40      300  0.059348583   0.52932294  0.089676201   0.79133779   0.88101399            0   0.16125676 
+      50      300  0.071757555    1.1962334 -0.082794531     1.788369    1.7055744            0   0.35491397 
+      60      300  0.088143587    1.3927481  -0.37659486    2.0821585    1.7055636            0   0.40387853 
+      70      300  0.099860907    1.7068337  -0.84572368    2.5517165    1.7059928            0    0.4821525 
+      80      300   0.11159992    15.509932    -3.369527    23.187348    19.817821            0    4.5251521 
+      90      300   0.12841201     42.26862   -2.0785273    63.191586    61.113059            0    12.569033 
+     100      300   0.14122367    41.459415  -0.88546313    61.981825    61.096362            0     12.36685 
+Loop time of 0.141343 on 4 procs for 100 steps with 300 atoms
+
+Performance: 305639.349 tau/day, 707.498 timesteps/s
+94.8% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.5988e-05 | 2.9087e-05 | 3.2663e-05 |   0.0 |  0.02
+Kspace  | 0.13587    | 0.13743    | 0.13808    |   0.2 | 97.23
+Neigh   | 0.00041199 | 0.00057679 | 0.00076771 |   0.0 |  0.41
+Comm    | 0.001039   | 0.0015332  | 0.0029936  |   2.2 |  1.08
+Output  | 0.00017786 | 0.00044322 | 0.0012352  |   0.0 |  0.31
+Modify  | 0.00017333 | 0.00018805 | 0.00020623 |   0.0 |  0.13
+Other   |            | 0.001146   |            |       |  0.81
+
+Nlocal:    75 ave 80 max 71 min
+Histogram: 2 0 0 0 0 0 0 1 0 1
+Nghost:    282.5 ave 290 max 275 min
+Histogram: 1 0 1 0 0 0 0 0 1 1
+Neighs:    615.25 ave 970 max 292 min
+Histogram: 1 1 0 0 0 0 0 1 0 1
+
+Total # of neighbors = 2461
+Ave neighs/atom = 8.20333
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.1 b/examples/USER/scafacos/log.27Nov18.scafacos.cw.p2nfft.g++.1
similarity index 64%
rename from examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.1
rename to examples/USER/scafacos/log.27Nov18.scafacos.cw.p2nfft.g++.1
index f47f24edee..10c2fa166b 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.1
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.cw.p2nfft.g++.1
@@ -1,5 +1,4 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 
 units		lj
@@ -52,31 +51,31 @@ Neighbor list info ...
 Per MPI rank memory allocation (min/avg/max) = 3.34 | 3.34 | 3.34 Mbytes
 Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
        0      300            0            0   0.49683273            0   0.49683273            0  0.016561091 
-      10      300  0.071435928  0.015479312   0.47369009  0.023141571   0.49683166            0  0.020417984 
-      20      300   0.14302707  0.062386358   0.40356181  0.093267605   0.49682941            0  0.032105581 
-      30      300   0.21480989   0.14331637    0.2825636   0.21425798   0.49682157            0  0.052270382 
-      40      300   0.28638172   0.53041843  0.089505208   0.79297556   0.88248077            0   0.16157862 
-      50      300   0.35810781    1.1948397 -0.083317439    1.7862853    1.7029679            0   0.35447982 
-      60      300   0.42993116    1.3915614  -0.37745551    2.0803842    1.7029287            0   0.40349499 
-      70      300   0.50181961    1.7061978  -0.84746071    2.5507657     1.703305            0   0.48190445 
-      80      300   0.57404566    20.692093     -3.32971     30.93468     27.60497            0    6.0759456 
-      90      300   0.64724708    48.999403   -2.1632167    73.254107    71.090891            0    14.578714 
-     100      300   0.72128963    51.199785  -0.81127924    76.543678    75.732399            0    15.281693 
-Loop time of 0.721302 on 1 procs for 100 steps with 300 atoms
+      10      300  0.028267145  0.015479312   0.47369009  0.023141571   0.49683166            0  0.020417984 
+      20      300  0.056742668  0.062386358   0.40356181  0.093267605   0.49682941            0  0.032105581 
+      30      300  0.085426331   0.14331637    0.2825636   0.21425798   0.49682157            0  0.052270382 
+      40      300   0.11415625   0.53041843  0.089505208   0.79297556   0.88248077            0   0.16157862 
+      50      300   0.14363861    1.1948397 -0.083317439    1.7862853    1.7029679            0   0.35447982 
+      60      300   0.17313194    1.3915614  -0.37745551    2.0803842    1.7029287            0   0.40349499 
+      70      300   0.20299363    1.7061978  -0.84746071    2.5507657     1.703305            0   0.48190445 
+      80      300   0.23256087    20.692093     -3.32971     30.93468     27.60497            0    6.0759456 
+      90      300   0.26286936    48.999403   -2.1632167    73.254107    71.090891            0    14.578714 
+     100      300   0.29372239    51.199785  -0.81127924    76.543678    75.732399            0    15.281693 
+Loop time of 0.293738 on 1 procs for 100 steps with 300 atoms
 
-Performance: 59891.733 tau/day, 138.638 timesteps/s
-100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+Performance: 147070.013 tau/day, 340.440 timesteps/s
+99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 8.893e-05  | 8.893e-05  | 8.893e-05  |   0.0 |  0.01
-Kspace  | 0.71502    | 0.71502    | 0.71502    |   0.0 | 99.13
-Neigh   | 0.0035415  | 0.0035415  | 0.0035415  |   0.0 |  0.49
-Comm    | 0.001024   | 0.001024   | 0.001024   |   0.0 |  0.14
-Output  | 0.00015044 | 0.00015044 | 0.00015044 |   0.0 |  0.02
-Modify  | 0.0010409  | 0.0010409  | 0.0010409  |   0.0 |  0.14
-Other   |            | 0.0004385  |            |       |  0.06
+Pair    | 4.3869e-05 | 4.3869e-05 | 4.3869e-05 |   0.0 |  0.01
+Kspace  | 0.29081    | 0.29081    | 0.29081    |   0.0 | 99.00
+Neigh   | 0.0019989  | 0.0019989  | 0.0019989  |   0.0 |  0.68
+Comm    | 0.0003159  | 0.0003159  | 0.0003159  |   0.0 |  0.11
+Output  | 0.00016809 | 0.00016809 | 0.00016809 |   0.0 |  0.06
+Modify  | 0.00021863 | 0.00021863 | 0.00021863 |   0.0 |  0.07
+Other   |            | 0.0001831  |            |       |  0.06
 
 Nlocal:    300 ave 300 max 300 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.4 b/examples/USER/scafacos/log.27Nov18.scafacos.cw.p2nfft.g++.4
similarity index 64%
rename from examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.4
rename to examples/USER/scafacos/log.27Nov18.scafacos.cw.p2nfft.g++.4
index d5a07e0324..aa828ed159 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.cw.g++.p2nfft.4
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.cw.p2nfft.g++.4
@@ -1,5 +1,4 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 
 units		lj
@@ -52,31 +51,31 @@ Neighbor list info ...
 Per MPI rank memory allocation (min/avg/max) = 3.333 | 3.333 | 3.333 Mbytes
 Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
        0      300            0            0   0.49683273            0   0.49683273            0  0.016561091 
-      10      300   0.02743125  0.015479312   0.47369009  0.023141571   0.49683166            0  0.020417984 
-      20      300   0.05494833  0.062386358   0.40356181  0.093267605   0.49682941            0  0.032105581 
-      30      300  0.082517862   0.14331637    0.2825636   0.21425798   0.49682157            0  0.052270382 
-      40      300   0.11015558   0.53041843  0.089505208   0.79297556   0.88248077            0   0.16157862 
-      50      300   0.13790298    1.1948397 -0.083317439    1.7862853    1.7029679            0   0.35447982 
-      60      300    0.1660006    1.3915614  -0.37745551    2.0803842    1.7029287            0   0.40349499 
-      70      300    0.1937964    1.7061978  -0.84746071    2.5507657     1.703305            0   0.48190445 
-      80      300   0.22181106    20.692093     -3.32971     30.93468     27.60497            0    6.0759456 
-      90      300   0.25105524    48.999403   -2.1632167    73.254107    71.090891            0    14.578714 
-     100      300   0.28086019    51.199785  -0.81127924    76.543678    75.732399            0    15.281693 
-Loop time of 0.280875 on 4 procs for 100 steps with 300 atoms
+      10      300  0.013789177  0.015479312   0.47369009  0.023141571   0.49683166            0  0.020417984 
+      20      300  0.025422812  0.062386358   0.40356181  0.093267605   0.49682941            0  0.032105581 
+      30      300  0.039611578   0.14331637    0.2825636   0.21425798   0.49682157            0  0.052270382 
+      40      300  0.051841736   0.53041843  0.089505208   0.79297556   0.88248077            0   0.16157862 
+      50      300   0.07456398    1.1948397 -0.083317439    1.7862853    1.7029679            0   0.35447982 
+      60      300  0.087505341    1.3915614  -0.37745551    2.0803842    1.7029287            0   0.40349499 
+      70      300   0.10280418    1.7061978  -0.84746071    2.5507657     1.703305            0   0.48190445 
+      80      300    0.1173315    20.692093     -3.32971     30.93468     27.60497            0    6.0759456 
+      90      300   0.13260174    48.999403   -2.1632167    73.254107    71.090891            0    14.578714 
+     100      300   0.14577007    51.199785  -0.81127924    76.543678    75.732399            0    15.281693 
+Loop time of 0.145873 on 4 procs for 100 steps with 300 atoms
 
-Performance: 153805.254 tau/day, 356.031 timesteps/s
-99.7% CPU use with 4 MPI tasks x 1 OpenMP threads
+Performance: 296148.120 tau/day, 685.528 timesteps/s
+92.8% CPU use with 4 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 5.6744e-05 | 6.0022e-05 | 6.4135e-05 |   0.0 |  0.02
-Kspace  | 0.27651    | 0.27682    | 0.27714    |   0.0 | 98.56
-Neigh   | 0.00079465 | 0.001082   | 0.0014107  |   0.8 |  0.39
-Comm    | 0.0019372  | 0.002014   | 0.0020835  |   0.1 |  0.72
-Output  | 0.00018406 | 0.00019914 | 0.00023413 |   0.0 |  0.07
-Modify  | 0.0002749  | 0.00028563 | 0.00029325 |   0.0 |  0.10
-Other   |            | 0.0004173  |            |       |  0.15
+Pair    | 2.3842e-05 | 2.5034e-05 | 2.7657e-05 |   0.0 |  0.02
+Kspace  | 0.14221    | 0.14239    | 0.14264    |   0.0 | 97.61
+Neigh   | 0.00041676 | 0.0005784  | 0.00076866 |   0.0 |  0.40
+Comm    | 0.0013564  | 0.0014941  | 0.0016375  |   0.3 |  1.02
+Output  | 0.00017166 | 0.00033867 | 0.00083518 |   0.0 |  0.23
+Modify  | 0.00012016 | 0.00012261 | 0.0001266  |   0.0 |  0.08
+Other   |            | 0.0009243  |            |       |  0.63
 
 Nlocal:    75 ave 81 max 69 min
 Histogram: 1 0 0 0 1 1 0 0 0 1
diff --git a/examples/USER/scafacos/log.27Nov18.scafacos.cw.p3m.g++.1 b/examples/USER/scafacos/log.27Nov18.scafacos.cw.p3m.g++.1
new file mode 100644
index 0000000000..9410529284
--- /dev/null
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.cw.p3m.g++.1
@@ -0,0 +1,92 @@
+LAMMPS (27 Nov 2018)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos p3m  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul press
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver p3m ...
+WARNING: Virial computation for P3M not available (src/USER-SCAFACOS/scafacos.cpp:104)
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.34 | 3.34 | 3.34 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
+       0      300            0            0   0.49619864            0   0.49619864            0            0 
+      10      300  0.018853664  0.015458704   0.47310664  0.023110763    0.4962174            0 0.0046221526 
+      20      300  0.037713289  0.062303961   0.40312435  0.093144421   0.49626877            0  0.018628884 
+      30      300  0.055995226   0.14313464   0.28239193   0.21398628   0.49637821            0  0.042797257 
+      40      300  0.074442625   0.53306643  0.089950137   0.79693431   0.88688444            0   0.15938686 
+      50      300  0.093066454    1.1925037  -0.08272797    1.7827931    1.7000651            0   0.35655861 
+      60      300   0.11158967    1.3890617  -0.37645444    2.0766472    1.7001927            0   0.41532944 
+      70      300    0.1300416    1.7033725  -0.84587122    2.5465418    1.7006706            0   0.50930837 
+      80      300   0.14844203    15.906218   -3.3586154    23.779796    20.421181            0    4.7559592 
+      90      300    0.1676445    42.749883   -2.2769116    63.911076    61.634164            0    12.782215 
+     100      300    0.1866703     42.25523  -0.84978755    63.171569    62.321782            0    12.634314 
+Loop time of 0.186683 on 1 procs for 100 steps with 300 atoms
+
+Performance: 231408.103 tau/day, 535.667 timesteps/s
+99.5% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.7657e-05 | 2.7657e-05 | 2.7657e-05 |   0.0 |  0.01
+Kspace  | 0.18368    | 0.18368    | 0.18368    |   0.0 | 98.39
+Neigh   | 0.0020967  | 0.0020967  | 0.0020967  |   0.0 |  1.12
+Comm    | 0.00027132 | 0.00027132 | 0.00027132 |   0.0 |  0.15
+Output  | 0.00013709 | 0.00013709 | 0.00013709 |   0.0 |  0.07
+Modify  | 0.0002718  | 0.0002718  | 0.0002718  |   0.0 |  0.15
+Other   |            | 0.0001974  |            |       |  0.11
+
+Nlocal:    300 ave 300 max 300 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    374 ave 374 max 374 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    2478 ave 2478 max 2478 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 2478
+Ave neighs/atom = 8.26
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.27Nov18.scafacos.cw.p3m.g++.4 b/examples/USER/scafacos/log.27Nov18.scafacos.cw.p3m.g++.4
new file mode 100644
index 0000000000..2dc5c07b2b
--- /dev/null
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.cw.p3m.g++.4
@@ -0,0 +1,92 @@
+LAMMPS (27 Nov 2018)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos p3m  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul press
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver p3m ...
+WARNING: Virial computation for P3M not available (src/USER-SCAFACOS/scafacos.cpp:104)
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.333 | 3.333 | 3.333 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul Press 
+       0      300            0            0   0.49601855            0   0.49601855            0            0 
+      10      300 0.0080001354  0.015443077   0.47292202    0.0230874   0.49600942            0   0.00461748 
+      20      300  0.018443823  0.062241297   0.40294626  0.093050739     0.495997            0  0.018610148 
+      30      300  0.029161692   0.14299426   0.28226416   0.21377642   0.49604058            0  0.042755283 
+      40      300  0.037320137   0.53371718  0.089969593   0.79790718   0.88787677            0   0.15958144 
+      50      300  0.045557499     1.195874 -0.082570725    1.7878316    1.7052608            0   0.35756631 
+      60      300  0.054044962    1.3923357  -0.37594466    2.0815419    1.7055972            0   0.41630838 
+      70      300  0.064656734    1.7063617  -0.84481819    2.5510108    1.7061926            0   0.51020216 
+      80      300  0.073246241    13.884845   -3.3804644    20.757843    17.377379            0    4.1515687 
+      90      300  0.081598997    40.561354   -2.1801816    60.639224    58.459042            0    12.127845 
+     100      300  0.092384577    39.972453  -0.95590215    59.758818    58.802916            0    11.951764 
+Loop time of 0.0924569 on 4 procs for 100 steps with 300 atoms
+
+Performance: 467244.853 tau/day, 1081.585 timesteps/s
+96.2% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.812e-05  | 1.955e-05  | 2.1935e-05 |   0.0 |  0.02
+Kspace  | 0.087202   | 0.087385   | 0.087595   |   0.1 | 94.51
+Neigh   | 0.00039053 | 0.00056016 | 0.00074887 |   0.0 |  0.61
+Comm    | 0.00095391 | 0.001252   | 0.001538   |   0.6 |  1.35
+Output  | 0.00015235 | 0.00026172 | 0.00058818 |   0.0 |  0.28
+Modify  | 9.7275e-05 | 0.00010461 | 0.00011683 |   0.0 |  0.11
+Other   |            | 0.002874   |            |       |  3.11
+
+Nlocal:    75 ave 81 max 70 min
+Histogram: 2 0 0 0 0 0 0 1 0 1
+Nghost:    284 ave 290 max 277 min
+Histogram: 1 0 0 1 0 0 0 1 0 1
+Neighs:    619.25 ave 986 max 285 min
+Histogram: 1 1 0 0 0 0 0 1 0 1
+
+Total # of neighbors = 2477
+Ave neighs/atom = 8.25667
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.1 b/examples/USER/scafacos/log.27Nov18.scafacos.ewald.g++.1
similarity index 79%
rename from examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.1
rename to examples/USER/scafacos/log.27Nov18.scafacos.ewald.g++.1
index 13212ede0c..e6cd7c506f 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.1
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.ewald.g++.1
@@ -1,5 +1,4 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
@@ -16,7 +15,7 @@ replicate 8 8 8
   orthogonal box = (0 0 0) to (8 8 8)
   1 by 1 by 1 MPI processor grid
   4096 atoms
-  Time spent = 0.000498772 secs
+  Time spent = 0.000287294 secs
 
 velocity	all create 1.5 49893
 
@@ -47,6 +46,7 @@ thermo          10
 
 run		100
 Setting up ScaFaCoS with solver ewald ...
+WARNING: Virial computation for Ewald not available (src/USER-SCAFACOS/scafacos.cpp:107)
 Neighbor list info ...
   update every 1 steps, delay 0 steps, check yes
   max neighbors/atom: 2000, page size: 100000
@@ -72,21 +72,21 @@ Step Temp E_pair E_mol TotEng Press
       80    1.5033218   -1.7524875            0   0.50194458    12.023638 
       90    1.5108306   -1.7637462            0   0.50194636    12.083694 
      100    1.5292479   -1.7913449            0   0.50196695    12.230996 
-Loop time of 1121.22 on 1 procs for 100 steps with 4096 atoms
+Loop time of 427.03 on 1 procs for 100 steps with 4096 atoms
 
-Performance: 38.530 tau/day, 0.089 timesteps/s
-100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+Performance: 101.164 tau/day, 0.234 timesteps/s
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.0015197  | 0.0015197  | 0.0015197  |   0.0 |  0.00
-Kspace  | 1121.2     | 1121.2     | 1121.2     |   0.0 |100.00
+Pair    | 0.0003655  | 0.0003655  | 0.0003655  |   0.0 |  0.00
+Kspace  | 427.02     | 427.02     | 427.02     |   0.0 |100.00
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.013699   | 0.013699   | 0.013699   |   0.0 |  0.00
-Output  | 0.00038314 | 0.00038314 | 0.00038314 |   0.0 |  0.00
-Modify  | 0.011126   | 0.011126   | 0.011126   |   0.0 |  0.00
-Other   |            | 0.00418    |            |       |  0.00
+Comm    | 0.0047293  | 0.0047293  | 0.0047293  |   0.0 |  0.00
+Output  | 0.00025916 | 0.00025916 | 0.00025916 |   0.0 |  0.00
+Modify  | 0.0029993  | 0.0029993  | 0.0029993  |   0.0 |  0.00
+Other   |            | 0.00181    |            |       |  0.00
 
 Nlocal:    4096 ave 4096 max 4096 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@@ -99,4 +99,4 @@ Total # of neighbors = 524288
 Ave neighs/atom = 128
 Neighbor list builds = 0
 Dangerous builds = 0
-Total wall time: 0:18:57
+Total wall time: 0:07:10
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.4 b/examples/USER/scafacos/log.27Nov18.scafacos.ewald.g++.4
similarity index 76%
rename from examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.4
rename to examples/USER/scafacos/log.27Nov18.scafacos.ewald.g++.4
index 26c1953afa..39e0387491 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.ewald.4
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.ewald.g++.4
@@ -1,5 +1,4 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
@@ -16,7 +15,7 @@ replicate 8 8 8
   orthogonal box = (0 0 0) to (8 8 8)
   1 by 2 by 2 MPI processor grid
   4096 atoms
-  Time spent = 0.000261068 secs
+  Time spent = 0.000125408 secs
 
 velocity	all create 1.5 49893
 
@@ -47,6 +46,7 @@ thermo          10
 
 run		100
 Setting up ScaFaCoS with solver ewald ...
+WARNING: Virial computation for Ewald not available (src/USER-SCAFACOS/scafacos.cpp:107)
 Neighbor list info ...
   update every 1 steps, delay 0 steps, check yes
   max neighbors/atom: 2000, page size: 100000
@@ -61,32 +61,32 @@ Neighbor list info ...
       bin: standard
 Per MPI rank memory allocation (min/avg/max) = 4.008 | 4.008 | 4.008 Mbytes
 Step Temp E_pair E_mol TotEng Press 
-       0          1.5   -1.7475938            0   0.50185691         -nan 
-      10     1.500011    -1.747529            0   0.50193816         -nan 
-      20    1.5000023   -1.7475152            0   0.50193898         -nan 
-      30    1.4999308    -1.747404            0   0.50194285         -nan 
-      40    1.4997722   -1.7471622            0   0.50194686         -nan 
-      50    1.4995835    -1.746878            0   0.50194808         -nan 
-      60    1.4996054   -1.7469114            0   0.50194749         -nan 
-      70    1.5004341   -1.7481558            0   0.50194592         -nan 
-      80    1.5033218   -1.7524875            0   0.50194458         -nan 
-      90    1.5108306   -1.7637462            0   0.50194636         -nan 
-     100    1.5292479   -1.7913449            0   0.50196695         -nan 
-Loop time of 295.996 on 4 procs for 100 steps with 4096 atoms
+       0          1.5   -1.7475938            0   0.50185691     11.99707 
+      10     1.500011    -1.747529            0   0.50193816    11.997158 
+      20    1.5000023   -1.7475152            0   0.50193898    11.997089 
+      30    1.4999308    -1.747404            0   0.50194285    11.996517 
+      40    1.4997722   -1.7471622            0   0.50194686    11.995248 
+      50    1.4995835    -1.746878            0   0.50194808    11.993739 
+      60    1.4996054   -1.7469114            0   0.50194749    11.993914 
+      70    1.5004341   -1.7481558            0   0.50194592    12.000543 
+      80    1.5033218   -1.7524875            0   0.50194458    12.023638 
+      90    1.5108306   -1.7637462            0   0.50194636    12.083694 
+     100    1.5292479   -1.7913449            0   0.50196695    12.230996 
+Loop time of 113.519 on 4 procs for 100 steps with 4096 atoms
 
-Performance: 145.948 tau/day, 0.338 timesteps/s
-99.9% CPU use with 4 MPI tasks x 1 OpenMP threads
+Performance: 380.555 tau/day, 0.881 timesteps/s
+98.8% CPU use with 4 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.00071096 | 0.00071985 | 0.00072813 |   0.0 |  0.00
-Kspace  | 295.98     | 295.98     | 295.98     |   0.0 | 99.99
+Pair    | 0.00022793 | 0.00023127 | 0.00023508 |   0.0 |  0.00
+Kspace  | 113.51     | 113.51     | 113.51     |   0.0 | 99.99
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.013666   | 0.013736   | 0.013795   |   0.0 |  0.00
-Output  | 0.00023484 | 0.00025135 | 0.00029254 |   0.0 |  0.00
-Modify  | 0.0029099  | 0.002973   | 0.0030224  |   0.1 |  0.00
-Other   |            | 0.001821   |            |       |  0.00
+Comm    | 0.0084581  | 0.0085486  | 0.0086188  |   0.1 |  0.01
+Output  | 0.00019598 | 0.00046694 | 0.0012727  |   0.0 |  0.00
+Modify  | 0.0016849  | 0.0017331  | 0.0017881  |   0.1 |  0.00
+Other   |            | 0.002307   |            |       |  0.00
 
 Nlocal:    1024 ave 1024 max 1024 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
@@ -99,4 +99,4 @@ Total # of neighbors = 524288
 Ave neighs/atom = 128
 Neighbor list builds = 0
 Dangerous builds = 0
-Total wall time: 0:05:02
+Total wall time: 0:01:54
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.1 b/examples/USER/scafacos/log.27Nov18.scafacos.fmm.g++.1
similarity index 79%
rename from examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.1
rename to examples/USER/scafacos/log.27Nov18.scafacos.fmm.g++.1
index 598585ae95..b9ae654659 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.1
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.fmm.g++.1
@@ -1,5 +1,4 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
@@ -16,7 +15,7 @@ replicate 8 8 8
   orthogonal box = (0 0 0) to (8 8 8)
   1 by 1 by 1 MPI processor grid
   4096 atoms
-  Time spent = 0.000518799 secs
+  Time spent = 0.000326395 secs
 
 velocity	all create 1.5 49893
 
@@ -72,21 +71,21 @@ Step Temp E_pair E_mol TotEng Press
       80    1.5032684   -1.7524625            0   0.50188958    10.465466 
       90    1.5107749    -1.763714            0   0.50189507    10.515502 
      100      1.52919    -1.791306            0   0.50191895    10.638261 
-Loop time of 34.7058 on 1 procs for 100 steps with 4096 atoms
+Loop time of 9.55733 on 1 procs for 100 steps with 4096 atoms
 
-Performance: 1244.749 tau/day, 2.881 timesteps/s
-100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+Performance: 4520.093 tau/day, 10.463 timesteps/s
+99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.0015228  | 0.0015228  | 0.0015228  |   0.0 |  0.00
-Kspace  | 34.675     | 34.675     | 34.675     |   0.0 | 99.91
+Pair    | 0.0003767  | 0.0003767  | 0.0003767  |   0.0 |  0.00
+Kspace  | 9.5472     | 9.5472     | 9.5472     |   0.0 | 99.89
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.013741   | 0.013741   | 0.013741   |   0.0 |  0.04
-Output  | 0.00041246 | 0.00041246 | 0.00041246 |   0.0 |  0.00
-Modify  | 0.01107    | 0.01107    | 0.01107    |   0.0 |  0.03
-Other   |            | 0.004232   |            |       |  0.01
+Comm    | 0.0047688  | 0.0047688  | 0.0047688  |   0.0 |  0.05
+Output  | 0.00027132 | 0.00027132 | 0.00027132 |   0.0 |  0.00
+Modify  | 0.0029824  | 0.0029824  | 0.0029824  |   0.0 |  0.03
+Other   |            | 0.001692   |            |       |  0.02
 
 Nlocal:    4096 ave 4096 max 4096 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@@ -99,4 +98,4 @@ Total # of neighbors = 524288
 Ave neighs/atom = 128
 Neighbor list builds = 0
 Dangerous builds = 0
-Total wall time: 0:00:35
+Total wall time: 0:00:10
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.4 b/examples/USER/scafacos/log.27Nov18.scafacos.fmm.g++.4
similarity index 79%
rename from examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.4
rename to examples/USER/scafacos/log.27Nov18.scafacos.fmm.g++.4
index 27fdfcedcb..7ee8d3d068 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.fmm.4
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.fmm.g++.4
@@ -1,5 +1,4 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
@@ -16,7 +15,7 @@ replicate 8 8 8
   orthogonal box = (0 0 0) to (8 8 8)
   1 by 2 by 2 MPI processor grid
   4096 atoms
-  Time spent = 0.000270367 secs
+  Time spent = 0.00015521 secs
 
 velocity	all create 1.5 49893
 
@@ -72,21 +71,21 @@ Step Temp E_pair E_mol TotEng Press
       80    1.5032684   -1.7524625            0   0.50188958    10.465466 
       90    1.5107749    -1.763714            0   0.50189507    10.515502 
      100      1.52919    -1.791306            0   0.50191895    10.638261 
-Loop time of 10.0781 on 4 procs for 100 steps with 4096 atoms
+Loop time of 3.05974 on 4 procs for 100 steps with 4096 atoms
 
-Performance: 4286.533 tau/day, 9.923 timesteps/s
-99.9% CPU use with 4 MPI tasks x 1 OpenMP threads
+Performance: 14118.863 tau/day, 32.683 timesteps/s
+98.6% CPU use with 4 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.00071096 | 0.00073177 | 0.00075269 |   0.0 |  0.01
-Kspace  | 10.056     | 10.057     | 10.057     |   0.0 | 99.79
+Pair    | 0.00021482 | 0.0002175  | 0.00022054 |   0.0 |  0.01
+Kspace  | 3.0468     | 3.0468     | 3.0468     |   0.0 | 99.58
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.01492    | 0.015036   | 0.015207   |   0.1 |  0.15
-Output  | 0.00036311 | 0.00039428 | 0.00046515 |   0.0 |  0.00
-Modify  | 0.002944   | 0.0030704  | 0.0033708  |   0.3 |  0.03
-Other   |            | 0.002214   |            |       |  0.02
+Comm    | 0.0081758  | 0.0082486  | 0.0083146  |   0.1 |  0.27
+Output  | 0.00019073 | 0.00049388 | 0.0013943  |   0.0 |  0.02
+Modify  | 0.001507   | 0.0015851  | 0.0017498  |   0.2 |  0.05
+Other   |            | 0.002385   |            |       |  0.08
 
 Nlocal:    1024 ave 1024 max 1024 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
@@ -99,4 +98,4 @@ Total # of neighbors = 524288
 Ave neighs/atom = 128
 Neighbor list builds = 0
 Dangerous builds = 0
-Total wall time: 0:00:11
+Total wall time: 0:00:04
diff --git a/examples/USER/scafacos/log.27Nov18.scafacos.g++.1 b/examples/USER/scafacos/log.27Nov18.scafacos.g++.1
new file mode 100644
index 0000000000..6b36f0ba91
--- /dev/null
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.g++.1
@@ -0,0 +1,102 @@
+LAMMPS (27 Nov 2018)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (1 1 1)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (8 8 8)
+  1 by 1 by 1 MPI processor grid
+  4096 atoms
+  Time spent = 0.000253677 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p3m 0.001
+#kspace_style    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver p3m ...
+WARNING: Virial computation for P3M not available (src/USER-SCAFACOS/scafacos.cpp:104)
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 8 8 8
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 5.813 | 5.813 | 5.813 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5   -1.7475752            0    0.5018755     11.99707 
+      10    1.5000018   -1.7475779            0   0.50187548    11.997085 
+      20    1.4999833   -1.7475525            0    0.5018731    11.996936 
+      30    1.4999006   -1.7474414            0    0.5018603    11.996276 
+      40      1.49973   -1.7471989            0   0.50184695    11.994911 
+      50    1.4995292   -1.7469064            0   0.50183822    11.993305 
+      60      1.49954   -1.7469273            0   0.50183355    11.993391 
+      70    1.5003599   -1.7481583            0   0.50183215    11.999949 
+      80    1.5032409    -1.752478            0   0.50183276    12.022991 
+      90    1.5107445   -1.7637257            0   0.50183781    12.083005 
+     100    1.5291568   -1.7913136            0   0.50186158    12.230268 
+Loop time of 2.98552 on 1 procs for 100 steps with 4096 atoms
+
+Performance: 14469.821 tau/day, 33.495 timesteps/s
+98.7% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.0003686  | 0.0003686  | 0.0003686  |   0.0 |  0.01
+Kspace  | 2.9749     | 2.9749     | 2.9749     |   0.0 | 99.65
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0049963  | 0.0049963  | 0.0049963  |   0.0 |  0.17
+Output  | 0.00027728 | 0.00027728 | 0.00027728 |   0.0 |  0.01
+Modify  | 0.0031269  | 0.0031269  | 0.0031269  |   0.0 |  0.10
+Other   |            | 0.001828   |            |       |  0.06
+
+Nlocal:    4096 ave 4096 max 4096 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    9728 ave 9728 max 9728 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    524288 ave 524288 max 524288 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 524288
+Ave neighs/atom = 128
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:03
diff --git a/examples/USER/scafacos/log.27Nov18.scafacos.g++.4 b/examples/USER/scafacos/log.27Nov18.scafacos.g++.4
new file mode 100644
index 0000000000..23f55fc5a6
--- /dev/null
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.g++.4
@@ -0,0 +1,102 @@
+LAMMPS (27 Nov 2018)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (1 1 1)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (8 8 8)
+  1 by 2 by 2 MPI processor grid
+  4096 atoms
+  Time spent = 0.00012064 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p3m 0.001
+#kspace_style    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver p3m ...
+WARNING: Virial computation for P3M not available (src/USER-SCAFACOS/scafacos.cpp:104)
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 8 8 8
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.008 | 4.008 | 4.008 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5   -1.7475814            0   0.50186926     11.99707 
+      10    1.5000019   -1.7475841            0   0.50186935    11.997085 
+      20    1.4999837    -1.747556            0   0.50187017     11.99694 
+      30    1.4999022   -1.7474323            0   0.50187181    11.996288 
+      40    1.4997327   -1.7471824            0   0.50186744    11.994932 
+      50    1.4995318   -1.7468908            0   0.50185778    11.993326 
+      60     1.499542   -1.7469166            0    0.5018472    11.993407 
+      70    1.5003608   -1.7481509            0   0.50184084    11.999956 
+      80    1.5032406   -1.7524717            0    0.5018387    12.022989 
+      90    1.5107432   -1.7637208            0    0.5018408    12.082995 
+     100    1.5291549   -1.7913103            0   0.50186206    12.230252 
+Loop time of 1.04979 on 4 procs for 100 steps with 4096 atoms
+
+Performance: 41150.904 tau/day, 95.257 timesteps/s
+96.5% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00021386 | 0.00022691 | 0.00023532 |   0.0 |  0.02
+Kspace  | 1.0351     | 1.0353     | 1.0354     |   0.0 | 98.61
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0092776  | 0.0094286  | 0.0095558  |   0.1 |  0.90
+Output  | 0.00019121 | 0.00050998 | 0.0014408  |   0.0 |  0.05
+Modify  | 0.0019102  | 0.0019451  | 0.0019991  |   0.1 |  0.19
+Other   |            | 0.002431   |            |       |  0.23
+
+Nlocal:    1024 ave 1024 max 1024 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:    5120 ave 5120 max 5120 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Neighs:    131072 ave 131072 max 131072 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 524288
+Ave neighs/atom = 128
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.1 b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.direct.g++.1
similarity index 69%
rename from examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.1
rename to examples/USER/scafacos/log.27Nov18.scafacos.hsph.direct.g++.1
index b0a47e4f98..2d3ca0d588 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.1
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.direct.g++.1
@@ -1,13 +1,12 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
 units		lj
 atom_style  charge
 
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+read_data data.hammersley_sphere
+  orthogonal box = (-51.5 -51.5 -51.5) to (51.5 51.5 51.5)
   1 by 1 by 1 MPI processor grid
   reading atoms ...
   1000 atoms
@@ -45,51 +44,51 @@ Neighbor list info ...
   max neighbors/atom: 2000, page size: 100000
   master list distance cutoff = 2
   ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
+  binsize = 1, bins = 103 103 103
   1 neighbor lists, perpetual/occasional/extra = 1 0 0
   (1) pair zero, perpetual
       attributes: half, newton on
       pair build: half/bin/atomonly/newton
       stencil: half/bin/3d/newton
       bin: standard
-Per MPI rank memory allocation (min/avg/max) = 10.3 | 10.3 | 10.3 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 10.44 | 10.44 | 10.44 Mbytes
 Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417787            0    1.6235721 0.0015678854 
-       1    18.780041   -10.770002            0    17.371889  0.016718957 
-       2    65.289192   -11.084705            0    86.751149  0.060353634 
-       3    121.92987   -7.0625759            0    175.64933   0.11404974 
-       4    185.78164   -5.8777512            0    272.51604   0.17462195 
-       5    286.36222    -4.382053            0    424.73173   0.26918926 
-       6    481.42206   -4.3095567            0     717.1014   0.45274088 
-       7    488.59167   -3.8685194            0     728.2861   0.45956866 
-       8    497.85287   -3.0417966            0    742.99073   0.46838116 
-       9    499.61615    -3.419003            0     745.2558   0.46983345 
-      10    502.63684   -2.8360961            0    750.36521   0.47280809 
-      11     504.4846   -2.7628105            0    753.20736   0.47462793 
-      12    506.54485   -2.8460356            0    756.21142   0.47651441 
-      13    508.27211    -2.730935            0    758.91482   0.47813752 
-      14    510.57045   -2.6094877            0    762.48033   0.48031431 
-      15    513.14798   -2.7150827            0    766.23717   0.48275229 
-      16    515.78124   -2.3961811            0    770.50201   0.48526333 
-      17    515.70265   -2.2982683            0    770.48215   0.48526617 
-      18     515.7081   -2.1515983            0    770.63699   0.48530393 
-      19    515.74906   -2.0581436            0    770.79182   0.48530977 
-      20    515.70883   -1.8922577            0    770.89742   0.48527105 
-Loop time of 0.465839 on 1 procs for 20 steps with 1000 atoms
+       0          1.5  -0.62417787            0    1.6235721 0.0015226607 
+       1    18.780041   -10.770002            0    17.371889   0.01623671 
+       2    65.289192   -11.084705            0    86.751149  0.058612772 
+       3    121.92987   -7.0625759            0    175.64933   0.11076005 
+       4    185.78164   -5.8777512            0    272.51604   0.16958509 
+       5    286.36222    -4.382053            0    424.73173   0.26142467 
+       6    481.42206   -4.3095567            0     717.1014   0.43968187 
+       7    488.59167   -3.8685194            0     728.2861    0.4463127 
+       8    497.85287   -3.0417966            0    742.99073   0.45487101 
+       9    499.61615    -3.419003            0     745.2558   0.45628141 
+      10    502.63684   -2.8360961            0    750.36521   0.45917024 
+      11     504.4846   -2.7628105            0    753.20736   0.46093759 
+      12    506.54485   -2.8460356            0    756.21142   0.46276966 
+      13    508.27211    -2.730935            0    758.91482   0.46434596 
+      14    510.57045   -2.6094877            0    762.48033   0.46645996 
+      15    513.14798   -2.7150827            0    766.23717   0.46882762 
+      16    515.78124   -2.3961811            0    770.50201   0.47126623 
+      17    515.70265   -2.2982683            0    770.48215   0.47126898 
+      18     515.7081   -2.1515983            0    770.63699   0.47130565 
+      19    515.74906   -2.0581436            0    770.79182   0.47131132 
+      20    515.70883   -1.8922577            0    770.89742   0.47127372 
+Loop time of 0.139386 on 1 procs for 20 steps with 1000 atoms
 
-Performance: 18547.165 tau/day, 42.933 timesteps/s
-99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
+Performance: 61985.954 tau/day, 143.486 timesteps/s
+99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.00021982 | 0.00021982 | 0.00021982 |   0.0 |  0.05
-Kspace  | 0.3218     | 0.3218     | 0.3218     |   0.0 | 69.08
-Neigh   | 0.14249    | 0.14249    | 0.14249    |   0.0 | 30.59
-Comm    | 0.00014853 | 0.00014853 | 0.00014853 |   0.0 |  0.03
-Output  | 0.00026131 | 0.00026131 | 0.00026131 |   0.0 |  0.06
-Modify  | 0.00055146 | 0.00055146 | 0.00055146 |   0.0 |  0.12
-Other   |            | 0.0003715  |            |       |  0.08
+Pair    | 5.4359e-05 | 5.4359e-05 | 5.4359e-05 |   0.0 |  0.04
+Kspace  | 0.1018     | 0.1018     | 0.1018     |   0.0 | 73.03
+Neigh   | 0.037074   | 0.037074   | 0.037074   |   0.0 | 26.60
+Comm    | 4.6492e-05 | 4.6492e-05 | 4.6492e-05 |   0.0 |  0.03
+Output  | 0.00016332 | 0.00016332 | 0.00016332 |   0.0 |  0.12
+Modify  | 0.000139   | 0.000139   | 0.000139   |   0.0 |  0.10
+Other   |            | 0.0001132  |            |       |  0.08
 
 Nlocal:    1000 ave 1000 max 1000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.4 b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.direct.g++.4
similarity index 64%
rename from examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.4
rename to examples/USER/scafacos/log.27Nov18.scafacos.hsph.direct.g++.4
index 1235c0a3cd..085958d351 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.direct.4
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.direct.g++.4
@@ -1,13 +1,12 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
 units		lj
 atom_style  charge
 
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+read_data data.hammersley_sphere
+  orthogonal box = (-51.5 -51.5 -51.5) to (51.5 51.5 51.5)
   1 by 2 by 2 MPI processor grid
   reading atoms ...
   1000 atoms
@@ -45,58 +44,58 @@ Neighbor list info ...
   max neighbors/atom: 2000, page size: 100000
   master list distance cutoff = 2
   ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
+  binsize = 1, bins = 103 103 103
   1 neighbor lists, perpetual/occasional/extra = 1 0 0
   (1) pair zero, perpetual
       attributes: half, newton on
       pair build: half/bin/atomonly/newton
       stencil: half/bin/3d/newton
       bin: standard
-Per MPI rank memory allocation (min/avg/max) = 4.837 | 5.123 | 5.6 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 4.882 | 5.463 | 7.161 Mbytes
 Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417787            0    1.6235721 0.0015678854 
-       1    18.780041   -10.770002            0    17.371889  0.016718957 
-       2    65.289192   -11.084705            0    86.751149  0.060353634 
-       3    121.92987   -7.0625759            0    175.64933   0.11404974 
-       4    185.78164   -5.8777512            0    272.51604   0.17462195 
-       5    286.36222    -4.382053            0    424.73173   0.26918926 
-       6    481.42206   -4.3095567            0     717.1014   0.45274088 
-       7    488.59167   -3.8685194            0     728.2861   0.45956866 
-       8    497.85287   -3.0417966            0    742.99073   0.46838116 
-       9    499.61615    -3.419003            0     745.2558   0.46983345 
-      10    502.63684   -2.8360961            0    750.36521   0.47280809 
-      11     504.4846   -2.7628105            0    753.20736   0.47462793 
-      12    506.54485   -2.8460356            0    756.21142   0.47651441 
-      13    508.27211    -2.730935            0    758.91482   0.47813752 
-      14    510.57045   -2.6094877            0    762.48033   0.48031431 
-      15    513.14798   -2.7150827            0    766.23717   0.48275229 
-      16    515.78124   -2.3961811            0    770.50201   0.48526333 
-      17    515.70265   -2.2982683            0    770.48215   0.48526617 
-      18     515.7081   -2.1515983            0    770.63699   0.48530393 
-      19    515.74906   -2.0581436            0    770.79182   0.48530977 
-      20    515.70883   -1.8922577            0    770.89742   0.48527105 
-Loop time of 0.161335 on 4 procs for 20 steps with 1000 atoms
+       0          1.5  -0.62417787            0    1.6235721 0.0015226607 
+       1    18.780041   -10.770002            0    17.371889   0.01623671 
+       2    65.289192   -11.084705            0    86.751149  0.058612772 
+       3    121.92987   -7.0625759            0    175.64933   0.11076005 
+       4    185.78164   -5.8777512            0    272.51604   0.16958509 
+       5    286.36222    -4.382053            0    424.73173   0.26142467 
+       6    481.42206   -4.3095567            0     717.1014   0.43968187 
+       7    488.59167   -3.8685194            0     728.2861    0.4463127 
+       8    497.85287   -3.0417966            0    742.99073   0.45487101 
+       9    499.61615    -3.419003            0     745.2558   0.45628141 
+      10    502.63684   -2.8360961            0    750.36521   0.45917024 
+      11     504.4846   -2.7628105            0    753.20736   0.46093759 
+      12    506.54485   -2.8460356            0    756.21142   0.46276966 
+      13    508.27211    -2.730935            0    758.91482   0.46434596 
+      14    510.57045   -2.6094877            0    762.48033   0.46645996 
+      15    513.14798   -2.7150827            0    766.23717   0.46882762 
+      16    515.78124   -2.3961811            0    770.50201   0.47126623 
+      17    515.70265   -2.2982683            0    770.48215   0.47126898 
+      18     515.7081   -2.1515983            0    770.63699   0.47130565 
+      19    515.74906   -2.0581436            0    770.79182   0.47131132 
+      20    515.70883   -1.8922577            0    770.89742   0.47127372 
+Loop time of 0.100977 on 4 procs for 20 steps with 1000 atoms
 
-Performance: 53553.228 tau/day, 123.966 timesteps/s
-99.5% CPU use with 4 MPI tasks x 1 OpenMP threads
+Performance: 85564.440 tau/day, 198.066 timesteps/s
+95.2% CPU use with 4 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.00022721 | 0.00023353 | 0.000247   |   0.0 |  0.14
-Kspace  | 0.10295    | 0.11808    | 0.13377    |   3.5 | 73.19
-Neigh   | 0.023849   | 0.039717   | 0.055031   |   6.1 | 24.62
-Comm    | 0.0023148  | 0.0025774  | 0.0028391  |   0.4 |  1.60
-Output  | 0.00029063 | 0.00038403 | 0.00050664 |   0.0 |  0.24
-Modify  | 0.00015664 | 0.00015944 | 0.00016165 |   0.0 |  0.10
-Other   |            | 0.0001805  |            |       |  0.11
+Pair    | 5.1975e-05 | 5.3406e-05 | 5.4836e-05 |   0.0 |  0.05
+Kspace  | 0.07448    | 0.084484   | 0.08939    |   2.0 | 83.67
+Neigh   | 0.006777   | 0.012519   | 0.023358   |   5.7 | 12.40
+Comm    | 0.0011938  | 0.0020879  | 0.0029798  |   1.8 |  2.07
+Output  | 0.00024247 | 0.00065064 | 0.0016594  |   0.0 |  0.64
+Modify  | 1.812e-05  | 5.3406e-05 | 0.00014162 |   0.0 |  0.05
+Other   |            | 0.001127   |            |       |  1.12
 
-Nlocal:    250 ave 259 max 238 min
-Histogram: 1 0 0 1 0 0 0 0 0 2
-Nghost:    672.25 ave 683 max 663 min
-Histogram: 2 0 0 0 0 0 0 0 1 1
-Neighs:    61954.2 ave 97157 max 25016 min
-Histogram: 1 0 0 1 0 0 1 0 0 1
+Nlocal:    250 ave 518 max 78 min
+Histogram: 1 0 1 1 0 0 0 0 0 1
+Nghost:    659 ave 791 max 430 min
+Histogram: 1 0 0 0 0 0 0 1 1 1
+Neighs:    61954.2 ave 115962 max 19730 min
+Histogram: 1 0 1 0 0 1 0 0 0 1
 
 Total # of neighbors = 247817
 Ave neighs/atom = 247.817
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.1 b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.fmm.g++.1
similarity index 65%
rename from examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.1
rename to examples/USER/scafacos/log.27Nov18.scafacos.hsph.fmm.g++.1
index 4bef247e26..94ac525119 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.1
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.fmm.g++.1
@@ -1,13 +1,12 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
 units		lj
 atom_style  charge
 
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+read_data data.hammersley_sphere
+  orthogonal box = (-51.5 -51.5 -51.5) to (51.5 51.5 51.5)
   1 by 1 by 1 MPI processor grid
   reading atoms ...
   1000 atoms
@@ -49,51 +48,51 @@ Neighbor list info ...
   max neighbors/atom: 2000, page size: 100000
   master list distance cutoff = 2
   ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
+  binsize = 1, bins = 103 103 103
   1 neighbor lists, perpetual/occasional/extra = 1 0 0
   (1) pair zero, perpetual
       attributes: half, newton on
       pair build: half/bin/atomonly/newton
       stencil: half/bin/3d/newton
       bin: standard
-Per MPI rank memory allocation (min/avg/max) = 10.3 | 10.3 | 10.3 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 10.44 | 10.44 | 10.44 Mbytes
 Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417141            0    1.6235786 0.0015676581 
-       1    18.780412   -10.770009            0    17.372438  0.016719188 
-       2    65.294131   -11.084501            0    86.758754   0.06035827 
-       3    121.92555   -7.0612033            0    175.64423    0.1140457 
-       4    185.71165   -5.8781334            0    272.41077   0.17455524 
-       5    286.28339   -4.3800108            0    424.61565   0.26911306 
-       6    481.28097   -4.3052012            0    716.89433   0.45262045 
-       7    487.26022   -3.8672741            0    726.29216   0.45830216 
-       8    493.65478   -3.0242687            0    736.71742   0.46443761 
-       9    495.66203   -3.4336343            0    739.31592   0.46613014 
-      10    498.41831   -2.8837072            0    743.99613   0.46887706 
-      11    499.20944   -2.7724783            0    745.29287   0.46966875 
-      12    500.97345   -2.8281484            0    747.88057   0.47126462 
-      13    507.46412   -2.7752775            0    757.65971   0.47728761 
-      14    525.35729   -2.5749814            0    784.67292   0.49422171 
-      15     563.9578   -2.9982381            0    842.09253   0.53043696 
-      16    645.47602   -2.5519203            0    964.69389   0.60730795 
-      17    647.09276   -2.2568468            0    967.41166   0.60891914 
-      18    647.12596   -2.2791003            0    967.43915   0.60900309 
-      19    647.24862   -2.2495226            0    967.65253   0.60908339 
-      20    647.51175   -2.0239179            0    968.27244   0.60932598 
-Loop time of 0.797289 on 1 procs for 20 steps with 1000 atoms
+       0          1.5  -0.62417141            0    1.6235786   0.00152244 
+       1    18.780412   -10.770009            0    17.372438  0.016236934 
+       2    65.294131   -11.084501            0    86.758754  0.058617275 
+       3    121.92555   -7.0612033            0    175.64423   0.11075612 
+       4    185.71165   -5.8781334            0    272.41077   0.16952031 
+       5    286.28339   -4.3800108            0    424.61565   0.26135067 
+       6    481.28097   -4.3052012            0    716.89433   0.43956491 
+       7    487.26022   -3.8672741            0    726.29216   0.44508273 
+       8    493.65478   -3.0242687            0    736.71742   0.45104121 
+       9    495.66203   -3.4336343            0    739.31592   0.45268492 
+      10    498.41831   -2.8837072            0    743.99613    0.4553526 
+      11    499.20944   -2.7724783            0    745.29287   0.45612146 
+      12    500.97345   -2.8281484            0    747.88057    0.4576713 
+      13    507.46412   -2.7752775            0    757.65971   0.46352056 
+      14    525.35729   -2.5749814            0    784.67292   0.47996621 
+      15     563.9578   -2.9982381            0    842.09253   0.51513685 
+      16    645.47602   -2.5519203            0    964.69389   0.58979054 
+      17    647.09276   -2.2568468            0    967.41166   0.59135526 
+      18    647.12596   -2.2791003            0    967.43915   0.59143679 
+      19    647.24862   -2.2495226            0    967.65253   0.59151478 
+      20    647.51175   -2.0239179            0    968.27244   0.59175037 
+Loop time of 0.345449 on 1 procs for 20 steps with 1000 atoms
 
-Performance: 10836.721 tau/day, 25.085 timesteps/s
-100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+Performance: 25010.947 tau/day, 57.896 timesteps/s
+99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.00022364 | 0.00022364 | 0.00022364 |   0.0 |  0.03
-Kspace  | 0.6524     | 0.6524     | 0.6524     |   0.0 | 81.83
-Neigh   | 0.14312    | 0.14312    | 0.14312    |   0.0 | 17.95
-Comm    | 0.00020337 | 0.00020337 | 0.00020337 |   0.0 |  0.03
-Output  | 0.00036621 | 0.00036621 | 0.00036621 |   0.0 |  0.05
-Modify  | 0.00058126 | 0.00058126 | 0.00058126 |   0.0 |  0.07
-Other   |            | 0.0003934  |            |       |  0.05
+Pair    | 6.485e-05  | 6.485e-05  | 6.485e-05  |   0.0 |  0.02
+Kspace  | 0.30702    | 0.30702    | 0.30702    |   0.0 | 88.88
+Neigh   | 0.037661   | 0.037661   | 0.037661   |   0.0 | 10.90
+Comm    | 5.4598e-05 | 5.4598e-05 | 5.4598e-05 |   0.0 |  0.02
+Output  | 0.00032878 | 0.00032878 | 0.00032878 |   0.0 |  0.10
+Modify  | 0.00016141 | 0.00016141 | 0.00016141 |   0.0 |  0.05
+Other   |            | 0.0001564  |            |       |  0.05
 
 Nlocal:    1000 ave 1000 max 1000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@@ -106,4 +105,4 @@ Total # of neighbors = 244342
 Ave neighs/atom = 244.342
 Neighbor list builds = 19
 Dangerous builds = 18
-Total wall time: 0:00:01
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.4 b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.fmm.g++.4
similarity index 60%
rename from examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.4
rename to examples/USER/scafacos/log.27Nov18.scafacos.hsph.fmm.g++.4
index e3f5bf44c9..79137934ab 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.fmm.4
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.fmm.g++.4
@@ -1,13 +1,12 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
 units		lj
 atom_style  charge
 
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+read_data data.hammersley_sphere
+  orthogonal box = (-51.5 -51.5 -51.5) to (51.5 51.5 51.5)
   1 by 2 by 2 MPI processor grid
   reading atoms ...
   1000 atoms
@@ -49,58 +48,58 @@ Neighbor list info ...
   max neighbors/atom: 2000, page size: 100000
   master list distance cutoff = 2
   ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
+  binsize = 1, bins = 103 103 103
   1 neighbor lists, perpetual/occasional/extra = 1 0 0
   (1) pair zero, perpetual
       attributes: half, newton on
       pair build: half/bin/atomonly/newton
       stencil: half/bin/3d/newton
       bin: standard
-Per MPI rank memory allocation (min/avg/max) = 4.837 | 5.123 | 5.6 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 4.882 | 5.463 | 7.161 Mbytes
 Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417141            0    1.6235786 0.0015676581 
-       1    18.780412   -10.770009            0    17.372438  0.016719188 
-       2    65.294131   -11.084501            0    86.758754   0.06035827 
-       3    121.92555   -7.0612033            0    175.64423    0.1140457 
-       4    185.71165   -5.8781334            0    272.41077   0.17455524 
-       5    286.28339   -4.3800108            0    424.61565   0.26911306 
-       6    481.28097   -4.3052012            0    716.89433   0.45262045 
-       7    487.26022   -3.8672741            0    726.29216   0.45830216 
-       8    493.65478   -3.0242687            0    736.71742   0.46443761 
-       9    495.66203   -3.4336343            0    739.31592   0.46613014 
-      10    498.41831   -2.8837072            0    743.99613   0.46887706 
-      11    499.20944   -2.7724783            0    745.29287   0.46966875 
-      12    500.97345   -2.8281484            0    747.88057   0.47126462 
-      13    507.46412   -2.7752775            0    757.65971   0.47728761 
-      14    525.35729   -2.5749814            0    784.67292   0.49422171 
-      15     563.9578   -2.9982381            0    842.09253   0.53043696 
-      16    645.47602   -2.5519203            0    964.69389   0.60730795 
-      17    647.09276   -2.2568468            0    967.41166   0.60891914 
-      18    647.12596   -2.2791003            0    967.43915   0.60900309 
-      19    647.24862   -2.2495226            0    967.65253   0.60908339 
-      20    647.51175   -2.0239179            0    968.27244   0.60932598 
-Loop time of 0.666895 on 4 procs for 20 steps with 1000 atoms
+       0          1.5  -0.62417141            0    1.6235786   0.00152244 
+       1    18.780412   -10.770009            0    17.372438  0.016236934 
+       2    65.294131   -11.084501            0    86.758754  0.058617275 
+       3    121.92555   -7.0612033            0    175.64423   0.11075612 
+       4    185.71165   -5.8781334            0    272.41077   0.16952031 
+       5    286.28339   -4.3800108            0    424.61565   0.26135067 
+       6    481.28097   -4.3052012            0    716.89433   0.43956491 
+       7    487.26022   -3.8672741            0    726.29216   0.44508273 
+       8    493.65478   -3.0242687            0    736.71742   0.45104121 
+       9    495.66203   -3.4336343            0    739.31592   0.45268492 
+      10    498.41831   -2.8837072            0    743.99613    0.4553526 
+      11    499.20944   -2.7724783            0    745.29287   0.45612146 
+      12    500.97345   -2.8281484            0    747.88057    0.4576713 
+      13    507.46412   -2.7752775            0    757.65971   0.46352056 
+      14    525.35729   -2.5749814            0    784.67292   0.47996621 
+      15     563.9578   -2.9982381            0    842.09253   0.51513685 
+      16    645.47602   -2.5519203            0    964.69389   0.58979054 
+      17    647.09276   -2.2568468            0    967.41166   0.59135526 
+      18    647.12596   -2.2791003            0    967.43915   0.59143679 
+      19    647.24862   -2.2495226            0    967.65253   0.59151478 
+      20    647.51175   -2.0239179            0    968.27244   0.59175037 
+Loop time of 0.357618 on 4 procs for 20 steps with 1000 atoms
 
-Performance: 12955.555 tau/day, 29.990 timesteps/s
-99.5% CPU use with 4 MPI tasks x 1 OpenMP threads
+Performance: 24159.843 tau/day, 55.926 timesteps/s
+96.2% CPU use with 4 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.0002284  | 0.00024879 | 0.00025725 |   0.0 |  0.04
-Kspace  | 0.6085     | 0.62278    | 0.6386     |   1.6 | 93.38
-Neigh   | 0.023998   | 0.040044   | 0.054552   |   6.2 |  6.00
-Comm    | 0.0025489  | 0.0028656  | 0.0031898  |   0.4 |  0.43
-Output  | 0.0004077  | 0.00053912 | 0.00071406 |   0.0 |  0.08
-Modify  | 0.00017953 | 0.00018525 | 0.00020218 |   0.0 |  0.03
-Other   |            | 0.0002366  |            |       |  0.04
+Pair    | 5.9605e-05 | 6.2883e-05 | 6.8426e-05 |   0.0 |  0.02
+Kspace  | 0.32759    | 0.3385     | 0.34421    |   1.1 | 94.65
+Neigh   | 0.0090706  | 0.014676   | 0.025457   |   5.3 |  4.10
+Comm    | 0.0013905  | 0.0015741  | 0.0017447  |   0.3 |  0.44
+Output  | 0.00034785 | 0.00099462 | 0.0026579  |   0.0 |  0.28
+Modify  | 5.2929e-05 | 8.3923e-05 | 0.00015783 |   0.0 |  0.02
+Other   |            | 0.001731   |            |       |  0.48
 
-Nlocal:    250 ave 259 max 240 min
-Histogram: 1 0 0 0 0 2 0 0 0 1
-Nghost:    668.75 ave 679 max 657 min
-Histogram: 1 0 0 0 0 1 1 0 0 1
-Neighs:    61085.5 ave 95363 max 24964 min
-Histogram: 1 0 0 1 0 0 0 1 0 1
+Nlocal:    250 ave 512 max 84 min
+Histogram: 1 0 2 0 0 0 0 0 0 1
+Nghost:    655.75 ave 784 max 433 min
+Histogram: 1 0 0 0 0 0 0 1 1 1
+Neighs:    61085.5 ave 111012 max 21779 min
+Histogram: 1 0 1 0 0 1 0 0 0 1
 
 Total # of neighbors = 244342
 Ave neighs/atom = 244.342
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.1 b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.p2nfft.g++.1
similarity index 52%
rename from examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.1
rename to examples/USER/scafacos/log.27Nov18.scafacos.hsph.p2nfft.g++.1
index f8a85c6c6b..9fa1677017 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.hsph.g++.p2nfft.1
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.p2nfft.g++.1
@@ -1,13 +1,12 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
 units		lj
 atom_style  charge
 
-read_data data.hammersley_shphere
-  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+read_data data.hammersley_sphere
+  orthogonal box = (-51.5 -51.5 -51.5) to (51.5 51.5 51.5)
   1 by 1 by 1 MPI processor grid
   reading atoms ...
   1000 atoms
@@ -47,51 +46,51 @@ Neighbor list info ...
   max neighbors/atom: 2000, page size: 100000
   master list distance cutoff = 2
   ghost atom cutoff = 2
-  binsize = 1, bins = 102 102 102
+  binsize = 1, bins = 103 103 103
   1 neighbor lists, perpetual/occasional/extra = 1 0 0
   (1) pair zero, perpetual
       attributes: half, newton on
       pair build: half/bin/atomonly/newton
       stencil: half/bin/3d/newton
       bin: standard
-Per MPI rank memory allocation (min/avg/max) = 10.3 | 10.3 | 10.3 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 10.44 | 10.44 | 10.44 Mbytes
 Step Temp E_pair E_mol TotEng Press 
-       0          1.5  -0.62417787            0    1.6235721 0.0015678854 
-       1    18.780041   -10.770002            0    17.371889  0.016718957 
-       2    65.289192   -11.084705            0    86.751149  0.060353634 
-       3    121.92987   -7.0625759            0    175.64933   0.11404974 
-       4    185.78164   -5.8777511            0    272.51603   0.17462194 
-       5    286.36221   -4.3820531            0    424.73172   0.26918925 
-       6    481.42203   -4.3095567            0    717.10136   0.45274086 
-       7    488.59165   -3.8685193            0    728.28607   0.45956865 
-       8    497.85288   -3.0417938            0    742.99075   0.46838117 
-       9    499.61619   -3.4190063            0    745.25585   0.46983349 
-      10    502.63691   -2.8360951            0    750.36531   0.47280815 
-      11     504.4847   -2.7628089            0    753.20751   0.47462802 
-      12    506.54494   -2.8460319            0    756.21157    0.4765145 
-      13     508.2722   -2.7309328            0    758.91497   0.47813761 
-      14    510.57053   -2.6094792            0    762.48045   0.48031438 
-      15    513.14804   -2.7150819            0    766.23726   0.48275234 
-      16    515.78127   -2.3961749            0    770.50206   0.48526336 
-      17    515.70267   -2.2982581            0    770.48219   0.48526619 
-      18    515.70813   -2.1516075            0    770.63702   0.48530395 
-      19    515.74908   -2.0581483            0    770.79185   0.48530979 
-      20    515.70881    -1.892235            0    770.89742   0.48527104 
-Loop time of 1.06008 on 1 procs for 20 steps with 1000 atoms
+       0          1.5  -0.62417787            0    1.6235721 0.0015226607 
+       1    18.780041   -10.770002            0    17.371889   0.01623671 
+       2    65.289192   -11.084705            0    86.751149  0.058612772 
+       3    121.92987   -7.0625759            0    175.64933   0.11076005 
+       4    185.78164   -5.8777511            0    272.51603   0.16958509 
+       5    286.36221   -4.3820531            0    424.73172   0.26142466 
+       6    481.42203   -4.3095567            0    717.10136   0.43968184 
+       7    488.59165   -3.8685193            0    728.28608   0.44631269 
+       8    497.85288   -3.0417938            0    742.99075   0.45487102 
+       9    499.61619   -3.4190062            0    745.25585   0.45628145 
+      10    502.63691   -2.8360951            0    750.36531   0.45917031 
+      11     504.4847    -2.762809            0    753.20751   0.46093768 
+      12    506.54494    -2.846032            0    756.21156   0.46276975 
+      13     508.2722   -2.7309328            0    758.91496   0.46434604 
+      14    510.57052   -2.6094795            0    762.48045   0.46646002 
+      15    513.14804    -2.715082            0    766.23725   0.46882767 
+      16    515.78127    -2.396175            0    770.50206   0.47126626 
+      17    515.70267   -2.2982584            0    770.48219   0.47126901 
+      18    515.70813   -2.1516073            0    770.63702   0.47130567 
+      19    515.74908   -2.0581482            0    770.79185   0.47131134 
+      20    515.70881   -1.8922354            0    770.89742   0.47127371 
+Loop time of 0.313936 on 1 procs for 20 steps with 1000 atoms
 
-Performance: 8150.306 tau/day, 18.866 timesteps/s
-100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+Performance: 27521.533 tau/day, 63.707 timesteps/s
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.00022078 | 0.00022078 | 0.00022078 |   0.0 |  0.02
-Kspace  | 0.91611    | 0.91611    | 0.91611    |   0.0 | 86.42
-Neigh   | 0.14232    | 0.14232    | 0.14232    |   0.0 | 13.43
-Comm    | 0.00015092 | 0.00015092 | 0.00015092 |   0.0 |  0.01
-Output  | 0.00033736 | 0.00033736 | 0.00033736 |   0.0 |  0.03
-Modify  | 0.00056243 | 0.00056243 | 0.00056243 |   0.0 |  0.05
-Other   |            | 0.0003803  |            |       |  0.04
+Pair    | 5.7697e-05 | 5.7697e-05 | 5.7697e-05 |   0.0 |  0.02
+Kspace  | 0.27613    | 0.27613    | 0.27613    |   0.0 | 87.96
+Neigh   | 0.037193   | 0.037193   | 0.037193   |   0.0 | 11.85
+Comm    | 4.9591e-05 | 4.9591e-05 | 4.9591e-05 |   0.0 |  0.02
+Output  | 0.00022483 | 0.00022483 | 0.00022483 |   0.0 |  0.07
+Modify  | 0.00014615 | 0.00014615 | 0.00014615 |   0.0 |  0.05
+Other   |            | 0.0001338  |            |       |  0.04
 
 Nlocal:    1000 ave 1000 max 1000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@@ -104,4 +103,4 @@ Total # of neighbors = 247817
 Ave neighs/atom = 247.817
 Neighbor list builds = 19
 Dangerous builds = 18
-Total wall time: 0:00:01
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.27Nov18.scafacos.hsph.p2nfft.g++.4 b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.p2nfft.g++.4
new file mode 100644
index 0000000000..a41a3d9685
--- /dev/null
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.hsph.p2nfft.g++.4
@@ -0,0 +1,106 @@
+LAMMPS (27 Nov 2018)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_sphere
+  orthogonal box = (-51.5 -51.5 -51.5) to (51.5 51.5 51.5)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify scafacos tolerance potential
+
+timestep	0.005
+thermo          1
+
+run		20
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 103 103 103
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.882 | 5.463 | 7.161 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417787            0    1.6235721 0.0015226607 
+       1    18.780041   -10.770002            0    17.371889   0.01623671 
+       2    65.289192   -11.084705            0    86.751149  0.058612772 
+       3    121.92987   -7.0625759            0    175.64933   0.11076005 
+       4    185.78164   -5.8777511            0    272.51603   0.16958509 
+       5    286.36221   -4.3820531            0    424.73172   0.26142466 
+       6    481.42203   -4.3095567            0    717.10136   0.43968184 
+       7    488.59165   -3.8685193            0    728.28608   0.44631269 
+       8    497.85288   -3.0417938            0    742.99075   0.45487102 
+       9    499.61619   -3.4190062            0    745.25585   0.45628145 
+      10    502.63691   -2.8360951            0    750.36531   0.45917031 
+      11     504.4847    -2.762809            0    753.20751   0.46093768 
+      12    506.54494    -2.846032            0    756.21156   0.46276975 
+      13     508.2722   -2.7309328            0    758.91496   0.46434604 
+      14    510.57052   -2.6094795            0    762.48045   0.46646002 
+      15    513.14804    -2.715082            0    766.23725   0.46882767 
+      16    515.78127    -2.396175            0    770.50206   0.47126626 
+      17    515.70267   -2.2982584            0    770.48219   0.47126901 
+      18    515.70813   -2.1516073            0    770.63702   0.47130567 
+      19    515.74908   -2.0581482            0    770.79185   0.47131134 
+      20    515.70881   -1.8922354            0    770.89742   0.47127371 
+Loop time of 0.283172 on 4 procs for 20 steps with 1000 atoms
+
+Performance: 30511.451 tau/day, 70.628 timesteps/s
+94.8% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 5.8413e-05 | 6.2227e-05 | 6.8665e-05 |   0.0 |  0.02
+Kspace  | 0.2548     | 0.26559    | 0.27139    |   1.2 | 93.79
+Neigh   | 0.0083673  | 0.014062   | 0.024744   |   5.3 |  4.97
+Comm    | 0.0013778  | 0.0014804  | 0.0015941  |   0.2 |  0.52
+Output  | 0.00029898 | 0.00072974 | 0.0017858  |   0.0 |  0.26
+Modify  | 3.6001e-05 | 6.3002e-05 | 0.00013065 |   0.0 |  0.02
+Other   |            | 0.001181   |            |       |  0.42
+
+Nlocal:    250 ave 518 max 78 min
+Histogram: 1 0 1 1 0 0 0 0 0 1
+Nghost:    659 ave 791 max 430 min
+Histogram: 1 0 0 0 0 0 0 1 1 1
+Neighs:    61954.2 ave 115962 max 19730 min
+Histogram: 1 0 1 0 0 1 0 0 0 1
+
+Total # of neighbors = 247817
+Ave neighs/atom = 247.817
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.1 b/examples/USER/scafacos/log.27Nov18.scafacos.p2nfft.g++.1
similarity index 80%
rename from examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.1
rename to examples/USER/scafacos/log.27Nov18.scafacos.p2nfft.g++.1
index 1876a0e52b..2993de6329 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.1
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.p2nfft.g++.1
@@ -1,5 +1,4 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
@@ -16,7 +15,7 @@ replicate 8 8 8
   orthogonal box = (0 0 0) to (8 8 8)
   1 by 1 by 1 MPI processor grid
   4096 atoms
-  Time spent = 0.00049448 secs
+  Time spent = 0.000288725 secs
 
 velocity	all create 1.5 49893
 
@@ -72,21 +71,21 @@ Step Temp E_pair E_mol TotEng Press
       80    1.5033149   -1.7524495            0   0.50197233     10.46585 
       90    1.5108219   -1.7637095            0   0.50197005    10.515883 
      100     1.529239   -1.7913105            0     0.501988    10.638649 
-Loop time of 18.1113 on 1 procs for 100 steps with 4096 atoms
+Loop time of 7.17417 on 1 procs for 100 steps with 4096 atoms
 
-Performance: 2385.257 tau/day, 5.521 timesteps/s
-100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+Performance: 6021.602 tau/day, 13.939 timesteps/s
+99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.0014985  | 0.0014985  | 0.0014985  |   0.0 |  0.01
-Kspace  | 18.079     | 18.079     | 18.079     |   0.0 | 99.82
+Pair    | 0.00037169 | 0.00037169 | 0.00037169 |   0.0 |  0.01
+Kspace  | 7.1639     | 7.1639     | 7.1639     |   0.0 | 99.86
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.014229   | 0.014229   | 0.014229   |   0.0 |  0.08
-Output  | 0.0004642  | 0.0004642  | 0.0004642  |   0.0 |  0.00
-Modify  | 0.011227   | 0.011227   | 0.011227   |   0.0 |  0.06
-Other   |            | 0.004455   |            |       |  0.02
+Comm    | 0.0048451  | 0.0048451  | 0.0048451  |   0.0 |  0.07
+Output  | 0.00030565 | 0.00030565 | 0.00030565 |   0.0 |  0.00
+Modify  | 0.0029933  | 0.0029933  | 0.0029933  |   0.0 |  0.04
+Other   |            | 0.001761   |            |       |  0.02
 
 Nlocal:    4096 ave 4096 max 4096 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@@ -99,4 +98,4 @@ Total # of neighbors = 524288
 Ave neighs/atom = 128
 Neighbor list builds = 0
 Dangerous builds = 0
-Total wall time: 0:00:21
+Total wall time: 0:00:07
diff --git a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.4 b/examples/USER/scafacos/log.27Nov18.scafacos.p2nfft.g++.4
similarity index 79%
rename from examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.4
rename to examples/USER/scafacos/log.27Nov18.scafacos.p2nfft.g++.4
index bf4494df8b..19db90d822 100644
--- a/examples/USER/scafacos/log.08Aug18.scafacos.g++.p2nfft.4
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.p2nfft.g++.4
@@ -1,5 +1,4 @@
-LAMMPS (2 Aug 2018)
-OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+LAMMPS (27 Nov 2018)
   using 1 OpenMP thread(s) per MPI task
 # Point dipoles in a 2d box
 
@@ -16,7 +15,7 @@ replicate 8 8 8
   orthogonal box = (0 0 0) to (8 8 8)
   1 by 2 by 2 MPI processor grid
   4096 atoms
-  Time spent = 0.000260592 secs
+  Time spent = 0.000186443 secs
 
 velocity	all create 1.5 49893
 
@@ -72,21 +71,21 @@ Step Temp E_pair E_mol TotEng Press
       80    1.5033149   -1.7524495            0   0.50197233     10.46585 
       90    1.5108219   -1.7637095            0   0.50197005    10.515883 
      100     1.529239   -1.7913105            0     0.501988    10.638649 
-Loop time of 5.09997 on 4 procs for 100 steps with 4096 atoms
+Loop time of 2.28914 on 4 procs for 100 steps with 4096 atoms
 
-Performance: 8470.643 tau/day, 19.608 timesteps/s
-99.8% CPU use with 4 MPI tasks x 1 OpenMP threads
+Performance: 18871.750 tau/day, 43.685 timesteps/s
+98.4% CPU use with 4 MPI tasks x 1 OpenMP threads
 
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.00069928 | 0.00071001 | 0.00073647 |   0.0 |  0.01
-Kspace  | 5.0795     | 5.0796     | 5.0797     |   0.0 | 99.60
+Pair    | 0.00019193 | 0.00020188 | 0.00020981 |   0.0 |  0.01
+Kspace  | 2.2771     | 2.2771     | 2.2772     |   0.0 | 99.48
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.014101   | 0.014216   | 0.014331   |   0.1 |  0.28
-Output  | 0.00030541 | 0.00033581 | 0.00039625 |   0.0 |  0.01
-Modify  | 0.0030217  | 0.0030621  | 0.0030868  |   0.0 |  0.06
-Other   |            | 0.002036   |            |       |  0.04
+Comm    | 0.0075378  | 0.007587   | 0.0076075  |   0.0 |  0.33
+Output  | 0.00019574 | 0.00051248 | 0.0014374  |   0.0 |  0.02
+Modify  | 0.0014074  | 0.0014608  | 0.0014906  |   0.1 |  0.06
+Other   |            | 0.002247   |            |       |  0.10
 
 Nlocal:    1024 ave 1024 max 1024 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
@@ -99,4 +98,4 @@ Total # of neighbors = 524288
 Ave neighs/atom = 128
 Neighbor list builds = 0
 Dangerous builds = 0
-Total wall time: 0:00:05
+Total wall time: 0:00:02
diff --git a/examples/USER/scafacos/log.27Nov18.scafacos.p3m.g++.1 b/examples/USER/scafacos/log.27Nov18.scafacos.p3m.g++.1
new file mode 100644
index 0000000000..92e85e072e
--- /dev/null
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.p3m.g++.1
@@ -0,0 +1,102 @@
+LAMMPS (27 Nov 2018)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (1 1 1)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (8 8 8)
+  1 by 1 by 1 MPI processor grid
+  4096 atoms
+  Time spent = 0.000295162 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p3m 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver p3m ...
+WARNING: Virial computation for P3M not available (src/USER-SCAFACOS/scafacos.cpp:104)
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 8 8 8
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 5.813 | 5.813 | 5.813 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5   -1.7475754            0    0.5018753     11.99707 
+      10    1.5000018   -1.7475781            0   0.50187528    11.997085 
+      20    1.4999833   -1.7475525            0   0.50187314    11.996937 
+      30    1.4999008   -1.7474408            0   0.50186112    11.996277 
+      40    1.4997303   -1.7471984            0    0.5018478    11.994913 
+      50    1.4995294   -1.7469059            0   0.50183901    11.993306 
+      60    1.4995402   -1.7469269            0   0.50183416    11.993392 
+      70      1.50036   -1.7481582            0   0.50183238     11.99995 
+      80    1.5032409   -1.7524778            0   0.50183303    12.022991 
+      90    1.5107445   -1.7637255            0   0.50183801    12.083005 
+     100    1.5291568   -1.7913136            0   0.50186158    12.230268 
+Loop time of 2.98343 on 1 procs for 100 steps with 4096 atoms
+
+Performance: 14479.957 tau/day, 33.518 timesteps/s
+99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00035572 | 0.00035572 | 0.00035572 |   0.0 |  0.01
+Kspace  | 2.9729     | 2.9729     | 2.9729     |   0.0 | 99.65
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.005022   | 0.005022   | 0.005022   |   0.0 |  0.17
+Output  | 0.00027037 | 0.00027037 | 0.00027037 |   0.0 |  0.01
+Modify  | 0.003046   | 0.003046   | 0.003046   |   0.0 |  0.10
+Other   |            | 0.00182    |            |       |  0.06
+
+Nlocal:    4096 ave 4096 max 4096 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    9728 ave 9728 max 9728 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    524288 ave 524288 max 524288 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 524288
+Ave neighs/atom = 128
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:03
diff --git a/examples/USER/scafacos/log.27Nov18.scafacos.p3m.g++.4 b/examples/USER/scafacos/log.27Nov18.scafacos.p3m.g++.4
new file mode 100644
index 0000000000..18199eca16
--- /dev/null
+++ b/examples/USER/scafacos/log.27Nov18.scafacos.p3m.g++.4
@@ -0,0 +1,102 @@
+LAMMPS (27 Nov 2018)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (1 1 1)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (8 8 8)
+  1 by 2 by 2 MPI processor grid
+  4096 atoms
+  Time spent = 0.000118017 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p3m 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver p3m ...
+WARNING: Virial computation for P3M not available (src/USER-SCAFACOS/scafacos.cpp:104)
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 8 8 8
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.008 | 4.008 | 4.008 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5   -1.7475797            0   0.50187096     11.99707 
+      10    1.5000019   -1.7475825            0   0.50187094    11.997085 
+      20    1.4999837   -1.7475551            0   0.50187104     11.99694 
+      30    1.4999024   -1.7474348            0   0.50186949    11.996289 
+      40    1.4997328    -1.747187            0   0.50186299    11.994934 
+      50     1.499532   -1.7468958            0   0.50185295    11.993327 
+      60    1.4995421     -1.74692            0   0.50184401    11.993408 
+      70    1.5003611   -1.7481536            0    0.5018387    11.999959 
+      80    1.5032412   -1.7524745            0   0.50183679    12.022994 
+      90     1.510744    -1.763723            0   0.50183979    12.083002 
+     100    1.5291558   -1.7913123            0    0.5018614     12.23026 
+Loop time of 1.10005 on 4 procs for 100 steps with 4096 atoms
+
+Performance: 39271.030 tau/day, 90.905 timesteps/s
+96.4% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.0002172  | 0.00022924 | 0.0002377  |   0.0 |  0.02
+Kspace  | 1.0848     | 1.0849     | 1.085      |   0.0 | 98.62
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.010176   | 0.01062    | 0.01088    |   0.3 |  0.97
+Output  | 0.00019169 | 0.0003345  | 0.00075459 |   0.0 |  0.03
+Modify  | 0.001878   | 0.0018976  | 0.0019398  |   0.1 |  0.17
+Other   |            | 0.002097   |            |       |  0.19
+
+Nlocal:    1024 ave 1024 max 1024 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:    5120 ave 5120 max 5120 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Neighs:    131072 ave 131072 max 131072 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 524288
+Ave neighs/atom = 128
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:01