Merge branch 'master' into improper-tester

2021-02-24 17:08:34 -05:00
parent 8d0d7f4f55 e916c5d852
commit c45658f1c7
3831 changed files with 197950 additions and 849272 deletions
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@ -108,7 +108,7 @@ For bug reports, the next step is that one of the core LAMMPS developers will se
 For submitting pull requests, there is a [detailed tutorial](https://lammps.sandia.gov/doc/Howto_github.html) in the LAMMPS manual. Thus only a brief breakdown of the steps is presented here. Please note, that the LAMMPS developers are still reviewing and trying to improve the process. If you are unsure about something, do not hesitate to post a question on the lammps-users mailing list or contact one fo the core LAMMPS developers.
 Immediately after the submission, the LAMMPS continuing integration server at ci.lammps.org will download your submitted branch and perform a simple compilation test, i.e. will test whether your submitted code can be compiled under various conditions. It will also do a check on whether your included documentation translates cleanly. Whether these tests are successful or fail will be recorded. If a test fails, please inspect the corresponding output on the CI server and take the necessary steps, if needed, so that the code can compile cleanly again. The test will be re-run each the pull request is updated with a push to the remote branch on GitHub.
-Next a LAMMPS core developer will self-assign and do an overall technical assessment of the submission. If you are not yet registered as a LAMMPS collaborator, you will receive an invitation for that. As part of the assesment, the pull request will be categorized with labels. There are two special labels: `needs_work` (indicates that work from the submitter of the pull request is needed) and `work_in_progress` (indicates, that the assigned LAMMPS developer will make changes, if not done by the contributor who made the submit). 
+Next a LAMMPS core developer will self-assign and do an overall technical assessment of the submission. If you are not yet registered as a LAMMPS collaborator, you will receive an invitation for that. As part of the assessment, the pull request will be categorized with labels. There are two special labels: `needs_work` (indicates that work from the submitter of the pull request is needed) and `work_in_progress` (indicates, that the assigned LAMMPS developer will make changes, if not done by the contributor who made the submit). 
 You may also receive comments and suggestions on the overall submission or specific details and on occasion specific requests for changes as part of the review. If permitted, also additional changes may be pushed into your pull request branch or a pull request may be filed in your LAMMPS fork on GitHub to include those changes.
 The LAMMPS developer may then decide to assign the pull request to another developer (e.g. when that developer is more knowledgeable about the submitted feature or enhancement or has written the modified code). It may also happen, that additional developers are requested to provide a review and approve the changes. For submissions, that may change the general behavior of LAMMPS, or where a possibility of unwanted side effects exists, additional tests may be requested by the assigned developer.
 If the assigned developer is satisfied and considers the submission ready for inclusion into LAMMPS, the pull request will receive approvals and be merged into the master branch by one of the core LAMMPS developers. After the pull request is merged, you may delete the feature branch used for the pull request in your personal LAMMPS fork.
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@ -0,0 +1,47 @@
 # GitHub action to run static code analysis on C++ and Python code
 name: "CodeQL Code Analysis"
 on:
  push:
    branches: [master]
 jobs:
  analyze:
    name: Analyze
    if: ${{ github.repository == 'lammps/lammps' }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        language: ['cpp', 'python']
    steps:
    - name: Checkout repository
      uses: actions/checkout@v2
      with:
        fetch-depth: 2
    - name: Setup Python
      uses: actions/setup-python@v2
      with:
        python-version: '3.x'
    - name: Initialize CodeQL
      uses: github/codeql-action/init@v1
      with:
        languages: ${{ matrix.language }}
    - name: Create Build Environment
      run: cmake -E make_directory ${{github.workspace}}/build
    - name: Building LAMMPS via CMake
      if: ${{ matrix.language == 'cpp' }}
      shell: bash
      working-directory: ${{github.workspace}}/build
      run: |
        cmake -C $GITHUB_WORKSPACE/cmake/presets/most.cmake $GITHUB_WORKSPACE/cmake
        cmake --build . --parallel 2
    - name: Perform CodeQL Analysis
      uses: github/codeql-action/analyze@v1
--- a/.github/workflows/unittest-macos.yml
+++ b/.github/workflows/unittest-macos.yml
@ -0,0 +1,34 @@
 # GitHub action to build LAMMPS on MacOS and run unit tests
 name: "Unittest for MacOS"
 on:
  push:
    branches: [master]
 jobs:
  build:
    name: MacOS Unit Test
    if: ${{ github.repository == 'lammps/lammps' }}
    runs-on: macos-latest
    steps:
    - name: Checkout repository
      uses: actions/checkout@v2
      with:
        fetch-depth: 2
    - name: Create Build Environment
      run: cmake -E make_directory ${{github.workspace}}/build
    - name: Building LAMMPS via CMake
      shell: bash
      working-directory: ${{github.workspace}}/build
      run: |
        cmake -C $GITHUB_WORKSPACE/cmake/presets/most.cmake $GITHUB_WORKSPACE/cmake \
              -DENABLE_TESTING=ON -DBUILD_SHARED_LIBS=ON -DLAMMPS_EXCEPTIONS=ON
        cmake --build . --parallel 2
    - name: Run Tests
      working-directory: ${{github.workspace}}/build
      shell: bash
      run: ctest -V
--- a/6
+++ b/6
@ -37,14 +37,14 @@ tools                      pre- and post-processing tools
 Point your browser at any of these files to get started:
-https://lammps.sandia.gov/doc/Manual.html         LAMMPS user manual
+https://lammps.sandia.gov/doc/Manual.html         LAMMPS manual
 https://lammps.sandia.gov/doc/Intro.html          hi-level introduction
 https://lammps.sandia.gov/doc/Build.html          how to build LAMMPS
 https://lammps.sandia.gov/doc/Run_head.html       how to run LAMMPS
 https://lammps.sandia.gov/doc/Commands_all.html   Table of available commands
-https://lammps.sandia.gov/doc/pg_library.html     LAMMPS programmer guide
+https://lammps.sandia.gov/doc/Library.html        LAMMPS library interfaces
 https://lammps.sandia.gov/doc/Modify.html         how to modify and extend LAMMPS
-https://lammps.sandia.gov/doc/pg_developer.html   LAMMPS developer guide
+https://lammps.sandia.gov/doc/Developer.html      LAMMPS developer info
 You can also create these doc pages locally:
--- a/bench/KEPLER/Makefile.cpu
+++ b/bench/KEPLER/Makefile.cpu
@ -1,108 +0,0 @@
 # linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
 SHELL = /bin/sh
 # ---------------------------------------------------------------------
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
 CC =		icc
 CCFLAGS =	-O
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 LINK =		icc
 LINKFLAGS =	-O
 LIB =           -lstdc++
 SIZE =		size
 ARCHIVE =	ar
 ARFLAGS =	-rc
 SHLIBFLAGS =	-shared
 # ---------------------------------------------------------------------
 # LAMMPS-specific settings
 # specify settings for LAMMPS features you will use
 # if you change any -D setting, do full re-compile after "make clean"
 # LAMMPS ifdef settings, OPTIONAL
 # see possible settings in doc/Section_start.html#2_2 (step 4)
 LMP_INC =
 # MPI library, REQUIRED
 # see discussion in doc/Section_start.html#2_2 (step 5)
 # can point to dummy MPI library in src/STUBS as in Makefile.serial
 # INC = path for mpi.h, MPI compiler settings
 # PATH = path for MPI library
 # LIB = name of MPI library
 MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
 MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
 MPI_LIB =	-lmpi
 # FFT library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 6)
 # can be left blank to use provided KISS FFT library
 # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
 # PATH = path for FFT library
 # LIB = name of FFT library
 FFT_INC =
 FFT_PATH = 
 FFT_LIB =
 # JPEG and/or PNG library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 7)
 # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
 # INC = path(s) for jpeglib.h and/or png.h
 # PATH = path(s) for JPEG library and/or PNG library
 # LIB = name(s) of JPEG library and/or PNG library
 JPG_INC =       
 JPG_PATH = 	
 JPG_LIB =	-ljpeg
 # ---------------------------------------------------------------------
 # build rules and dependencies
 # no need to edit this section
 include	Makefile.package.settings
 include	Makefile.package
 EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
 EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
 EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
 # Path to src files
 vpath %.cpp ..
 vpath %.h ..
 # Link target
 $(EXE):	$(OBJ)
 	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
 	$(SIZE) $(EXE)
 # Library targets
 lib:	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
 shlib:	$(OBJ)
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
        $(OBJ) $(EXTRA_LIB) $(LIB)
 # Compilation rules
 %.o:%.cpp
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
 %.d:%.cpp
 	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
 # Individual dependencies
 DEPENDS = $(OBJ:.o=.d)
 sinclude $(DEPENDS)
--- a/bench/KEPLER/Makefile.cuda
+++ b/bench/KEPLER/Makefile.cuda
@ -1,108 +0,0 @@
 # linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
 SHELL = /bin/sh
 # ---------------------------------------------------------------------
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
 CC =		icc
 CCFLAGS =	-O
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 LINK =		icc
 LINKFLAGS =	-O
 LIB =           -lstdc++
 SIZE =		size
 ARCHIVE =	ar
 ARFLAGS =	-rc
 SHLIBFLAGS =	-shared
 # ---------------------------------------------------------------------
 # LAMMPS-specific settings
 # specify settings for LAMMPS features you will use
 # if you change any -D setting, do full re-compile after "make clean"
 # LAMMPS ifdef settings, OPTIONAL
 # see possible settings in doc/Section_start.html#2_2 (step 4)
 LMP_INC =
 # MPI library, REQUIRED
 # see discussion in doc/Section_start.html#2_2 (step 5)
 # can point to dummy MPI library in src/STUBS as in Makefile.serial
 # INC = path for mpi.h, MPI compiler settings
 # PATH = path for MPI library
 # LIB = name of MPI library
 MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
 MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
 MPI_LIB =	-lmpi
 # FFT library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 6)
 # can be left blank to use provided KISS FFT library
 # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
 # PATH = path for FFT library
 # LIB = name of FFT library
 FFT_INC =
 FFT_PATH = 
 FFT_LIB =
 # JPEG and/or PNG library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 7)
 # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
 # INC = path(s) for jpeglib.h and/or png.h
 # PATH = path(s) for JPEG library and/or PNG library
 # LIB = name(s) of JPEG library and/or PNG library
 JPG_INC =       
 JPG_PATH = 	
 JPG_LIB =	-ljpeg
 # ---------------------------------------------------------------------
 # build rules and dependencies
 # no need to edit this section
 include	Makefile.package.settings
 include	Makefile.package
 EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
 EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
 EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
 # Path to src files
 vpath %.cpp ..
 vpath %.h ..
 # Link target
 $(EXE):	$(OBJ)
 	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
 	$(SIZE) $(EXE)
 # Library targets
 lib:	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
 shlib:	$(OBJ)
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
        $(OBJ) $(EXTRA_LIB) $(LIB)
 # Compilation rules
 %.o:%.cpp
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
 %.d:%.cpp
 	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
 # Individual dependencies
 DEPENDS = $(OBJ:.o=.d)
 sinclude $(DEPENDS)
--- a/bench/KEPLER/Makefile.gpu
+++ b/bench/KEPLER/Makefile.gpu
@ -1,108 +0,0 @@
 # linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
 SHELL = /bin/sh
 # ---------------------------------------------------------------------
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
 CC =		icc
 CCFLAGS =	-O
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 LINK =		icc
 LINKFLAGS =	-O
 LIB =           -lstdc++
 SIZE =		size
 ARCHIVE =	ar
 ARFLAGS =	-rc
 SHLIBFLAGS =	-shared
 # ---------------------------------------------------------------------
 # LAMMPS-specific settings
 # specify settings for LAMMPS features you will use
 # if you change any -D setting, do full re-compile after "make clean"
 # LAMMPS ifdef settings, OPTIONAL
 # see possible settings in doc/Section_start.html#2_2 (step 4)
 LMP_INC =
 # MPI library, REQUIRED
 # see discussion in doc/Section_start.html#2_2 (step 5)
 # can point to dummy MPI library in src/STUBS as in Makefile.serial
 # INC = path for mpi.h, MPI compiler settings
 # PATH = path for MPI library
 # LIB = name of MPI library
 MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
 MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
 MPI_LIB =	-lmpi
 # FFT library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 6)
 # can be left blank to use provided KISS FFT library
 # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
 # PATH = path for FFT library
 # LIB = name of FFT library
 FFT_INC =
 FFT_PATH = 
 FFT_LIB =
 # JPEG and/or PNG library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 7)
 # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
 # INC = path(s) for jpeglib.h and/or png.h
 # PATH = path(s) for JPEG library and/or PNG library
 # LIB = name(s) of JPEG library and/or PNG library
 JPG_INC =       
 JPG_PATH = 	
 JPG_LIB =	-ljpeg
 # ---------------------------------------------------------------------
 # build rules and dependencies
 # no need to edit this section
 include	Makefile.package.settings
 include	Makefile.package
 EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
 EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
 EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
 # Path to src files
 vpath %.cpp ..
 vpath %.h ..
 # Link target
 $(EXE):	$(OBJ)
 	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
 	$(SIZE) $(EXE)
 # Library targets
 lib:	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
 shlib:	$(OBJ)
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
        $(OBJ) $(EXTRA_LIB) $(LIB)
 # Compilation rules
 %.o:%.cpp
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
 %.d:%.cpp
 	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
 # Individual dependencies
 DEPENDS = $(OBJ:.o=.d)
 sinclude $(DEPENDS)
--- a/bench/KEPLER/Makefile.gpu.double
+++ b/bench/KEPLER/Makefile.gpu.double
@ -1,50 +0,0 @@
 # /* ----------------------------------------------------------------------   
 #  Generic Linux Makefile for CUDA 
 #     - Change CUDA_ARCH for your GPU
 # ------------------------------------------------------------------------- */
 # which file will be copied to Makefile.lammps
 EXTRAMAKE = Makefile.lammps.standard
 CUDA_HOME = /home/projects/cuda/6.0.37
 NVCC = nvcc
 # Kepler CUDA
 CUDA_ARCH = -arch=sm_35
 # Tesla CUDA
 #CUDA_ARCH = -arch=sm_21
 # newer CUDA
 #CUDA_ARCH = -arch=sm_13
 # older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
 LMP_INC = -DLAMMPS_SMALLBIG
 # precision for GPU calculations
 # -D_SINGLE_SINGLE  # Single precision for all calculations
 # -D_DOUBLE_DOUBLE  # Double precision for all calculations
 # -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double
 CUDA_PRECISION = -D_DOUBLE_DOUBLE
 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64
 CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
 CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
 CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
 BIN_DIR = ./
 OBJ_DIR = ./
 LIB_DIR = ./
 AR = ar
 BSH = /bin/sh
 CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
 include Nvidia.makefile
--- a/bench/KEPLER/Makefile.gpu.mixed
+++ b/bench/KEPLER/Makefile.gpu.mixed
@ -1,50 +0,0 @@
 # /* ----------------------------------------------------------------------   
 #  Generic Linux Makefile for CUDA 
 #     - Change CUDA_ARCH for your GPU
 # ------------------------------------------------------------------------- */
 # which file will be copied to Makefile.lammps
 EXTRAMAKE = Makefile.lammps.standard
 CUDA_HOME = /home/projects/cuda/6.0.37
 NVCC = nvcc
 # Kepler CUDA
 CUDA_ARCH = -arch=sm_35
 # Tesla CUDA
 #CUDA_ARCH = -arch=sm_21
 # newer CUDA
 #CUDA_ARCH = -arch=sm_13
 # older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
 LMP_INC = -DLAMMPS_SMALLBIG
 # precision for GPU calculations
 # -D_SINGLE_SINGLE  # Single precision for all calculations
 # -D_DOUBLE_DOUBLE  # Double precision for all calculations
 # -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double
 CUDA_PRECISION = -D_SINGLE_DOUBLE
 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64
 CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
 CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
 CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
 BIN_DIR = ./
 OBJ_DIR = ./
 LIB_DIR = ./
 AR = ar
 BSH = /bin/sh
 CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
 include Nvidia.makefile
--- a/bench/KEPLER/Makefile.gpu.single
+++ b/bench/KEPLER/Makefile.gpu.single
@ -1,50 +0,0 @@
 # /* ----------------------------------------------------------------------   
 #  Generic Linux Makefile for CUDA 
 #     - Change CUDA_ARCH for your GPU
 # ------------------------------------------------------------------------- */
 # which file will be copied to Makefile.lammps
 EXTRAMAKE = Makefile.lammps.standard
 CUDA_HOME = /home/projects/cuda/6.0.37
 NVCC = nvcc
 # Kepler CUDA
 CUDA_ARCH = -arch=sm_35
 # Tesla CUDA
 #CUDA_ARCH = -arch=sm_21
 # newer CUDA
 #CUDA_ARCH = -arch=sm_13
 # older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
 LMP_INC = -DLAMMPS_SMALLBIG
 # precision for GPU calculations
 # -D_SINGLE_SINGLE  # Single precision for all calculations
 # -D_DOUBLE_DOUBLE  # Double precision for all calculations
 # -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double
 CUDA_PRECISION = -D_SINGLE_SINGLE
 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64
 CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
 CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
 CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
 BIN_DIR = ./
 OBJ_DIR = ./
 LIB_DIR = ./
 AR = ar
 BSH = /bin/sh
 CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
 include Nvidia.makefile
--- a/bench/KEPLER/Makefile.intel.cpu
+++ b/bench/KEPLER/Makefile.intel.cpu
@ -1,109 +0,0 @@
 # linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
 SHELL = /bin/sh
 # ---------------------------------------------------------------------
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
 CC =		icc
 CCFLAGS =	-O3 -openmp -DLAMMPS_MEMALIGN=64 -no-offload \
                -xHost -fno-alias -ansi-alias -restrict -override-limits
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 LINK =		icc
 LINKFLAGS =	-O -openmp
 LIB =           -lstdc++
 SIZE =		size
 ARCHIVE =	ar
 ARFLAGS =	-rc
 SHLIBFLAGS =	-shared
 # ---------------------------------------------------------------------
 # LAMMPS-specific settings
 # specify settings for LAMMPS features you will use
 # if you change any -D setting, do full re-compile after "make clean"
 # LAMMPS ifdef settings, OPTIONAL
 # see possible settings in doc/Section_start.html#2_2 (step 4)
 LMP_INC =	-DLAMMPS_GZIP -DLAMMPS_JPEG
 # MPI library, REQUIRED
 # see discussion in doc/Section_start.html#2_2 (step 5)
 # can point to dummy MPI library in src/STUBS as in Makefile.serial
 # INC = path for mpi.h, MPI compiler settings
 # PATH = path for MPI library
 # LIB = name of MPI library
 MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
 MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
 MPI_LIB =	-lmpi
 # FFT library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 6)
 # can be left blank to use provided KISS FFT library
 # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
 # PATH = path for FFT library
 # LIB = name of FFT library
 FFT_INC =
 FFT_PATH = 
 FFT_LIB =
 # JPEG and/or PNG library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 7)
 # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
 # INC = path(s) for jpeglib.h and/or png.h
 # PATH = path(s) for JPEG library and/or PNG library
 # LIB = name(s) of JPEG library and/or PNG library
 JPG_INC =       
 JPG_PATH = 	
 JPG_LIB =	-ljpeg
 # ---------------------------------------------------------------------
 # build rules and dependencies
 # no need to edit this section
 include	Makefile.package.settings
 include	Makefile.package
 EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
 EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
 EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
 # Path to src files
 vpath %.cpp ..
 vpath %.h ..
 # Link target
 $(EXE):	$(OBJ)
 	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
 	$(SIZE) $(EXE)
 # Library targets
 lib:	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
 shlib:	$(OBJ)
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
        $(OBJ) $(EXTRA_LIB) $(LIB)
 # Compilation rules
 %.o:%.cpp
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
 %.d:%.cpp
 	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
 # Individual dependencies
 DEPENDS = $(OBJ:.o=.d)
 sinclude $(DEPENDS)
--- a/bench/KEPLER/Makefile.kokkos.cuda
+++ b/bench/KEPLER/Makefile.kokkos.cuda
@ -1,113 +0,0 @@
 # linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
 SHELL = /bin/sh
 # ---------------------------------------------------------------------
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
 CC =		nvcc
 CCFLAGS =	-O3 -arch=sm_35
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 LINK =		mpicxx
 LINKFLAGS =	-O
 LIB =           -lstdc++
 SIZE =		size
 ARCHIVE =	ar
 ARFLAGS =	-rc
 SHLIBFLAGS =	-shared
 OMP = yes
 CUDA = yes
 # ---------------------------------------------------------------------
 # LAMMPS-specific settings
 # specify settings for LAMMPS features you will use
 # if you change any -D setting, do full re-compile after "make clean"
 # LAMMPS ifdef settings, OPTIONAL
 # see possible settings in doc/Section_start.html#2_2 (step 4)
 LMP_INC =
 # MPI library, REQUIRED
 # see discussion in doc/Section_start.html#2_2 (step 5)
 # can point to dummy MPI library in src/STUBS as in Makefile.serial
 # INC = path for mpi.h, MPI compiler settings
 # PATH = path for MPI library
 # LIB = name of MPI library
 MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
 MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
 MPI_LIB =	-lmpi
 # FFT library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 6)
 # can be left blank to use provided KISS FFT library
 # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
 # PATH = path for FFT library
 # LIB = name of FFT library
 FFT_INC =
 FFT_PATH = 
 FFT_LIB =
 # JPEG and/or PNG library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 7)
 # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
 # INC = path(s) for jpeglib.h and/or png.h
 # PATH = path(s) for JPEG library and/or PNG library
 # LIB = name(s) of JPEG library and/or PNG library
 JPG_INC =       
 JPG_PATH = 	
 JPG_LIB =	-ljpeg
 # ---------------------------------------------------------------------
 # build rules and dependencies
 # no need to edit this section
 include	Makefile.package.settings
 include	Makefile.package
 EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
 EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
 EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
 # Path to src files
 vpath %.cpp ..
 vpath %.h ..
 # Link target
 $(EXE):	$(OBJ)
 	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
 	$(SIZE) $(EXE)
 # Library targets
 lib:	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
 shlib:	$(OBJ)
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
        $(OBJ) $(EXTRA_LIB) $(LIB)
 # Compilation rules
 %.o:%.cu
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
 %.o:%.cpp
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
 %.d:%.cpp
 	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
 # Individual dependencies
 DEPENDS = $(OBJ:.o=.d)
 sinclude $(DEPENDS)
--- a/bench/KEPLER/Makefile.kokkos.omp
+++ b/bench/KEPLER/Makefile.kokkos.omp
@ -1,110 +0,0 @@
 # linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
 SHELL = /bin/sh
 # ---------------------------------------------------------------------
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
 CC =		icc
 CCFLAGS =	-O
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 LINK =		icc
 LINKFLAGS =	-O
 LIB =           -lstdc++
 SIZE =		size
 ARCHIVE =	ar
 ARFLAGS =	-rc
 SHLIBFLAGS =	-shared
 OMP = yes
 # ---------------------------------------------------------------------
 # LAMMPS-specific settings
 # specify settings for LAMMPS features you will use
 # if you change any -D setting, do full re-compile after "make clean"
 # LAMMPS ifdef settings, OPTIONAL
 # see possible settings in doc/Section_start.html#2_2 (step 4)
 LMP_INC =
 # MPI library, REQUIRED
 # see discussion in doc/Section_start.html#2_2 (step 5)
 # can point to dummy MPI library in src/STUBS as in Makefile.serial
 # INC = path for mpi.h, MPI compiler settings
 # PATH = path for MPI library
 # LIB = name of MPI library
 MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
 MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
 MPI_LIB =	-lmpi
 # FFT library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 6)
 # can be left blank to use provided KISS FFT library
 # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
 # PATH = path for FFT library
 # LIB = name of FFT library
 FFT_INC =
 FFT_PATH = 
 FFT_LIB =
 # JPEG and/or PNG library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 7)
 # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
 # INC = path(s) for jpeglib.h and/or png.h
 # PATH = path(s) for JPEG library and/or PNG library
 # LIB = name(s) of JPEG library and/or PNG library
 JPG_INC =       
 JPG_PATH = 	
 JPG_LIB =	-ljpeg
 # ---------------------------------------------------------------------
 # build rules and dependencies
 # no need to edit this section
 include	Makefile.package.settings
 include	Makefile.package
 EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
 EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
 EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
 # Path to src files
 vpath %.cpp ..
 vpath %.h ..
 # Link target
 $(EXE):	$(OBJ)
 	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
 	$(SIZE) $(EXE)
 # Library targets
 lib:	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
 shlib:	$(OBJ)
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
        $(OBJ) $(EXTRA_LIB) $(LIB)
 # Compilation rules
 %.o:%.cpp
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
 %.d:%.cpp
 	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
 # Individual dependencies
 DEPENDS = $(OBJ:.o=.d)
 sinclude $(DEPENDS)
--- a/bench/KEPLER/Makefile.omp
+++ b/bench/KEPLER/Makefile.omp
@ -1,108 +0,0 @@
 # linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
 SHELL = /bin/sh
 # ---------------------------------------------------------------------
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
 CC =		icc
 CCFLAGS =	-O3 -openmp -restrict -ansi-alias
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 LINK =		icc
 LINKFLAGS =	-O -openmp
 LIB =           -lstdc++
 SIZE =		size
 ARCHIVE =	ar
 ARFLAGS =	-rc
 SHLIBFLAGS =	-shared
 # ---------------------------------------------------------------------
 # LAMMPS-specific settings
 # specify settings for LAMMPS features you will use
 # if you change any -D setting, do full re-compile after "make clean"
 # LAMMPS ifdef settings, OPTIONAL
 # see possible settings in doc/Section_start.html#2_2 (step 4)
 LMP_INC =
 # MPI library, REQUIRED
 # see discussion in doc/Section_start.html#2_2 (step 5)
 # can point to dummy MPI library in src/STUBS as in Makefile.serial
 # INC = path for mpi.h, MPI compiler settings
 # PATH = path for MPI library
 # LIB = name of MPI library
 MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
 MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
 MPI_LIB =	-lmpi
 # FFT library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 6)
 # can be left blank to use provided KISS FFT library
 # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
 # PATH = path for FFT library
 # LIB = name of FFT library
 FFT_INC =
 FFT_PATH = 
 FFT_LIB =
 # JPEG and/or PNG library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 7)
 # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
 # INC = path(s) for jpeglib.h and/or png.h
 # PATH = path(s) for JPEG library and/or PNG library
 # LIB = name(s) of JPEG library and/or PNG library
 JPG_INC =       
 JPG_PATH = 	
 JPG_LIB =	-ljpeg
 # ---------------------------------------------------------------------
 # build rules and dependencies
 # no need to edit this section
 include	Makefile.package.settings
 include	Makefile.package
 EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
 EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
 EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
 # Path to src files
 vpath %.cpp ..
 vpath %.h ..
 # Link target
 $(EXE):	$(OBJ)
 	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
 	$(SIZE) $(EXE)
 # Library targets
 lib:	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
 shlib:	$(OBJ)
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
        $(OBJ) $(EXTRA_LIB) $(LIB)
 # Compilation rules
 %.o:%.cpp
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
 %.d:%.cpp
 	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
 # Individual dependencies
 DEPENDS = $(OBJ:.o=.d)
 sinclude $(DEPENDS)
--- a/bench/KEPLER/Makefile.opt
+++ b/bench/KEPLER/Makefile.opt
@ -1,108 +0,0 @@
 # linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
 SHELL = /bin/sh
 # ---------------------------------------------------------------------
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
 CC =		icc
 CCFLAGS =	-O -restrict
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 LINK =		icc
 LINKFLAGS =	-O
 LIB =           -lstdc++
 SIZE =		size
 ARCHIVE =	ar
 ARFLAGS =	-rc
 SHLIBFLAGS =	-shared
 # ---------------------------------------------------------------------
 # LAMMPS-specific settings
 # specify settings for LAMMPS features you will use
 # if you change any -D setting, do full re-compile after "make clean"
 # LAMMPS ifdef settings, OPTIONAL
 # see possible settings in doc/Section_start.html#2_2 (step 4)
 LMP_INC =
 # MPI library, REQUIRED
 # see discussion in doc/Section_start.html#2_2 (step 5)
 # can point to dummy MPI library in src/STUBS as in Makefile.serial
 # INC = path for mpi.h, MPI compiler settings
 # PATH = path for MPI library
 # LIB = name of MPI library
 MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
 MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
 MPI_LIB =	-lmpi
 # FFT library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 6)
 # can be left blank to use provided KISS FFT library
 # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
 # PATH = path for FFT library
 # LIB = name of FFT library
 FFT_INC =
 FFT_PATH = 
 FFT_LIB =
 # JPEG and/or PNG library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 7)
 # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
 # INC = path(s) for jpeglib.h and/or png.h
 # PATH = path(s) for JPEG library and/or PNG library
 # LIB = name(s) of JPEG library and/or PNG library
 JPG_INC =       
 JPG_PATH = 	
 JPG_LIB =	-ljpeg
 # ---------------------------------------------------------------------
 # build rules and dependencies
 # no need to edit this section
 include	Makefile.package.settings
 include	Makefile.package
 EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
 EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
 EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
 # Path to src files
 vpath %.cpp ..
 vpath %.h ..
 # Link target
 $(EXE):	$(OBJ)
 	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
 	$(SIZE) $(EXE)
 # Library targets
 lib:	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
 shlib:	$(OBJ)
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
        $(OBJ) $(EXTRA_LIB) $(LIB)
 # Compilation rules
 %.o:%.cpp
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
 %.d:%.cpp
 	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
 # Individual dependencies
 DEPENDS = $(OBJ:.o=.d)
 sinclude $(DEPENDS)
--- a/bench/KEPLER/README
+++ b/bench/KEPLER/README
@ -1,68 +0,0 @@
 These are build and input and run scripts used to run the LJ benchmark
 in the top-level bench directory using all the various accelerator
 packages currently available in LAMMPS.  The results of running these
 benchmarks on a GPU cluster with Kepler GPUs are shown on the "GPU
 (Kepler)" section of the Benchmark page of the LAMMPS WWW site:
 lammps.sandia.gov/bench.
 The specifics of the benchmark machine are as follows:
 It is a small GPU cluster at Sandia National Labs called "shannon". It
 has 32 nodes, each with two 8-core Sandy Bridge Xeon CPUs (E5-2670,
 2.6GHz, HT deactivated), for a total of 512 cores.  Twenty-four of the
 nodes have two NVIDIA Kepler GPUs (K20x, 2688 732 MHz cores).  LAMMPS
 was compiled with the Intel icc compiler, using module
 openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37.
 ------------------------------------------------------------------------
 You can, of course, build LAMMPS yourself with any of the accelerator
 packages installed for your platform.
 The build.py script will build LAMMPS for the various accelerlator
 packages using the Makefile.* files in this dir, which you can edit if
 necessary for your platform.  You must set the "lmpdir" variable at
 the top of build.py to the home directory of LAMMPS as installed on
 your system.  Note that the build.py script hardcodes the arch setting
 for the USER-CUDA package, which should be matched to the GPUs on your
 system, e.g. sm_35 for Kepler GPUs.  For the GPU package, this setting
 is in the Makefile.gpu.* files, as is the CUDA_HOME variable which
 should point to where NVIDIA Cuda software is installed on your
 system.
 Once the Makefiles are in place, then typing, for example,
 python build.py cpu gpu
 will build executables for the CPU (no accelerators), and 3 variants
 (double, mixed, single precision) of the GPU package.  See the list of
 possible targets at the top of the build.py script.
 Note that the build.py script will un-install all packages in your
 LAMMPS directory, then only install the ones needed for the benchmark.
 The Makefile.* files in this dir are copied into lammps/src/MAKE, as a
 dummy Makefile.foo, so they will not conflict with makefiles that may
 already be there.  The build.py script also builds the auxiliary GPU
 and USER-CUDA library as needed.
 LAMMPS executables that are generated by build.py are copied into this
 directory when the script finishes each build.
 ------------------------------------------------------------------------
 The in.* files can be run with any of the accelerator packages,
 if you specify the appropriate command-line switches.  These
 include switches to set the problem size and number of timesteps
 to run.
 The run*.sh scripts have sample mpirun commands for running the input
 scripts on a single node or on multiple nodes for the strong and weak
 scaling results shown on the benchmark web page.  These scripts are
 provided for illustration purposes, to show what command-line
 arguments are used with each accelerator package.
 Note that we generate these run scripts, either for interactive or
 batch submission, via Python scripts which often produces a long list
 of runs to exercise a combination of options.  To perform a quick
 benchmark calculation on your platform, you will typically only want
 to run a few commands out of any of the run*.sh scripts.
--- a/bench/KEPLER/build.py
+++ b/bench/KEPLER/build.py
@ -1,187 +0,0 @@
 #!/usr/local/bin/python
 # Syntax: build.py target1 target2 ...
 #         targets:
 #         cpu, opt, omp,
 #         gpu/double, gpu/mixed, gpu/single,
 #         cuda/double, cuda/mixed, cuda/single,
 #         intel/cpu, intel/phi,
 #         kokkos/omp, kokkos/phi, kokkos/cuda
 #         gpu = gpu/double + gpu/mixed + gpu/single
 #         cuda = cuda/double + cuda/mixed + cuda/single
 #         intel = intel/cpu + intel/phi
 #         kokkos = kokkos/omp + kokkos/phi + kokkos/cuda
 #         all = cpu + opt + omp + gpu + cuda + intel + kokkos
 # create exectuables for different packages
 # MUST set lmpdir to path of LAMMPS home directory
 import sys,commands,os
 lmpdir = "~/lammps"
 # build LAMMPS
 # copy makefile into src/MAKE as Makefile.foo, then remove it
 def build_lammps(makefile,pkg):
  print "Building LAMMPS with %s and %s packages ..." % (makefile,pkg)
  commands.getoutput("cp %s %s/src/MAKE/Makefile.foo" % (makefile,lmpdir))
  cwd = os.getcwd()
  os.chdir(os.path.expanduser(lmpdir + "/src"))
  str = "make clean-foo"
  txt = commands.getoutput(str)
  str = "make no-all"
  txt = commands.getoutput(str)
  for package in pkg:
    str = "make yes-%s" % package
    txt = commands.getoutput(str)
    print txt
  str = "make -j 16 foo"
  txt = commands.getoutput(str)
  os.remove("MAKE/Makefile.foo")
  os.chdir(cwd)
 # build GPU library in LAMMPS
 # copy makefile into lib/gpu as Makefile.foo, then remove it
 def build_gpu(makefile):
  print "Building GPU lib with %s ..." % makefile
  commands.getoutput("cp %s %s/lib/gpu/Makefile.foo" % (makefile,lmpdir))
  cwd = os.getcwd()
  os.chdir(os.path.expanduser(lmpdir + "/lib/gpu"))
  str = "make -f Makefile.foo clean"
  txt = commands.getoutput(str)
  str = "make -j 16 -f Makefile.foo"
  txt = commands.getoutput(str)
  os.remove("Makefile.foo")
  os.chdir(cwd)
 # build CUDA library in LAMMPS
 # set precision and arch explicitly as options to make in lib/cuda
 def build_cuda(precision,arch):
  print "Building USER-CUDA lib with %s and arch sm_%d ..." % (precision,arch)
  cwd = os.getcwd()
  os.chdir(os.path.expanduser(lmpdir + "/lib/cuda"))
  str = "make clean"
  txt = commands.getoutput(str)
  if precision == "double": pflag = 2
  elif precision == "mixed": pflag = 4
  elif precision == "single": pflag = 1
  str = "make -j 16 precision=%d arch=%s" % (pflag,arch)
  txt = commands.getoutput(str)
  os.chdir(cwd)
 # main program
 # convert target keywords into target flags
  cpu = opt = omp = 0
 gpu = gpu_double = gpu_mixed = gpu_single = 0
 cuda = cuda_double = cuda_mixed = cuda_single = 0
 intel = intel_cpu = intel_phi = 0
 kokkos = kokkos_omp = kokkos_phi = kokkos_cuda = 0
 targets = sys.argv[1:]
 for target in targets:
  if target == "cpu": cpu = 1
  elif target == "opt": opt = 1
  elif target == "omp": omp = 1
  elif target == "gpu/double": gpu_double = 1
  elif target == "gpu/mixed": gpu_mixed = 1
  elif target == "gpu/single": gpu_single = 1
  elif target == "gpu": gpu = 1
  elif target == "cuda/double": cuda_double = 1
  elif target == "cuda/mixed": cuda_mixed = 1
  elif target == "cuda/single": cuda_single = 1
  elif target == "cuda": cuda = 1
  elif target == "intel/cpu": intel_cpu = 1
  elif target == "intel/phi": intel_phi = 1
  elif target == "intel": intel = 1
  elif target == "kokkos/omp": kokkos_omp = 1
  elif target == "kokkos/phi": kokkos_phi = 1
  elif target == "kokkos/cuda": kokkos_cuda = 1
  elif target == "kokkos": kokkos = 1
  elif target == "all": cpu = omp = gpu = cuda = intel = kokkos = 1
  else: print "Target",target,"is unknown"
 if gpu: gpu_double = gpu_mixed = gpu_single = 1
 if cuda: cuda_double = cuda_mixed = cuda_single = 1
 if intel: intel_cpu = intel_phi = 1
 if kokkos: kokkos_omp = kokkos_phi = kokkos_cuda = 1
 # CPU
 if cpu:
  build_lammps(makefile = "Makefile.cpu", pkg = [])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_cpu" % lmpdir)
 # OPT
 if opt:
  build_lammps(makefile = "Makefile.opt", pkg = ["opt"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_opt" % lmpdir)
 # OMP
 if omp:
  build_lammps(makefile = "Makefile.omp", pkg = ["user-omp"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_omp" % lmpdir)
 # GPU, 3 precisions
 if gpu_double:
  build_gpu(makefile = "Makefile.gpu.double")
  build_lammps(makefile = "Makefile.gpu", pkg = ["gpu"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_gpu_double" % lmpdir)
 if gpu_mixed:
  build_gpu(makefile = "Makefile.gpu.mixed")
  build_lammps(makefile = "Makefile.gpu", pkg = ["gpu"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_gpu_mixed" % lmpdir)
 if gpu_single:
  build_gpu(makefile = "Makefile.gpu.single")
  build_lammps(makefile = "Makefile.gpu", pkg = ["gpu"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_gpu_single" % lmpdir)
 # CUDA, 3 precisions
 if cuda_double:
  build_cuda(precision = "double", arch = 35)
  build_lammps(makefile = "Makefile.cuda", pkg = ["kspace","user-cuda"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_cuda_double" % lmpdir)
 if cuda_mixed:
  build_cuda(precision = "mixed", arch = 35)
  build_lammps(makefile = "Makefile.cuda", pkg = ["kspace","user-cuda"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_cuda_mixed" % lmpdir)
 if cuda_single:
  build_cuda(precision = "single", arch = 35)
  build_lammps(makefile = "Makefile.cuda", pkg = ["kspace","user-cuda"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_cuda_single" % lmpdir)
 # INTEL, CPU and Phi
 if intel_cpu:
  build_lammps(makefile = "Makefile.intel.cpu", pkg = ["user-intel"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_intel_cpu" % lmpdir)
 if intel_phi:
  build_lammps(makefile = "Makefile.intel.phi", pkg = ["user-intel","user-omp"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_intel_phi" % lmpdir)
 # KOKKOS, all variants
 if kokkos_omp:
  build_lammps(makefile = "Makefile.kokkos.omp", pkg = ["kokkos"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_kokkos_omp" % lmpdir)
 if kokkos_phi:
  build_lammps(makefile = "Makefile.kokkos.phi", pkg = ["kokkos"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_kokkos_phi" % lmpdir)
 if kokkos_cuda:
  build_lammps(makefile = "Makefile.kokkos.cuda", pkg = ["kokkos"])
  print commands.getoutput("mv %s/src/lmp_foo ./lmp_kokkos_cuda" % lmpdir)
--- a/bench/KEPLER/in.lj
+++ b/bench/KEPLER/in.lj
@ -1,22 +0,0 @@
 # 3d Lennard-Jones melt
 units		lj
 atom_style	atomic
 lattice		fcc 0.8442
 region		box block 0 $x 0 $y 0 $z
 create_box	1 box
 create_atoms	1 box
 mass		1 1.0
 velocity	all create 1.44 87287 loop geom
 pair_style	lj/cut 2.5
 pair_coeff	1 1 1.0 1.0 2.5
 neighbor	0.3 bin
 neigh_modify	delay 0 every 20 check no
 fix		1 all nve
 run		$t
--- a/bench/KEPLER/run_cpu.sh
+++ b/bench/KEPLER/run_cpu.sh
@ -1,29 +0,0 @@
 #!/bin/bash
 #SBATCH -N 1 --time=12:00:00
 mpirun -np 1 lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cpu.128K.1
 mpirun -np 2 lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cpu.128K.2
 mpirun -np 4 lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cpu.128K.4
 mpirun -np 6 lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cpu.128K.6
 mpirun -np 8 lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cpu.128K.8
 mpirun -np 10 lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cpu.128K.10
 mpirun -np 12 lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cpu.128K.12
 mpirun -np 14 lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cpu.128K.14
 mpirun -np 16 lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cpu.128K.16
--- a/bench/KEPLER/run_cuda.sh
+++ b/bench/KEPLER/run_cuda.sh
@ -1,20 +0,0 @@
 #!/bin/bash
 #SBATCH -N 1 --time=12:00:00
 mpirun -N 1 lmp_cuda_double -c on -sf cuda -pk cuda 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cuda.double.128K.1
 mpirun -N 2 lmp_cuda_double -c on -sf cuda -pk cuda 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cuda.double.128K.2
 mpirun -N 1 lmp_cuda_mixed -c on -sf cuda -pk cuda 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cuda.mixed.128K.1
 mpirun -N 2 lmp_cuda_mixed -c on -sf cuda -pk cuda 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cuda.mixed.128K.2
 mpirun -N 1 lmp_cuda_single -c on -sf cuda -pk cuda 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cuda.single.128K.1
 mpirun -N 2 lmp_cuda_single -c on -sf cuda -pk cuda 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.cuda.single.128K.2
--- a/bench/KEPLER/run_gpu.sh
+++ b/bench/KEPLER/run_gpu.sh
@ -1,155 +0,0 @@
 #!/bin/bash
 #SBATCH -N 1 --time=12:00:00
 mpirun -np 1 lmp_gpu_single -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.1.1
 mpirun -np 2 lmp_gpu_single -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.2.1
 mpirun -np 2 lmp_gpu_single -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.2.2
 mpirun -np 4 lmp_gpu_single -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.4.1
 mpirun -np 4 lmp_gpu_single -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.4.2
 mpirun -np 6 lmp_gpu_single -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.6.1
 mpirun -np 6 lmp_gpu_single -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.6.2
 mpirun -np 8 lmp_gpu_single -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.8.1
 mpirun -np 8 lmp_gpu_single -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.8.2
 mpirun -np 10 lmp_gpu_single -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.10.1
 mpirun -np 10 lmp_gpu_single -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.10.2
 mpirun -np 12 lmp_gpu_single -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.12.1
 mpirun -np 12 lmp_gpu_single -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.12.2
 mpirun -np 14 lmp_gpu_single -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.14.1
 mpirun -np 14 lmp_gpu_single -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.14.2
 mpirun -np 16 lmp_gpu_single -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.16.1
 mpirun -np 16 lmp_gpu_single -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.single.128K.16.2
 mpirun -np 1 lmp_gpu_mixed -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.1.1
 mpirun -np 2 lmp_gpu_mixed -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.2.1
 mpirun -np 2 lmp_gpu_mixed -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.2.2
 mpirun -np 4 lmp_gpu_mixed -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.4.1
 mpirun -np 4 lmp_gpu_mixed -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.4.2
 mpirun -np 6 lmp_gpu_mixed -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.6.1
 mpirun -np 6 lmp_gpu_mixed -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.6.2
 mpirun -np 8 lmp_gpu_mixed -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.8.1
 mpirun -np 8 lmp_gpu_mixed -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.8.2
 mpirun -np 10 lmp_gpu_mixed -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.10.1
 mpirun -np 10 lmp_gpu_mixed -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.10.2
 mpirun -np 12 lmp_gpu_mixed -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.12.1
 mpirun -np 12 lmp_gpu_mixed -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.12.2
 mpirun -np 14 lmp_gpu_mixed -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.14.1
 mpirun -np 14 lmp_gpu_mixed -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.14.2
 mpirun -np 16 lmp_gpu_mixed -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.16.1
 mpirun -np 16 lmp_gpu_mixed -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.mixed.128K.16.2
 mpirun -np 1 lmp_gpu_double -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.1.1
 mpirun -np 2 lmp_gpu_double -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.2.1
 mpirun -np 2 lmp_gpu_double -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.2.2
 mpirun -np 4 lmp_gpu_double -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.4.1
 mpirun -np 4 lmp_gpu_double -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.4.2
 mpirun -np 6 lmp_gpu_double -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.6.1
 mpirun -np 6 lmp_gpu_double -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.6.2
 mpirun -np 8 lmp_gpu_double -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.8.1
 mpirun -np 8 lmp_gpu_double -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.8.2
 mpirun -np 10 lmp_gpu_double -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.10.1
 mpirun -np 10 lmp_gpu_double -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.10.2
 mpirun -np 12 lmp_gpu_double -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.12.1
 mpirun -np 12 lmp_gpu_double -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.12.2
 mpirun -np 14 lmp_gpu_double -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.14.1
 mpirun -np 14 lmp_gpu_double -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.14.2
 mpirun -np 16 lmp_gpu_double -sf gpu -pk gpu 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.16.1
 mpirun -np 16 lmp_gpu_double -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.gpu.double.128K.16.2
--- a/bench/KEPLER/run_intel_cpu.sh
+++ b/bench/KEPLER/run_intel_cpu.sh
@ -1,83 +0,0 @@
 #!/bin/bash
 #SBATCH -N 1 --time=12:00:00
 mpirun -np 1 lmp_intel_cpu -sf intel -pk intel 1 prec single -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.single.128K.1
 mpirun -np 2 lmp_intel_cpu -sf intel -pk intel 1 prec single -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.single.128K.2
 mpirun -np 4 lmp_intel_cpu -sf intel -pk intel 1 prec single -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.single.128K.4
 mpirun -np 6 lmp_intel_cpu -sf intel -pk intel 1 prec single -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.single.128K.6
 mpirun -np 8 lmp_intel_cpu -sf intel -pk intel 1 prec single -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.single.128K.8
 mpirun -np 10 lmp_intel_cpu -sf intel -pk intel 1 prec single -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.single.128K.10
 mpirun -np 12 lmp_intel_cpu -sf intel -pk intel 1 prec single -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.single.128K.12
 mpirun -np 14 lmp_intel_cpu -sf intel -pk intel 1 prec single -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.single.128K.14
 mpirun -np 16 lmp_intel_cpu -sf intel -pk intel 1 prec single -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.single.128K.16
 mpirun -np 1 lmp_intel_cpu -sf intel -pk intel 1 prec mixed -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.mixed.128K.1
 mpirun -np 2 lmp_intel_cpu -sf intel -pk intel 1 prec mixed -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.mixed.128K.2
 mpirun -np 4 lmp_intel_cpu -sf intel -pk intel 1 prec mixed -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.mixed.128K.4
 mpirun -np 6 lmp_intel_cpu -sf intel -pk intel 1 prec mixed -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.mixed.128K.6
 mpirun -np 8 lmp_intel_cpu -sf intel -pk intel 1 prec mixed -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.mixed.128K.8
 mpirun -np 10 lmp_intel_cpu -sf intel -pk intel 1 prec mixed -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.mixed.128K.10
 mpirun -np 12 lmp_intel_cpu -sf intel -pk intel 1 prec mixed -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.mixed.128K.12
 mpirun -np 14 lmp_intel_cpu -sf intel -pk intel 1 prec mixed -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.mixed.128K.14
 mpirun -np 16 lmp_intel_cpu -sf intel -pk intel 1 prec mixed -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.mixed.128K.16
 mpirun -np 1 lmp_intel_cpu -sf intel -pk intel 1 prec double -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.double.128K.1
 mpirun -np 2 lmp_intel_cpu -sf intel -pk intel 1 prec double -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.double.128K.2
 mpirun -np 4 lmp_intel_cpu -sf intel -pk intel 1 prec double -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.double.128K.4
 mpirun -np 6 lmp_intel_cpu -sf intel -pk intel 1 prec double -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.double.128K.6
 mpirun -np 8 lmp_intel_cpu -sf intel -pk intel 1 prec double -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.double.128K.8
 mpirun -np 10 lmp_intel_cpu -sf intel -pk intel 1 prec double -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.double.128K.10
 mpirun -np 12 lmp_intel_cpu -sf intel -pk intel 1 prec double -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.double.128K.12
 mpirun -np 14 lmp_intel_cpu -sf intel -pk intel 1 prec double -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.double.128K.14
 mpirun -np 16 lmp_intel_cpu -sf intel -pk intel 1 prec double -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.intel.cpu.double.128K.16
--- a/bench/KEPLER/run_kokkos_cuda.sh
+++ b/bench/KEPLER/run_kokkos_cuda.sh
@ -1,74 +0,0 @@
 #!/bin/bash
 #SBATCH -N 1 --time=12:00:00
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 1 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.1
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 2 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.2
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 3 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.3
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 4 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.4
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 5 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.5
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 6 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.6
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 7 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.7
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 8 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.8
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 9 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.9
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 10 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.10
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 11 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.11
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 12 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.12
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 13 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.13
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 14 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.14
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 15 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.15
 mpirun -np 1 lmp_kokkos_cuda -k on g 1 t 16 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.1.16
 mpirun -np 2 lmp_kokkos_cuda -k on g 2 t 1 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.2.1
 mpirun -np 2 lmp_kokkos_cuda -k on g 2 t 2 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.2.2
 mpirun -np 2 lmp_kokkos_cuda -k on g 2 t 3 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.2.3
 mpirun -np 2 lmp_kokkos_cuda -k on g 2 t 4 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.2.4
 mpirun -np 2 lmp_kokkos_cuda -k on g 2 t 5 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.2.5
 mpirun -np 2 lmp_kokkos_cuda -k on g 2 t 6 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.2.6
 mpirun -np 2 lmp_kokkos_cuda -k on g 2 t 7 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.2.7
 mpirun -np 2 lmp_kokkos_cuda -k on g 2 t 8 -sf kk -pk kokkos binsize 2.8 comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.cuda.128K.2.8
--- a/bench/KEPLER/run_kokkos_omp.sh
+++ b/bench/KEPLER/run_kokkos_omp.sh
@ -1,17 +0,0 @@
 #!/bin/bash
 #SBATCH -N 1 --time=12:00:00
 mpirun -np full -bind-to socket -map-by socket -x KMP_AFFINITY=scatter lmp_kokkos_omp -k on t 16 -sf kk -pk kokkos neigh full newton off comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.omp.128K.1.16
 mpirun -np full -bind-to socket -map-by socket -x KMP_AFFINITY=scatter lmp_kokkos_omp -k on t 8 -sf kk -pk kokkos neigh full newton off comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.omp.128K.2.8
 mpirun -np full -bind-to socket -map-by socket -x KMP_AFFINITY=scatter lmp_kokkos_omp -k on t 4 -sf kk -pk kokkos neigh full newton off comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.omp.128K.4.4
 mpirun -np full -bind-to socket -map-by socket -x KMP_AFFINITY=scatter lmp_kokkos_omp -k on t 2 -sf kk -pk kokkos neigh full newton off comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.omp.128K.8.2
 mpirun -np half -bind-to socket -map-by socket -x KMP_AFFINITY=scatter lmp_kokkos_omp -k on t 1 -sf kk -pk kokkos neigh half newton on comm device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.kokkos.omp.128K.16.1
--- a/bench/KEPLER/run_omp.sh
+++ b/bench/KEPLER/run_omp.sh
@ -1,17 +0,0 @@
 #!/bin/bash
 #SBATCH -N 1 --time=12:00:00
 mpirun -np 1 lmp_omp -sf omp -pk omp 16 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.omp.128K.1.16
 mpirun -np 2 lmp_omp -sf omp -pk omp 8 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.omp.128K.2.8
 mpirun -np 4 lmp_omp -sf omp -pk omp 4 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.omp.128K.4.4
 mpirun -np 8 lmp_omp -sf omp -pk omp 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.omp.128K.8.2
 mpirun -np 16 lmp_omp -sf omp -pk omp 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.omp.128K.16.1
--- a/bench/KEPLER/run_opt.sh
+++ b/bench/KEPLER/run_opt.sh
@ -1,29 +0,0 @@
 #!/bin/bash
 #SBATCH -N 1 --time=12:00:00
 mpirun -np 1 lmp_opt -sf opt -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.opt.128K.1
 mpirun -np 2 lmp_opt -sf opt -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.opt.128K.2
 mpirun -np 4 lmp_opt -sf opt -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.opt.128K.4
 mpirun -np 6 lmp_opt -sf opt -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.opt.128K.6
 mpirun -np 8 lmp_opt -sf opt -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.opt.128K.8
 mpirun -np 10 lmp_opt -sf opt -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.opt.128K.10
 mpirun -np 12 lmp_opt -sf opt -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.opt.128K.12
 mpirun -np 14 lmp_opt -sf opt -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.opt.128K.14
 mpirun -np 16 lmp_opt -sf opt -v x 32 -v y 32 -v z 32 -v t 100 < in.lj
 mv log.lammps log.10Sep14.lj.opt.128K.16
--- a/bench/KEPLER/run_strong.sh
+++ b/bench/KEPLER/run_strong.sh
@ -1,20 +0,0 @@
 #!/bin/bash
 #SBATCH -N 16 --time=12:00:00
 mpirun -npernode 16 lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.cpu.2048K.16.16
 mpirun -npernode 16 lmp_omp -sf omp -pk omp 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.omp.2048K.16.1.16
 mpirun -npernode 2 lmp_cuda -c on -sf cuda -pk cuda 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.cuda.2048K.2.16
 mpirun -npernode 14 lmp_gpu -sf gpu -pk gpu 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.gpu.2048K.2.14.16
 mpirun -npernode 2 lmp_kokkos_cuda -k on g 2 t 1 -sf kk -pk kokkos comm device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.2.1.16
 mpirun -np 256 -bind-to core -map-by core -x KMP_AFFINITY=scatter lmp_kokkos_omp -k on t 1 -sf kk -pk kokkos comm device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.kokkos.omp.2048K.16.1.16
--- a/bench/KEPLER/run_weak.sh
+++ b/bench/KEPLER/run_weak.sh
@ -1,20 +0,0 @@
 #!/bin/bash
 #SBATCH -N 16 --time=12:00:00
 mpirun -npernode 16 lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.cpu.512K.16.16
 mpirun -npernode 16 lmp_omp -sf omp -pk omp 1 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.omp.512K.16.1.16
 mpirun -npernode 2 lmp_cuda -c on -sf cuda -pk cuda 2 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.cuda.512K.2.16
 mpirun -npernode 14 lmp_gpu -sf gpu -pk gpu 2 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.gpu.512K.2.14.16
 mpirun -npernode 2 lmp_kokkos_cuda -k on g 2 t 1 -sf kk -pk kokkos comm device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.2.1.16
 mpirun -np 256 -bind-to core -map-by core -x KMP_AFFINITY=scatter lmp_kokkos_omp -k on t 1 -sf kk -pk kokkos comm device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj
 mv log.lammps log.28Jun14.lj.kokkos.omp.512K.16.1.16
--- a/bench/POTENTIALS/CH.rebo
+++ b/bench/POTENTIALS/CH.rebo
@ -0,0 +1 @@
 ../../potentials/CH.rebo
--- a/bench/POTENTIALS/data.eff
+++ b/bench/POTENTIALS/data.eff
--- a/bench/POTENTIALS/in.rebo
+++ b/bench/POTENTIALS/in.rebo
@ -11,7 +11,7 @@ neighbor	    0.5 bin
 neigh_modify	    delay 5 every 1
 pair_style	    rebo
-pair_coeff	    * * CH.airebo C H
+pair_coeff	    * * CH.rebo C H
 velocity	    all create 300.0 761341
--- a/bench/POTENTIALS/log.16Mar18.airebo.1
+++ b/bench/POTENTIALS/log.16Mar18.airebo.1
@ -1,87 +0,0 @@
 LAMMPS (16 Mar 2018)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # AIREBO polyethelene benchmark
 units		    metal
 atom_style	    atomic
 read_data	    data.airebo
  orthogonal box = (-2.1 -2.1 0) to (2.1 2.1 25.579)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  60 atoms
 replicate	    17 16 2
  orthogonal box = (-2.1 -2.1 0) to (69.3 65.1 51.158)
  1 by 1 by 1 MPI processor grid
  32640 atoms
  Time spent = 0.00154901 secs
 neighbor	    0.5 bin
 neigh_modify	    delay 5 every 1
 pair_style	    airebo 3.0 1 1
 pair_coeff	    * * CH.airebo C H
 velocity	    all create 300.0 761341
 fix		    1 all nve
 timestep	    0.0005
 thermo		    10
 run		    100
 Neighbor list info ...
  update every 1 steps, delay 5 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 10.7
  ghost atom cutoff = 10.7
  binsize = 5.35, bins = 14 13 10
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair airebo, perpetual
      attributes: full, newton on, ghost
      pair build: full/bin/ghost
      stencil: full/ghost/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 106.4 | 106.4 | 106.4 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300    -139299.7            0   -138034.03    7998.7287 
      10    161.33916   -138711.85            0   -138031.17    33242.273 
      20    208.59505   -138911.77            0   -138031.73   -3199.2371 
      30    139.73485   -138617.76            0   -138028.23    10890.529 
      40    142.15332   -138628.03            0    -138028.3    14614.022 
      50    114.21945   -138509.87            0   -138027.98    24700.885 
      60     164.9432   -138725.08            0   -138029.19    35135.722 
      70    162.14928   -138714.86            0   -138030.77    5666.4609 
      80    157.17575   -138694.81            0    -138031.7    19838.161 
      90    196.16354   -138859.65            0   -138032.05   -7942.9718 
     100    178.30378    -138783.8            0   -138031.55     31012.15 
 Loop time of 60.9424 on 1 procs for 100 steps with 32640 atoms
 Performance: 0.071 ns/day, 338.569 hours/ns, 1.641 timesteps/s
 99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 55.299     | 55.299     | 55.299     |   0.0 | 90.74
 Neigh   | 5.5777     | 5.5777     | 5.5777     |   0.0 |  9.15
 Comm    | 0.027658   | 0.027658   | 0.027658   |   0.0 |  0.05
 Output  | 0.0011463  | 0.0011463  | 0.0011463  |   0.0 |  0.00
 Modify  | 0.024684   | 0.024684   | 0.024684   |   0.0 |  0.04
 Other   |            | 0.012      |            |       |  0.02
 Nlocal:    32640 ave 32640 max 32640 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:    48190 ave 48190 max 48190 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    0 ave 0 max 0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:  2.22179e+07 ave 2.22179e+07 max 2.22179e+07 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 22217870
 Ave neighs/atom = 680.695
 Neighbor list builds = 8
 Dangerous builds = 0
 Total wall time: 0:01:02
--- a/bench/POTENTIALS/log.16Mar18.airebo.4
+++ b/bench/POTENTIALS/log.16Mar18.airebo.4
@ -1,87 +0,0 @@
 LAMMPS (16 Mar 2018)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # AIREBO polyethelene benchmark
 units		    metal
 atom_style	    atomic
 read_data	    data.airebo
  orthogonal box = (-2.1 -2.1 0) to (2.1 2.1 25.579)
  1 by 1 by 4 MPI processor grid
  reading atoms ...
  60 atoms
 replicate	    17 16 2
  orthogonal box = (-2.1 -2.1 0) to (69.3 65.1 51.158)
  2 by 2 by 1 MPI processor grid
  32640 atoms
  Time spent = 0.00070262 secs
 neighbor	    0.5 bin
 neigh_modify	    delay 5 every 1
 pair_style	    airebo 3.0 1 1
 pair_coeff	    * * CH.airebo C H
 velocity	    all create 300.0 761341
 fix		    1 all nve
 timestep	    0.0005
 thermo		    10
 run		    100
 Neighbor list info ...
  update every 1 steps, delay 5 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 10.7
  ghost atom cutoff = 10.7
  binsize = 5.35, bins = 14 13 10
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair airebo, perpetual
      attributes: full, newton on, ghost
      pair build: full/bin/ghost
      stencil: full/ghost/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 29.37 | 29.75 | 30.13 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300    -139299.7            0   -138034.03    7998.7287 
      10    161.33916   -138711.85            0   -138031.17    33242.273 
      20    208.59505   -138911.77            0   -138031.73   -3199.2371 
      30    139.73485   -138617.76            0   -138028.23    10890.529 
      40    142.15332   -138628.03            0    -138028.3    14614.022 
      50    114.21945   -138509.87            0   -138027.98    24700.885 
      60     164.9432   -138725.08            0   -138029.19    35135.722 
      70    162.14928   -138714.86            0   -138030.77    5666.4609 
      80    157.17575   -138694.81            0    -138031.7    19838.161 
      90    196.16354   -138859.65            0   -138032.05   -7942.9718 
     100    178.30378    -138783.8            0   -138031.55     31012.15 
 Loop time of 16.768 on 4 procs for 100 steps with 32640 atoms
 Performance: 0.258 ns/day, 93.156 hours/ns, 5.964 timesteps/s
 99.2% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 14.538     | 14.589     | 14.654     |   1.3 | 87.00
 Neigh   | 1.8853     | 1.8992     | 1.9159     |   0.8 | 11.33
 Comm    | 0.18073    | 0.25896    | 0.31361    |  10.6 |  1.54
 Output  | 0.00050807 | 0.0040419  | 0.0077746  |   5.6 |  0.02
 Modify  | 0.0094635  | 0.0096973  | 0.0099616  |   0.2 |  0.06
 Other   |            | 0.007481   |            |       |  0.04
 Nlocal:    8160 ave 8174 max 8146 min
 Histogram: 1 0 1 0 0 0 0 1 0 1
 Nghost:    22614.5 ave 22629 max 22601 min
 Histogram: 1 1 0 0 0 0 0 1 0 1
 Neighs:    0 ave 0 max 0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:  5.55447e+06 ave 5.56557e+06 max 5.54193e+06 min
 Histogram: 1 0 0 1 0 0 0 1 0 1
 Total # of neighbors = 22217870
 Ave neighs/atom = 680.695
 Neighbor list builds = 8
 Dangerous builds = 0
 Total wall time: 0:00:17
--- a/bench/POTENTIALS/log.16Mar18.reaxc.1
+++ b/bench/POTENTIALS/log.16Mar18.reaxc.1
@ -1,93 +0,0 @@
 LAMMPS (16 Mar 2018)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # ReaxFF benchmark: simulation of PETN crystal, replicated unit cell
 units		real
 atom_style	charge
 read_data	data.reax
  orthogonal box = (0 0 0) to (9.49107 9.49107 6.99123)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  58 atoms
 replicate	7 8 10
  orthogonal box = (0 0 0) to (66.4375 75.9285 69.9123)
  1 by 1 by 1 MPI processor grid
  32480 atoms
  Time spent = 0.00162625 secs
 velocity	all create 300.0 9999
 pair_style	reax/c NULL
 pair_coeff      * * ffield.reax C H O N
 timestep	0.1
 fix		1 all nve
 fix             2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
 thermo		10
 thermo_style	custom step temp ke pe pxx pyy pzz etotal
 run		100
 Neighbor list info ...
  update every 1 steps, delay 10 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 12
  ghost atom cutoff = 12
  binsize = 6, bins = 12 13 12
  2 neighbor lists, perpetual/occasional/extra = 2 0 0
  (1) pair reax/c, perpetual
      attributes: half, newton off, ghost
      pair build: half/bin/newtoff/ghost
      stencil: half/ghost/bin/3d/newtoff
      bin: standard
  (2) fix qeq/reax, perpetual, copy from (1)
      attributes: half, newton off, ghost
      pair build: copy
      stencil: none
      bin: none
 Per MPI rank memory allocation (min/avg/max) = 1727 | 1727 | 1727 Mbytes
 Step Temp KinEng PotEng Pxx Pyy Pzz TotEng 
       0          300    29044.119   -3232140.8    22804.879   -29365.593    6302.5637   -3203096.6 
      10    299.37479     28983.59   -3232075.2    21746.778    -23987.41    7610.2967   -3203091.6 
      20     295.5855    28616.735   -3231710.1    18178.568   -10871.882    10603.247   -3203093.3 
      30    289.48845    28026.457   -3231123.2    12146.362    4985.5572    13364.455   -3203096.8 
      40    282.66404     27365.76   -3230467.5    4284.2794    18132.771    14133.719   -3203101.7 
      50    274.97005    26620.876   -3229730.4     -3719.11    25519.692    12551.708   -3203109.5 
      60    266.11301    25763.393   -3228883.8   -9271.4049    27307.216    9753.2509   -3203120.4 
      70     259.3263    25106.346   -3228237.2   -11150.726    24238.382    6578.5306   -3203130.8 
      80    260.33956    25204.444   -3228344.2   -9576.6006     16737.65    3454.5747   -3203139.7 
      90    269.90199    26130.219   -3229275.5    -5906.376    5246.1572    467.31789   -3203145.3 
     100    280.76717    27182.117   -3230330.6   -1363.8281   -8133.2509   -1689.7711   -3203148.5 
 Loop time of 437.886 on 1 procs for 100 steps with 32480 atoms
 Performance: 0.002 ns/day, 12163.512 hours/ns, 0.228 timesteps/s
 99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 350.29     | 350.29     | 350.29     |   0.0 | 80.00
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
 Comm    | 0.026264   | 0.026264   | 0.026264   |   0.0 |  0.01
 Output  | 0.0024614  | 0.0024614  | 0.0024614  |   0.0 |  0.00
 Modify  | 87.55      | 87.55      | 87.55      |   0.0 | 19.99
 Other   |            | 0.01296    |            |       |  0.00
 Nlocal:    32480 ave 32480 max 32480 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:    45128 ave 45128 max 45128 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    1.27781e+07 ave 1.27781e+07 max 1.27781e+07 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 12778082
 Ave neighs/atom = 393.414
 Neighbor list builds = 0
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
 Total wall time: 0:07:24
--- a/bench/POTENTIALS/log.16Mar18.reaxc.4
+++ b/bench/POTENTIALS/log.16Mar18.reaxc.4
@ -1,93 +0,0 @@
 LAMMPS (16 Mar 2018)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # ReaxFF benchmark: simulation of PETN crystal, replicated unit cell
 units		real
 atom_style	charge
 read_data	data.reax
  orthogonal box = (0 0 0) to (9.49107 9.49107 6.99123)
  2 by 2 by 1 MPI processor grid
  reading atoms ...
  58 atoms
 replicate	7 8 10
  orthogonal box = (0 0 0) to (66.4375 75.9285 69.9123)
  1 by 2 by 2 MPI processor grid
  32480 atoms
  Time spent = 0.000803709 secs
 velocity	all create 300.0 9999
 pair_style	reax/c NULL
 pair_coeff      * * ffield.reax C H O N
 timestep	0.1
 fix		1 all nve
 fix             2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
 thermo		10
 thermo_style	custom step temp ke pe pxx pyy pzz etotal
 run		100
 Neighbor list info ...
  update every 1 steps, delay 10 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 12
  ghost atom cutoff = 12
  binsize = 6, bins = 12 13 12
  2 neighbor lists, perpetual/occasional/extra = 2 0 0
  (1) pair reax/c, perpetual
      attributes: half, newton off, ghost
      pair build: half/bin/newtoff/ghost
      stencil: half/ghost/bin/3d/newtoff
      bin: standard
  (2) fix qeq/reax, perpetual, copy from (1)
      attributes: half, newton off, ghost
      pair build: copy
      stencil: none
      bin: none
 Per MPI rank memory allocation (min/avg/max) = 647 | 647 | 647 Mbytes
 Step Temp KinEng PotEng Pxx Pyy Pzz TotEng 
       0          300    29044.119   -3232140.8    22804.879   -29365.593    6302.5638   -3203096.6 
      10    299.37479     28983.59   -3232075.2    21746.773   -23987.409    7610.2911   -3203091.6 
      20    295.58552    28616.737   -3231710.1    18178.576   -10871.874    10603.421   -3203093.3 
      30    289.48843    28026.455   -3231123.3    12146.158    4985.3436    13364.461   -3203096.8 
      40    282.66409    27365.764   -3230467.5    4284.5077    18133.151    14133.949   -3203101.7 
      50    274.97008    26620.879   -3229730.3    -3718.536    25520.328    12552.195   -3203109.5 
      60    266.11301    25763.393   -3228883.7   -9271.0381    27307.591    9753.5339   -3203120.4 
      70    259.32631    25106.348   -3228237.1   -11150.314    24238.962    6578.8636   -3203130.8 
      80    260.33966    25204.453   -3228344.1   -9575.5709    16738.467    3455.2525   -3203139.7 
      90    269.90213    26130.231   -3229275.5   -5906.0456    5246.2122    467.43473   -3203145.2 
     100    280.76727    27182.127   -3230330.6   -1363.1733   -8132.8726   -1689.3275   -3203148.4 
 Loop time of 128.275 on 4 procs for 100 steps with 32480 atoms
 Performance: 0.007 ns/day, 3563.196 hours/ns, 0.780 timesteps/s
 99.0% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 92.314     | 97.932     | 103.53     |  44.1 | 76.34
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
 Comm    | 0.039458   | 5.6409     | 11.263     | 184.0 |  4.40
 Output  | 0.00086117 | 0.0010868  | 0.0016167  |   0.9 |  0.00
 Modify  | 24.687     | 24.688     | 24.69      |   0.0 | 19.25
 Other   |            | 0.01323    |            |       |  0.01
 Nlocal:    8120 ave 8120 max 8120 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 Nghost:    21992 ave 21992 max 21992 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 Neighs:    3.48274e+06 ave 3.48274e+06 max 3.48274e+06 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 13930976
 Ave neighs/atom = 428.909
 Neighbor list builds = 0
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
 Total wall time: 0:02:10
--- a/bench/POTENTIALS/log.16Mar18.rebo.1
+++ b/bench/POTENTIALS/log.16Mar18.rebo.1
@ -1,87 +0,0 @@
 LAMMPS (16 Mar 2018)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # REBO polyethelene benchmark
 units		    metal
 atom_style	    atomic
 read_data	    data.rebo
  orthogonal box = (-2.1 -2.1 0) to (2.1 2.1 25.579)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  60 atoms
 replicate	    17 16 2
  orthogonal box = (-2.1 -2.1 0) to (69.3 65.1 51.158)
  1 by 1 by 1 MPI processor grid
  32640 atoms
  Time spent = 0.00151849 secs
 neighbor	    0.5 bin
 neigh_modify	    delay 5 every 1
 pair_style	    rebo
 pair_coeff	    * * CH.airebo C H
 velocity	    all create 300.0 761341
 fix		    1 all nve
 timestep	    0.0005
 thermo		    10
 run		    100
 Neighbor list info ...
  update every 1 steps, delay 5 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 6.5
  ghost atom cutoff = 6.5
  binsize = 3.25, bins = 22 21 16
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair rebo, perpetual
      attributes: full, newton on, ghost
      pair build: full/bin/ghost
      stencil: full/ghost/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 34.21 | 34.21 | 34.21 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300   -138442.48            0    -137176.8    2464.5258 
      10    179.38052   -137930.92            0   -137174.12     15656.95 
      20    206.87725   -138046.63            0   -137173.83   -24042.005 
      30    150.80048   -137807.07            0   -137170.86   -16524.069 
      40    173.25027      -137902            0   -137171.07   -5716.7297 
      50    151.80552   -137812.01            0   -137171.55    3481.1096 
      60    199.08762   -138013.46            0   -137173.53    17882.906 
      70     217.8592   -138093.51            0   -137174.38   -12269.648 
      80    202.37612   -138029.05            0   -137175.24   -7622.1573 
      90      194.905   -137996.68            0    -137174.4   -32267.297 
     100    185.17966   -137954.16            0    -137172.9   -6902.1493 
 Loop time of 5.17257 on 1 procs for 100 steps with 32640 atoms
 Performance: 0.835 ns/day, 28.737 hours/ns, 19.333 timesteps/s
 99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 3.3427     | 3.3427     | 3.3427     |   0.0 | 64.62
 Neigh   | 1.7764     | 1.7764     | 1.7764     |   0.0 | 34.34
 Comm    | 0.017914   | 0.017914   | 0.017914   |   0.0 |  0.35
 Output  | 0.0011199  | 0.0011199  | 0.0011199  |   0.0 |  0.02
 Modify  | 0.024357   | 0.024357   | 0.024357   |   0.0 |  0.47
 Other   |            | 0.01004    |            |       |  0.19
 Nlocal:    32640 ave 32640 max 32640 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:    26460 ave 26460 max 26460 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    0 ave 0 max 0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:  4.90213e+06 ave 4.90213e+06 max 4.90213e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 4902132
 Ave neighs/atom = 150.188
 Neighbor list builds = 9
 Dangerous builds = 0
 Total wall time: 0:00:05
--- a/bench/POTENTIALS/log.16Mar18.rebo.4
+++ b/bench/POTENTIALS/log.16Mar18.rebo.4
@ -1,87 +0,0 @@
 LAMMPS (16 Mar 2018)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # REBO polyethelene benchmark
 units		    metal
 atom_style	    atomic
 read_data	    data.rebo
  orthogonal box = (-2.1 -2.1 0) to (2.1 2.1 25.579)
  1 by 1 by 4 MPI processor grid
  reading atoms ...
  60 atoms
 replicate	    17 16 2
  orthogonal box = (-2.1 -2.1 0) to (69.3 65.1 51.158)
  2 by 2 by 1 MPI processor grid
  32640 atoms
  Time spent = 0.000838995 secs
 neighbor	    0.5 bin
 neigh_modify	    delay 5 every 1
 pair_style	    rebo
 pair_coeff	    * * CH.airebo C H
 velocity	    all create 300.0 761341
 fix		    1 all nve
 timestep	    0.0005
 thermo		    10
 run		    100
 Neighbor list info ...
  update every 1 steps, delay 5 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 6.5
  ghost atom cutoff = 6.5
  binsize = 3.25, bins = 22 21 16
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair rebo, perpetual
      attributes: full, newton on, ghost
      pair build: full/bin/ghost
      stencil: full/ghost/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 11.75 | 11.94 | 12.13 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300   -138442.48            0    -137176.8    2464.5258 
      10    179.38052   -137930.92            0   -137174.12     15656.95 
      20    206.87725   -138046.63            0   -137173.83   -24042.005 
      30    150.80048   -137807.07            0   -137170.86   -16524.069 
      40    173.25027      -137902            0   -137171.07   -5716.7297 
      50    151.80552   -137812.01            0   -137171.55    3481.1096 
      60    199.08762   -138013.46            0   -137173.53    17882.906 
      70     217.8592   -138093.51            0   -137174.38   -12269.648 
      80    202.37612   -138029.05            0   -137175.24   -7622.1573 
      90      194.905   -137996.68            0    -137174.4   -32267.297 
     100    185.17966   -137954.16            0    -137172.9   -6902.1493 
 Loop time of 1.52214 on 4 procs for 100 steps with 32640 atoms
 Performance: 2.838 ns/day, 8.456 hours/ns, 65.697 timesteps/s
 98.9% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 0.88531    | 0.90632    | 0.92546    |   1.6 | 59.54
 Neigh   | 0.53954    | 0.54258    | 0.54621    |   0.3 | 35.65
 Comm    | 0.035654   | 0.058364   | 0.079543   |   7.0 |  3.83
 Output  | 0.00048494 | 0.00065351 | 0.0011017  |   0.0 |  0.04
 Modify  | 0.0090034  | 0.0090633  | 0.0091114  |   0.0 |  0.60
 Other   |            | 0.005168   |            |       |  0.34
 Nlocal:    8160 ave 8163 max 8157 min
 Histogram: 1 1 0 0 0 0 0 0 1 1
 Nghost:    11605.8 ave 11615 max 11593 min
 Histogram: 1 0 0 0 0 0 2 0 0 1
 Neighs:    0 ave 0 max 0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:  1.22553e+06 ave 1.22734e+06 max 1.22455e+06 min
 Histogram: 2 0 0 1 0 0 0 0 0 1
 Total # of neighbors = 4902132
 Ave neighs/atom = 150.188
 Neighbor list builds = 9
 Dangerous builds = 0
 Total wall time: 0:00:01
--- a/bench/POTENTIALS/log.16Mar18.spce.1
+++ b/bench/POTENTIALS/log.16Mar18.spce.1
@ -1,132 +0,0 @@
 LAMMPS (16 Mar 2018)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # SPC/E water box benchmark
 units		real
 atom_style	full
 read_data	data.spce
  orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  4500 atoms
  scanning bonds ...
  2 = max bonds/atom
  scanning angles ...
  1 = max angles/atom
  reading bonds ...
  3000 bonds
  reading angles ...
  1500 angles
  2 = max # of 1-2 neighbors
  1 = max # of 1-3 neighbors
  1 = max # of 1-4 neighbors
  2 = max # of special neighbors
 replicate	2 4 1
  orthogonal box = (0.02645 0.02645 0.02641) to (71.0392 142.052 35.4736)
  1 by 1 by 1 MPI processor grid
  36000 atoms
  24000 bonds
  12000 angles
  2 = max # of 1-2 neighbors
  1 = max # of 1-3 neighbors
  1 = max # of 1-4 neighbors
  2 = max # of special neighbors
  Time spent = 0.0105426 secs
 pair_style	lj/cut/coul/long 9.8 9.8
 kspace_style	pppm 1.0e-4
 pair_coeff	1 1 0.15535 3.166
 pair_coeff	* 2 0.0000 0.0000
 bond_style	harmonic
 angle_style	harmonic
 dihedral_style	none
 improper_style	none
 bond_coeff	1 1000.00 1.000
 angle_coeff	1 100.0 109.47
 special_bonds   lj/coul 0.0 0.0 0.5
  2 = max # of 1-2 neighbors
  1 = max # of 1-3 neighbors
  1 = max # of 1-4 neighbors
  2 = max # of special neighbors
 neighbor        2.0 bin
 neigh_modify	every 1 delay 10 check yes
 fix		1 all shake 0.0001 20 0 b 1 a 1
  0 = # of size 2 clusters
  0 = # of size 3 clusters
  0 = # of size 4 clusters
  12000 = # of frozen angles
 fix		2 all nvt temp 300.0 300.0 100.0
 velocity	all create 300 432567 dist uniform
 timestep	2.0
 thermo_style    one
 thermo		50
 run		100
 PPPM initialization ...
  using 12-bit tables for long-range coulomb (../kspace.cpp:321)
  G vector (1/distance) = 0.268801
  grid = 36 64 24
  stencil order = 5
  estimated absolute RMS force accuracy = 0.0331015
  estimated relative force accuracy = 9.96841e-05
  using double precision FFTs
  3d grid and FFT values/proc = 91977 55296
 Neighbor list info ...
  update every 1 steps, delay 10 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 11.8
  ghost atom cutoff = 11.8
  binsize = 5.9, bins = 13 25 7
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair lj/cut/coul/long, perpetual
      attributes: half, newton on
      pair build: half/bin/newton
      stencil: half/bin/3d/newton
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 105.1 | 105.1 | 105.1 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300   -133281.51            0   -111820.57    516.17807 
      50    264.98553   -136986.74            0   -118030.61   -440.29256 
     100    274.45966   -136364.57            0   -116730.69   -128.61949 
 Loop time of 20.172 on 1 procs for 100 steps with 36000 atoms
 Performance: 0.857 ns/day, 28.017 hours/ns, 4.957 timesteps/s
 99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 15.818     | 15.818     | 15.818     |   0.0 | 78.41
 Bond    | 7.8201e-05 | 7.8201e-05 | 7.8201e-05 |   0.0 |  0.00
 Kspace  | 1.966      | 1.966      | 1.966      |   0.0 |  9.75
 Neigh   | 2.0639     | 2.0639     | 2.0639     |   0.0 | 10.23
 Comm    | 0.043918   | 0.043918   | 0.043918   |   0.0 |  0.22
 Output  | 0.00025153 | 0.00025153 | 0.00025153 |   0.0 |  0.00
 Modify  | 0.27056    | 0.27056    | 0.27056    |   0.0 |  1.34
 Other   |            | 0.009522   |            |       |  0.05
 Nlocal:    36000 ave 36000 max 36000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:    56963 ave 56963 max 56963 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    1.24625e+07 ave 1.24625e+07 max 1.24625e+07 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 12462451
 Ave neighs/atom = 346.179
 Ave special neighs/atom = 2
 Neighbor list builds = 9
 Dangerous builds = 6
 Total wall time: 0:00:20
--- a/bench/POTENTIALS/log.16Mar18.spce.4
+++ b/bench/POTENTIALS/log.16Mar18.spce.4
@ -1,132 +0,0 @@
 LAMMPS (16 Mar 2018)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # SPC/E water box benchmark
 units		real
 atom_style	full
 read_data	data.spce
  orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736)
  2 by 2 by 1 MPI processor grid
  reading atoms ...
  4500 atoms
  scanning bonds ...
  2 = max bonds/atom
  scanning angles ...
  1 = max angles/atom
  reading bonds ...
  3000 bonds
  reading angles ...
  1500 angles
  2 = max # of 1-2 neighbors
  1 = max # of 1-3 neighbors
  1 = max # of 1-4 neighbors
  2 = max # of special neighbors
 replicate	2 4 1
  orthogonal box = (0.02645 0.02645 0.02641) to (71.0392 142.052 35.4736)
  1 by 4 by 1 MPI processor grid
  36000 atoms
  24000 bonds
  12000 angles
  2 = max # of 1-2 neighbors
  1 = max # of 1-3 neighbors
  1 = max # of 1-4 neighbors
  2 = max # of special neighbors
  Time spent = 0.00535488 secs
 pair_style	lj/cut/coul/long 9.8 9.8
 kspace_style	pppm 1.0e-4
 pair_coeff	1 1 0.15535 3.166
 pair_coeff	* 2 0.0000 0.0000
 bond_style	harmonic
 angle_style	harmonic
 dihedral_style	none
 improper_style	none
 bond_coeff	1 1000.00 1.000
 angle_coeff	1 100.0 109.47
 special_bonds   lj/coul 0.0 0.0 0.5
  2 = max # of 1-2 neighbors
  1 = max # of 1-3 neighbors
  1 = max # of 1-4 neighbors
  2 = max # of special neighbors
 neighbor        2.0 bin
 neigh_modify	every 1 delay 10 check yes
 fix		1 all shake 0.0001 20 0 b 1 a 1
  0 = # of size 2 clusters
  0 = # of size 3 clusters
  0 = # of size 4 clusters
  12000 = # of frozen angles
 fix		2 all nvt temp 300.0 300.0 100.0
 velocity	all create 300 432567 dist uniform
 timestep	2.0
 thermo_style    one
 thermo		50
 run		100
 PPPM initialization ...
  using 12-bit tables for long-range coulomb (../kspace.cpp:321)
  G vector (1/distance) = 0.268801
  grid = 36 64 24
  stencil order = 5
  estimated absolute RMS force accuracy = 0.0331015
  estimated relative force accuracy = 9.96841e-05
  using double precision FFTs
  3d grid and FFT values/proc = 27993 13824
 Neighbor list info ...
  update every 1 steps, delay 10 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 11.8
  ghost atom cutoff = 11.8
  binsize = 5.9, bins = 13 25 7
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair lj/cut/coul/long, perpetual
      attributes: half, newton on
      pair build: half/bin/newton
      stencil: half/bin/3d/newton
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 37.74 | 37.74 | 37.74 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300   -133281.51            0   -111820.57    516.17807 
      50    264.98553   -136986.74            0   -118030.61   -440.29256 
     100    274.45966   -136364.57            0   -116730.69   -128.61949 
 Loop time of 5.43807 on 4 procs for 100 steps with 36000 atoms
 Performance: 3.178 ns/day, 7.553 hours/ns, 18.389 timesteps/s
 99.1% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 4.0016     | 4.0968     | 4.1706     |   3.3 | 75.34
 Bond    | 7.1049e-05 | 8.5771e-05 | 0.00010586 |   0.0 |  0.00
 Kspace  | 0.56386    | 0.63614    | 0.73036    |   8.3 | 11.70
 Neigh   | 0.52575    | 0.52587    | 0.52594    |   0.0 |  9.67
 Comm    | 0.045847   | 0.047308   | 0.048331   |   0.4 |  0.87
 Output  | 9.1314e-05 | 0.00012183 | 0.00021172 |   0.0 |  0.00
 Modify  | 0.12561    | 0.1258     | 0.12605    |   0.1 |  2.31
 Other   |            | 0.005944   |            |       |  0.11
 Nlocal:    9000 ave 9002 max 8998 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
 Nghost:    24134.2 ave 24184 max 24062 min
 Histogram: 1 0 0 0 0 0 1 1 0 1
 Neighs:    3.11561e+06 ave 3.11676e+06 max 3.11446e+06 min
 Histogram: 1 0 0 1 0 0 1 0 0 1
 Total # of neighbors = 12462451
 Ave neighs/atom = 346.179
 Ave special neighs/atom = 2
 Neighbor list builds = 9
 Dangerous builds = 6
 Total wall time: 0:00:05
--- a/bench/POTENTIALS/log.16Mar18.adp.1
+++ b/bench/POTENTIALS/log.16Mar18.adp.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk Ni in ADP
@ -7,17 +6,18 @@ units		metal
 atom_style	atomic
 lattice		fcc 3.52
-Lattice spacing in x,y,z = 3.52 3.52 3.52
+Lattice spacing in x,y,z = 3.5200000 3.5200000 3.5200000
 region		box block 0 20 0 20 0 20
 create_box	1 box
-Created orthogonal box = (0 0 0) to (70.4 70.4 70.4)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (70.400000 70.400000 70.400000)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.00184107 secs
+  create_atoms CPU = 0.002 seconds
 pair_style	adp
 pair_coeff	* * Ni.adp Ni
 Reading adp potential file Ni.adp with DATE: 2011-06-20
 velocity	all create 1600.0 376847 loop geom
@ -41,35 +41,35 @@ Neighbor list info ...
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 27.56 | 27.56 | 27.56 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 27.57 | 27.57 | 27.57 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1600      -142400            0   -135782.09    20259.105 
     100    793.05485   -139023.13            0    -135742.9    32175.694 
-Loop time of 11.9854 on 1 procs for 100 steps with 32000 atoms
+Loop time of 11.0841 on 1 procs for 100 steps with 32000 atoms
-Performance: 3.604 ns/day, 6.659 hours/ns, 8.344 timesteps/s
+Performance: 3.897 ns/day, 6.158 hours/ns, 9.022 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 11.447     | 11.447     | 11.447     |   0.0 | 95.51
+Pair    | 10.597     | 10.597     | 10.597     |   0.0 | 95.60
-Neigh   | 0.48465    | 0.48465    | 0.48465    |   0.0 |  4.04
+Neigh   | 0.43765    | 0.43765    | 0.43765    |   0.0 |  3.95
-Comm    | 0.019317   | 0.019317   | 0.019317   |   0.0 |  0.16
+Comm    | 0.018561   | 0.018561   | 0.018561   |   0.0 |  0.17
-Output  | 0.00011063 | 0.00011063 | 0.00011063 |   0.0 |  0.00
+Output  | 0.0001123  | 0.0001123  | 0.0001123  |   0.0 |  0.00
-Modify  | 0.025319   | 0.025319   | 0.025319   |   0.0 |  0.21
+Modify  | 0.023261   | 0.023261   | 0.023261   |   0.0 |  0.21
-Other   |            | 0.009125   |            |       |  0.08
+Other   |            | 0.00792    |            |       |  0.07
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    19911 ave 19911 max 19911 min
+Nghost:        19911.0 ave       19911 max       19911 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    1.33704e+06 ave 1.33704e+06 max 1.33704e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 1337035
-Ave neighs/atom = 41.7823
+Ave neighs/atom = 41.782344
 Neighbor list builds = 13
 Dangerous builds = 0
-Total wall time: 0:00:12
+Total wall time: 0:00:11
--- a/bench/POTENTIALS/log.16Mar18.adp.4
+++ b/bench/POTENTIALS/log.16Mar18.adp.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk Ni in ADP
@ -7,17 +6,18 @@ units		metal
 atom_style	atomic
 lattice		fcc 3.52
-Lattice spacing in x,y,z = 3.52 3.52 3.52
+Lattice spacing in x,y,z = 3.5200000 3.5200000 3.5200000
 region		box block 0 20 0 20 0 20
 create_box	1 box
-Created orthogonal box = (0 0 0) to (70.4 70.4 70.4)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (70.400000 70.400000 70.400000)
  1 by 2 by 2 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.000586033 secs
+  create_atoms CPU = 0.001 seconds
 pair_style	adp
 pair_coeff	* * Ni.adp Ni
 Reading adp potential file Ni.adp with DATE: 2011-06-20
 velocity	all create 1600.0 376847 loop geom
@ -45,30 +45,30 @@ Per MPI rank memory allocation (min/avg/max) = 12.45 | 12.45 | 12.45 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1600      -142400            0   -135782.09    20259.105 
     100    793.05485   -139023.13            0    -135742.9    32175.694 
-Loop time of 3.49752 on 4 procs for 100 steps with 32000 atoms
+Loop time of 3.54402 on 4 procs for 100 steps with 32000 atoms
-Performance: 12.352 ns/day, 1.943 hours/ns, 28.592 timesteps/s
+Performance: 12.190 ns/day, 1.969 hours/ns, 28.217 timesteps/s
-99.1% CPU use with 4 MPI tasks x 1 OpenMP threads
+97.1% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 3.3203     | 3.3261     | 3.3317     |   0.3 | 95.10
+Pair    | 3.2768     | 3.3041     | 3.339      |   1.2 | 93.23
-Neigh   | 0.12544    | 0.12594    | 0.12634    |   0.1 |  3.60
+Neigh   | 0.11542    | 0.11601    | 0.11661    |   0.1 |  3.27
-Comm    | 0.024059   | 0.03001    | 0.035574   |   2.9 |  0.86
+Comm    | 0.068218   | 0.10201    | 0.13103    |   7.0 |  2.88
-Output  | 4.8161e-05 | 6.8128e-05 | 0.00011802 |   0.0 |  0.00
+Output  | 4.4823e-05 | 8.0943e-05 | 0.000175   |   0.0 |  0.00
-Modify  | 0.010666   | 0.010841   | 0.011109   |   0.2 |  0.31
+Modify  | 0.010904   | 0.011064   | 0.011172   |   0.1 |  0.31
-Other   |            | 0.00457    |            |       |  0.13
+Other   |            | 0.01075    |            |       |  0.30
-Nlocal:    8000 ave 8044 max 7960 min
+Nlocal:        8000.00 ave        8044 max        7960 min
 Histogram: 1 0 0 1 0 1 0 0 0 1
-Nghost:    9131 ave 9171 max 9087 min
+Nghost:        9131.00 ave        9171 max        9087 min
 Histogram: 1 0 0 0 1 0 1 0 0 1
-Neighs:    334259 ave 336108 max 332347 min
+Neighs:       334259.0 ave      336108 max      332347 min
 Histogram: 1 0 0 1 0 0 1 0 0 1
 Total # of neighbors = 1337035
-Ave neighs/atom = 41.7823
+Ave neighs/atom = 41.782344
 Neighbor list builds = 13
 Dangerous builds = 0
--- a/bench/POTENTIALS/log.9Oct20.airebo.1
+++ b/bench/POTENTIALS/log.9Oct20.airebo.1
@ -0,0 +1,90 @@
 LAMMPS (9 Oct 2020)
  using 1 OpenMP thread(s) per MPI task
 # AIREBO polyethelene benchmark
 units		    metal
 atom_style	    atomic
 read_data	    data.airebo
 Reading data file ...
  orthogonal box = (-2.1000000 -2.1000000 0.0000000) to (2.1000000 2.1000000 25.579000)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  60 atoms
  read_data CPU = 0.000 seconds
 replicate	    17 16 2
 Replicating atoms ...
  orthogonal box = (-2.1000000 -2.1000000 0.0000000) to (69.300000 65.100000 51.158000)
  1 by 1 by 1 MPI processor grid
  32640 atoms
  replicate CPU = 0.002 seconds
 neighbor	    0.5 bin
 neigh_modify	    delay 5 every 1
 pair_style	    airebo 3.0 1 1
 pair_coeff	    * * CH.airebo C H
 Reading airebo potential file CH.airebo with DATE: 2011-10-25
 velocity	    all create 300.0 761341
 fix		    1 all nve
 timestep	    0.0005
 thermo		    10
 run		    100
 Neighbor list info ...
  update every 1 steps, delay 5 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 10.7
  ghost atom cutoff = 10.7
  binsize = 5.35, bins = 14 13 10
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair airebo, perpetual
      attributes: full, newton on, ghost
      pair build: full/bin/ghost
      stencil: full/ghost/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 106.4 | 106.4 | 106.4 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300   -139300.72            0   -138035.04    7988.6646 
      10    161.34683    -138712.9            0   -138032.19    33228.921 
      20    208.59504   -138912.79            0   -138032.74   -3211.8806 
      30     139.7513   -138618.85            0   -138029.25    10878.143 
      40    142.14562   -138629.02            0   -138029.32    14601.302 
      50    114.23401   -138510.95            0      -138029    24691.124 
      60    164.92002      -138726            0   -138030.21    35125.541 
      70    162.15256    -138715.9            0   -138031.79    5658.7946 
      80    157.16184   -138695.77            0   -138032.72    19824.698 
      90    196.15907   -138860.65            0   -138033.07   -7950.8463 
     100    178.31875   -138784.89            0   -138032.57    30997.671 
 Loop time of 58.0757 on 1 procs for 100 steps with 32640 atoms
 Performance: 0.074 ns/day, 322.643 hours/ns, 1.722 timesteps/s
 99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 53.513     | 53.513     | 53.513     |   0.0 | 92.14
 Neigh   | 4.5013     | 4.5013     | 4.5013     |   0.0 |  7.75
 Comm    | 0.026609   | 0.026609   | 0.026609   |   0.0 |  0.05
 Output  | 0.0010192  | 0.0010192  | 0.0010192  |   0.0 |  0.00
 Modify  | 0.02275    | 0.02275    | 0.02275    |   0.0 |  0.04
 Other   |            | 0.01074    |            |       |  0.02
 Nlocal:        32640.0 ave       32640 max       32640 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:        48190.0 ave       48190 max       48190 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:        0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:  2.22178e+07 ave 2.22178e+07 max 2.22178e+07 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 22217840
 Ave neighs/atom = 680.69363
 Neighbor list builds = 8
 Dangerous builds = 0
 Total wall time: 0:00:59
--- a/bench/POTENTIALS/log.9Oct20.airebo.4
+++ b/bench/POTENTIALS/log.9Oct20.airebo.4
@ -0,0 +1,90 @@
 LAMMPS (9 Oct 2020)
  using 1 OpenMP thread(s) per MPI task
 # AIREBO polyethelene benchmark
 units		    metal
 atom_style	    atomic
 read_data	    data.airebo
 Reading data file ...
  orthogonal box = (-2.1000000 -2.1000000 0.0000000) to (2.1000000 2.1000000 25.579000)
  1 by 1 by 4 MPI processor grid
  reading atoms ...
  60 atoms
  read_data CPU = 0.000 seconds
 replicate	    17 16 2
 Replicating atoms ...
  orthogonal box = (-2.1000000 -2.1000000 0.0000000) to (69.300000 65.100000 51.158000)
  2 by 2 by 1 MPI processor grid
  32640 atoms
  replicate CPU = 0.001 seconds
 neighbor	    0.5 bin
 neigh_modify	    delay 5 every 1
 pair_style	    airebo 3.0 1 1
 pair_coeff	    * * CH.airebo C H
 Reading airebo potential file CH.airebo with DATE: 2011-10-25
 velocity	    all create 300.0 761341
 fix		    1 all nve
 timestep	    0.0005
 thermo		    10
 run		    100
 Neighbor list info ...
  update every 1 steps, delay 5 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 10.7
  ghost atom cutoff = 10.7
  binsize = 5.35, bins = 14 13 10
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair airebo, perpetual
      attributes: full, newton on, ghost
      pair build: full/bin/ghost
      stencil: full/ghost/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 29.43 | 29.81 | 30.19 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300   -139300.72            0   -138035.04    7988.6646 
      10    161.34683    -138712.9            0   -138032.19    33228.921 
      20    208.59504   -138912.79            0   -138032.74   -3211.8806 
      30     139.7513   -138618.85            0   -138029.25    10878.143 
      40    142.14562   -138629.02            0   -138029.32    14601.302 
      50    114.23401   -138510.95            0      -138029    24691.124 
      60    164.92002      -138726            0   -138030.21    35125.541 
      70    162.15256    -138715.9            0   -138031.79    5658.7946 
      80    157.16184   -138695.77            0   -138032.72    19824.698 
      90    196.15907   -138860.65            0   -138033.07   -7950.8463 
     100    178.31875   -138784.89            0   -138032.57    30997.671 
 Loop time of 17.206 on 4 procs for 100 steps with 32640 atoms
 Performance: 0.251 ns/day, 95.589 hours/ns, 5.812 timesteps/s
 97.1% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 14.655     | 14.727     | 14.848     |   1.9 | 85.59
 Neigh   | 1.5571     | 1.6135     | 1.6871     |   3.7 |  9.38
 Comm    | 0.7741     | 0.83422    | 0.90385    |   5.8 |  4.85
 Output  | 0.00047541 | 0.0027475  | 0.009517   |   7.5 |  0.02
 Modify  | 0.0091925  | 0.009367   | 0.0096078  |   0.2 |  0.05
 Other   |            | 0.01908    |            |       |  0.11
 Nlocal:        8160.00 ave        8174 max        8146 min
 Histogram: 1 0 1 0 0 0 0 1 0 1
 Nghost:        22614.5 ave       22629 max       22601 min
 Histogram: 1 1 0 0 0 0 0 1 0 1
 Neighs:        0.00000 ave           0 max           0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:  5.55446e+06 ave 5.56556e+06 max 5.54192e+06 min
 Histogram: 1 0 0 1 0 0 0 1 0 1
 Total # of neighbors = 22217840
 Ave neighs/atom = 680.69363
 Neighbor list builds = 8
 Dangerous builds = 0
 Total wall time: 0:00:17
--- a/bench/POTENTIALS/log.16Mar18.bop.1
+++ b/bench/POTENTIALS/log.16Mar18.bop.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk CdTe via BOP
@ -7,19 +6,18 @@ units		metal
 atom_style	atomic
 lattice		custom 6.82884 		basis 0.0 0.0 0.0 basis 0.25 0.25 0.25 		basis 0.0 0.5 0.5 basis 0.25 0.75 0.75 		basis 0.5 0.0 0.5 basis 0.75 0.25 0.75 		basis 0.5 0.5 0.0 basis 0.75 0.75 0.25
-Lattice spacing in x,y,z = 6.82884 6.82884 6.82884
+Lattice spacing in x,y,z = 6.8288400 6.8288400 6.8288400
 region		box block 0 20 0 20 0 10
 create_box	2 box
-Created orthogonal box = (0 0 0) to (136.577 136.577 68.2884)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (136.57680 136.57680 68.288400)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box basis 2 2 basis 4 2 basis 6 2 basis 8 2
 Created 32000 atoms
-  Time spent = 0.00191426 secs
+  create_atoms CPU = 0.002 seconds
 pair_style	bop
 pair_coeff	* * CdTe.bop.table Cd Te
-Reading potential file CdTe.bop.table with DATE: 2012-06-25
+Reading bop potential file CdTe.bop.table with DATE: 2012-06-25
 Reading potential file CdTe.bop.table with DATE: 2012-06-25
 mass		1 112.4
 mass		2 127.6
@ -51,32 +49,32 @@ Per MPI rank memory allocation (min/avg/max) = 19.39 | 19.39 | 19.39 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1000   -69539.487            0   -65403.292    3473.2595 
     100    572.16481   -67769.936            0    -65403.35    1838.6993 
-Loop time of 24.1696 on 1 procs for 100 steps with 32000 atoms
+Loop time of 36.0284 on 1 procs for 100 steps with 32000 atoms
-Performance: 0.357 ns/day, 67.138 hours/ns, 4.137 timesteps/s
+Performance: 0.240 ns/day, 100.079 hours/ns, 2.776 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 23.355     | 23.355     | 23.355     |   0.0 | 96.63
+Pair    | 35.306     | 35.306     | 35.306     |   0.0 | 97.99
-Neigh   | 0.7545     | 0.7545     | 0.7545     |   0.0 |  3.12
+Neigh   | 0.66375    | 0.66375    | 0.66375    |   0.0 |  1.84
-Comm    | 0.026978   | 0.026978   | 0.026978   |   0.0 |  0.11
+Comm    | 0.027954   | 0.027954   | 0.027954   |   0.0 |  0.08
-Output  | 0.0001111  | 0.0001111  | 0.0001111  |   0.0 |  0.00
+Output  | 9.9182e-05 | 9.9182e-05 | 9.9182e-05 |   0.0 |  0.00
-Modify  | 0.024145   | 0.024145   | 0.024145   |   0.0 |  0.10
+Modify  | 0.022574   | 0.022574   | 0.022574   |   0.0 |  0.06
-Other   |            | 0.009326   |            |       |  0.04
+Other   |            | 0.008374   |            |       |  0.02
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    35071 ave 35071 max 35071 min
+Nghost:        35071.0 ave       35071 max       35071 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    0 ave 0 max 0 min
+Neighs:        0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-FullNghs:  141288 ave 141288 max 141288 min
+FullNghs:     141288.0 ave      141288 max      141288 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 141288
-Ave neighs/atom = 4.41525
+Ave neighs/atom = 4.4152500
 Neighbor list builds = 14
 Dangerous builds = 0
-Total wall time: 0:00:24
+Total wall time: 0:00:36
--- a/bench/POTENTIALS/log.16Mar18.bop.4
+++ b/bench/POTENTIALS/log.16Mar18.bop.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk CdTe via BOP
@ -7,19 +6,18 @@ units		metal
 atom_style	atomic
 lattice		custom 6.82884 		basis 0.0 0.0 0.0 basis 0.25 0.25 0.25 		basis 0.0 0.5 0.5 basis 0.25 0.75 0.75 		basis 0.5 0.0 0.5 basis 0.75 0.25 0.75 		basis 0.5 0.5 0.0 basis 0.75 0.75 0.25
-Lattice spacing in x,y,z = 6.82884 6.82884 6.82884
+Lattice spacing in x,y,z = 6.8288400 6.8288400 6.8288400
 region		box block 0 20 0 20 0 10
 create_box	2 box
-Created orthogonal box = (0 0 0) to (136.577 136.577 68.2884)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (136.57680 136.57680 68.288400)
  2 by 2 by 1 MPI processor grid
 create_atoms	1 box basis 2 2 basis 4 2 basis 6 2 basis 8 2
 Created 32000 atoms
-  Time spent = 0.000597477 secs
+  create_atoms CPU = 0.001 seconds
 pair_style	bop
 pair_coeff	* * CdTe.bop.table Cd Te
-Reading potential file CdTe.bop.table with DATE: 2012-06-25
+Reading bop potential file CdTe.bop.table with DATE: 2012-06-25
 Reading potential file CdTe.bop.table with DATE: 2012-06-25
 mass		1 112.4
 mass		2 127.6
@ -47,36 +45,36 @@ Neighbor list info ...
      pair build: full/bin/ghost
      stencil: full/ghost/bin/3d
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 8.497 | 8.497 | 8.497 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 8.495 | 8.495 | 8.495 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1000   -69539.487            0   -65403.292    3473.2595 
     100    572.16481   -67769.936            0    -65403.35    1838.6993 
-Loop time of 6.50033 on 4 procs for 100 steps with 32000 atoms
+Loop time of 10.2579 on 4 procs for 100 steps with 32000 atoms
-Performance: 1.329 ns/day, 18.056 hours/ns, 15.384 timesteps/s
+Performance: 0.842 ns/day, 28.494 hours/ns, 9.749 timesteps/s
-99.2% CPU use with 4 MPI tasks x 1 OpenMP threads
+97.2% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 5.7879     | 5.975      | 6.1607     |   5.4 | 91.92
+Pair    | 9.0899     | 9.3839     | 9.6808     |   6.9 | 91.48
-Neigh   | 0.27603    | 0.27621    | 0.27647    |   0.0 |  4.25
+Neigh   | 0.24734    | 0.2533     | 0.25828    |   0.8 |  2.47
-Comm    | 0.049869   | 0.23531    | 0.42241    |  27.2 |  3.62
+Comm    | 0.30495    | 0.60685    | 0.89832    |  27.5 |  5.92
-Output  | 4.9829e-05 | 5.9724e-05 | 8.5592e-05 |   0.0 |  0.00
+Output  | 4.673e-05  | 7.695e-05  | 0.00016189 |   0.0 |  0.00
-Modify  | 0.0089927  | 0.0090921  | 0.0092406  |   0.1 |  0.14
+Modify  | 0.0092409  | 0.00937    | 0.0094445  |   0.1 |  0.09
-Other   |            | 0.004665   |            |       |  0.07
+Other   |            | 0.004455   |            |       |  0.04
-Nlocal:    8000 ave 8006 max 7994 min
+Nlocal:        8000.00 ave        8006 max        7994 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
-Nghost:    15171 ave 15177 max 15165 min
+Nghost:        15171.0 ave       15177 max       15165 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
-Neighs:    0 ave 0 max 0 min
+Neighs:        0.00000 ave           0 max           0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
-FullNghs:  35322 ave 35412 max 35267 min
+FullNghs:      35322.0 ave       35412 max       35267 min
 Histogram: 1 0 1 1 0 0 0 0 0 1
 Total # of neighbors = 141288
-Ave neighs/atom = 4.41525
+Ave neighs/atom = 4.4152500
 Neighbor list builds = 14
 Dangerous builds = 0
-Total wall time: 0:00:06
+Total wall time: 0:00:10
--- a/bench/POTENTIALS/log.16Mar18.comb.1
+++ b/bench/POTENTIALS/log.16Mar18.comb.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # SiO2 for COMB potential
@ -7,10 +6,12 @@ units		metal
 atom_style	charge
 read_data  	data.comb
-  triclinic box = (0 0 0) to (74.58 74.58 83.064) with tilt (0 0 0)
+Reading data file ...
  triclinic box = (0.0000000 0.0000000 0.0000000) to (74.580000 74.580000 83.064000) with tilt (0.0000000 0.0000000 0.0000000)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  32400 atoms
  read_data CPU = 0.022 seconds
 mass		1 28.0855
 group    	type1 type 1
@ -63,32 +64,32 @@ Step Temp TotEng PotEng E_vdwl E_coul c_q1 c_q2 Press Volume
      80    272.98301    -6.803583   -6.8388677    4.6404093   -11.479277    2.8932784   -1.4466392   -9896.1704    462016.62 
      90    305.77651   -6.8036184   -6.8431419    4.6512736   -11.494415    2.8953109   -1.4476554   -15675.983    462016.62 
     100    331.58255   -6.8036753   -6.8465344     4.662727   -11.509261     2.897273   -1.4486365   -21675.515    462016.62 
-Loop time of 517.206 on 1 procs for 100 steps with 32400 atoms
+Loop time of 426.185 on 1 procs for 100 steps with 32400 atoms
-Performance: 0.003 ns/day, 7183.417 hours/ns, 0.193 timesteps/s
+Performance: 0.004 ns/day, 5919.239 hours/ns, 0.235 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 114.18     | 114.18     | 114.18     |   0.0 | 22.08
+Pair    | 87.4       | 87.4       | 87.4       |   0.0 | 20.51
-Neigh   | 0.47558    | 0.47558    | 0.47558    |   0.0 |  0.09
+Neigh   | 0.3908     | 0.3908     | 0.3908     |   0.0 |  0.09
-Comm    | 0.030611   | 0.030611   | 0.030611   |   0.0 |  0.01
+Comm    | 0.029936   | 0.029936   | 0.029936   |   0.0 |  0.01
-Output  | 0.0024922  | 0.0024922  | 0.0024922  |   0.0 |  0.00
+Output  | 0.0024605  | 0.0024605  | 0.0024605  |   0.0 |  0.00
-Modify  | 402.51     | 402.51     | 402.51     |   0.0 | 77.82
+Modify  | 338.36     | 338.36     | 338.36     |   0.0 | 79.39
-Other   |            | 0.006137   |            |       |  0.00
+Other   |            | 0.005751   |            |       |  0.00
-Nlocal:    32400 ave 32400 max 32400 min
+Nlocal:        32400.0 ave       32400 max       32400 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    42518 ave 42518 max 42518 min
+Nghost:        42518.0 ave       42518 max       42518 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    0 ave 0 max 0 min
+Neighs:        0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:  1.85317e+07 ave 1.85317e+07 max 1.85317e+07 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 18531740
-Ave neighs/atom = 571.967
+Ave neighs/atom = 571.96728
 Neighbor list builds = 1
 Dangerous builds = 0
-Total wall time: 0:09:18
+Total wall time: 0:07:40
--- a/bench/POTENTIALS/log.16Mar18.comb.4
+++ b/bench/POTENTIALS/log.16Mar18.comb.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # SiO2 for COMB potential
@ -7,10 +6,12 @@ units		metal
 atom_style	charge
 read_data  	data.comb
-  triclinic box = (0 0 0) to (74.58 74.58 83.064) with tilt (0 0 0)
+Reading data file ...
  triclinic box = (0.0000000 0.0000000 0.0000000) to (74.580000 74.580000 83.064000) with tilt (0.0000000 0.0000000 0.0000000)
  1 by 2 by 2 MPI processor grid
  reading atoms ...
  32400 atoms
  read_data CPU = 0.031 seconds
 mass		1 28.0855
 group    	type1 type 1
@ -50,7 +51,7 @@ Neighbor list info ...
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 32.11 | 32.11 | 32.11 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 32.28 | 32.28 | 32.29 Mbytes
 Step Temp TotEng PotEng E_vdwl E_coul c_q1 c_q2 Press Volume 
       0          300   -6.8032038   -6.8419806    4.6274455   -11.469426    2.8875895   -1.4437947    13386.415    462016.62 
      10    273.21913   -6.8032489   -6.8385642    4.6221303   -11.460695    2.8872353   -1.4436176    13076.442    462016.62 
@ -63,32 +64,32 @@ Step Temp TotEng PotEng E_vdwl E_coul c_q1 c_q2 Press Volume
      80    272.98301    -6.803583   -6.8388677    4.6404093   -11.479277    2.8932784   -1.4466392   -9896.1704    462016.62 
      90    305.77651   -6.8036184   -6.8431419    4.6512736   -11.494415    2.8953109   -1.4476554   -15675.983    462016.62 
     100    331.58255   -6.8036753   -6.8465344     4.662727   -11.509261     2.897273   -1.4486365   -21675.515    462016.62 
-Loop time of 131.437 on 4 procs for 100 steps with 32400 atoms
+Loop time of 116.902 on 4 procs for 100 steps with 32400 atoms
-Performance: 0.013 ns/day, 1825.518 hours/ns, 0.761 timesteps/s
+Performance: 0.015 ns/day, 1623.637 hours/ns, 0.855 timesteps/s
-99.2% CPU use with 4 MPI tasks x 1 OpenMP threads
+97.0% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 28.847     | 28.874     | 28.913     |   0.5 | 21.97
+Pair    | 22.866     | 23.181     | 23.375     |   4.0 | 19.83
-Neigh   | 0.10981    | 0.11084    | 0.11145    |   0.2 |  0.08
+Neigh   | 0.093812   | 0.094818   | 0.095301   |   0.2 |  0.08
-Comm    | 0.28924    | 0.32866    | 0.3556     |   4.5 |  0.25
+Comm    | 0.94054    | 1.1329     | 1.4505     |  18.1 |  0.97
-Output  | 0.0010426  | 0.0011656  | 0.0015302  |   0.6 |  0.00
+Output  | 0.0011141  | 0.001422   | 0.0023448  |   1.4 |  0.00
-Modify  | 102.12     | 102.12     | 102.12     |   0.0 | 77.69
+Modify  | 92.485     | 92.488     | 92.494     |   0.0 | 79.12
-Other   |            | 0.003455   |            |       |  0.00
+Other   |            | 0.003673   |            |       |  0.00
-Nlocal:    8100 ave 8110 max 8090 min
+Nlocal:        8100.00 ave        8110 max        8090 min
 Histogram: 1 0 0 0 1 1 0 0 0 1
 Nghost:        20725.2 ave       20772 max       20694 min
 Histogram: 1 1 0 0 1 0 0 0 0 1
-Neighs:    0 ave 0 max 0 min
+Neighs:        0.00000 ave           0 max           0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:  4.63294e+06 ave 4.63866e+06 max 4.62736e+06 min
 Histogram: 1 0 0 0 1 1 0 0 0 1
 Total # of neighbors = 18531740
-Ave neighs/atom = 571.967
+Ave neighs/atom = 571.96728
 Neighbor list builds = 1
 Dangerous builds = 0
-Total wall time: 0:02:21
+Total wall time: 0:02:06
--- a/bench/POTENTIALS/log.16Mar18.dpd.1
+++ b/bench/POTENTIALS/log.16Mar18.dpd.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # DPD benchmark
@ -8,14 +7,14 @@ atom_style	atomic
 comm_modify     mode single vel yes
 lattice		fcc 3.0
-Lattice spacing in x,y,z = 1.10064 1.10064 1.10064
+Lattice spacing in x,y,z = 1.1006424 1.1006424 1.1006424
 region		box block 0 20 0 20 0 20
 create_box	1 box
-Created orthogonal box = (0 0 0) to (22.0128 22.0128 22.0128)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (22.012848 22.012848 22.012848)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.0018332 secs
+  create_atoms CPU = 0.002 seconds
 mass		1 1.0
 velocity	all create 1.0 87287 loop geom
@ -46,30 +45,30 @@ Per MPI rank memory allocation (min/avg/max) = 11.32 | 11.32 | 11.32 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0            1    3.6872574            0    5.1872105    28.880274 
     100    1.0246036    4.5727353            0    6.1095927    23.859969 
-Loop time of 3.09286 on 1 procs for 100 steps with 32000 atoms
+Loop time of 2.63541 on 1 procs for 100 steps with 32000 atoms
-Performance: 111741.340 tau/day, 32.333 timesteps/s
+Performance: 131137.146 tau/day, 37.945 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 1.5326     | 1.5326     | 1.5326     |   0.0 | 49.55
+Pair    | 1.1841     | 1.1841     | 1.1841     |   0.0 | 44.93
-Neigh   | 1.4771     | 1.4771     | 1.4771     |   0.0 | 47.76
+Neigh   | 1.3737     | 1.3737     | 1.3737     |   0.0 | 52.12
-Comm    | 0.044292   | 0.044292   | 0.044292   |   0.0 |  1.43
+Comm    | 0.04266    | 0.04266    | 0.04266    |   0.0 |  1.62
-Output  | 0.00011039 | 0.00011039 | 0.00011039 |   0.0 |  0.00
+Output  | 9.5844e-05 | 9.5844e-05 | 9.5844e-05 |   0.0 |  0.00
-Modify  | 0.022322   | 0.022322   | 0.022322   |   0.0 |  0.72
+Modify  | 0.020128   | 0.020128   | 0.020128   |   0.0 |  0.76
-Other   |            | 0.01648    |            |       |  0.53
+Other   |            | 0.01468    |            |       |  0.56
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    14981 ave 14981 max 14981 min
+Nghost:        14981.0 ave       14981 max       14981 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    660587 ave 660587 max 660587 min
+Neighs:       660587.0 ave      660587 max      660587 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 660587
-Ave neighs/atom = 20.6433
+Ave neighs/atom = 20.643344
 Neighbor list builds = 50
 Dangerous builds = 0
-Total wall time: 0:00:03
+Total wall time: 0:00:02
--- a/bench/POTENTIALS/log.16Mar18.dpd.4
+++ b/bench/POTENTIALS/log.16Mar18.dpd.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # DPD benchmark
@ -8,14 +7,14 @@ atom_style	atomic
 comm_modify     mode single vel yes
 lattice		fcc 3.0
-Lattice spacing in x,y,z = 1.10064 1.10064 1.10064
+Lattice spacing in x,y,z = 1.1006424 1.1006424 1.1006424
 region		box block 0 20 0 20 0 20
 create_box	1 box
-Created orthogonal box = (0 0 0) to (22.0128 22.0128 22.0128)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (22.012848 22.012848 22.012848)
  1 by 2 by 2 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.000589132 secs
+  create_atoms CPU = 0.001 seconds
 mass		1 1.0
 velocity	all create 1.0 87287 loop geom
@ -42,34 +41,34 @@ Neighbor list info ...
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.874 | 3.874 | 3.874 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 3.875 | 3.875 | 3.875 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0            1    3.6872574            0    5.1872105    28.911346 
     100    1.0219182    4.5817845            0    6.1146139    23.803115 
-Loop time of 0.83904 on 4 procs for 100 steps with 32000 atoms
+Loop time of 0.882096 on 4 procs for 100 steps with 32000 atoms
-Performance: 411899.440 tau/day, 119.184 timesteps/s
+Performance: 391793.935 tau/day, 113.366 timesteps/s
-99.3% CPU use with 4 MPI tasks x 1 OpenMP threads
+93.4% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.39605    | 0.40101    | 0.40702    |   0.6 | 47.79
+Pair    | 0.31428    | 0.33654    | 0.37754    |   4.4 | 38.15
-Neigh   | 0.38186    | 0.38494    | 0.38738    |   0.4 | 45.88
+Neigh   | 0.36308    | 0.3849     | 0.41542    |   3.1 | 43.63
-Comm    | 0.032073   | 0.039688   | 0.045953   |   2.9 |  4.73
+Comm    | 0.07276    | 0.14322    | 0.1842     |  11.3 | 16.24
-Output  | 4.4823e-05 | 5.4002e-05 | 7.844e-05  |   0.0 |  0.01
+Output  | 4.22e-05   | 5.2989e-05 | 8.2493e-05 |   0.0 |  0.01
-Modify  | 0.0056572  | 0.0056887  | 0.0057547  |   0.1 |  0.68
+Modify  | 0.0057678  | 0.0060433  | 0.0065472  |   0.4 |  0.69
-Other   |            | 0.007655   |            |       |  0.91
+Other   |            | 0.01134    |            |       |  1.29
-Nlocal:    8000 ave 8014 max 7986 min
+Nlocal:        8000.00 ave        8014 max        7986 min
 Histogram: 1 1 0 0 0 0 0 0 1 1
-Nghost:    6744 ave 6764 max 6726 min
+Nghost:        6744.00 ave        6764 max        6726 min
 Histogram: 1 0 0 1 0 1 0 0 0 1
-Neighs:    165107 ave 166433 max 163419 min
+Neighs:       165107.0 ave      166433 max      163419 min
 Histogram: 1 0 1 0 0 0 0 0 0 2
 Total # of neighbors = 660428
-Ave neighs/atom = 20.6384
+Ave neighs/atom = 20.638375
 Neighbor list builds = 50
 Dangerous builds = 0
 Total wall time: 0:00:00
--- a/bench/POTENTIALS/log.16Mar18.eam.1
+++ b/bench/POTENTIALS/log.16Mar18.eam.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk Cu in EAM
@ -7,17 +6,18 @@ units		metal
 atom_style	atomic
 lattice		fcc 3.615
-Lattice spacing in x,y,z = 3.615 3.615 3.615
+Lattice spacing in x,y,z = 3.6150000 3.6150000 3.6150000
 region		box block 0 20 0 20 0 20
 create_box	1 box
-Created orthogonal box = (0 0 0) to (72.3 72.3 72.3)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (72.300000 72.300000 72.300000)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.00185037 secs
+  create_atoms CPU = 0.002 seconds
 pair_style	eam
 pair_coeff	1 1 Cu_u3.eam
 Reading eam potential file Cu_u3.eam with DATE: 2007-06-11
 velocity	all create 1600.0 376847 loop geom
@ -45,30 +45,30 @@ Per MPI rank memory allocation (min/avg/max) = 16.83 | 16.83 | 16.83 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1600      -113280            0   -106662.09    18703.573 
     100      801.832    -109957.3            0   -106640.77    51322.821 
-Loop time of 3.92295 on 1 procs for 100 steps with 32000 atoms
+Loop time of 3.70399 on 1 procs for 100 steps with 32000 atoms
-Performance: 11.012 ns/day, 2.179 hours/ns, 25.491 timesteps/s
+Performance: 11.663 ns/day, 2.058 hours/ns, 26.998 timesteps/s
-99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 3.3913     | 3.3913     | 3.3913     |   0.0 | 86.45
+Pair    | 3.2216     | 3.2216     | 3.2216     |   0.0 | 86.98
-Neigh   | 0.48107    | 0.48107    | 0.48107    |   0.0 | 12.26
+Neigh   | 0.43766    | 0.43766    | 0.43766    |   0.0 | 11.82
-Comm    | 0.01729    | 0.01729    | 0.01729    |   0.0 |  0.44
+Comm    | 0.015404   | 0.015404   | 0.015404   |   0.0 |  0.42
-Output  | 0.00011253 | 0.00011253 | 0.00011253 |   0.0 |  0.00
+Output  | 0.000103   | 0.000103   | 0.000103   |   0.0 |  0.00
-Modify  | 0.024349   | 0.024349   | 0.024349   |   0.0 |  0.62
+Modify  | 0.021604   | 0.021604   | 0.021604   |   0.0 |  0.58
-Other   |            | 0.008847   |            |       |  0.23
+Other   |            | 0.007627   |            |       |  0.21
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    19909 ave 19909 max 19909 min
+Nghost:        19909.0 ave       19909 max       19909 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    1.20778e+06 ave 1.20778e+06 max 1.20778e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 1207784
-Ave neighs/atom = 37.7433
+Ave neighs/atom = 37.743250
 Neighbor list builds = 13
 Dangerous builds = 0
 Total wall time: 0:00:03
--- a/bench/POTENTIALS/log.16Mar18.eam.4
+++ b/bench/POTENTIALS/log.16Mar18.eam.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk Cu in EAM
@ -7,17 +6,18 @@ units		metal
 atom_style	atomic
 lattice		fcc 3.615
-Lattice spacing in x,y,z = 3.615 3.615 3.615
+Lattice spacing in x,y,z = 3.6150000 3.6150000 3.6150000
 region		box block 0 20 0 20 0 20
 create_box	1 box
-Created orthogonal box = (0 0 0) to (72.3 72.3 72.3)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (72.300000 72.300000 72.300000)
  1 by 2 by 2 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.000595331 secs
+  create_atoms CPU = 0.001 seconds
 pair_style	eam
 pair_coeff	1 1 Cu_u3.eam
 Reading eam potential file Cu_u3.eam with DATE: 2007-06-11
 velocity	all create 1600.0 376847 loop geom
@ -41,34 +41,34 @@ Neighbor list info ...
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 7.381 | 7.381 | 7.381 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 7.382 | 7.382 | 7.382 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1600      -113280            0   -106662.09    18703.573 
     100      801.832    -109957.3            0   -106640.77    51322.821 
-Loop time of 1.04497 on 4 procs for 100 steps with 32000 atoms
+Loop time of 1.01466 on 4 procs for 100 steps with 32000 atoms
-Performance: 41.341 ns/day, 0.581 hours/ns, 95.697 timesteps/s
+Performance: 42.576 ns/day, 0.564 hours/ns, 98.555 timesteps/s
-99.4% CPU use with 4 MPI tasks x 1 OpenMP threads
+99.3% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.88513    | 0.88724    | 0.89191    |   0.3 | 84.91
+Pair    | 0.86683    | 0.86797    | 0.86877    |   0.1 | 85.54
-Neigh   | 0.12418    | 0.12458    | 0.12511    |   0.1 | 11.92
+Neigh   | 0.11567    | 0.11681    | 0.11992    |   0.5 | 11.51
-Comm    | 0.015654   | 0.020543   | 0.022984   |   2.0 |  1.97
+Comm    | 0.015399   | 0.017346   | 0.018526   |   0.9 |  1.71
-Output  | 4.8637e-05 | 5.8711e-05 | 8.6546e-05 |   0.0 |  0.01
+Output  | 4.6253e-05 | 8.1241e-05 | 0.00017262 |   0.0 |  0.01
-Modify  | 0.0085199  | 0.0085896  | 0.0086446  |   0.1 |  0.82
+Modify  | 0.0085337  | 0.0085824  | 0.0086181  |   0.0 |  0.85
-Other   |            | 0.003959   |            |       |  0.38
+Other   |            | 0.003876   |            |       |  0.38
-Nlocal:    8000 ave 8008 max 7993 min
+Nlocal:        8000.00 ave        8008 max        7993 min
 Histogram: 2 0 0 0 0 0 0 0 1 1
 Nghost:        9130.25 ave        9138 max        9122 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
-Neighs:    301946 ave 302392 max 301360 min
+Neighs:       301946.0 ave      302392 max      301360 min
 Histogram: 1 0 0 0 1 0 0 0 1 1
 Total # of neighbors = 1207784
-Ave neighs/atom = 37.7433
+Ave neighs/atom = 37.743250
 Neighbor list builds = 13
 Dangerous builds = 0
 Total wall time: 0:00:01
--- a/bench/POTENTIALS/log.16Mar18.eff.1
+++ b/bench/POTENTIALS/log.16Mar18.eff.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # eFF benchmark of H plasma
@ -7,12 +6,14 @@ units 		electron
 atom_style	electron
 read_data  	data.eff
-  orthogonal box = (0 0 0) to (41.9118 41.9118 41.9118)
+Reading data file ...
  orthogonal box = (0.0000000 0.0000000 0.0000000) to (41.911791 41.911791 41.911791)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  32000 atoms
  reading velocities ...
  32000 velocities
  read_data CPU = 0.040 seconds
 pair_style      eff/cut 12
 pair_coeff	* *
@ -42,7 +43,7 @@ Neighbor list info ...
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 370.9 | 370.9 | 370.9 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 371.5 | 371.5 | 371.5 Mbytes
 Step TotEng PotEng KinEng Temp Press 
       0    4046.5854    796.63785    3249.9475    42763.133 4.4764483e+12 
       5    4046.5854    796.95799    3249.6274     42758.92 4.4728546e+12 
@ -65,33 +66,33 @@ Step TotEng PotEng KinEng Temp Press
      90    4046.5857    864.14162    3182.4441    41874.916 4.3868277e+12 
      95    4046.5857    871.30234    3175.2834    41780.695 4.3805068e+12 
     100    4046.5858    878.76023    3167.8255    41682.563 4.3740731e+12 
-Loop time of 323.031 on 1 procs for 100 steps with 32000 atoms
+Loop time of 344.943 on 1 procs for 100 steps with 32000 atoms
-Performance: 26.747 fs/day, 0.897 hours/fs, 0.310 timesteps/s
+Performance: 25.048 fs/day, 0.958 hours/fs, 0.290 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 322.78     | 322.78     | 322.78     |   0.0 | 99.92
+Pair    | 344.71     | 344.71     | 344.71     |   0.0 | 99.93
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.1876     | 0.1876     | 0.1876     |   0.0 |  0.06
+Comm    | 0.1763     | 0.1763     | 0.1763     |   0.0 |  0.05
-Output  | 0.0027025  | 0.0027025  | 0.0027025  |   0.0 |  0.00
+Output  | 0.0024362  | 0.0024362  | 0.0024362  |   0.0 |  0.00
-Modify  | 0.032475   | 0.032475   | 0.032475   |   0.0 |  0.01
+Modify  | 0.030869   | 0.030869   | 0.030869   |   0.0 |  0.01
-Other   |            | 0.02538    |            |       |  0.01
+Other   |            | 0.02272    |            |       |  0.01
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    114349 ave 114349 max 114349 min
+Nghost:       114349.0 ave      114349 max      114349 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    8.10572e+07 ave 8.10572e+07 max 8.10572e+07 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 81057159
-Ave neighs/atom = 2533.04
+Ave neighs/atom = 2533.0362
 Neighbor list builds = 0
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
-Total wall time: 0:05:27
+Total wall time: 0:05:49
--- a/bench/POTENTIALS/log.16Mar18.eff.4
+++ b/bench/POTENTIALS/log.16Mar18.eff.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # eFF benchmark of H plasma
@ -7,12 +6,14 @@ units 		electron
 atom_style	electron
 read_data  	data.eff
-  orthogonal box = (0 0 0) to (41.9118 41.9118 41.9118)
+Reading data file ...
  orthogonal box = (0.0000000 0.0000000 0.0000000) to (41.911791 41.911791 41.911791)
  1 by 2 by 2 MPI processor grid
  reading atoms ...
  32000 atoms
  reading velocities ...
  32000 velocities
  read_data CPU = 0.064 seconds
 pair_style      eff/cut 12
 pair_coeff	* *
@ -42,7 +43,7 @@ Neighbor list info ...
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 101.4 | 104.8 | 108.3 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 102.1 | 105.5 | 109.0 Mbytes
 Step TotEng PotEng KinEng Temp Press 
       0    4046.5854    796.63785    3249.9475    42763.133 4.4764483e+12 
       5    4046.5854    796.95799    3249.6274     42758.92 4.4728546e+12 
@ -65,33 +66,33 @@ Step TotEng PotEng KinEng Temp Press
      90    4046.5857    864.14162    3182.4441    41874.916 4.3868277e+12 
      95    4046.5857    871.30234    3175.2834    41780.695 4.3805068e+12 
     100    4046.5858    878.76023    3167.8255    41682.563 4.3740731e+12 
-Loop time of 90.1636 on 4 procs for 100 steps with 32000 atoms
+Loop time of 100.431 on 4 procs for 100 steps with 32000 atoms
-Performance: 95.826 fs/day, 0.250 hours/fs, 1.109 timesteps/s
+Performance: 86.029 fs/day, 0.279 hours/fs, 0.996 timesteps/s
-99.1% CPU use with 4 MPI tasks x 1 OpenMP threads
+95.8% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 83.772     | 86.516     | 89.593     |  29.5 | 95.95
+Pair    | 89.149     | 93.787     | 97.971     |  41.9 | 93.38
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.51677    | 3.5934     | 6.3368     | 144.6 |  3.99
+Comm    | 2.4073     | 6.5821     | 11.21      | 157.8 |  6.55
-Output  | 0.0012872  | 0.0018208  | 0.0024981  |   1.0 |  0.00
+Output  | 0.0014203  | 0.0094504  | 0.019111   |   8.3 |  0.01
-Modify  | 0.017231   | 0.018405   | 0.01983    |   0.8 |  0.02
+Modify  | 0.016678   | 0.016999   | 0.017425   |   0.2 |  0.02
-Other   |            | 0.03431    |            |       |  0.04
+Other   |            | 0.03524    |            |       |  0.04
-Nlocal:    8000 ave 8112 max 7875 min
+Nlocal:        8000.00 ave        8112 max        7875 min
 Histogram: 1 1 0 0 0 0 0 0 0 2
-Nghost:    65589 ave 66004 max 65177 min
+Nghost:        65589.0 ave       66004 max       65177 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
 Neighs:    2.02643e+07 ave 2.11126e+07 max 1.94058e+07 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
 Total # of neighbors = 81057159
-Ave neighs/atom = 2533.04
+Ave neighs/atom = 2533.0362
 Neighbor list builds = 0
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
-Total wall time: 0:01:31
+Total wall time: 0:01:42
--- a/bench/POTENTIALS/log.16Mar18.eim.1
+++ b/bench/POTENTIALS/log.16Mar18.eim.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # EIM benchmark
 # if run long enough (e.g. 1M steps), the unstable CsCl form of a NaCl single
@ -9,12 +8,14 @@ units		metal
 atom_style      atomic
 read_data       data.eim
-  orthogonal box = (-0.5 -0.5 -0.5) to (71.58 143.66 71.58)
+Reading data file ...
  orthogonal box = (-0.5 -0.5 -0.5) to (71.580002 143.66000 71.580002)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  32000 atoms
  reading velocities ...
  32000 velocities
  read_data CPU = 0.024 seconds
 pair_style      eim
 pair_coeff      * * Na Cl ffield.eim Na Cl
@ -44,34 +45,34 @@ Neighbor list info ...
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 17.72 | 17.72 | 17.72 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 17.74 | 17.74 | 17.74 Mbytes
 Step PotEng Pxx Pyy Pzz Temp 
       0    -90567.58    -117883.6   -118039.81   -117894.07         1400 
-     100   -91997.012   -4104.7052    -4138.276   -4145.8936    944.10136 
+     100    -91997.39    -4127.237   -4160.9799   -4169.0581    944.09785 
-Loop time of 11.4536 on 1 procs for 100 steps with 32000 atoms
+Loop time of 10.3731 on 1 procs for 100 steps with 32000 atoms
-Performance: 0.377 ns/day, 63.631 hours/ns, 8.731 timesteps/s
+Performance: 0.416 ns/day, 57.628 hours/ns, 9.640 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 9.8277     | 9.8277     | 9.8277     |   0.0 | 85.80
+Pair    | 8.8937     | 8.8937     | 8.8937     |   0.0 | 85.74
-Neigh   | 1.484      | 1.484      | 1.484      |   0.0 | 12.96
+Neigh   | 1.344      | 1.344      | 1.344      |   0.0 | 12.96
-Comm    | 0.028584   | 0.028584   | 0.028584   |   0.0 |  0.25
+Comm    | 0.028207   | 0.028207   | 0.028207   |   0.0 |  0.27
-Output  | 0.00023127 | 0.00023127 | 0.00023127 |   0.0 |  0.00
+Output  | 0.00020099 | 0.00020099 | 0.00020099 |   0.0 |  0.00
-Modify  | 0.09791    | 0.09791    | 0.09791    |   0.0 |  0.85
+Modify  | 0.093584   | 0.093584   | 0.093584   |   0.0 |  0.90
-Other   |            | 0.0152     |            |       |  0.13
+Other   |            | 0.0134     |            |       |  0.13
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    21505 ave 21505 max 21505 min
+Nghost:        21505.0 ave       21505 max       21505 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    1.5839e+06 ave 1.5839e+06 max 1.5839e+06 min
+Neighs:    1.58387e+06 ave 1.58387e+06 max 1.58387e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Total # of neighbors = 1583901
+Total # of neighbors = 1583871
-Ave neighs/atom = 49.4969
+Ave neighs/atom = 49.495969
 Neighbor list builds = 37
 Dangerous builds = 12
-Total wall time: 0:00:11
+Total wall time: 0:00:10
--- a/bench/POTENTIALS/log.16Mar18.eim.4
+++ b/bench/POTENTIALS/log.16Mar18.eim.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # EIM benchmark
 # if run long enough (e.g. 1M steps), the unstable CsCl form of a NaCl single
@ -9,12 +8,14 @@ units		metal
 atom_style      atomic
 read_data       data.eim
-  orthogonal box = (-0.5 -0.5 -0.5) to (71.58 143.66 71.58)
+Reading data file ...
  orthogonal box = (-0.5 -0.5 -0.5) to (71.580002 143.66000 71.580002)
  1 by 4 by 1 MPI processor grid
  reading atoms ...
  32000 atoms
  reading velocities ...
  32000 velocities
  read_data CPU = 0.023 seconds
 pair_style      eim
 pair_coeff      * * Na Cl ffield.eim Na Cl
@ -44,34 +45,34 @@ Neighbor list info ...
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 7.285 | 7.285 | 7.285 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 7.460 | 7.460 | 7.460 Mbytes
 Step PotEng Pxx Pyy Pzz Temp 
       0    -90567.58    -117883.6   -118039.81   -117894.07         1400 
-     100   -91997.012   -4104.7052    -4138.276   -4145.8936    944.10136 
+     100    -91997.39    -4127.237   -4160.9799   -4169.0581    944.09785 
-Loop time of 3.12061 on 4 procs for 100 steps with 32000 atoms
+Loop time of 3.14457 on 4 procs for 100 steps with 32000 atoms
-Performance: 1.384 ns/day, 17.337 hours/ns, 32.045 timesteps/s
+Performance: 1.374 ns/day, 17.470 hours/ns, 31.801 timesteps/s
-98.8% CPU use with 4 MPI tasks x 1 OpenMP threads
+95.8% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 2.6504     | 2.6583     | 2.6685     |   0.5 | 85.18
+Pair    | 2.6017     | 2.6264     | 2.6758     |   1.8 | 83.52
-Neigh   | 0.36996    | 0.37847    | 0.39396    |   1.5 | 12.13
+Neigh   | 0.34384    | 0.35308    | 0.36784    |   1.6 | 11.23
-Comm    | 0.037041   | 0.040586   | 0.04504    |   1.4 |  1.30
+Comm    | 0.039635   | 0.099661   | 0.15326    |  15.0 |  3.17
-Output  | 7.081e-05  | 8.75e-05   | 0.00012994 |   0.0 |  0.00
+Output  | 6.485e-05  | 9.656e-05  | 0.0001905  |   0.0 |  0.00
-Modify  | 0.029286   | 0.035978   | 0.047942   |   3.9 |  1.15
+Modify  | 0.035666   | 0.055446   | 0.098401   |  10.6 |  1.76
-Other   |            | 0.007206   |            |       |  0.23
+Other   |            | 0.009939   |            |       |  0.32
-Nlocal:    8000 ave 8000 max 8000 min
+Nlocal:        8000.00 ave        8000 max        8000 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 Nghost:        9460.25 ave        9469 max        9449 min
 Histogram: 1 0 0 0 0 1 0 1 0 1
-Neighs:    395975 ave 397239 max 394616 min
+Neighs:       395968.0 ave      397233 max      394606 min
 Histogram: 1 0 0 1 0 0 0 1 0 1
-Total # of neighbors = 1583901
+Total # of neighbors = 1583871
-Ave neighs/atom = 49.4969
+Ave neighs/atom = 49.495969
 Neighbor list builds = 37
 Dangerous builds = 12
 Total wall time: 0:00:03
--- a/bench/POTENTIALS/log.16Mar18.fene.1
+++ b/bench/POTENTIALS/log.16Mar18.fene.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # FENE beadspring benchmark
@ -8,7 +7,8 @@ atom_style	bond
 special_bonds   fene
 read_data	data.fene
-  orthogonal box = (-16.796 -16.796 -16.796) to (16.796 16.796 16.796)
+Reading data file ...
  orthogonal box = (-16.796000 -16.796000 -16.796000) to (16.796000 16.796000 16.796000)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  32000 atoms
@ -18,8 +18,13 @@ read_data	data.fene
  1 = max bonds/atom
  reading bonds ...
  31680 bonds
 Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0.0      1.0      1.0     
  special bond factors coul:  0.0      1.0      1.0     
     2 = max # of 1-2 neighbors
     2 = max # of special neighbors
  special bonds CPU = 0.003 seconds
  read_data CPU = 0.054 seconds
 neighbor	0.4 bin
 neigh_modify    delay 5 every 1
@ -49,36 +54,37 @@ Neighbor list info ...
      pair build: half/bin/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 13.18 | 13.18 | 13.18 Mbytes
+WARNING: Communication cutoff 1.52 is shorter than a bond length based estimate of 1.855. This may lead to errors. (src/comm.cpp:667)
 Per MPI rank memory allocation (min/avg/max) = 13.20 | 13.20 | 13.20 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0   0.97029772   0.44484087    20.494523    22.394765    4.6721833 
     100    0.9729966    0.4361122    20.507698     22.40326    4.6548819 
-Loop time of 0.66285 on 1 procs for 100 steps with 32000 atoms
+Loop time of 0.648089 on 1 procs for 100 steps with 32000 atoms
-Performance: 156415.445 tau/day, 150.864 timesteps/s
+Performance: 159978.044 tau/day, 154.300 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.13075    | 0.13075    | 0.13075    |   0.0 | 19.73
+Pair    | 0.12174    | 0.12174    | 0.12174    |   0.0 | 18.78
-Bond    | 0.046363   | 0.046363   | 0.046363   |   0.0 |  6.99
+Bond    | 0.050688   | 0.050688   | 0.050688   |   0.0 |  7.82
-Neigh   | 0.3172     | 0.3172     | 0.3172     |   0.0 | 47.85
+Neigh   | 0.33136    | 0.33136    | 0.33136    |   0.0 | 51.13
-Comm    | 0.016553   | 0.016553   | 0.016553   |   0.0 |  2.50
+Comm    | 0.014753   | 0.014753   | 0.014753   |   0.0 |  2.28
-Output  | 0.00010395 | 0.00010395 | 0.00010395 |   0.0 |  0.02
+Output  | 9.8467e-05 | 9.8467e-05 | 9.8467e-05 |   0.0 |  0.02
-Modify  | 0.14515    | 0.14515    | 0.14515    |   0.0 | 21.90
+Modify  | 0.12378    | 0.12378    | 0.12378    |   0.0 | 19.10
-Other   |            | 0.006728   |            |       |  1.02
+Other   |            | 0.005668   |            |       |  0.87
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    9493 ave 9493 max 9493 min
+Nghost:        9493.00 ave        9493 max        9493 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    155873 ave 155873 max 155873 min
+Neighs:       155873.0 ave      155873 max      155873 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 155873
-Ave neighs/atom = 4.87103
+Ave neighs/atom = 4.8710312
-Ave special neighs/atom = 1.98
+Ave special neighs/atom = 1.9800000
 Neighbor list builds = 20
 Dangerous builds = 20
 Total wall time: 0:00:00
--- a/bench/POTENTIALS/log.16Mar18.fene.4
+++ b/bench/POTENTIALS/log.16Mar18.fene.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # FENE beadspring benchmark
@ -8,7 +7,8 @@ atom_style	bond
 special_bonds   fene
 read_data	data.fene
-  orthogonal box = (-16.796 -16.796 -16.796) to (16.796 16.796 16.796)
+Reading data file ...
  orthogonal box = (-16.796000 -16.796000 -16.796000) to (16.796000 16.796000 16.796000)
  1 by 2 by 2 MPI processor grid
  reading atoms ...
  32000 atoms
@ -18,8 +18,13 @@ read_data	data.fene
  1 = max bonds/atom
  reading bonds ...
  31680 bonds
 Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0.0      1.0      1.0     
  special bond factors coul:  0.0      1.0      1.0     
     2 = max # of 1-2 neighbors
     2 = max # of special neighbors
  special bonds CPU = 0.001 seconds
  read_data CPU = 0.048 seconds
 neighbor	0.4 bin
 neigh_modify    delay 5 every 1
@ -49,36 +54,37 @@ Neighbor list info ...
      pair build: half/bin/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 4.605 | 4.605 | 4.606 Mbytes
+WARNING: Communication cutoff 1.52 is shorter than a bond length based estimate of 1.855. This may lead to errors. (src/comm.cpp:667)
 Per MPI rank memory allocation (min/avg/max) = 4.779 | 4.780 | 4.780 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0   0.97029772   0.44484087    20.494523    22.394765    4.6721833 
     100    0.9736748   0.44378481    20.502389     22.40664    4.7809557 
-Loop time of 0.184782 on 4 procs for 100 steps with 32000 atoms
+Loop time of 0.179123 on 4 procs for 100 steps with 32000 atoms
-Performance: 561093.346 tau/day, 541.178 timesteps/s
+Performance: 578819.228 tau/day, 558.275 timesteps/s
-98.4% CPU use with 4 MPI tasks x 1 OpenMP threads
+99.2% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.033747   | 0.034391   | 0.035036   |   0.3 | 18.61
+Pair    | 0.031898   | 0.032311   | 0.032864   |   0.2 | 18.04
-Bond    | 0.012475   | 0.012579   | 0.012812   |   0.1 |  6.81
+Bond    | 0.01335    | 0.013471   | 0.013588   |   0.1 |  7.52
-Neigh   | 0.083916   | 0.083953   | 0.084022   |   0.0 | 45.43
+Neigh   | 0.087105   | 0.087195   | 0.087282   |   0.0 | 48.68
-Comm    | 0.012409   | 0.01363    | 0.014534   |   0.7 |  7.38
+Comm    | 0.010541   | 0.011533   | 0.012463   |   0.7 |  6.44
-Output  | 4.1246e-05 | 5.9545e-05 | 0.00010443 |   0.0 |  0.03
+Output  | 3.8624e-05 | 5.6028e-05 | 0.00010157 |   0.0 |  0.03
-Modify  | 0.036675   | 0.037876   | 0.038357   |   0.4 | 20.50
+Modify  | 0.031766   | 0.03233    | 0.033015   |   0.3 | 18.05
-Other   |            | 0.002294   |            |       |  1.24
+Other   |            | 0.002227   |            |       |  1.24
-Nlocal:    8000 ave 8023 max 7978 min
+Nlocal:        8000.00 ave        8023 max        7978 min
 Histogram: 1 0 0 0 1 1 0 0 0 1
 Nghost:        4158.75 ave        4175 max        4145 min
 Histogram: 1 0 1 0 0 0 1 0 0 1
-Neighs:    38940 ave 39184 max 38640 min
+Neighs:        38940.0 ave       39184 max       38640 min
 Histogram: 1 0 0 0 0 1 1 0 0 1
 Total # of neighbors = 155760
-Ave neighs/atom = 4.8675
+Ave neighs/atom = 4.8675000
-Ave special neighs/atom = 1.98
+Ave special neighs/atom = 1.9800000
 Neighbor list builds = 20
 Dangerous builds = 20
 Total wall time: 0:00:00
--- a/bench/POTENTIALS/log.16Mar18.gb.1
+++ b/bench/POTENTIALS/log.16Mar18.gb.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # Gay-Berne benchmark
 # biaxial ellipsoid mesogens in isotropic phase
@ -18,13 +17,15 @@ atom_style    ellipsoid
 #set	      group all quat/random 982381
 read_data     data.gb
-  orthogonal box = (2.19575 2.19575 2.19575) to (50.8124 50.8124 50.8124)
+Reading data file ...
  orthogonal box = (2.1957493 2.1957493 2.1957493) to (50.812373 50.812373 50.812373)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  32768 atoms
  reading velocities ...
  32768 velocities
  32768 ellipsoids
  read_data CPU = 0.097 seconds
 compute	      rot all temp/asphere
 group	      spheroid type 1
@ -63,41 +64,41 @@ Neighbor list info ...
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 28.91 | 28.91 | 28.91 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 28.98 | 28.98 | 28.98 Mbytes
 Step Temp E_pair E_mol TotEng Press Volume 
       0          2.4   0.50438568            0    4.1042758    6.7818168    114909.09 
-      20    2.7357818   0.26045557            0     4.364003    6.8299368    111715.16 
+      20    2.7357797   0.26044978            0     4.363994    6.8299173     111715.2 
-      40    2.9201296   0.22570735            0     4.605768    7.0767907    109473.23 
+      40    2.9201268    0.2257049            0    4.6057615    7.0767796    109473.26 
-      60    2.9820039   0.19733812            0    4.6702075    7.1507065    108393.77 
+      60    2.9820022   0.19733756            0    4.6702044    7.1507023    108393.79 
-      80    3.0148529   0.15114819            0    4.6732895    7.1699502    107672.24 
+      80     3.014852   0.15114765            0    4.6732876    7.1699472    107672.25 
-     100    3.0206703   0.10567623            0    4.6365433     7.154345    107184.83 
+     100    3.0206698     0.105676            0    4.6365424    7.1543436    107184.84 
-Loop time of 43.7894 on 1 procs for 100 steps with 32768 atoms
+Loop time of 57.1053 on 1 procs for 100 steps with 32768 atoms
-Performance: 394.616 tau/day, 2.284 timesteps/s
+Performance: 302.599 tau/day, 1.751 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 42.881     | 42.881     | 42.881     |   0.0 | 97.93
+Pair    | 56.246     | 56.246     | 56.246     |   0.0 | 98.50
-Neigh   | 0.35071    | 0.35071    | 0.35071    |   0.0 |  0.80
+Neigh   | 0.31058    | 0.31058    | 0.31058    |   0.0 |  0.54
-Comm    | 0.065153   | 0.065153   | 0.065153   |   0.0 |  0.15
+Comm    | 0.066039   | 0.066039   | 0.066039   |   0.0 |  0.12
-Output  | 0.00054383 | 0.00054383 | 0.00054383 |   0.0 |  0.00
+Output  | 0.00048757 | 0.00048757 | 0.00048757 |   0.0 |  0.00
-Modify  | 0.47852    | 0.47852    | 0.47852    |   0.0 |  1.09
+Modify  | 0.46972    | 0.46972    | 0.46972    |   0.0 |  0.82
-Other   |            | 0.01337    |            |       |  0.03
+Other   |            | 0.01198    |            |       |  0.02
-Nlocal:    32768 ave 32768 max 32768 min
+Nlocal:        32768.0 ave       32768 max       32768 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    25669 ave 25669 max 25669 min
+Nghost:        25669.0 ave       25669 max       25669 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    2.30433e+06 ave 2.30433e+06 max 2.30433e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Total # of neighbors = 2304332
+Total # of neighbors = 2304331
-Ave neighs/atom = 70.3226
+Ave neighs/atom = 70.322601
 Neighbor list builds = 6
 Dangerous builds = 3
 Please see the log.cite file for references relevant to this simulation
-Total wall time: 0:00:44
+Total wall time: 0:00:57
--- a/bench/POTENTIALS/log.16Mar18.gb.4
+++ b/bench/POTENTIALS/log.16Mar18.gb.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # Gay-Berne benchmark
 # biaxial ellipsoid mesogens in isotropic phase
@ -18,13 +17,15 @@ atom_style    ellipsoid
 #set	      group all quat/random 982381
 read_data     data.gb
-  orthogonal box = (2.19575 2.19575 2.19575) to (50.8124 50.8124 50.8124)
+Reading data file ...
  orthogonal box = (2.1957493 2.1957493 2.1957493) to (50.812373 50.812373 50.812373)
  1 by 2 by 2 MPI processor grid
  reading atoms ...
  32768 atoms
  reading velocities ...
  32768 velocities
  32768 ellipsoids
  read_data CPU = 0.079 seconds
 compute	      rot all temp/asphere
 group	      spheroid type 1
@ -63,41 +64,41 @@ Neighbor list info ...
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 11.78 | 11.78 | 11.78 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 11.99 | 11.99 | 12.00 Mbytes
 Step Temp E_pair E_mol TotEng Press Volume 
       0          2.4   0.50438568            0    4.1042758    6.7818168    114909.09 
-      20    2.7357818   0.26045557            0     4.364003    6.8299368    111715.16 
+      20    2.7357797   0.26044978            0     4.363994    6.8299173     111715.2 
-      40    2.9201296   0.22570735            0     4.605768    7.0767907    109473.23 
+      40    2.9201268    0.2257049            0    4.6057615    7.0767796    109473.26 
-      60    2.9820039   0.19733812            0    4.6702075    7.1507065    108393.77 
+      60    2.9820022   0.19733756            0    4.6702044    7.1507023    108393.79 
-      80    3.0148529   0.15114819            0    4.6732895    7.1699502    107672.24 
+      80     3.014852   0.15114765            0    4.6732876    7.1699472    107672.25 
-     100    3.0206703   0.10567623            0    4.6365433     7.154345    107184.83 
+     100    3.0206698     0.105676            0    4.6365424    7.1543436    107184.84 
-Loop time of 11.3124 on 4 procs for 100 steps with 32768 atoms
+Loop time of 14.9338 on 4 procs for 100 steps with 32768 atoms
-Performance: 1527.522 tau/day, 8.840 timesteps/s
+Performance: 1157.109 tau/day, 6.696 timesteps/s
-99.2% CPU use with 4 MPI tasks x 1 OpenMP threads
+99.5% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 10.778     | 10.849     | 10.934     |   2.0 | 95.90
+Pair    | 14.317     | 14.457     | 14.545     |   2.5 | 96.81
-Neigh   | 0.088265   | 0.08871    | 0.089238   |   0.1 |  0.78
+Neigh   | 0.080048   | 0.080928   | 0.082009   |   0.3 |  0.54
-Comm    | 0.1384     | 0.22518    | 0.29662    |  14.1 |  1.99
+Comm    | 0.15948    | 0.24734    | 0.38914    |  18.9 |  1.66
-Output  | 0.00020599 | 0.00024837 | 0.00036836 |   0.0 |  0.00
+Output  | 0.00018859 | 0.00034791 | 0.00082254 |   0.0 |  0.00
-Modify  | 0.13828    | 0.13899    | 0.13984    |   0.2 |  1.23
+Modify  | 0.137      | 0.13804    | 0.13981    |   0.3 |  0.92
-Other   |            | 0.01053    |            |       |  0.09
+Other   |            | 0.01041    |            |       |  0.07
-Nlocal:    8192 ave 8215 max 8166 min
+Nlocal:        8192.00 ave        8215 max        8166 min
 Histogram: 1 1 0 0 0 0 0 0 0 2
 Nghost:        11972.5 ave       11984 max       11959 min
 Histogram: 1 0 0 0 1 0 1 0 0 1
-Neighs:    576083 ave 579616 max 572161 min
+Neighs:       576083.0 ave      579616 max      572161 min
 Histogram: 1 1 0 0 0 0 0 0 0 2
-Total # of neighbors = 2304332
+Total # of neighbors = 2304331
-Ave neighs/atom = 70.3226
+Ave neighs/atom = 70.322601
 Neighbor list builds = 6
 Dangerous builds = 3
 Please see the log.cite file for references relevant to this simulation
-Total wall time: 0:00:11
+Total wall time: 0:00:15
--- a/bench/POTENTIALS/log.16Mar18.granular.1
+++ b/bench/POTENTIALS/log.16Mar18.granular.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # granular chute flow
@ -10,12 +9,14 @@ newton		off
 comm_modify	vel yes
 read_data	data.granular
-  orthogonal box = (0 0 0) to (40 20 37.2886)
+Reading data file ...
  orthogonal box = (0.0000000 0.0000000 0.0000000) to (40.000000 20.000000 37.288600)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  32000 atoms
  reading velocities ...
  32000 velocities
  read_data CPU = 0.050 seconds
 pair_style	gran/hooke/history 200000.0 NULL 50.0 NULL 0.5 0
 pair_coeff	* *
@ -52,34 +53,34 @@ Neighbor list info ...
      pair build: half/size/bin/newtoff
      stencil: half/bin/3d/newtoff
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 23.36 | 23.36 | 23.36 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 23.37 | 23.37 | 23.37 Mbytes
 Step Atoms KinEng c_1 Volume 
       0    32000    784139.13    1601.1263    29833.783 
     100    32000    784292.08    1571.0968    29834.707 
-Loop time of 0.292816 on 1 procs for 100 steps with 32000 atoms
+Loop time of 0.274779 on 1 procs for 100 steps with 32000 atoms
-Performance: 2950.657 tau/day, 341.511 timesteps/s
+Performance: 3144.341 tau/day, 363.928 timesteps/s
-99.3% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.4% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.17449    | 0.17449    | 0.17449    |   0.0 | 59.59
+Pair    | 0.16956    | 0.16956    | 0.16956    |   0.0 | 61.71
-Neigh   | 0.031927   | 0.031927   | 0.031927   |   0.0 | 10.90
+Neigh   | 0.027646   | 0.027646   | 0.027646   |   0.0 | 10.06
-Comm    | 0.010195   | 0.010195   | 0.010195   |   0.0 |  3.48
+Comm    | 0.010068   | 0.010068   | 0.010068   |   0.0 |  3.66
-Output  | 0.00019121 | 0.00019121 | 0.00019121 |   0.0 |  0.07
+Output  | 0.00017285 | 0.00017285 | 0.00017285 |   0.0 |  0.06
-Modify  | 0.064463   | 0.064463   | 0.064463   |   0.0 | 22.01
+Modify  | 0.056372   | 0.056372   | 0.056372   |   0.0 | 20.52
-Other   |            | 0.01155    |            |       |  3.94
+Other   |            | 0.01096    |            |       |  3.99
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    5463 ave 5463 max 5463 min
+Nghost:        5463.00 ave        5463 max        5463 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    115133 ave 115133 max 115133 min
+Neighs:       115133.0 ave      115133 max      115133 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 115133
-Ave neighs/atom = 3.59791
+Ave neighs/atom = 3.5979062
 Neighbor list builds = 2
 Dangerous builds = 0
 Total wall time: 0:00:00
--- a/bench/POTENTIALS/log.16Mar18.granular.4
+++ b/bench/POTENTIALS/log.16Mar18.granular.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # granular chute flow
@ -10,12 +9,14 @@ newton		off
 comm_modify	vel yes
 read_data	data.granular
-  orthogonal box = (0 0 0) to (40 20 37.2886)
+Reading data file ...
  orthogonal box = (0.0000000 0.0000000 0.0000000) to (40.000000 20.000000 37.288600)
  2 by 1 by 2 MPI processor grid
  reading atoms ...
  32000 atoms
  reading velocities ...
  32000 velocities
  read_data CPU = 0.052 seconds
 pair_style	gran/hooke/history 200000.0 NULL 50.0 NULL 0.5 0
 pair_coeff	* *
@ -52,34 +53,34 @@ Neighbor list info ...
      pair build: half/size/bin/newtoff
      stencil: half/bin/3d/newtoff
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 10.41 | 10.42 | 10.42 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 10.59 | 10.59 | 10.60 Mbytes
 Step Atoms KinEng c_1 Volume 
       0    32000    784139.13    1601.1263    29833.783 
     100    32000    784292.08    1571.0968    29834.707 
-Loop time of 0.0903978 on 4 procs for 100 steps with 32000 atoms
+Loop time of 0.0952788 on 4 procs for 100 steps with 32000 atoms
-Performance: 9557.751 tau/day, 1106.221 timesteps/s
+Performance: 9068.124 tau/day, 1049.551 timesteps/s
-98.3% CPU use with 4 MPI tasks x 1 OpenMP threads
+95.4% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.046331   | 0.049088   | 0.052195   |   1.2 | 54.30
+Pair    | 0.044316   | 0.047274   | 0.049681   |   1.0 | 49.62
-Neigh   | 0.0090401  | 0.0091327  | 0.0091863  |   0.1 | 10.10
+Neigh   | 0.0079038  | 0.0079354  | 0.0079608  |   0.0 |  8.33
-Comm    | 0.0073855  | 0.0080023  | 0.0086699  |   0.6 |  8.85
+Comm    | 0.0082569  | 0.0089372  | 0.0094819  |   0.5 |  9.38
-Output  | 7.1049e-05 | 0.00010067 | 0.00012088 |   0.0 |  0.11
+Output  | 6.9857e-05 | 9.3222e-05 | 0.00010514 |   0.0 |  0.10
-Modify  | 0.017226   | 0.017449   | 0.01803    |   0.3 | 19.30
+Modify  | 0.015689   | 0.016034   | 0.016789   |   0.4 | 16.83
-Other   |            | 0.006625   |            |       |  7.33
+Other   |            | 0.015      |            |       | 15.75
-Nlocal:    8000 ave 8008 max 7992 min
+Nlocal:        8000.00 ave        8008 max        7992 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
-Nghost:    2439 ave 2450 max 2428 min
+Nghost:        2439.00 ave        2450 max        2428 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
 Neighs:        29500.5 ave       30488 max       28513 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
 Total # of neighbors = 118002
-Ave neighs/atom = 3.68756
+Ave neighs/atom = 3.6875625
 Neighbor list builds = 2
 Dangerous builds = 0
 Total wall time: 0:00:00
--- a/bench/POTENTIALS/log.16Mar18.lj.1
+++ b/bench/POTENTIALS/log.16Mar18.lj.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # 3d Lennard-Jones melt
@ -7,14 +6,14 @@ units		lj
 atom_style	atomic
 lattice		fcc 0.8442
-Lattice spacing in x,y,z = 1.6796 1.6796 1.6796
+Lattice spacing in x,y,z = 1.6795962 1.6795962 1.6795962
 region		box block 0 20 0 20 0 20
 create_box	1 box
-Created orthogonal box = (0 0 0) to (33.5919 33.5919 33.5919)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (33.591924 33.591924 33.591924)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.00183916 secs
+  create_atoms CPU = 0.002 seconds
 mass		1 1.0
 velocity	all create 1.44 87287 loop geom
@ -44,30 +43,30 @@ Per MPI rank memory allocation (min/avg/max) = 15.82 | 15.82 | 15.82 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1.44   -6.7733681            0   -4.6134356   -5.0197073 
     100   0.75745998   -5.7584998            0   -4.6223453   0.20729996 
-Loop time of 1.721 on 1 procs for 100 steps with 32000 atoms
+Loop time of 1.59245 on 1 procs for 100 steps with 32000 atoms
-Performance: 25101.720 tau/day, 58.106 timesteps/s
+Performance: 27127.959 tau/day, 62.796 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 1.2551     | 1.2551     | 1.2551     |   0.0 | 72.93
+Pair    | 1.1654     | 1.1654     | 1.1654     |   0.0 | 73.18
-Neigh   | 0.41825    | 0.41825    | 0.41825    |   0.0 | 24.30
+Neigh   | 0.38321    | 0.38321    | 0.38321    |   0.0 | 24.06
-Comm    | 0.015347   | 0.015347   | 0.015347   |   0.0 |  0.89
+Comm    | 0.014476   | 0.014476   | 0.014476   |   0.0 |  0.91
-Output  | 0.00010729 | 0.00010729 | 0.00010729 |   0.0 |  0.01
+Output  | 9.5844e-05 | 9.5844e-05 | 9.5844e-05 |   0.0 |  0.01
-Modify  | 0.023436   | 0.023436   | 0.023436   |   0.0 |  1.36
+Modify  | 0.021453   | 0.021453   | 0.021453   |   0.0 |  1.35
-Other   |            | 0.008766   |            |       |  0.51
+Other   |            | 0.007799   |            |       |  0.49
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    19669 ave 19669 max 19669 min
+Nghost:        19669.0 ave       19669 max       19669 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    1.20318e+06 ave 1.20318e+06 max 1.20318e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 1203176
-Ave neighs/atom = 37.5992
+Ave neighs/atom = 37.599250
 Neighbor list builds = 11
 Dangerous builds = 0
 Total wall time: 0:00:01
--- a/bench/POTENTIALS/log.16Mar18.lj.4
+++ b/bench/POTENTIALS/log.16Mar18.lj.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # 3d Lennard-Jones melt
@ -7,14 +6,14 @@ units		lj
 atom_style	atomic
 lattice		fcc 0.8442
-Lattice spacing in x,y,z = 1.6796 1.6796 1.6796
+Lattice spacing in x,y,z = 1.6795962 1.6795962 1.6795962
 region		box block 0 20 0 20 0 20
 create_box	1 box
-Created orthogonal box = (0 0 0) to (33.5919 33.5919 33.5919)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (33.591924 33.591924 33.591924)
  1 by 2 by 2 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.000587225 secs
+  create_atoms CPU = 0.001 seconds
 mass		1 1.0
 velocity	all create 1.44 87287 loop geom
@ -40,34 +39,34 @@ Neighbor list info ...
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 6.88 | 6.88 | 6.88 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 6.881 | 6.881 | 6.881 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1.44   -6.7733681            0   -4.6134356   -5.0197073 
     100   0.75745998   -5.7584998            0   -4.6223453   0.20729996 
-Loop time of 0.469936 on 4 procs for 100 steps with 32000 atoms
+Loop time of 0.452443 on 4 procs for 100 steps with 32000 atoms
-Performance: 91927.316 tau/day, 212.795 timesteps/s
+Performance: 95481.741 tau/day, 221.023 timesteps/s
-99.1% CPU use with 4 MPI tasks x 1 OpenMP threads
+98.4% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.32713    | 0.32917    | 0.33317    |   0.4 | 70.05
+Pair    | 0.31149    | 0.3132     | 0.31493    |   0.2 | 69.22
-Neigh   | 0.10836    | 0.10931    | 0.11007    |   0.2 | 23.26
+Neigh   | 0.1006     | 0.10164    | 0.10385    |   0.4 | 22.47
-Comm    | 0.015526   | 0.020355   | 0.022399   |   2.0 |  4.33
+Comm    | 0.02195    | 0.025904   | 0.028603   |   1.6 |  5.73
-Output  | 4.2439e-05 | 5.8353e-05 | 0.00010061 |   0.0 |  0.01
+Output  | 4.3631e-05 | 7.534e-05  | 0.00015879 |   0.0 |  0.02
-Modify  | 0.0071156  | 0.0072448  | 0.007309   |   0.1 |  1.54
+Modify  | 0.0067751  | 0.0073788  | 0.0088398  |   1.0 |  1.63
-Other   |            | 0.003793   |            |       |  0.81
+Other   |            | 0.004243   |            |       |  0.94
-Nlocal:    8000 ave 8041 max 7958 min
+Nlocal:        8000.00 ave        8041 max        7958 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
-Nghost:    9011 ave 9065 max 8961 min
+Nghost:        9011.00 ave        9065 max        8961 min
 Histogram: 1 1 0 0 0 0 0 1 0 1
-Neighs:    300794 ave 304843 max 297317 min
+Neighs:       300794.0 ave      304843 max      297317 min
 Histogram: 1 0 0 1 1 0 0 0 0 1
 Total # of neighbors = 1203176
-Ave neighs/atom = 37.5992
+Ave neighs/atom = 37.599250
 Neighbor list builds = 11
 Dangerous builds = 0
 Total wall time: 0:00:00
--- a/bench/POTENTIALS/log.16Mar18.meamc.1
+++ b/bench/POTENTIALS/log.16Mar18.meamc.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk Ni in MEAM
@ -7,17 +6,19 @@ units		metal
 atom_style	atomic
 lattice		fcc 3.52
-Lattice spacing in x,y,z = 3.52 3.52 3.52
+Lattice spacing in x,y,z = 3.5200000 3.5200000 3.5200000
 region		box block 0 20 0 20 0 20
 create_box	1 box
-Created orthogonal box = (0 0 0) to (70.4 70.4 70.4)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (70.400000 70.400000 70.400000)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.00184226 secs
+  create_atoms CPU = 0.002 seconds
 pair_style	meam/c
 pair_coeff	* * library.meam Ni4 Ni.meam Ni4
 Reading potential file library.meam with DATE: 2012-06-29
 Reading potential file Ni.meam with DATE: 2007-06-11
 velocity	all create 1600.0 376847 loop geom
@ -47,37 +48,37 @@ Neighbor list info ...
      pair build: halffull/newton
      stencil: none
      bin: none
-Per MPI rank memory allocation (min/avg/max) = 55.91 | 55.91 | 55.91 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 55.92 | 55.92 | 55.92 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1600      -142400            0   -135782.09     20259.18 
      50    885.10702   -139411.51            0   -135750.54    32425.431 
     100    895.50973    -139454.3            0    -135750.3    31804.185 
-Loop time of 22.9343 on 1 procs for 100 steps with 32000 atoms
+Loop time of 21.655 on 1 procs for 100 steps with 32000 atoms
-Performance: 1.884 ns/day, 12.741 hours/ns, 4.360 timesteps/s
+Performance: 1.995 ns/day, 12.031 hours/ns, 4.618 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 22.397     | 22.397     | 22.397     |   0.0 | 97.66
+Pair    | 21.181     | 21.181     | 21.181     |   0.0 | 97.81
-Neigh   | 0.48781    | 0.48781    | 0.48781    |   0.0 |  2.13
+Neigh   | 0.42787    | 0.42787    | 0.42787    |   0.0 |  1.98
-Comm    | 0.013967   | 0.013967   | 0.013967   |   0.0 |  0.06
+Comm    | 0.013557   | 0.013557   | 0.013557   |   0.0 |  0.06
-Output  | 0.00022793 | 0.00022793 | 0.00022793 |   0.0 |  0.00
+Output  | 0.00020766 | 0.00020766 | 0.00020766 |   0.0 |  0.00
-Modify  | 0.025412   | 0.025412   | 0.025412   |   0.0 |  0.11
+Modify  | 0.023456   | 0.023456   | 0.023456   |   0.0 |  0.11
-Other   |            | 0.009448   |            |       |  0.04
+Other   |            | 0.008504   |            |       |  0.04
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    13576 ave 13576 max 13576 min
+Nghost:        13576.0 ave       13576 max       13576 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    780360 ave 780360 max 780360 min
+Neighs:       780360.0 ave      780360 max      780360 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:  1.56072e+06 ave 1.56072e+06 max 1.56072e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 1560720
-Ave neighs/atom = 48.7725
+Ave neighs/atom = 48.772500
 Neighbor list builds = 8
 Dangerous builds = 0
-Total wall time: 0:00:23
+Total wall time: 0:00:21
--- a/bench/POTENTIALS/log.16Mar18.meamc.4
+++ b/bench/POTENTIALS/log.16Mar18.meamc.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk Ni in MEAM
@ -7,17 +6,19 @@ units		metal
 atom_style	atomic
 lattice		fcc 3.52
-Lattice spacing in x,y,z = 3.52 3.52 3.52
+Lattice spacing in x,y,z = 3.5200000 3.5200000 3.5200000
 region		box block 0 20 0 20 0 20
 create_box	1 box
-Created orthogonal box = (0 0 0) to (70.4 70.4 70.4)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (70.400000 70.400000 70.400000)
  1 by 2 by 2 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.00058651 secs
+  create_atoms CPU = 0.001 seconds
 pair_style	meam/c
 pair_coeff	* * library.meam Ni4 Ni.meam Ni4
 Reading potential file library.meam with DATE: 2012-06-29
 Reading potential file Ni.meam with DATE: 2007-06-11
 velocity	all create 1600.0 376847 loop geom
@ -47,37 +48,37 @@ Neighbor list info ...
      pair build: halffull/newton
      stencil: none
      bin: none
-Per MPI rank memory allocation (min/avg/max) = 17.41 | 17.41 | 17.41 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 17.42 | 17.42 | 17.42 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1600      -142400            0   -135782.09     20259.18 
      50    885.10702   -139411.51            0   -135750.54    32425.431 
     100    895.50973    -139454.3            0    -135750.3    31804.185 
-Loop time of 6.45947 on 4 procs for 100 steps with 32000 atoms
+Loop time of 6.34746 on 4 procs for 100 steps with 32000 atoms
-Performance: 6.688 ns/day, 3.589 hours/ns, 15.481 timesteps/s
+Performance: 6.806 ns/day, 3.526 hours/ns, 15.754 timesteps/s
 98.0% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 6.22       | 6.2385     | 6.265      |   0.7 | 96.58
+Pair    | 6.0585     | 6.1109     | 6.1535     |   1.4 | 96.27
-Neigh   | 0.12657    | 0.12691    | 0.12721    |   0.1 |  1.96
+Neigh   | 0.11286    | 0.11651    | 0.12455    |   1.4 |  1.84
-Comm    | 0.052339   | 0.07915    | 0.097897   |   5.9 |  1.23
+Comm    | 0.058046   | 0.099641   | 0.15569    |  11.7 |  1.57
-Output  | 9.7752e-05 | 0.0001151  | 0.00016594 |   0.0 |  0.00
+Output  | 9.0122e-05 | 0.00016046 | 0.0003624  |   0.0 |  0.00
-Modify  | 0.010194   | 0.010291   | 0.010442   |   0.1 |  0.16
+Modify  | 0.010822   | 0.011674   | 0.014224   |   1.4 |  0.18
-Other   |            | 0.004529   |            |       |  0.07
+Other   |            | 0.008601   |            |       |  0.14
-Nlocal:    8000 ave 8045 max 7947 min
+Nlocal:        8000.00 ave        8045 max        7947 min
 Histogram: 1 0 0 1 0 0 0 1 0 1
 Nghost:        6066.75 ave        6120 max        6021 min
 Histogram: 1 0 1 0 0 0 1 0 0 1
-Neighs:    195090 ave 196403 max 193697 min
+Neighs:       195090.0 ave      196403 max      193697 min
 Histogram: 1 0 0 1 0 0 0 1 0 1
-FullNghs:  390180 ave 392616 max 387490 min
+FullNghs:     390180.0 ave      392616 max      387490 min
 Histogram: 1 0 0 1 0 0 0 1 0 1
 Total # of neighbors = 1560720
-Ave neighs/atom = 48.7725
+Ave neighs/atom = 48.772500
 Neighbor list builds = 8
 Dangerous builds = 0
 Total wall time: 0:00:06
--- a/bench/POTENTIALS/log.16Mar18.peri.1
+++ b/bench/POTENTIALS/log.16Mar18.peri.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # Crack growth in notched 3D Peridynamic block
@ -58,11 +57,11 @@ region          plate block 		0 0.01975 0 0.01575 ${myzmin} ${myzmax} 		units bo
 region          plate block 		0 0.01975 0 0.01575 0 ${myzmax} 		units box
 region          plate block 		0 0.01975 0 0.01575 0 0.01225 		units box
 create_box      3 plate
-Created orthogonal box = (0 0 0) to (0.01975 0.01575 0.01225)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (0.01975 0.01575 0.01225)
  1 by 1 by 1 MPI processor grid
 create_atoms    1 region plate
 Created 32000 atoms
-  Time spent = 0.00362897 secs
+  create_atoms CPU = 0.004 seconds
 pair_style      peri/pmb
@ -101,8 +100,10 @@ region		topright block 		0.009875 0.01975 0.01075 0.01575 ${myzmin} ${myzmax}
 region		topright block 		0.009875 0.01975 0.01075 0.01575 0 ${myzmax} 		units box
 region		topright block 		0.009875 0.01975 0.01075 0.01575 0 0.01225 		units box
 set 		region topleft  type 2
 Setting atom values ...
  5000 settings made for type
 set 		region topright type 3
 Setting atom values ...
  5000 settings made for type
 pair_coeff      1 1 ${myc} ${mydelta} ${mys0} 0.0
 pair_coeff      1 1 5.43248872420337e+22 ${mydelta} ${mys0} 0.0
@ -129,11 +130,13 @@ pair_coeff      1 3 5.43248872420337e+22 0.001515 ${mys0} 0.0
 pair_coeff      1 3 5.43248872420337e+22 0.001515 0.00102062072615966 0.0
 set             group all density ${mydensity}
 set             group all density 2440
 Setting atom values ...
  32000 settings made for density
 variable	myvolume equal ($h)^3
 variable	myvolume equal (0.0005)^3
 set             group all volume ${myvolume}
 set             group all volume 1.25e-10
 Setting atom values ...
  32000 settings made for volume
 velocity        all set 0.0 0.0 0.0 sum no units box
@ -184,23 +187,23 @@ Step Temp E_pair E_mol TotEng Press Volume
      60 9.8975313e+26 5.7284448e+08            0 1.2287455e+09 1.2048543e+14 3.6292128e-06 
      80 9.3888573e+26 4.0928092e+08            0 1.0314725e+09 1.1429321e+14 3.6292128e-06 
     100 8.3930314e+26 3.8522361e+08            0 9.4142265e+08 1.0217075e+14 3.6292128e-06 
-Loop time of 11.0398 on 1 procs for 100 steps with 32000 atoms
+Loop time of 10.1036 on 1 procs for 100 steps with 32000 atoms
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 11.005     | 11.005     | 11.005     |   0.0 | 99.68
+Pair    | 10.07      | 10.07      | 10.07      |   0.0 | 99.67
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 6.6042e-05 | 6.6042e-05 | 6.6042e-05 |   0.0 |  0.00
+Comm    | 6.6519e-05 | 6.6519e-05 | 6.6519e-05 |   0.0 |  0.00
-Output  | 0.00057292 | 0.00057292 | 0.00057292 |   0.0 |  0.01
+Output  | 0.00051737 | 0.00051737 | 0.00051737 |   0.0 |  0.01
-Modify  | 0.0256     | 0.0256     | 0.0256     |   0.0 |  0.23
+Modify  | 0.024288   | 0.024288   | 0.024288   |   0.0 |  0.24
-Other   |            | 0.008592   |            |       |  0.08
+Other   |            | 0.008486   |            |       |  0.08
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    0 ave 0 max 0 min
+Nghost:        0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    6.74442e+06 ave 6.74442e+06 max 6.74442e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@ -208,10 +211,10 @@ FullNghs:  1.34888e+07 ave 1.34888e+07 max 1.34888e+07 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 13488836
-Ave neighs/atom = 421.526
+Ave neighs/atom = 421.52612
 Neighbor list builds = 0
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
-Total wall time: 0:00:11
+Total wall time: 0:00:10
--- a/bench/POTENTIALS/log.16Mar18.peri.4
+++ b/bench/POTENTIALS/log.16Mar18.peri.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # Crack growth in notched 3D Peridynamic block
@ -58,11 +57,11 @@ region          plate block 		0 0.01975 0 0.01575 ${myzmin} ${myzmax} 		units bo
 region          plate block 		0 0.01975 0 0.01575 0 ${myzmax} 		units box
 region          plate block 		0 0.01975 0 0.01575 0 0.01225 		units box
 create_box      3 plate
-Created orthogonal box = (0 0 0) to (0.01975 0.01575 0.01225)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (0.01975 0.01575 0.01225)
  2 by 2 by 1 MPI processor grid
 create_atoms    1 region plate
 Created 32000 atoms
-  Time spent = 0.0011344 secs
+  create_atoms CPU = 0.001 seconds
 pair_style      peri/pmb
@ -101,8 +100,10 @@ region		topright block 		0.009875 0.01975 0.01075 0.01575 ${myzmin} ${myzmax}
 region		topright block 		0.009875 0.01975 0.01075 0.01575 0 ${myzmax} 		units box
 region		topright block 		0.009875 0.01975 0.01075 0.01575 0 0.01225 		units box
 set 		region topleft  type 2
 Setting atom values ...
  5000 settings made for type
 set 		region topright type 3
 Setting atom values ...
  5000 settings made for type
 pair_coeff      1 1 ${myc} ${mydelta} ${mys0} 0.0
 pair_coeff      1 1 5.43248872420337e+22 ${mydelta} ${mys0} 0.0
@ -129,11 +130,13 @@ pair_coeff      1 3 5.43248872420337e+22 0.001515 ${mys0} 0.0
 pair_coeff      1 3 5.43248872420337e+22 0.001515 0.00102062072615966 0.0
 set             group all density ${mydensity}
 set             group all density 2440
 Setting atom values ...
  32000 settings made for density
 variable	myvolume equal ($h)^3
 variable	myvolume equal (0.0005)^3
 set             group all volume ${myvolume}
 set             group all volume 1.25e-10
 Setting atom values ...
  32000 settings made for volume
 velocity        all set 0.0 0.0 0.0 sum no units box
@ -176,7 +179,7 @@ Neighbor list info ...
 Peridynamic bonds:
  total # of bonds = 3457032
  bonds/atom = 108.032
-Per MPI rank memory allocation (min/avg/max) = 47.63 | 48.11 | 48.78 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 47.70 | 48.18 | 48.85 Mbytes
 Step Temp E_pair E_mol TotEng Press Volume 
       0 2.0134233e+27            0            0 1.3342785e+09 2.4509971e+14 3.6292128e-06 
      20 1.7695805e+27 1.6163291e+08            0 1.3343188e+09 2.1541601e+14 3.6292128e-06 
@ -184,34 +187,34 @@ Step Temp E_pair E_mol TotEng Press Volume
      60 9.8975313e+26 5.7284448e+08            0 1.2287455e+09 1.2048543e+14 3.6292128e-06 
      80 9.3888573e+26 4.0928092e+08            0 1.0314725e+09 1.1429321e+14 3.6292128e-06 
     100 8.3930314e+26 3.8522361e+08            0 9.4142265e+08 1.0217075e+14 3.6292128e-06 
-Loop time of 2.8928 on 4 procs for 100 steps with 32000 atoms
+Loop time of 2.82804 on 4 procs for 100 steps with 32000 atoms
-99.0% CPU use with 4 MPI tasks x 1 OpenMP threads
+97.9% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 2.7472     | 2.7951     | 2.8585     |   2.9 | 96.62
+Pair    | 2.6021     | 2.6599     | 2.7081     |   2.4 | 94.05
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 0.019592   | 0.083156   | 0.13278    |  17.0 |  2.87
+Comm    | 0.10341    | 0.15313    | 0.21057    |  10.3 |  5.41
-Output  | 0.00022125 | 0.00034326 | 0.00058961 |   0.0 |  0.01
+Output  | 0.00020409 | 0.00041658 | 0.00093699 |   0.0 |  0.01
-Modify  | 0.0083542  | 0.0089623  | 0.0095983  |   0.5 |  0.31
+Modify  | 0.008944   | 0.0092288  | 0.0095088  |   0.3 |  0.33
-Other   |            | 0.005276   |            |       |  0.18
+Other   |            | 0.005395   |            |       |  0.19
-Nlocal:    8000 ave 8000 max 8000 min
+Nlocal:        8000.00 ave        8000 max        8000 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
-Nghost:    5125 ave 5125 max 5125 min
+Nghost:        5125.00 ave        5125 max        5125 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
-Neighs:    1.6861e+06 ave 1.77502e+06 max 1.60625e+06 min
+Neighs:    1.68610e+06 ave 1.77502e+06 max 1.60625e+06 min
 Histogram: 2 0 0 0 0 0 0 0 1 1
 FullNghs:  3.37221e+06 ave 3.41832e+06 max  3.3261e+06 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
 Total # of neighbors = 13488836
-Ave neighs/atom = 421.526
+Ave neighs/atom = 421.52612
 Neighbor list builds = 0
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
-Total wall time: 0:00:03
+Total wall time: 0:00:02
--- a/bench/POTENTIALS/log.16Mar18.protein.1
+++ b/bench/POTENTIALS/log.16Mar18.protein.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # Rhodopsin model
@ -17,7 +16,8 @@ pair_modify     mix arithmetic
 kspace_style    pppm 1e-4
 read_data       data.protein
-  orthogonal box = (-27.5 -38.5 -36.3646) to (27.5 38.5 36.3615)
+Reading data file ...
  orthogonal box = (-27.500000 -38.500000 -36.364600) to (27.500000 38.500000 36.361500)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  32000 atoms
@ -39,16 +39,22 @@ read_data       data.protein
  56829 dihedrals
  reading impropers ...
  1034 impropers
 Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0.0      0.0      0.0     
  special bond factors coul:  0.0      0.0      0.0     
     4 = max # of 1-2 neighbors
    12 = max # of 1-3 neighbors
    24 = max # of 1-4 neighbors
    26 = max # of special neighbors
  special bonds CPU = 0.011 seconds
  read_data CPU = 0.125 seconds
 fix             1 all shake 0.0001 5 0 m 1.0 a 232
    1617 = # of size 2 clusters
    3633 = # of size 3 clusters
     747 = # of size 4 clusters
    4233 = # of frozen angles
  find clusters CPU = 0.006 seconds
 fix             2 all npt temp 300.0 300.0 100.0 		z 0.0 0.0 1000.0 mtk no pchain 0 tchain 1
 special_bonds   charmm
@ -58,13 +64,13 @@ timestep        2.0
 run		100
 PPPM initialization ...
-  using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  using 12-bit tables for long-range coulomb (src/kspace.cpp:328)
-  G vector (1/distance) = 0.248835
+  G vector (1/distance) = 0.24883488
  grid = 25 32 32
  stencil order = 5
-  estimated absolute RMS force accuracy = 0.0355478
+  estimated absolute RMS force accuracy = 0.035547797
-  estimated relative force accuracy = 0.000107051
+  estimated relative force accuracy = 0.00010705113
-  using double precision FFTs
+  using double precision KISS FFT
  3d grid and FFT values/proc = 41070 25600
 Neighbor list info ...
  update every 1 steps, delay 5 steps, check yes
@ -78,46 +84,46 @@ Neighbor list info ...
      pair build: half/bin/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 140 | 140 | 140 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 140.0 | 140.0 | 140.0 Mbytes
 ---------------- Step        0 ----- CPU =      0.0000 (sec) ----------------
 TotEng   =    -25356.2064 KinEng   =     21444.8313 Temp     =       299.0397 
 PotEng   =    -46801.0377 E_bond   =      2537.9940 E_angle  =     10921.3742 
 E_dihed  =      5211.7865 E_impro  =       213.5116 E_vdwl   =     -2307.8634 
 E_coul   =    207025.8927 E_long   =   -270403.7333 Press    =      -149.3301 
 Volume   =    307995.0335 
---------------- Step      100 ----- CPU =     23.7567 (sec) ----------------
+---------------- Step      100 ----- CPU =     20.0022 (sec) ----------------
-TotEng   =    -25290.7386 KinEng   =     21591.9096 Temp     =       301.0906 
+TotEng   =    -25290.7304 KinEng   =     21591.9084 Temp     =       301.0906 
-PotEng   =    -46882.6482 E_bond   =      2567.9789 E_angle  =     10781.9556 
+PotEng   =    -46882.6388 E_bond   =      2567.9807 E_angle  =     10781.9571 
-E_dihed  =      5198.7493 E_impro  =       216.7863 E_vdwl   =     -1902.6458 
+E_dihed  =      5198.7492 E_impro  =       216.7864 E_vdwl   =     -1902.6618 
-E_coul   =    206659.5007 E_long   =   -270404.9733 Press    =         6.7898 
+E_coul   =    206659.5226 E_long   =   -270404.9730 Press    =         6.7406 
-Volume   =    308133.9933 
+Volume   =    308134.2285 
-Loop time of 23.7568 on 1 procs for 100 steps with 32000 atoms
+Loop time of 20.0022 on 1 procs for 100 steps with 32000 atoms
-Performance: 0.727 ns/day, 32.995 hours/ns, 4.209 timesteps/s
+Performance: 0.864 ns/day, 27.781 hours/ns, 4.999 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 17.905     | 17.905     | 17.905     |   0.0 | 75.37
+Pair    | 15         | 15         | 15         |   0.0 | 74.99
-Bond    | 0.73417    | 0.73417    | 0.73417    |   0.0 |  3.09
+Bond    | 0.65091    | 0.65091    | 0.65091    |   0.0 |  3.25
-Kspace  | 1.4676     | 1.4676     | 1.4676     |   0.0 |  6.18
+Kspace  | 1.2144     | 1.2144     | 1.2144     |   0.0 |  6.07
-Neigh   | 2.9907     | 2.9907     | 2.9907     |   0.0 | 12.59
+Neigh   | 2.6096     | 2.6096     | 2.6096     |   0.0 | 13.05
-Comm    | 0.037427   | 0.037427   | 0.037427   |   0.0 |  0.16
+Comm    | 0.035203   | 0.035203   | 0.035203   |   0.0 |  0.18
-Output  | 0.00011754 | 0.00011754 | 0.00011754 |   0.0 |  0.00
+Output  | 0.00010848 | 0.00010848 | 0.00010848 |   0.0 |  0.00
-Modify  | 0.60985    | 0.60985    | 0.60985    |   0.0 |  2.57
+Modify  | 0.48116    | 0.48116    | 0.48116    |   0.0 |  2.41
-Other   |            | 0.01201    |            |       |  0.05
+Other   |            | 0.01032    |            |       |  0.05
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    47958 ave 47958 max 47958 min
+Nghost:        47958.0 ave       47958 max       47958 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    1.20281e+07 ave 1.20281e+07 max 1.20281e+07 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Total # of neighbors = 12028098
+Total # of neighbors = 12028093
-Ave neighs/atom = 375.878
+Ave neighs/atom = 375.87791
-Ave special neighs/atom = 7.43187
+Ave special neighs/atom = 7.4318750
 Neighbor list builds = 11
 Dangerous builds = 0
-Total wall time: 0:00:24
+Total wall time: 0:00:20
--- a/bench/POTENTIALS/log.16Mar18.protein.4
+++ b/bench/POTENTIALS/log.16Mar18.protein.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # Rhodopsin model
@ -17,7 +16,8 @@ pair_modify     mix arithmetic
 kspace_style    pppm 1e-4
 read_data       data.protein
-  orthogonal box = (-27.5 -38.5 -36.3646) to (27.5 38.5 36.3615)
+Reading data file ...
  orthogonal box = (-27.500000 -38.500000 -36.364600) to (27.500000 38.500000 36.361500)
  1 by 2 by 2 MPI processor grid
  reading atoms ...
  32000 atoms
@ -39,16 +39,22 @@ read_data       data.protein
  56829 dihedrals
  reading impropers ...
  1034 impropers
 Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0.0      0.0      0.0     
  special bond factors coul:  0.0      0.0      0.0     
     4 = max # of 1-2 neighbors
    12 = max # of 1-3 neighbors
    24 = max # of 1-4 neighbors
    26 = max # of special neighbors
  special bonds CPU = 0.005 seconds
  read_data CPU = 0.210 seconds
 fix             1 all shake 0.0001 5 0 m 1.0 a 232
    1617 = # of size 2 clusters
    3633 = # of size 3 clusters
     747 = # of size 4 clusters
    4233 = # of frozen angles
  find clusters CPU = 0.003 seconds
 fix             2 all npt temp 300.0 300.0 100.0 		z 0.0 0.0 1000.0 mtk no pchain 0 tchain 1
 special_bonds   charmm
@ -58,13 +64,13 @@ timestep        2.0
 run		100
 PPPM initialization ...
-  using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  using 12-bit tables for long-range coulomb (src/kspace.cpp:328)
-  G vector (1/distance) = 0.248835
+  G vector (1/distance) = 0.24883488
  grid = 25 32 32
  stencil order = 5
-  estimated absolute RMS force accuracy = 0.0355478
+  estimated absolute RMS force accuracy = 0.035547797
-  estimated relative force accuracy = 0.000107051
+  estimated relative force accuracy = 0.00010705113
-  using double precision FFTs
+  using double precision KISS FFT
  3d grid and FFT values/proc = 13230 6400
 Neighbor list info ...
  update every 1 steps, delay 5 steps, check yes
@ -78,46 +84,46 @@ Neighbor list info ...
      pair build: half/bin/newton
      stencil: half/bin/3d/newton
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 49.07 | 49.17 | 49.46 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 49.25 | 49.35 | 49.64 Mbytes
 ---------------- Step        0 ----- CPU =      0.0000 (sec) ----------------
 TotEng   =    -25356.2064 KinEng   =     21444.8313 Temp     =       299.0397 
 PotEng   =    -46801.0377 E_bond   =      2537.9940 E_angle  =     10921.3742 
 E_dihed  =      5211.7865 E_impro  =       213.5116 E_vdwl   =     -2307.8634 
 E_coul   =    207025.8927 E_long   =   -270403.7333 Press    =      -149.3301 
 Volume   =    307995.0335 
---------------- Step      100 ----- CPU =      6.3997 (sec) ----------------
+---------------- Step      100 ----- CPU =      5.5375 (sec) ----------------
-TotEng   =    -25290.7386 KinEng   =     21591.9096 Temp     =       301.0906 
+TotEng   =    -25290.7303 KinEng   =     21591.9085 Temp     =       301.0906 
-PotEng   =    -46882.6483 E_bond   =      2567.9789 E_angle  =     10781.9556 
+PotEng   =    -46882.6388 E_bond   =      2567.9807 E_angle  =     10781.9571 
-E_dihed  =      5198.7493 E_impro  =       216.7863 E_vdwl   =     -1902.6458 
+E_dihed  =      5198.7492 E_impro  =       216.7864 E_vdwl   =     -1902.6618 
-E_coul   =    206659.5007 E_long   =   -270404.9733 Press    =         6.7898 
+E_coul   =    206659.5225 E_long   =   -270404.9730 Press    =         6.7406 
-Volume   =    308133.9933 
+Volume   =    308134.2285 
-Loop time of 6.39977 on 4 procs for 100 steps with 32000 atoms
+Loop time of 5.53765 on 4 procs for 100 steps with 32000 atoms
-Performance: 2.700 ns/day, 8.889 hours/ns, 15.626 timesteps/s
+Performance: 3.120 ns/day, 7.691 hours/ns, 18.058 timesteps/s
-98.8% CPU use with 4 MPI tasks x 1 OpenMP threads
+99.4% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 4.4434     | 4.5321     | 4.6846     |   4.3 | 70.82
+Pair    | 3.8921     | 3.9427     | 4.0762     |   3.9 | 71.20
-Bond    | 0.17894    | 0.18568    | 0.19951    |   1.9 |  2.90
+Bond    | 0.16218    | 0.16829    | 0.17972    |   1.7 |  3.04
-Kspace  | 0.4651     | 0.61064    | 0.69123    |  11.1 |  9.54
+Kspace  | 0.35196    | 0.48475    | 0.53996    |  11.1 |  8.75
-Neigh   | 0.7739     | 0.77394    | 0.774      |   0.0 | 12.09
+Neigh   | 0.69975    | 0.69981    | 0.69988    |   0.0 | 12.64
-Comm    | 0.057676   | 0.069183   | 0.07901    |   3.0 |  1.08
+Comm    | 0.04908    | 0.049445   | 0.049767   |   0.1 |  0.89
-Output  | 5.6505e-05 | 6.6578e-05 | 9.4414e-05 |   0.0 |  0.00
+Output  | 5.1737e-05 | 8.5056e-05 | 0.00018382 |   0.0 |  0.00
-Modify  | 0.21444    | 0.21866    | 0.22524    |   0.9 |  3.42
+Modify  | 0.18393    | 0.18474    | 0.18528    |   0.1 |  3.34
-Other   |            | 0.009451   |            |       |  0.15
+Other   |            | 0.007858   |            |       |  0.14
-Nlocal:    8000 ave 8143 max 7933 min
+Nlocal:        8000.00 ave        8143 max        7933 min
 Histogram: 1 2 0 0 0 0 0 0 0 1
 Nghost:        22733.5 ave       22769 max       22693 min
 Histogram: 1 0 0 0 0 2 0 0 0 1
 Neighs:    3.00702e+06 ave  3.0975e+06 max 2.96492e+06 min
 Histogram: 1 2 0 0 0 0 0 0 0 1
-Total # of neighbors = 12028098
+Total # of neighbors = 12028093
-Ave neighs/atom = 375.878
+Ave neighs/atom = 375.87791
-Ave special neighs/atom = 7.43187
+Ave special neighs/atom = 7.4318750
 Neighbor list builds = 11
 Dangerous builds = 0
-Total wall time: 0:00:06
+Total wall time: 0:00:05
--- a/bench/POTENTIALS/log.9Oct20.reaxc.1
+++ b/bench/POTENTIALS/log.9Oct20.reaxc.1
@ -0,0 +1,96 @@
 LAMMPS (9 Oct 2020)
  using 1 OpenMP thread(s) per MPI task
 # ReaxFF benchmark: simulation of PETN crystal, replicated unit cell
 units		real
 atom_style	charge
 read_data	data.reax
 Reading data file ...
  orthogonal box = (0.0000000 0.0000000 0.0000000) to (9.4910650 9.4910650 6.9912300)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  58 atoms
  read_data CPU = 0.000 seconds
 replicate	7 8 10
 Replicating atoms ...
  orthogonal box = (0.0000000 0.0000000 0.0000000) to (66.437455 75.928520 69.912300)
  1 by 1 by 1 MPI processor grid
  32480 atoms
  replicate CPU = 0.002 seconds
 velocity	all create 300.0 9999
 pair_style	reax/c NULL
 pair_coeff      * * ffield.reax C H O N
 WARNING: Changed valency_val to valency_boc for X (src/USER-REAXC/reaxc_ffield.cpp:315)
 timestep	0.1
 fix		1 all nve
 fix             2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
 thermo		10
 thermo_style	custom step temp ke pe pxx pyy pzz etotal
 run		100
 Neighbor list info ...
  update every 1 steps, delay 10 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 12
  ghost atom cutoff = 12
  binsize = 6, bins = 12 13 12
  2 neighbor lists, perpetual/occasional/extra = 2 0 0
  (1) pair reax/c, perpetual
      attributes: half, newton off, ghost
      pair build: half/bin/newtoff/ghost
      stencil: half/ghost/bin/3d/newtoff
      bin: standard
  (2) fix qeq/reax, perpetual, copy from (1)
      attributes: half, newton off, ghost
      pair build: copy
      stencil: none
      bin: none
 Per MPI rank memory allocation (min/avg/max) = 1727.0 | 1727.0 | 1727.0 Mbytes
 Step Temp KinEng PotEng Pxx Pyy Pzz TotEng 
       0          300    29044.119   -3232140.8    22804.879   -29365.593    6302.5638   -3203096.6 
      10    299.37479     28983.59   -3232075.2    21746.783   -23987.396    7610.3039   -3203091.6 
      20    295.58549    28616.733   -3231710.1    18178.443   -10872.027     10603.19   -3203093.3 
      30    289.48845    28026.456   -3231123.3    12146.101    4985.2572    13364.258   -3203096.8 
      40    282.66408    27365.763   -3230467.5    4284.1187    18132.512     14133.51   -3203101.7 
      50    274.97007    26620.878   -3229730.4    -3718.933    25520.016    12551.903   -3203109.5 
      60    266.11301    25763.393   -3228883.8   -9271.3498    27307.451    9753.2362   -3203120.4 
      70    259.32635    25106.351   -3228237.2   -11150.623    24238.509     6578.531   -3203130.8 
      80    260.33969    25204.456   -3228344.2   -9576.4144    16737.758    3454.6426   -3203139.7 
      90     269.9021    26130.229   -3229275.5   -5905.8652    5246.3236    467.53439   -3203145.2 
     100    280.76723    27182.123   -3230330.6   -1363.6002   -8133.2093   -1689.6535   -3203148.5 
 Loop time of 213.234 on 1 procs for 100 steps with 32480 atoms
 Performance: 0.004 ns/day, 5923.154 hours/ns, 0.469 timesteps/s
 99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 157        | 157        | 157        |   0.0 | 73.63
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
 Comm    | 0.024997   | 0.024997   | 0.024997   |   0.0 |  0.01
 Output  | 0.0022025  | 0.0022025  | 0.0022025  |   0.0 |  0.00
 Modify  | 56.19      | 56.19      | 56.19      |   0.0 | 26.35
 Other   |            | 0.01211    |            |       |  0.01
 Nlocal:        32480.0 ave       32480 max       32480 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:        45128.0 ave       45128 max       45128 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    1.27781e+07 ave 1.27781e+07 max 1.27781e+07 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 12778082
 Ave neighs/atom = 393.41385
 Neighbor list builds = 0
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
 Total wall time: 0:03:37
--- a/bench/POTENTIALS/log.9Oct20.reaxc.4
+++ b/bench/POTENTIALS/log.9Oct20.reaxc.4
@ -0,0 +1,96 @@
 LAMMPS (9 Oct 2020)
  using 1 OpenMP thread(s) per MPI task
 # ReaxFF benchmark: simulation of PETN crystal, replicated unit cell
 units		real
 atom_style	charge
 read_data	data.reax
 Reading data file ...
  orthogonal box = (0.0000000 0.0000000 0.0000000) to (9.4910650 9.4910650 6.9912300)
  2 by 2 by 1 MPI processor grid
  reading atoms ...
  58 atoms
  read_data CPU = 0.000 seconds
 replicate	7 8 10
 Replicating atoms ...
  orthogonal box = (0.0000000 0.0000000 0.0000000) to (66.437455 75.928520 69.912300)
  1 by 2 by 2 MPI processor grid
  32480 atoms
  replicate CPU = 0.001 seconds
 velocity	all create 300.0 9999
 pair_style	reax/c NULL
 pair_coeff      * * ffield.reax C H O N
 WARNING: Changed valency_val to valency_boc for X (src/USER-REAXC/reaxc_ffield.cpp:315)
 timestep	0.1
 fix		1 all nve
 fix             2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
 thermo		10
 thermo_style	custom step temp ke pe pxx pyy pzz etotal
 run		100
 Neighbor list info ...
  update every 1 steps, delay 10 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 12
  ghost atom cutoff = 12
  binsize = 6, bins = 12 13 12
  2 neighbor lists, perpetual/occasional/extra = 2 0 0
  (1) pair reax/c, perpetual
      attributes: half, newton off, ghost
      pair build: half/bin/newtoff/ghost
      stencil: half/ghost/bin/3d/newtoff
      bin: standard
  (2) fix qeq/reax, perpetual, copy from (1)
      attributes: half, newton off, ghost
      pair build: copy
      stencil: none
      bin: none
 Per MPI rank memory allocation (min/avg/max) = 647.0 | 647.0 | 647.0 Mbytes
 Step Temp KinEng PotEng Pxx Pyy Pzz TotEng 
       0          300    29044.119   -3232140.8    22804.879   -29365.593    6302.5638   -3203096.6 
      10    299.37479     28983.59   -3232075.2    21746.771   -23987.411    7610.2901   -3203091.6 
      20    295.58551    28616.736   -3231710.1    18178.439   -10871.954    10603.303   -3203093.3 
      30    289.48844    28026.456   -3231123.2    12146.289    4985.5678    13364.519   -3203096.8 
      40    282.66406    27365.762   -3230467.4    4284.8179    18133.406    14134.156   -3203101.7 
      50    274.97009     26620.88   -3229730.3   -3718.6796    25520.338    12552.205   -3203109.5 
      60    266.11302    25763.394   -3228883.8   -9271.5644    27307.146    9753.1034   -3203120.4 
      70    259.32636    25106.352   -3228237.1    -11150.66    24238.705    6578.7141   -3203130.8 
      80    260.33966    25204.454   -3228344.1   -9576.2474    16737.753    3454.7607   -3203139.7 
      90    269.90207    26130.226   -3229275.5   -5905.8809    5246.1687    467.42114   -3203145.2 
     100    280.76722    27182.122   -3230330.6   -1363.4752   -8133.2096   -1689.5922   -3203148.5 
 Loop time of 69.1187 on 4 procs for 100 steps with 32480 atoms
 Performance: 0.013 ns/day, 1919.965 hours/ns, 1.447 timesteps/s
 97.5% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 45.964     | 48.533     | 51.423     |  29.3 | 70.22
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
 Comm    | 0.19604    | 3.0913     | 5.6647     | 116.3 |  4.47
 Output  | 0.00074649 | 0.0011722  | 0.0023553  |   2.0 |  0.00
 Modify  | 17.48      | 17.485     | 17.489     |   0.1 | 25.30
 Other   |            | 0.008528   |            |       |  0.01
 Nlocal:        8120.00 ave        8120 max        8120 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 Nghost:        21992.0 ave       21992 max       21992 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 Neighs:    3.48274e+06 ave 3.48274e+06 max 3.48274e+06 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 13930976
 Ave neighs/atom = 428.90936
 Neighbor list builds = 0
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
 Total wall time: 0:01:10
--- a/bench/POTENTIALS/log.9Oct20.rebo.1
+++ b/bench/POTENTIALS/log.9Oct20.rebo.1
@ -0,0 +1,90 @@
 LAMMPS (9 Oct 2020)
  using 1 OpenMP thread(s) per MPI task
 # REBO polyethelene benchmark
 units		    metal
 atom_style	    atomic
 read_data	    data.rebo
 Reading data file ...
  orthogonal box = (-2.1000000 -2.1000000 0.0000000) to (2.1000000 2.1000000 25.579000)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  60 atoms
  read_data CPU = 0.000 seconds
 replicate	    17 16 2
 Replicating atoms ...
  orthogonal box = (-2.1000000 -2.1000000 0.0000000) to (69.300000 65.100000 51.158000)
  1 by 1 by 1 MPI processor grid
  32640 atoms
  replicate CPU = 0.002 seconds
 neighbor	    0.5 bin
 neigh_modify	    delay 5 every 1
 pair_style	    rebo
 pair_coeff	    * * CH.rebo C H
 Reading rebo potential file CH.rebo with DATE: 2018-7-3
 velocity	    all create 300.0 761341
 fix		    1 all nve
 timestep	    0.0005
 thermo		    10
 run		    100
 Neighbor list info ...
  update every 1 steps, delay 5 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 6.5
  ghost atom cutoff = 6.5
  binsize = 3.25, bins = 22 21 16
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair rebo, perpetual
      attributes: full, newton on, ghost
      pair build: full/bin/ghost
      stencil: full/ghost/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 34.21 | 34.21 | 34.21 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300   -138442.83            0   -137177.16    2463.0748 
      10    179.37985   -137931.27            0   -137174.48    15655.936 
      20    206.87654   -138046.99            0   -137174.19   -24042.627 
      30    150.80122   -137807.43            0   -137171.21   -16524.118 
      40    173.24945   -137902.35            0   -137171.42   -5716.9119 
      50    151.80455   -137812.36            0   -137171.91    3480.4584 
      60    199.08777   -138013.82            0   -137173.88    17881.372 
      70    217.85748   -138093.86            0   -137174.73   -12270.999 
      80    202.37482   -138029.39            0   -137175.59   -7622.7319 
      90    194.90628   -137997.05            0   -137174.75   -32267.471 
     100    185.17818   -137954.51            0   -137173.26   -6901.7499 
 Loop time of 4.83649 on 1 procs for 100 steps with 32640 atoms
 Performance: 0.893 ns/day, 26.869 hours/ns, 20.676 timesteps/s
 99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 3.3248     | 3.3248     | 3.3248     |   0.0 | 68.74
 Neigh   | 1.4583     | 1.4583     | 1.4583     |   0.0 | 30.15
 Comm    | 0.01934    | 0.01934    | 0.01934    |   0.0 |  0.40
 Output  | 0.0011871  | 0.0011871  | 0.0011871  |   0.0 |  0.02
 Modify  | 0.023516   | 0.023516   | 0.023516   |   0.0 |  0.49
 Other   |            | 0.009316   |            |       |  0.19
 Nlocal:        32640.0 ave       32640 max       32640 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:        26460.0 ave       26460 max       26460 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:        0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:  4.90213e+06 ave 4.90213e+06 max 4.90213e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 4902134
 Ave neighs/atom = 150.18793
 Neighbor list builds = 9
 Dangerous builds = 0
 Total wall time: 0:00:05
--- a/bench/POTENTIALS/log.9Oct20.rebo.4
+++ b/bench/POTENTIALS/log.9Oct20.rebo.4
@ -0,0 +1,90 @@
 LAMMPS (9 Oct 2020)
  using 1 OpenMP thread(s) per MPI task
 # REBO polyethelene benchmark
 units		    metal
 atom_style	    atomic
 read_data	    data.rebo
 Reading data file ...
  orthogonal box = (-2.1000000 -2.1000000 0.0000000) to (2.1000000 2.1000000 25.579000)
  1 by 1 by 4 MPI processor grid
  reading atoms ...
  60 atoms
  read_data CPU = 0.000 seconds
 replicate	    17 16 2
 Replicating atoms ...
  orthogonal box = (-2.1000000 -2.1000000 0.0000000) to (69.300000 65.100000 51.158000)
  2 by 2 by 1 MPI processor grid
  32640 atoms
  replicate CPU = 0.001 seconds
 neighbor	    0.5 bin
 neigh_modify	    delay 5 every 1
 pair_style	    rebo
 pair_coeff	    * * CH.rebo C H
 Reading rebo potential file CH.rebo with DATE: 2018-7-3
 velocity	    all create 300.0 761341
 fix		    1 all nve
 timestep	    0.0005
 thermo		    10
 run		    100
 Neighbor list info ...
  update every 1 steps, delay 5 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 6.5
  ghost atom cutoff = 6.5
  binsize = 3.25, bins = 22 21 16
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair rebo, perpetual
      attributes: full, newton on, ghost
      pair build: full/bin/ghost
      stencil: full/ghost/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 11.80 | 12.00 | 12.19 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300   -138442.83            0   -137177.16    2463.0748 
      10    179.37985   -137931.27            0   -137174.48    15655.936 
      20    206.87654   -138046.99            0   -137174.19   -24042.627 
      30    150.80122   -137807.43            0   -137171.21   -16524.118 
      40    173.24945   -137902.35            0   -137171.42   -5716.9119 
      50    151.80455   -137812.36            0   -137171.91    3480.4584 
      60    199.08777   -138013.82            0   -137173.88    17881.372 
      70    217.85748   -138093.86            0   -137174.73   -12270.999 
      80    202.37482   -138029.39            0   -137175.59   -7622.7319 
      90    194.90628   -137997.05            0   -137174.75   -32267.471 
     100    185.17818   -137954.51            0   -137173.26   -6901.7499 
 Loop time of 1.74701 on 4 procs for 100 steps with 32640 atoms
 Performance: 2.473 ns/day, 9.706 hours/ns, 57.241 timesteps/s
 94.1% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 0.89836    | 0.96998    | 1.054      |   6.0 | 55.52
 Neigh   | 0.453      | 0.47091    | 0.50316    |   2.8 | 26.96
 Comm    | 0.15706    | 0.27291    | 0.36547    |  14.5 | 15.62
 Output  | 0.00047016 | 0.00073808 | 0.0015287  |   0.0 |  0.04
 Modify  | 0.0093558  | 0.010209   | 0.011958   |   1.0 |  0.58
 Other   |            | 0.02227    |            |       |  1.27
 Nlocal:        8160.00 ave        8163 max        8157 min
 Histogram: 1 1 0 0 0 0 0 0 1 1
 Nghost:        11605.8 ave       11615 max       11593 min
 Histogram: 1 0 0 0 0 0 2 0 0 1
 Neighs:        0.00000 ave           0 max           0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:  1.22553e+06 ave 1.22735e+06 max 1.22455e+06 min
 Histogram: 2 0 0 1 0 0 0 0 0 1
 Total # of neighbors = 4902134
 Ave neighs/atom = 150.18793
 Neighbor list builds = 9
 Dangerous builds = 0
 Total wall time: 0:00:01
--- a/bench/POTENTIALS/log.9Oct20.spce.1
+++ b/bench/POTENTIALS/log.9Oct20.spce.1
@ -0,0 +1,147 @@
 LAMMPS (9 Oct 2020)
  using 1 OpenMP thread(s) per MPI task
 # SPC/E water box benchmark
 units		real
 atom_style	full
 read_data	data.spce
 Reading data file ...
  orthogonal box = (0.02645 0.02645 0.02641) to (35.532800 35.532800 35.473600)
  1 by 1 by 1 MPI processor grid
  reading atoms ...
  4500 atoms
  scanning bonds ...
  2 = max bonds/atom
  scanning angles ...
  1 = max angles/atom
  reading bonds ...
  3000 bonds
  reading angles ...
  1500 angles
 Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0.0      0.0      0.0     
  special bond factors coul:  0.0      0.0      0.0     
     2 = max # of 1-2 neighbors
     1 = max # of 1-3 neighbors
     1 = max # of 1-4 neighbors
     2 = max # of special neighbors
  special bonds CPU = 0.001 seconds
  read_data CPU = 0.009 seconds
 replicate	2 4 1
 Replicating atoms ...
  orthogonal box = (0.02645 0.02645 0.02641) to (71.039150 142.05185 35.473600)
  1 by 1 by 1 MPI processor grid
  36000 atoms
  24000 bonds
  12000 angles
 Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0.0      0.0      0.0     
  special bond factors coul:  0.0      0.0      0.0     
     2 = max # of 1-2 neighbors
     1 = max # of 1-3 neighbors
     1 = max # of 1-4 neighbors
     2 = max # of special neighbors
  special bonds CPU = 0.005 seconds
  replicate CPU = 0.012 seconds
 pair_style	lj/cut/coul/long 9.8 9.8
 kspace_style	pppm 1.0e-4
 pair_coeff	1 1 0.15535 3.166
 pair_coeff	* 2 0.0000 0.0000
 bond_style	harmonic
 angle_style	harmonic
 dihedral_style	none
 improper_style	none
 bond_coeff	1 1000.00 1.000
 angle_coeff	1 100.0 109.47
 special_bonds   lj/coul 0.0 0.0 0.5
 Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0.0      0.0      0.5     
  special bond factors coul:  0.0      0.0      0.5     
     2 = max # of 1-2 neighbors
     1 = max # of 1-3 neighbors
     1 = max # of 1-4 neighbors
     2 = max # of special neighbors
  special bonds CPU = 0.005 seconds
 neighbor        2.0 bin
 neigh_modify	every 1 delay 10 check yes
 fix		1 all shake 0.0001 20 0 b 1 a 1
       0 = # of size 2 clusters
       0 = # of size 3 clusters
       0 = # of size 4 clusters
   12000 = # of frozen angles
  find clusters CPU = 0.005 seconds
 fix		2 all nvt temp 300.0 300.0 100.0
 velocity	all create 300 432567 dist uniform
 timestep	2.0
 thermo_style    one
 thermo		50
 run		100
 PPPM initialization ...
  using 12-bit tables for long-range coulomb (src/kspace.cpp:328)
  G vector (1/distance) = 0.2688011
  grid = 36 64 24
  stencil order = 5
  estimated absolute RMS force accuracy = 0.033101471
  estimated relative force accuracy = 9.9684097e-05
  using double precision KISS FFT
  3d grid and FFT values/proc = 91977 55296
 Neighbor list info ...
  update every 1 steps, delay 10 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 11.8
  ghost atom cutoff = 11.8
  binsize = 5.9, bins = 13 25 7
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair lj/cut/coul/long, perpetual
      attributes: half, newton on
      pair build: half/bin/newton
      stencil: half/bin/3d/newton
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 105.4 | 105.4 | 105.4 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300   -133281.51            0   -111820.57    516.17807 
      50    264.98553   -136986.74            0   -118030.61   -440.29256 
     100    274.45966   -136364.57            0   -116730.69   -128.61948 
 Loop time of 18.5133 on 1 procs for 100 steps with 36000 atoms
 Performance: 0.933 ns/day, 25.713 hours/ns, 5.402 timesteps/s
 99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 14.557     | 14.557     | 14.557     |   0.0 | 78.63
 Bond    | 7.1764e-05 | 7.1764e-05 | 7.1764e-05 |   0.0 |  0.00
 Kspace  | 1.7651     | 1.7651     | 1.7651     |   0.0 |  9.53
 Neigh   | 1.8703     | 1.8703     | 1.8703     |   0.0 | 10.10
 Comm    | 0.042219   | 0.042219   | 0.042219   |   0.0 |  0.23
 Output  | 0.00021219 | 0.00021219 | 0.00021219 |   0.0 |  0.00
 Modify  | 0.26983    | 0.26983    | 0.26983    |   0.0 |  1.46
 Other   |            | 0.008397   |            |       |  0.05
 Nlocal:        36000.0 ave       36000 max       36000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:        56963.0 ave       56963 max       56963 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    1.24625e+07 ave 1.24625e+07 max 1.24625e+07 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 12462451
 Ave neighs/atom = 346.17919
 Ave special neighs/atom = 2.0000000
 Neighbor list builds = 9
 Dangerous builds = 6
 Total wall time: 0:00:19
--- a/bench/POTENTIALS/log.9Oct20.spce.4
+++ b/bench/POTENTIALS/log.9Oct20.spce.4
@ -0,0 +1,147 @@
 LAMMPS (9 Oct 2020)
  using 1 OpenMP thread(s) per MPI task
 # SPC/E water box benchmark
 units		real
 atom_style	full
 read_data	data.spce
 Reading data file ...
  orthogonal box = (0.02645 0.02645 0.02641) to (35.532800 35.532800 35.473600)
  2 by 2 by 1 MPI processor grid
  reading atoms ...
  4500 atoms
  scanning bonds ...
  2 = max bonds/atom
  scanning angles ...
  1 = max angles/atom
  reading bonds ...
  3000 bonds
  reading angles ...
  1500 angles
 Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0.0      0.0      0.0     
  special bond factors coul:  0.0      0.0      0.0     
     2 = max # of 1-2 neighbors
     1 = max # of 1-3 neighbors
     1 = max # of 1-4 neighbors
     2 = max # of special neighbors
  special bonds CPU = 0.001 seconds
  read_data CPU = 0.008 seconds
 replicate	2 4 1
 Replicating atoms ...
  orthogonal box = (0.02645 0.02645 0.02641) to (71.039150 142.05185 35.473600)
  1 by 4 by 1 MPI processor grid
  36000 atoms
  24000 bonds
  12000 angles
 Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0.0      0.0      0.0     
  special bond factors coul:  0.0      0.0      0.0     
     2 = max # of 1-2 neighbors
     1 = max # of 1-3 neighbors
     1 = max # of 1-4 neighbors
     2 = max # of special neighbors
  special bonds CPU = 0.002 seconds
  replicate CPU = 0.005 seconds
 pair_style	lj/cut/coul/long 9.8 9.8
 kspace_style	pppm 1.0e-4
 pair_coeff	1 1 0.15535 3.166
 pair_coeff	* 2 0.0000 0.0000
 bond_style	harmonic
 angle_style	harmonic
 dihedral_style	none
 improper_style	none
 bond_coeff	1 1000.00 1.000
 angle_coeff	1 100.0 109.47
 special_bonds   lj/coul 0.0 0.0 0.5
 Finding 1-2 1-3 1-4 neighbors ...
  special bond factors lj:    0.0      0.0      0.5     
  special bond factors coul:  0.0      0.0      0.5     
     2 = max # of 1-2 neighbors
     1 = max # of 1-3 neighbors
     1 = max # of 1-4 neighbors
     2 = max # of special neighbors
  special bonds CPU = 0.004 seconds
 neighbor        2.0 bin
 neigh_modify	every 1 delay 10 check yes
 fix		1 all shake 0.0001 20 0 b 1 a 1
       0 = # of size 2 clusters
       0 = # of size 3 clusters
       0 = # of size 4 clusters
   12000 = # of frozen angles
  find clusters CPU = 0.003 seconds
 fix		2 all nvt temp 300.0 300.0 100.0
 velocity	all create 300 432567 dist uniform
 timestep	2.0
 thermo_style    one
 thermo		50
 run		100
 PPPM initialization ...
  using 12-bit tables for long-range coulomb (src/kspace.cpp:328)
  G vector (1/distance) = 0.2688011
  grid = 36 64 24
  stencil order = 5
  estimated absolute RMS force accuracy = 0.033101471
  estimated relative force accuracy = 9.9684097e-05
  using double precision KISS FFT
  3d grid and FFT values/proc = 27993 13824
 Neighbor list info ...
  update every 1 steps, delay 10 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 11.8
  ghost atom cutoff = 11.8
  binsize = 5.9, bins = 13 25 7
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair lj/cut/coul/long, perpetual
      attributes: half, newton on
      pair build: half/bin/newton
      stencil: half/bin/3d/newton
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 37.90 | 37.90 | 37.90 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          300   -133281.51            0   -111820.57    516.17807 
      50    264.98553   -136986.74            0   -118030.61   -440.29255 
     100    274.45966   -136364.57            0   -116730.69   -128.61954 
 Loop time of 5.44355 on 4 procs for 100 steps with 36000 atoms
 Performance: 3.174 ns/day, 7.560 hours/ns, 18.370 timesteps/s
 95.4% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 3.82       | 3.8744     | 3.9155     |   1.8 | 71.17
 Bond    | 7.8917e-05 | 8.6784e-05 | 9.4891e-05 |   0.0 |  0.00
 Kspace  | 0.79192    | 0.83671    | 0.88328    |   3.5 | 15.37
 Neigh   | 0.51754    | 0.5178     | 0.51789    |   0.0 |  9.51
 Comm    | 0.069774   | 0.078783   | 0.088247   |   2.3 |  1.45
 Output  | 9.0361e-05 | 0.00015712 | 0.00035048 |   0.0 |  0.00
 Modify  | 0.12822    | 0.13016    | 0.13486    |   0.8 |  2.39
 Other   |            | 0.005435   |            |       |  0.10
 Nlocal:        9000.00 ave        9002 max        8998 min
 Histogram: 2 0 0 0 0 0 0 0 0 2
 Nghost:        24134.2 ave       24184 max       24062 min
 Histogram: 1 0 0 0 0 0 1 1 0 1
 Neighs:    3.11561e+06 ave 3.11676e+06 max 3.11446e+06 min
 Histogram: 1 0 0 1 0 0 1 0 0 1
 Total # of neighbors = 12462451
 Ave neighs/atom = 346.17919
 Ave special neighs/atom = 2.0000000
 Neighbor list builds = 9
 Dangerous builds = 6
 Total wall time: 0:00:05
--- a/bench/POTENTIALS/log.16Mar18.sw.1
+++ b/bench/POTENTIALS/log.16Mar18.sw.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk Si via Stillinger-Weber
@ -7,17 +6,18 @@ units		metal
 atom_style	atomic
 lattice		diamond 5.431
-Lattice spacing in x,y,z = 5.431 5.431 5.431
+Lattice spacing in x,y,z = 5.4310000 5.4310000 5.4310000
 region		box block 0 20 0 20 0 10
 create_box	1 box
-Created orthogonal box = (0 0 0) to (108.62 108.62 54.31)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (108.62000 108.62000 54.310000)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.00191712 secs
+  create_atoms CPU = 0.002 seconds
 pair_style	sw
 pair_coeff	* * Si.sw Si
 Reading sw potential file Si.sw with DATE: 2007-06-11
 mass            1 28.06
 velocity	all create 1000.0 376847 loop geom
@ -46,32 +46,32 @@ Per MPI rank memory allocation (min/avg/max) = 12.52 | 12.52 | 12.52 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1000    -138771.2            0      -134635    6866.6499 
     100    508.80533   -136736.12            0    -134631.6    6361.7858 
-Loop time of 5.66634 on 1 procs for 100 steps with 32000 atoms
+Loop time of 3.9309 on 1 procs for 100 steps with 32000 atoms
-Performance: 1.525 ns/day, 15.740 hours/ns, 17.648 timesteps/s
+Performance: 2.198 ns/day, 10.919 hours/ns, 25.439 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 5.5464     | 5.5464     | 5.5464     |   0.0 | 97.88
+Pair    | 3.8206     | 3.8206     | 3.8206     |   0.0 | 97.19
-Neigh   | 0.075834   | 0.075834   | 0.075834   |   0.0 |  1.34
+Neigh   | 0.067368   | 0.067368   | 0.067368   |   0.0 |  1.71
-Comm    | 0.0092049  | 0.0092049  | 0.0092049  |   0.0 |  0.16
+Comm    | 0.0091503  | 0.0091503  | 0.0091503  |   0.0 |  0.23
-Output  | 0.00010991 | 0.00010991 | 0.00010991 |   0.0 |  0.00
+Output  | 0.0001049  | 0.0001049  | 0.0001049  |   0.0 |  0.00
-Modify  | 0.024666   | 0.024666   | 0.024666   |   0.0 |  0.44
+Modify  | 0.023839   | 0.023839   | 0.023839   |   0.0 |  0.61
-Other   |            | 0.01014    |            |       |  0.18
+Other   |            | 0.009882   |            |       |  0.25
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    12495 ave 12495 max 12495 min
+Nghost:        12495.0 ave       12495 max       12495 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    0 ave 0 max 0 min
+Neighs:        0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-FullNghs:  894818 ave 894818 max 894818 min
+FullNghs:     894818.0 ave      894818 max      894818 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 894818
-Ave neighs/atom = 27.9631
+Ave neighs/atom = 27.963062
 Neighbor list builds = 2
 Dangerous builds = 0
-Total wall time: 0:00:05
+Total wall time: 0:00:03
--- a/bench/POTENTIALS/log.16Mar18.sw.4
+++ b/bench/POTENTIALS/log.16Mar18.sw.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk Si via Stillinger-Weber
@ -7,17 +6,18 @@ units		metal
 atom_style	atomic
 lattice		diamond 5.431
-Lattice spacing in x,y,z = 5.431 5.431 5.431
+Lattice spacing in x,y,z = 5.4310000 5.4310000 5.4310000
 region		box block 0 20 0 20 0 10
 create_box	1 box
-Created orthogonal box = (0 0 0) to (108.62 108.62 54.31)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (108.62000 108.62000 54.310000)
  2 by 2 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.000604153 secs
+  create_atoms CPU = 0.001 seconds
 pair_style	sw
 pair_coeff	* * Si.sw Si
 Reading sw potential file Si.sw with DATE: 2007-06-11
 mass            1 28.06
 velocity	all create 1000.0 376847 loop geom
@ -46,32 +46,32 @@ Per MPI rank memory allocation (min/avg/max) = 4.104 | 4.104 | 4.104 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1000    -138771.2            0      -134635    6866.6499 
     100    508.80533   -136736.12            0    -134631.6    6361.7858 
-Loop time of 1.47105 on 4 procs for 100 steps with 32000 atoms
+Loop time of 1.04386 on 4 procs for 100 steps with 32000 atoms
-Performance: 5.873 ns/day, 4.086 hours/ns, 67.978 timesteps/s
+Performance: 8.277 ns/day, 2.900 hours/ns, 95.798 timesteps/s
-98.8% CPU use with 4 MPI tasks x 1 OpenMP threads
+98.7% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 1.3788     | 1.3929     | 1.4053     |   0.8 | 94.69
+Pair    | 0.96496    | 0.97632    | 0.9978     |   1.3 | 93.53
-Neigh   | 0.019134   | 0.019502   | 0.019816   |   0.2 |  1.33
+Neigh   | 0.01732    | 0.017998   | 0.019718   |   0.7 |  1.72
-Comm    | 0.024183   | 0.035734   | 0.049122   |   4.7 |  2.43
+Comm    | 0.012035   | 0.036398   | 0.049588   |   7.7 |  3.49
-Output  | 5.1975e-05 | 6.6102e-05 | 0.00010204 |   0.0 |  0.00
+Output  | 4.5061e-05 | 5.5015e-05 | 7.7248e-05 |   0.0 |  0.01
-Modify  | 0.0063825  | 0.0064374  | 0.0064764  |   0.0 |  0.44
+Modify  | 0.0070148  | 0.0070775  | 0.0071096  |   0.0 |  0.68
-Other   |            | 0.01638    |            |       |  1.11
+Other   |            | 0.006012   |            |       |  0.58
-Nlocal:    8000 ave 8015 max 7978 min
+Nlocal:        8000.00 ave        8015 max        7978 min
 Histogram: 1 0 0 0 0 0 1 1 0 1
-Nghost:    4995 ave 5017 max 4980 min
+Nghost:        4995.00 ave        5017 max        4980 min
 Histogram: 1 0 1 1 0 0 0 0 0 1
-Neighs:    0 ave 0 max 0 min
+Neighs:        0.00000 ave           0 max           0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
-FullNghs:  223704 ave 224108 max 223131 min
+FullNghs:     223704.0 ave      224108 max      223131 min
 Histogram: 1 0 0 0 0 0 1 1 0 1
 Total # of neighbors = 894818
-Ave neighs/atom = 27.9631
+Ave neighs/atom = 27.963062
 Neighbor list builds = 2
 Dangerous builds = 0
 Total wall time: 0:00:01
--- a/bench/POTENTIALS/log.16Mar18.tersoff.1
+++ b/bench/POTENTIALS/log.16Mar18.tersoff.1
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk Si via Tersoff
@ -7,17 +6,18 @@ units		metal
 atom_style	atomic
 lattice		diamond 5.431
-Lattice spacing in x,y,z = 5.431 5.431 5.431
+Lattice spacing in x,y,z = 5.4310000 5.4310000 5.4310000
 region		box block 0 20 0 20 0 10
 create_box	1 box
-Created orthogonal box = (0 0 0) to (108.62 108.62 54.31)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (108.62000 108.62000 54.310000)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.0019412 secs
+  create_atoms CPU = 0.002 seconds
 pair_style	tersoff
 pair_coeff	* * Si.tersoff Si
 Reading tersoff potential file Si.tersoff with DATE: 2007-10-25
 mass            1 28.06
 velocity	all create 1000.0 376847 loop geom
@ -42,36 +42,36 @@ Neighbor list info ...
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 11.1 | 11.1 | 11.1 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 11.11 | 11.11 | 11.11 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1000   -148173.19            0   -144036.99    7019.4434 
     100    430.57813   -145815.61            0   -144034.65   -14550.734 
-Loop time of 8.53088 on 1 procs for 100 steps with 32000 atoms
+Loop time of 4.71424 on 1 procs for 100 steps with 32000 atoms
-Performance: 1.013 ns/day, 23.697 hours/ns, 11.722 timesteps/s
+Performance: 1.833 ns/day, 13.095 hours/ns, 21.212 timesteps/s
-99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 8.4236     | 8.4236     | 8.4236     |   0.0 | 98.74
+Pair    | 4.612      | 4.612      | 4.612      |   0.0 | 97.83
-Neigh   | 0.065852   | 0.065852   | 0.065852   |   0.0 |  0.77
+Neigh   | 0.060618   | 0.060618   | 0.060618   |   0.0 |  1.29
-Comm    | 0.0078607  | 0.0078607  | 0.0078607  |   0.0 |  0.09
+Comm    | 0.008847   | 0.008847   | 0.008847   |   0.0 |  0.19
-Output  | 0.00010872 | 0.00010872 | 0.00010872 |   0.0 |  0.00
+Output  | 9.9659e-05 | 9.9659e-05 | 9.9659e-05 |   0.0 |  0.00
-Modify  | 0.023968   | 0.023968   | 0.023968   |   0.0 |  0.28
+Modify  | 0.023234   | 0.023234   | 0.023234   |   0.0 |  0.49
-Other   |            | 0.009521   |            |       |  0.11
+Other   |            | 0.00941    |            |       |  0.20
-Nlocal:    32000 ave 32000 max 32000 min
+Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    11537 ave 11537 max 11537 min
+Nghost:        11537.0 ave       11537 max       11537 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    0 ave 0 max 0 min
+Neighs:        0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
-FullNghs:  530500 ave 530500 max 530500 min
+FullNghs:     530500.0 ave      530500 max      530500 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 530500
-Ave neighs/atom = 16.5781
+Ave neighs/atom = 16.578125
 Neighbor list builds = 2
 Dangerous builds = 0
-Total wall time: 0:00:08
+Total wall time: 0:00:04
--- a/bench/POTENTIALS/log.16Mar18.tersoff.4
+++ b/bench/POTENTIALS/log.16Mar18.tersoff.4
@ -1,5 +1,4 @@
-LAMMPS (16 Mar 2018)
+LAMMPS (9 Oct 2020)
 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
  using 1 OpenMP thread(s) per MPI task
 # bulk Si via Tersoff
@ -7,17 +6,18 @@ units		metal
 atom_style	atomic
 lattice		diamond 5.431
-Lattice spacing in x,y,z = 5.431 5.431 5.431
+Lattice spacing in x,y,z = 5.4310000 5.4310000 5.4310000
 region		box block 0 20 0 20 0 10
 create_box	1 box
-Created orthogonal box = (0 0 0) to (108.62 108.62 54.31)
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (108.62000 108.62000 54.310000)
  2 by 2 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
-  Time spent = 0.000605822 secs
+  create_atoms CPU = 0.001 seconds
 pair_style	tersoff
 pair_coeff	* * Si.tersoff Si
 Reading tersoff potential file Si.tersoff with DATE: 2007-10-25
 mass            1 28.06
 velocity	all create 1000.0 376847 loop geom
@ -42,36 +42,36 @@ Neighbor list info ...
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 3.753 | 3.753 | 3.753 Mbytes
+Per MPI rank memory allocation (min/avg/max) = 3.754 | 3.754 | 3.754 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0         1000   -148173.19            0   -144036.99    7019.4434 
     100    430.57813   -145815.61            0   -144034.65   -14550.734 
-Loop time of 2.16161 on 4 procs for 100 steps with 32000 atoms
+Loop time of 1.32053 on 4 procs for 100 steps with 32000 atoms
-Performance: 3.997 ns/day, 6.004 hours/ns, 46.262 timesteps/s
+Performance: 6.543 ns/day, 3.668 hours/ns, 75.727 timesteps/s
-99.4% CPU use with 4 MPI tasks x 1 OpenMP threads
+97.5% CPU use with 4 MPI tasks x 1 OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 2.1122     | 2.1165     | 2.122      |   0.2 | 97.91
+Pair    | 1.1729     | 1.2118     | 1.2453     |   2.3 | 91.77
-Neigh   | 0.016894   | 0.016915   | 0.016955   |   0.0 |  0.78
+Neigh   | 0.015989   | 0.016319   | 0.016708   |   0.3 |  1.24
-Comm    | 0.012348   | 0.017858   | 0.022105   |   2.7 |  0.83
+Comm    | 0.046884   | 0.078767   | 0.11602    |   8.9 |  5.96
-Output  | 4.7684e-05 | 6.2048e-05 | 9.9421e-05 |   0.0 |  0.00
+Output  | 3.9816e-05 | 7.0453e-05 | 0.00015831 |   0.0 |  0.01
-Modify  | 0.0064063  | 0.0064579  | 0.0065169  |   0.0 |  0.30
+Modify  | 0.0070612  | 0.0071967  | 0.0073555  |   0.1 |  0.54
-Other   |            | 0.003793   |            |       |  0.18
+Other   |            | 0.006331   |            |       |  0.48
-Nlocal:    8000 ave 8005 max 7993 min
+Nlocal:        8000.00 ave        8005 max        7993 min
 Histogram: 1 0 0 0 0 1 0 1 0 1
 Nghost:        4580.25 ave        4593 max        4567 min
 Histogram: 1 0 0 1 0 0 0 1 0 1
-Neighs:    0 ave 0 max 0 min
+Neighs:        0.00000 ave           0 max           0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
-FullNghs:  132625 ave 132785 max 132562 min
+FullNghs:     132625.0 ave      132785 max      132562 min
 Histogram: 2 1 0 0 0 0 0 0 0 1
 Total # of neighbors = 530500
-Ave neighs/atom = 16.5781
+Ave neighs/atom = 16.578125
 Neighbor list builds = 2
 Dangerous builds = 0
-Total wall time: 0:00:02
+Total wall time: 0:00:01
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -25,7 +25,7 @@ set(LAMMPS_POTENTIALS_DIR ${LAMMPS_DIR}/potentials)
 find_package(Git)
 # by default, install into $HOME/.local (not /usr/local), so that no root access (and sudo!!) is needed
-if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
  set(CMAKE_INSTALL_PREFIX "$ENV{HOME}/.local" CACHE PATH "default install path" FORCE )
 endif()
@ -33,7 +33,7 @@ endif()
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Modules)
 # make sure LIBRARY_PATH is set if environment variable is set
-if (DEFINED ENV{LIBRARY_PATH})
+if(DEFINED ENV{LIBRARY_PATH})
  list(APPEND CMAKE_LIBRARY_PATH "$ENV{LIBRARY_PATH}")
  message(STATUS "Appending $ENV{LIBRARY_PATH} to CMAKE_LIBRARY_PATH: ${CMAKE_LIBRARY_PATH}")
 endif()
@ -107,13 +107,15 @@ option(CMAKE_VERBOSE_MAKEFILE "Generate verbose Makefiles" OFF)
 set(STANDARD_PACKAGES ASPHERE BODY CLASS2 COLLOID COMPRESS DIPOLE
  GRANULAR KSPACE LATTE MANYBODY MC MESSAGE MISC MLIAP MOLECULE PERI POEMS
  QEQ REPLICA RIGID SHOCK SPIN SNAP SRD KIM PYTHON MSCG MPIIO VORONOI
-  USER-ATC USER-AWPMD USER-BOCS USER-CGDNA USER-MESODPD USER-CGSDK USER-COLVARS
+  USER-ADIOS USER-ATC USER-AWPMD USER-BOCS USER-CGDNA USER-MESODPD USER-CGSDK
-  USER-DIFFRACTION USER-DPD USER-DRUDE USER-EFF USER-FEP USER-H5MD USER-LB
+  USER-COLVARS USER-DIFFRACTION USER-DPD USER-DRUDE USER-EFF USER-FEP USER-H5MD
-  USER-MANIFOLD USER-MEAMC USER-MESONT USER-MGPT USER-MISC USER-MOFFF USER-MOLFILE
+  USER-LB USER-MANIFOLD USER-MEAMC USER-MESONT USER-MGPT USER-MISC USER-MOFFF
-  USER-NETCDF USER-PHONON USER-PLUMED USER-PTM USER-QTB USER-REACTION
+  USER-MOLFILE USER-NETCDF USER-PHONON USER-PLUMED USER-PTM USER-QTB
-  USER-REAXC USER-SCAFACOS USER-SDPD USER-SMD USER-SMTBQ USER-SPH USER-TALLY
+  USER-REACTION USER-REAXC USER-SCAFACOS USER-SDPD USER-SMD USER-SMTBQ USER-SPH
-  USER-UEF USER-VTK USER-QUIP USER-QMMM USER-YAFF USER-ADIOS)
+  USER-TALLY USER-UEF USER-VTK USER-QUIP USER-QMMM USER-YAFF)
-set(SUFFIX_PACKAGES CORESHELL USER-OMP KOKKOS OPT USER-INTEL GPU)
+
 set(SUFFIX_PACKAGES CORESHELL GPU KOKKOS OPT USER-INTEL USER-OMP)
 foreach(PKG ${STANDARD_PACKAGES} ${SUFFIX_PACKAGES})
  option(PKG_${PKG} "Build ${PKG} Package" OFF)
 endforeach()
@ -154,8 +156,7 @@ if(BUILD_MPI)
    endif()
  endif()
 else()
-  enable_language(C)
+  file(GLOB MPI_SOURCES ${LAMMPS_SOURCE_DIR}/STUBS/mpi.cpp)
  file(GLOB MPI_SOURCES ${LAMMPS_SOURCE_DIR}/STUBS/mpi.c)
  add_library(mpi_stubs STATIC ${MPI_SOURCES})
  set_target_properties(mpi_stubs PROPERTIES OUTPUT_NAME lammps_mpi_stubs${LAMMPS_MACHINE})
  target_include_directories(mpi_stubs PUBLIC $<BUILD_INTERFACE:${LAMMPS_SOURCE_DIR}/STUBS>)
@ -220,6 +221,7 @@ if(BUILD_OMP)
  endif()
  if (((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.0)) OR
      (CMAKE_CXX_COMPILER_ID STREQUAL "PGI") OR
      ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10.0)) OR
      ((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 19.0)))
    # GCC 9.x and later plus Clang 10.x and later implement strict OpenMP 4.0 semantics for consts.
@ -372,13 +374,20 @@ else()
  set(CUDA_REQUEST_PIC)
 endif()
-foreach(PKG_WITH_INCL KSPACE PYTHON VORONOI USER-COLVARS USER-MOLFILE USER-NETCDF USER-PLUMED USER-QMMM
+foreach(PKG_WITH_INCL KSPACE PYTHON MLIAP VORONOI USER-COLVARS USER-MOLFILE USER-NETCDF USER-PLUMED USER-QMMM
        USER-QUIP USER-SCAFACOS USER-SMD USER-VTK KIM LATTE MESSAGE MSCG COMPRESS)
  if(PKG_${PKG_WITH_INCL})
    include(Packages/${PKG_WITH_INCL})
  endif()
 endforeach()
 # optionally enable building script wrappers using swig
 option(WITH_SWIG "Build scripting language wrappers with SWIG" OFF)
 if(WITH_SWIG)
  get_filename_component(LAMMPS_SWIG_DIR ${LAMMPS_SOURCE_DIR}/../tools/swig ABSOLUTE)
  add_subdirectory(${LAMMPS_SWIG_DIR} swig)
 endif()
 set(CMAKE_TUNE_FLAGS "${CMAKE_TUNE_DEFAULT}" CACHE STRING "Compiler and machine specific optimization flags (compilation only)")
 separate_arguments(CMAKE_TUNE_FLAGS)
 foreach(_FLAG ${CMAKE_TUNE_FLAGS})
@ -572,7 +581,7 @@ add_dependencies(lammps gitversion)
 ############################################
 get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
 list (FIND LANGUAGES "Fortran" _index)
-if (${_index} GREATER -1)
+if(${_index} GREATER -1)
  target_link_libraries(lammps PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
 endif()
 set(LAMMPS_CXX_HEADERS angle.h atom.h bond.h citeme.h comm.h compute.h dihedral.h domain.h error.h fix.h force.h group.h improper.h
@ -584,7 +593,7 @@ endif()
 set_target_properties(lammps PROPERTIES OUTPUT_NAME lammps${LAMMPS_MACHINE})
 set_target_properties(lammps PROPERTIES SOVERSION ${SOVERSION})
-target_include_directories(lammps PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+target_include_directories(lammps PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/lammps>)
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps)
 foreach(_HEADER ${LAMMPS_CXX_HEADERS})
  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/${_HEADER} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LAMMPS_SOURCE_DIR}/${_HEADER} ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/${_HEADER} DEPENDS ${LAMMPS_SOURCE_DIR}/${_HEADER})
@ -613,48 +622,7 @@ if(BUILD_SHARED_LIBS)
 endif()
 install(FILES ${LAMMPS_DOC_DIR}/lammps.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 RENAME ${LAMMPS_BINARY}.1)
-if(BUILD_TOOLS)
+include(Tools)
  add_executable(binary2txt ${LAMMPS_TOOLS_DIR}/binary2txt.cpp)
  target_compile_definitions(binary2txt PRIVATE -DLAMMPS_${LAMMPS_SIZES})
  install(TARGETS binary2txt DESTINATION ${CMAKE_INSTALL_BINDIR})
  include(CheckGeneratorSupport)
  if(CMAKE_GENERATOR_SUPPORT_FORTRAN)
    include(CheckLanguage)
    check_language(Fortran)
    if(CMAKE_Fortran_COMPILER)
      enable_language(Fortran)
      add_executable(chain.x ${LAMMPS_TOOLS_DIR}/chain.f)
      target_link_libraries(chain.x PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
      install(TARGETS chain.x DESTINATION ${CMAKE_INSTALL_BINDIR})
    else()
      message(WARNING "No suitable Fortran compiler found, skipping building 'chain.x'")
    endif()
  else()
    message(WARNING "CMake build doesn't support fortran, skipping building 'chain.x'")
  endif()
  enable_language(C)
  get_filename_component(MSI2LMP_SOURCE_DIR ${LAMMPS_TOOLS_DIR}/msi2lmp/src ABSOLUTE)
  file(GLOB MSI2LMP_SOURCES ${MSI2LMP_SOURCE_DIR}/[^.]*.c)
  add_executable(msi2lmp ${MSI2LMP_SOURCES})
  target_link_libraries(msi2lmp PRIVATE ${MATH_LIBRARIES})
  install(TARGETS msi2lmp DESTINATION ${CMAKE_INSTALL_BINDIR})
  install(FILES ${LAMMPS_DOC_DIR}/msi2lmp.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
 endif()
 if(BUILD_LAMMPS_SHELL)
  find_package(PkgConfig REQUIRED)
  pkg_check_modules(READLINE IMPORTED_TARGET REQUIRED readline)
  if(NOT LAMMPS_EXCEPTIONS)
    message(WARNING "The LAMMPS shell needs LAMMPS_EXCEPTIONS enabled for full functionality")
  endif()
  add_executable(lammps-shell ${LAMMPS_TOOLS_DIR}/lammps-shell/lammps-shell.cpp)
  target_compile_definitions(lammps-shell PRIVATE -DLAMMPS_LIB_NO_MPI)
  target_link_libraries(lammps-shell PRIVATE lammps PkgConfig::READLINE)
  install(TARGETS lammps-shell EXPORT LAMMPS_Targets DESTINATION ${CMAKE_INSTALL_BINDIR})
 endif()
 include(Documentation)
 ###############################################################################
@ -683,7 +651,7 @@ install(
 if(BUILD_SHARED_LIBS)
  if(CMAKE_VERSION VERSION_LESS 3.12)
    # adjust so we find Python 3 versions before Python 2 on old systems with old CMake
-    set(Python_ADDITIONAL_VERSIONS 3.8 3.7 3.6 3.5)
+    set(Python_ADDITIONAL_VERSIONS 3.9 3.8 3.7 3.6 3.5)
    find_package(PythonInterp) # Deprecated since version 3.12
    if(PYTHONINTERP_FOUND)
        set(Python_EXECUTABLE ${PYTHON_EXECUTABLE})
@ -693,10 +661,10 @@ if(BUILD_SHARED_LIBS)
  endif()
  if (Python_EXECUTABLE)
    add_custom_target(
-      install-python
+      install-python ${CMAKE_COMMAND} -E remove_directory build
-      ${Python_EXECUTABLE} install.py -v ${LAMMPS_SOURCE_DIR}/version.h
+      COMMAND ${Python_EXECUTABLE} install.py -v ${LAMMPS_SOURCE_DIR}/version.h
-      -m ${LAMMPS_PYTHON_DIR}/lammps.py
+      -p ${LAMMPS_PYTHON_DIR}/lammps
-      -l ${CMAKE_BINARY_DIR}/liblammps${CMAKE_SHARED_LIBRARY_SUFFIX}
+      -l ${CMAKE_BINARY_DIR}/liblammps${LAMMPS_MACHINE}${CMAKE_SHARED_LIBRARY_SUFFIX}
      WORKING_DIRECTORY  ${LAMMPS_PYTHON_DIR}
      COMMENT "Installing LAMMPS Python module")
  else()
@ -725,11 +693,8 @@ if(BUILD_SHARED_LIBS OR PKG_PYTHON)
    find_package(Python COMPONENTS Interpreter)
  endif()
  if (Python_EXECUTABLE)
-    execute_process(COMMAND ${Python_EXECUTABLE}
+    file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python)
-      -c "import distutils.sysconfig as cg; print(cg.get_python_lib(1,0,prefix='${CMAKE_INSTALL_PREFIX}'))"
+    install(CODE "execute_process(COMMAND ${Python_EXECUTABLE} setup.py build -b ${CMAKE_BINARY_DIR}/python install --prefix=${CMAKE_INSTALL_PREFIX} --root=\$ENV{DESTDIR}/ WORKING_DIRECTORY ${LAMMPS_PYTHON_DIR})")
      OUTPUT_VARIABLE PYTHON_DEFAULT_INSTDIR OUTPUT_STRIP_TRAILING_WHITESPACE)
    set(PYTHON_INSTDIR ${PYTHON_DEFAULT_INSTDIR} CACHE PATH "Installation folder for LAMMPS Python module")
    install(FILES ${LAMMPS_PYTHON_DIR}/lammps.py DESTINATION ${PYTHON_INSTDIR})
  endif()
 endif()
@ -737,23 +702,31 @@ include(Testing)
 include(CodeCoverage)
 include(CodingStandard)
 ###############################################################################
 # Print package summary
 ###############################################################################
 foreach(PKG ${STANDARD_PACKAGES} ${SUFFIX_PACKAGES})
  if(PKG_${PKG})
    message(STATUS "Building package: ${PKG}")
  endif()
 endforeach()
 get_target_property(DEFINES lammps COMPILE_DEFINITIONS)
 include(FeatureSummary)
 feature_summary(DESCRIPTION "The following tools and libraries have been found and configured:" WHAT PACKAGES_FOUND)
 message(STATUS "<<< Build configuration >>>
   Operating System: ${CMAKE_SYSTEM_NAME}
   Build type:       ${CMAKE_BUILD_TYPE}
   Install path:     ${CMAKE_INSTALL_PREFIX}
-   Generator:        ${CMAKE_GENERATOR} using ${CMAKE_MAKE_PROGRAM}
+   Generator:        ${CMAKE_GENERATOR} using ${CMAKE_MAKE_PROGRAM}")
-- <<< Compilers and Flags: >>>
+###############################################################################
 # Print package summary
 ###############################################################################
 set(ENABLED_PACKAGES)
 foreach(PKG ${STANDARD_PACKAGES} ${SUFFIX_PACKAGES})
  if(PKG_${PKG})
    list(APPEND ENABLED_PACKAGES ${PKG})
  endif()
 endforeach()
 if(ENABLED_PACKAGES)
  list(SORT ENABLED_PACKAGES)
 else()
  set(ENABLED_PACKAGES "<None>")
 endif()
 message(STATUS "Enabled packages: ${ENABLED_PACKAGES}")
 message(STATUS "<<< Compilers and Flags: >>>
 -- C++ Compiler:     ${CMAKE_CXX_COMPILER}
      Type:          ${CMAKE_CXX_COMPILER_ID}
      Version:       ${CMAKE_CXX_COMPILER_VERSION}
@ -765,14 +738,14 @@ if(OPTIONS)
 endif()
 get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
 list (FIND LANGUAGES "Fortran" _index)
-if (${_index} GREATER -1)
+if(${_index} GREATER -1)
  message(STATUS "Fortran Compiler: ${CMAKE_Fortran_COMPILER}
      Type:          ${CMAKE_Fortran_COMPILER_ID}
      Version:       ${CMAKE_Fortran_COMPILER_VERSION}
      Fortran Flags:${CMAKE_Fortran_FLAGS} ${CMAKE_Fortran_FLAGS_${BTYPE}}")
 endif()
 list (FIND LANGUAGES "C" _index)
-if (${_index} GREATER -1)
+if(${_index} GREATER -1)
  message(STATUS "C compiler:       ${CMAKE_C_COMPILER}
      Type:          ${CMAKE_C_COMPILER_ID}
      Version:       ${CMAKE_C_COMPILER_VERSION}
@ -804,9 +777,7 @@ if(PKG_GPU)
  message(STATUS "<<< GPU package settings >>>
 -- GPU API:          ${GPU_API}")
  if(GPU_API STREQUAL "CUDA")
-    message(STATUS "GPU architecture: ${GPU_ARCH}")
+    message(STATUS "GPU default architecture: ${GPU_ARCH}")
  elseif(GPU_API STREQUAL "OPENCL")
    message(STATUS "OpenCL tuning:    ${OCL_TUNE}")
  elseif(GPU_API STREQUAL "HIP")
    message(STATUS "HIP platform:     ${HIP_PLATFORM}")
    message(STATUS "HIP architecture: ${HIP_ARCH}")
@ -849,3 +820,15 @@ endif()
 if(BUILD_DOC)
  message(STATUS "<<< Building HTML Manual >>>")
 endif()
 if(BUILD_TOOLS)
  message(STATUS "<<< Building Tools >>>")
 endif()
 if(BUILD_LAMMPS_SHELL)
  message(STATUS "<<< Building LAMMPS Shell >>>")
 endif()
 if(ENABLE_TESTING)
  message(STATUS "<<< Building Unit Tests >>>")
  if(ENABLE_COVERAGE)
    message(STATUS "Collecting code coverage data")
  endif()
 endif()
--- a/cmake/Modules/CodingStandard.cmake
+++ b/cmake/Modules/CodingStandard.cmake
@ -8,7 +8,7 @@ else()
    find_package(Python3 COMPONENTS Interpreter QUIET)
 endif()
-if (Python3_EXECUTABLE)
+if(Python3_EXECUTABLE)
    if(Python3_VERSION VERSION_GREATER_EQUAL 3.5)
        add_custom_target(
          check-whitespace
--- a/cmake/Modules/Documentation.cmake
+++ b/cmake/Modules/Documentation.cmake
@ -50,27 +50,21 @@ if(BUILD_DOC)
    OUTPUT ${DOC_BUILD_DIR}/requirements.txt
    DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE}
    COMMAND ${CMAKE_COMMAND} -E copy ${DOCENV_REQUIREMENTS_FILE} ${DOC_BUILD_DIR}/requirements.txt
-    COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade pip
+    COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install --upgrade pip
-    COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
+    COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
-    COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
+    COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
  )
  # download mathjax distribution and unpack to folder "mathjax"
  if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/mathjax/es5)
-    file(DOWNLOAD "https://github.com/mathjax/MathJax/archive/3.0.5.tar.gz"
+    file(DOWNLOAD "https://github.com/mathjax/MathJax/archive/3.1.2.tar.gz"
      "${CMAKE_CURRENT_BINARY_DIR}/mathjax.tar.gz"
-      EXPECTED_MD5 5d9d3799cce77a1a95eee6be04eb68e7)
+      EXPECTED_MD5 a4a6a093a89bc2ccab1452d766b98e53)
    execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf mathjax.tar.gz WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
    file(GLOB MATHJAX_VERSION_DIR ${CMAKE_CURRENT_BINARY_DIR}/MathJax-*)
    execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${DOC_BUILD_STATIC_DIR}/mathjax)
  endif()
  # for increased browser compatibility
  if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/polyfill.js)
    file(DOWNLOAD "https://polyfill.io/v3/polyfill.min.js?features=es6"
      "${DOC_BUILD_STATIC_DIR}/polyfill.js")
  endif()
  # set up doxygen and add targets to run it
  file(MAKE_DIRECTORY ${DOXYGEN_BUILD_DIR})
  file(COPY ${LAMMPS_DOC_DIR}/doxygen/lammps-logo.png DESTINATION ${DOXYGEN_BUILD_DIR}/lammps-logo.png)
--- a/cmake/Modules/FindCythonize.cmake
+++ b/cmake/Modules/FindCythonize.cmake
@ -0,0 +1,30 @@
 # Find the Cythonize tool.
 #
 # This code sets the following variables:
 #
 #  Cythonize_EXECUTABLE
 #
 # adapted from https://github.com/cmarshall108/cython-cmake-example/blob/master/cmake/FindCython.cmake
 #=============================================================================
 if(CMAKE_VERSION VERSION_LESS 3.12)
    find_package(PythonInterp 3.6 QUIET) # Deprecated since version 3.12
    if(PYTHONINTERP_FOUND)
        set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE})
    endif()
 else()
    find_package(Python3 3.6 COMPONENTS Interpreter QUIET)
 endif()
 # Use the Cython executable that lives next to the Python executable
 # if it is a local installation.
 if(Python3_EXECUTABLE)
  get_filename_component(_python_path ${Python3_EXECUTABLE} PATH)
  find_program(Cythonize_EXECUTABLE
    NAMES cythonize3 cythonize cythonize.bat
    HINTS ${_python_path})
 endif()
 include(FindPackageHandleStandardArgs)
 FIND_PACKAGE_HANDLE_STANDARD_ARGS(Cythonize REQUIRED_VARS Cythonize_EXECUTABLE)
 mark_as_advanced(Cythonize_EXECUTABLE)
--- a/cmake/Modules/GTest.cmake
+++ b/cmake/Modules/GTest.cmake
@ -20,10 +20,10 @@ ExternalProject_Add(googletest
                                    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                                    -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
                                    -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
-                    BUILD_BYPRODUCTS <BINARY_DIR>/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest${GTEST_LIB_POSTFIX}.a
+                    BUILD_BYPRODUCTS <BINARY_DIR>/lib/libgtest${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
-                                     <BINARY_DIR>/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gmock${GTEST_LIB_POSTFIX}.a
+                                     <BINARY_DIR>/lib/libgmock${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
-                                     <BINARY_DIR>/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest_main${GTEST_LIB_POSTFIX}.a
+                                     <BINARY_DIR>/lib/libgtest_main${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
-                                     <BINARY_DIR>/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gmock_main${GTEST_LIB_POSTFIX}.a
+                                     <BINARY_DIR>/lib/libgmock_main${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
                    LOG_DOWNLOAD ON
                    LOG_CONFIGURE ON
                    LOG_BUILD ON
@ -39,10 +39,10 @@ file(MAKE_DIRECTORY ${GTEST_INCLUDE_DIR})
 file(MAKE_DIRECTORY ${GMOCK_INCLUDE_DIR})
 ExternalProject_Get_Property(googletest BINARY_DIR)
-set(GTEST_LIBRARY_PATH ${BINARY_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest${GTEST_LIB_POSTFIX}.a)
+set(GTEST_LIBRARY_PATH ${BINARY_DIR}/lib/libgtest${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
-set(GMOCK_LIBRARY_PATH ${BINARY_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gmock${GTEST_LIB_POSTFIX}.a)
+set(GMOCK_LIBRARY_PATH ${BINARY_DIR}/lib/libgmock${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
-set(GTEST_MAIN_LIBRARY_PATH ${BINARY_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest_main${GTEST_LIB_POSTFIX}.a)
+set(GTEST_MAIN_LIBRARY_PATH ${BINARY_DIR}/lib/libgtest_main${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
-set(GMOCK_MAIN_LIBRARY_PATH ${BINARY_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gmock_main${GTEST_LIB_POSTFIX}.a)
+set(GMOCK_MAIN_LIBRARY_PATH ${BINARY_DIR}/lib/libgmock_main${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
 # Prevent GoogleTest from overriding our compiler/linker options
 # when building with Visual Studio
--- a/cmake/Modules/LAMMPSUtils.cmake
+++ b/cmake/Modules/LAMMPSUtils.cmake
@ -50,6 +50,7 @@ function(check_for_autogen_files source_dir)
    file(GLOB SRC_AUTOGEN_FILES ${source_dir}/style_*.h)
    file(GLOB SRC_AUTOGEN_PACKAGES ${source_dir}/packages_*.h)
    list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/lmpinstalledpkgs.h ${source_dir}/lmpgitversion.h)
    list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/mliap_model_python_couple.h ${source_dir}/mliap_model_python_couple.cpp)
    foreach(_SRC ${SRC_AUTOGEN_FILES})
      get_filename_component(FILENAME "${_SRC}" NAME)
      if(EXISTS ${source_dir}/${FILENAME})
--- a/cmake/Modules/MPI4WIN.cmake
+++ b/cmake/Modules/MPI4WIN.cmake
@ -1,7 +1,7 @@
 # Download and configure custom MPICH files for Windows
 message(STATUS "Downloading and configuring MPICH-1.4.1 for Windows")
 include(ExternalProject)
-if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
  ExternalProject_Add(mpi4win_build
    URL https://download.lammps.org/thirdparty/mpich2-win64-devel.tar.gz
    URL_MD5 4939fdb59d13182fd5dd65211e469f14
--- a/cmake/Modules/OpenCLLoader.cmake
+++ b/cmake/Modules/OpenCLLoader.cmake
@ -0,0 +1,54 @@
 message(STATUS "Downloading and building OpenCL loader library")
 if(CMAKE_BUILD_TYPE STREQUAL Debug)
  set(OPENCL_LOADER_LIB_POSTFIX d)
 else()
  set(OPENCL_LOADER_LIB_POSTFIX)
 endif()
 include(ExternalProject)
 set(OPENCL_LOADER_URL "https://download.lammps.org/thirdparty/opencl-loader-2020.12.18.tar.gz" CACHE STRING "URL for OpenCL loader tarball")
 mark_as_advanced(OPENCL_LOADER_URL)
 ExternalProject_Add(opencl_loader
                    URL ${OPENCL_LOADER_URL}
                    URL_MD5         011cdcbd41030be94f3fced6d763a52a
                    SOURCE_DIR      "${CMAKE_BINARY_DIR}/opencl_loader-src"
                    BINARY_DIR      "${CMAKE_BINARY_DIR}/opencl_loader-build"
                    CMAKE_ARGS      ${CMAKE_REQUEST_PIC} ${CMAKE_EXTRA_OPENCL_LOADER_OPTS}
                                    -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
                                    -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
                                    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                                    -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
                                    -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
                    BUILD_BYPRODUCTS <BINARY_DIR>/libOpenCL${OPENCL_LOADER_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
                    LOG_DOWNLOAD ON
                    LOG_CONFIGURE ON
                    LOG_BUILD ON
                    INSTALL_COMMAND ""
                    TEST_COMMAND    "")
 ExternalProject_Get_Property(opencl_loader SOURCE_DIR)
 set(OPENCL_LOADER_INCLUDE_DIR ${SOURCE_DIR}/inc)
 # workaround for CMake 3.10 on ubuntu 18.04
 file(MAKE_DIRECTORY ${OPENCL_LOADER_INCLUDE_DIR})
 ExternalProject_Get_Property(opencl_loader BINARY_DIR)
 set(OPENCL_LOADER_LIBRARY_PATH "${BINARY_DIR}/libOpenCL${OPENCL_LOADER_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}")
 find_package(Threads QUIET)
 if(NOT WIN32)
  set(OPENCL_LOADER_DEP_LIBS "Threads::Threads;${CMAKE_DL_LIBS}")
 else()
  set(OPENCL_LOADER_DEP_LIBS "cfgmgr32;runtimeobject")
 endif()
 add_library(OpenCL::OpenCL UNKNOWN IMPORTED)
 add_dependencies(OpenCL::OpenCL opencl_loader)
 set_target_properties(OpenCL::OpenCL PROPERTIES
  IMPORTED_LOCATION ${OPENCL_LOADER_LIBRARY_PATH}
  INTERFACE_INCLUDE_DIRECTORIES ${OPENCL_LOADER_INCLUDE_DIR}
  INTERFACE_LINK_LIBRARIES "${OPENCL_LOADER_DEP_LIBS}")
--- a/cmake/Modules/Packages/GPU.cmake
+++ b/cmake/Modules/Packages/GPU.cmake
@ -1,7 +1,10 @@
 set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU)
 set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h
                ${GPU_SOURCES_DIR}/fix_gpu.h
-                ${GPU_SOURCES_DIR}/fix_gpu.cpp)
+                ${GPU_SOURCES_DIR}/fix_gpu.cpp
                ${GPU_SOURCES_DIR}/fix_nh_gpu.h
                ${GPU_SOURCES_DIR}/fix_nh_gpu.cpp)
 target_compile_definitions(lammps PRIVATE -DLMP_GPU)
 set(GPU_API "opencl" CACHE STRING "API used by GPU package")
 set(GPU_API_VALUES opencl cuda hip)
@ -35,6 +38,9 @@ if(GPU_API STREQUAL "CUDA")
  option(CUDPP_OPT "Enable CUDPP_OPT" ON)
  option(CUDA_MPS_SUPPORT "Enable tweaks to support CUDA Multi-process service (MPS)" OFF)
  if(CUDA_MPS_SUPPORT)
    if(CUDPP_OPT)
      message(FATAL_ERROR "Must use -DCUDPP_OPT=OFF with -DGPU_CUDA_MPS_SUPPORT=ON")
    endif()
    set(GPU_CUDA_MPS_FLAGS "-DCUDA_PROXY")
  endif()
@ -97,6 +103,10 @@ if(GPU_API STREQUAL "CUDA")
  if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
  endif()
  # Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later
  if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
  endif()
  if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
    message(WARNING "Unsupported CUDA version. Use at your own risk.")
  endif()
@ -135,27 +145,13 @@ if(GPU_API STREQUAL "CUDA")
  target_include_directories(nvc_get_devices PRIVATE ${CUDA_INCLUDE_DIRS})
 elseif(GPU_API STREQUAL "OPENCL")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
+  option(USE_STATIC_OPENCL_LOADER "Download and include a static OpenCL ICD loader" ON)
-    # download and unpack support binaries for compilation of windows binaries.
+  mark_as_advanced(USE_STATIC_OPENCL_LOADER)
-    set(LAMMPS_THIRDPARTY_URL "https://download.lammps.org/thirdparty")
+  if (USE_STATIC_OPENCL_LOADER)
-    file(DOWNLOAD "${LAMMPS_THIRDPARTY_URL}/opencl-win-devel.tar.gz" "${CMAKE_CURRENT_BINARY_DIR}/opencl-win-devel.tar.gz"
+    include(OpenCLLoader)
            EXPECTED_MD5 2c00364888d5671195598b44c2e0d44d)
    execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf opencl-win-devel.tar.gz WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
    add_library(OpenCL::OpenCL UNKNOWN IMPORTED)
    if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86")
      set_target_properties(OpenCL::OpenCL PROPERTIES IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/OpenCL/lib_win32/libOpenCL.dll")
    elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
      set_target_properties(OpenCL::OpenCL PROPERTIES IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/OpenCL/lib_win64/libOpenCL.dll")
    endif()
    set_target_properties(OpenCL::OpenCL PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_BINARY_DIR}/OpenCL/include")
  else()
    find_package(OpenCL REQUIRED)
  endif()
  set(OCL_TUNE "generic" CACHE STRING "OpenCL Device Tuning")
  set(OCL_TUNE_VALUES intel fermi kepler cypress generic)
  set_property(CACHE OCL_TUNE PROPERTY STRINGS ${OCL_TUNE_VALUES})
  validate_option(OCL_TUNE OCL_TUNE_VALUES)
  string(TOUPPER ${OCL_TUNE} OCL_TUNE)
  include(OpenCLUtils)
  set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h)
@ -199,7 +195,7 @@ elseif(GPU_API STREQUAL "OPENCL")
  add_library(gpu STATIC ${GPU_LIB_SOURCES})
  target_link_libraries(gpu PRIVATE OpenCL::OpenCL)
  target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu)
-  target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT)
+  target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT)
  target_compile_definitions(gpu PRIVATE -DUSE_OPENCL)
  target_link_libraries(lammps PRIVATE gpu)
@ -207,6 +203,7 @@ elseif(GPU_API STREQUAL "OPENCL")
  add_executable(ocl_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
  target_compile_definitions(ocl_get_devices PRIVATE -DUCL_OPENCL)
  target_link_libraries(ocl_get_devices PRIVATE OpenCL::OpenCL)
  add_dependencies(ocl_get_devices OpenCL::OpenCL)
 elseif(GPU_API STREQUAL "HIP")
  if(NOT DEFINED HIP_PATH)
      if(NOT DEFINED ENV{HIP_PATH})
@ -389,13 +386,10 @@ elseif(GPU_API STREQUAL "HIP")
  target_link_libraries(lammps PRIVATE gpu)
 endif()
 # GPU package
 FindStyleHeaders(${GPU_SOURCES_DIR} FIX_CLASS fix_ FIX)
 set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}")
-
+# detect styles which have a GPU version
 # detects styles which have GPU version
 RegisterStylesExt(${GPU_SOURCES_DIR} gpu GPU_SOURCES)
 RegisterFixStyle(${GPU_SOURCES_DIR}/fix_gpu.h)
 get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES)
--- a/cmake/Modules/Packages/KIM.cmake
+++ b/cmake/Modules/Packages/KIM.cmake
@ -19,6 +19,8 @@ if(CURL_FOUND)
    target_compile_definitions(lammps PRIVATE -DLMP_NO_SSL_CHECK)
  endif()
 endif()
 set(KIM_EXTRA_UNITTESTS OFF CACHE STRING "Set extra unit tests verbose mode on/off. If on, extra tests are included.")
 mark_as_advanced(KIM_EXTRA_UNITTESTS)
 find_package(PkgConfig QUIET)
 set(DOWNLOAD_KIM_DEFAULT ON)
 if(PKG_CONFIG_FOUND)
@ -34,8 +36,8 @@ if(DOWNLOAD_KIM)
  enable_language(C)
  enable_language(Fortran)
  ExternalProject_Add(kim_build
-    URL https://s3.openkim.org/kim-api/kim-api-2.1.3.txz
+    URL https://s3.openkim.org/kim-api/kim-api-2.2.1.txz
-    URL_MD5 6ee829a1bbba5f8b9874c88c4c4ebff8
+    URL_MD5 ae1ddda2ef7017ea07934e519d023dca
    BINARY_DIR build
    CMAKE_ARGS ${CMAKE_REQUEST_PIC}
               -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
@ -53,11 +55,28 @@ if(DOWNLOAD_KIM)
  add_library(LAMMPS::KIM UNKNOWN IMPORTED)
  set_target_properties(LAMMPS::KIM PROPERTIES
    IMPORTED_LOCATION "${INSTALL_DIR}/lib/libkim-api${CMAKE_SHARED_LIBRARY_SUFFIX}"
-    INTERFACE_INCLUDE_DIRECTORIES "${INSTALL_DIR}/include/kim-api")
+    INTERFACE_INCLUDE_DIRECTORIES "${INSTALL_DIR}/include/kim-api"
-  target_link_libraries(lammps PRIVATE LAMMPS::KIM)
+    )
  add_dependencies(LAMMPS::KIM kim_build)
  target_link_libraries(lammps PRIVATE LAMMPS::KIM)
  # Set rpath so lammps build directory is relocatable
  if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
    set(_rpath_prefix "@loader_path")
  else()
    set(_rpath_prefix "$ORIGIN")
  endif()
  set_target_properties(lmp PROPERTIES
    BUILD_RPATH "${_rpath_prefix}/kim_build-prefix/lib"
    )
 else()
  if(KIM-API_FOUND AND KIM-API_VERSION VERSION_GREATER_EQUAL 2.2.0)
    # For kim-api >= 2.2.0
    find_package(KIM-API 2.2.0 CONFIG REQUIRED)
    target_link_libraries(lammps PRIVATE KIM-API::kim-api)
  else()
    # For kim-api 2.1.3 (consistent with previous version of this file)
    find_package(PkgConfig REQUIRED)
    pkg_check_modules(KIM-API REQUIRED IMPORTED_TARGET libkim-api>=${KIM-API_MIN_VERSION})
    target_link_libraries(lammps PRIVATE PkgConfig::KIM-API)
  endif()
 endif()
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@ -1,4 +1,7 @@
 ########################################################################
 # As of version 3.3.0 Kokkos requires C++14
 set(CMAKE_CXX_STANDARD 14)
 ########################################################################
 # consistency checks and Kokkos options/settings required by LAMMPS
 if(Kokkos_ENABLE_CUDA)
  message(STATUS "KOKKOS: Enabling CUDA LAMBDA function support")
@ -35,8 +38,8 @@ if(DOWNLOAD_KOKKOS)
  list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
  include(ExternalProject)
  ExternalProject_Add(kokkos_build
-    URL https://github.com/kokkos/kokkos/archive/3.2.00.tar.gz
+    URL https://github.com/kokkos/kokkos/archive/3.3.01.tar.gz
-    URL_MD5 81569170fe232e5e64ab074f7cca5e50
+    URL_MD5 08201d1c7cf5bc458ce0f5b44a629d5a
    CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS}
    BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a
  )
@ -50,7 +53,7 @@ if(DOWNLOAD_KOKKOS)
  target_link_libraries(lammps PRIVATE LAMMPS::KOKKOS)
  add_dependencies(LAMMPS::KOKKOS kokkos_build)
 elseif(EXTERNAL_KOKKOS)
-  find_package(Kokkos 3.2.00 REQUIRED CONFIG)
+  find_package(Kokkos 3.3.01 REQUIRED CONFIG)
  target_link_libraries(lammps PRIVATE Kokkos::kokkos)
 else()
  set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
--- a/cmake/Modules/Packages/MESSAGE.cmake
+++ b/cmake/Modules/Packages/MESSAGE.cmake
@ -2,8 +2,7 @@ if(LAMMPS_SIZES STREQUAL BIGBIG)
  message(FATAL_ERROR "The MESSAGE Package is not compatible with -DLAMMPS_BIGBIG")
 endif()
 option(MESSAGE_ZMQ "Use ZeroMQ in MESSAGE package" OFF)
-file(GLOB_RECURSE cslib_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.F
+file(GLOB_RECURSE cslib_SOURCES
    ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.c
        ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.cpp)
 add_library(cslib STATIC ${cslib_SOURCES})
--- a/cmake/Modules/Packages/MLIAP.cmake
+++ b/cmake/Modules/Packages/MLIAP.cmake
@ -0,0 +1,40 @@
 # if PYTHON package is included we may also include Python support in MLIAP
 set(MLIAP_ENABLE_PYTHON_DEFAULT OFF)
 if(PKG_PYTHON)
  find_package(Cythonize QUIET)
  if(Cythonize_FOUND)
    set(MLIAP_ENABLE_PYTHON_DEFAULT ON)
  endif()
 endif()
 option(MLIAP_ENABLE_PYTHON "Build MLIAP package with Python support" ${MLIAP_ENABLE_PYTHON_DEFAULT})
 if(MLIAP_ENABLE_PYTHON)
  find_package(Cythonize REQUIRED)
  if(NOT PKG_PYTHON)
    message(FATAL_ERROR "Must enable PYTHON package for including Python support in MLIAP")
  endif()
  if(CMAKE_VERSION VERSION_LESS 3.12)
    if(PYTHONLIBS_VERSION_STRING VERSION_LESS 3.6)
      message(FATAL_ERROR "Python support in MLIAP requires Python 3.6 or later")
    endif()
  else()
    if(Python_VERSION VERSION_LESS 3.6)
      message(FATAL_ERROR "Python support in MLIAP requires Python 3.6 or later")
    endif()
  endif()
  set(MLIAP_BINARY_DIR ${CMAKE_BINARY_DIR}/cython)
  set(MLIAP_CYTHON_SRC ${LAMMPS_SOURCE_DIR}/MLIAP/mliap_model_python_couple.pyx)
  get_filename_component(MLIAP_CYTHON_BASE ${MLIAP_CYTHON_SRC} NAME_WE)
  file(MAKE_DIRECTORY ${MLIAP_BINARY_DIR})
  add_custom_command(OUTPUT  ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.cpp ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.h
          COMMAND            ${CMAKE_COMMAND} -E copy_if_different ${MLIAP_CYTHON_SRC} ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.pyx
          COMMAND            ${Cythonize_EXECUTABLE} -3 ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.pyx
          WORKING_DIRECTORY  ${MLIAP_BINARY_DIR}
          MAIN_DEPENDENCY    ${MLIAP_CYTHON_SRC}
          COMMENT "Generating C++ sources with cythonize...")
  target_compile_definitions(lammps PRIVATE -DMLIAP_PYTHON)
  target_sources(lammps PRIVATE ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.cpp)
  target_include_directories(lammps PRIVATE ${MLIAP_BINARY_DIR})
 endif()
--- a/cmake/Modules/Packages/USER-PLUMED.cmake
+++ b/cmake/Modules/Packages/USER-PLUMED.cmake
@ -55,8 +55,8 @@ if(DOWNLOAD_PLUMED)
  endif()
  include(ExternalProject)
  ExternalProject_Add(plumed_build
-    URL https://github.com/plumed/plumed2/releases/download/v2.6.1/plumed-src-2.6.1.tgz
+    URL https://github.com/plumed/plumed2/releases/download/v2.7.0/plumed-src-2.7.0.tgz
-    URL_MD5 89a9a450fc6025299fe16af235957163
+    URL_MD5 95f29dd0c067577f11972ff90dfc7d12
    BUILD_IN_SOURCE 1
    CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR>
                                             ${CONFIGURE_REQUEST_PIC}
--- a/cmake/Modules/StyleHeaderUtils.cmake
+++ b/cmake/Modules/StyleHeaderUtils.cmake
@ -59,7 +59,6 @@ function(CreateStyleHeader path filename)
            set(temp "${temp}#include \"${FNAME}\"\n")
        endforeach()
    endif()
    message(STATUS "Generating ${filename}...")
    file(WRITE "${path}/${filename}.tmp" "${temp}" )
    execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${path}/${filename}.tmp" "${path}/${filename}")
    set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${path}/${filename}")
@ -142,6 +141,7 @@ function(RegisterStylesExt search_path extension sources)
 endfunction(RegisterStylesExt)
 function(GenerateStyleHeaders output_path)
    message(STATUS "Generating style headers...")
    GenerateStyleHeader(${output_path} ANGLE      angle     ) # force
    GenerateStyleHeader(${output_path} ATOM_VEC   atom      ) # atom      atom_vec_hybrid
    GenerateStyleHeader(${output_path} BODY       body      ) # atom_vec_body
@ -232,7 +232,6 @@ function(CreatePackagesHeader path filename)
      set(temp "${temp}#include \"${DNAME}/${FNAME}\"\n")
    endforeach()
  endif()
  message(STATUS "Generating ${filename}...")
  file(WRITE "${path}/${filename}.tmp" "${temp}" )
  execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${path}/${filename}.tmp" "${path}/${filename}")
  set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${path}/${filename}")
@ -244,6 +243,7 @@ function(GeneratePackagesHeader path property style)
 endfunction(GeneratePackagesHeader)
 function(GeneratePackagesHeaders output_path)
    message(STATUS "Generating package headers...")
    GeneratePackagesHeader(${output_path} PKGANGLE      angle     ) # force
    GeneratePackagesHeader(${output_path} PKGATOM_VEC   atom      ) # atom      atom_vec_hybrid
    GeneratePackagesHeader(${output_path} PKGBODY       body      ) # atom_vec_body
--- a/cmake/Modules/Tools.cmake
+++ b/cmake/Modules/Tools.cmake
@ -0,0 +1,57 @@
 if(BUILD_TOOLS)
  add_executable(binary2txt ${LAMMPS_TOOLS_DIR}/binary2txt.cpp)
  target_compile_definitions(binary2txt PRIVATE -DLAMMPS_${LAMMPS_SIZES})
  install(TARGETS binary2txt DESTINATION ${CMAKE_INSTALL_BINDIR})
  include(CheckGeneratorSupport)
  if(CMAKE_GENERATOR_SUPPORT_FORTRAN)
    include(CheckLanguage)
    check_language(Fortran)
    if(CMAKE_Fortran_COMPILER)
      enable_language(Fortran)
      add_executable(chain.x ${LAMMPS_TOOLS_DIR}/chain.f)
      target_link_libraries(chain.x PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
      install(TARGETS chain.x DESTINATION ${CMAKE_INSTALL_BINDIR})
    else()
      message(WARNING "No suitable Fortran compiler found, skipping build of 'chain.x'")
    endif()
  else()
    message(WARNING "CMake build doesn't support fortran, skipping build of 'chain.x'")
  endif()
  enable_language(C)
  get_filename_component(MSI2LMP_SOURCE_DIR ${LAMMPS_TOOLS_DIR}/msi2lmp/src ABSOLUTE)
  file(GLOB MSI2LMP_SOURCES ${MSI2LMP_SOURCE_DIR}/[^.]*.c)
  add_executable(msi2lmp ${MSI2LMP_SOURCES})
  target_link_libraries(msi2lmp PRIVATE ${MATH_LIBRARIES})
  install(TARGETS msi2lmp DESTINATION ${CMAKE_INSTALL_BINDIR})
  install(FILES ${LAMMPS_DOC_DIR}/msi2lmp.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
 endif()
 if(BUILD_LAMMPS_SHELL)
  find_package(PkgConfig REQUIRED)
  pkg_check_modules(READLINE IMPORTED_TARGET REQUIRED readline)
  if(NOT LAMMPS_EXCEPTIONS)
    message(WARNING "The LAMMPS shell needs LAMMPS_EXCEPTIONS enabled for full functionality")
  endif()
  # include resource compiler to embed icons into the executable on Windows
  if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
    enable_language(RC)
    set(ICON_RC_FILE ${LAMMPS_TOOLS_DIR}/lammps-shell/lmpicons.rc)
  endif()
  add_executable(lammps-shell ${LAMMPS_TOOLS_DIR}/lammps-shell/lammps-shell.cpp ${ICON_RC_FILE})
  target_include_directories(lammps-shell PRIVATE ${LAMMPS_TOOLS_DIR}/lammps-shell)
  # workaround for broken readline pkg-config file on FreeBSD
  if(CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
    target_include_directories(lammps-shell PRIVATE /usr/local/include)
  endif()
  target_link_libraries(lammps-shell PRIVATE lammps PkgConfig::READLINE)
  install(TARGETS lammps-shell EXPORT LAMMPS_Targets DESTINATION ${CMAKE_INSTALL_BINDIR})
  install(DIRECTORY ${LAMMPS_TOOLS_DIR}/lammps-shell/icons DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/)
  install(FILES ${LAMMPS_TOOLS_DIR}/lammps-shell/lammps-shell.desktop DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/applications/)
 endif()
--- a/cmake/Modules/YAML.cmake
+++ b/cmake/Modules/YAML.cmake
@ -0,0 +1,32 @@
 message(STATUS "Downloading and building YAML library")
 include(ExternalProject)
 set(YAML_URL "https://pyyaml.org/download/libyaml/yaml-0.2.5.tar.gz" CACHE STRING "URL for libyaml tarball")
 mark_as_advanced(YAML_URL)
 ExternalProject_Add(libyaml
                    URL               ${YAML_URL}
                    URL_MD5           bb15429d8fb787e7d3f1c83ae129a999
                    SOURCE_DIR        "${CMAKE_BINARY_DIR}/yaml-src"
                    BINARY_DIR        "${CMAKE_BINARY_DIR}/yaml-build"
                    CONFIGURE_COMMAND <SOURCE_DIR>/configure ${CONFIGURE_REQUEST_PIC}
                                      CXX=${CMAKE_CXX_COMPILER}
                                      CC=${CMAKE_C_COMPILER}
                                      --prefix=<INSTALL_DIR> --disable-shared
                    BUILD_BYPRODUCTS  <INSTALL_DIR>/lib/libyaml${CMAKE_STATIC_LIBRARY_SUFFIX}
                    TEST_COMMAND      "")
 ExternalProject_Get_Property(libyaml INSTALL_DIR)
 set(YAML_INCLUDE_DIR ${INSTALL_DIR}/include)
 set(YAML_LIBRARY_DIR ${INSTALL_DIR}/lib)
 # workaround for CMake 3.10 on ubuntu 18.04
 file(MAKE_DIRECTORY ${YAML_INCLUDE_DIR})
 file(MAKE_DIRECTORY ${YAML_LIBRARY_DIR})
 set(YAML_LIBRARY_PATH ${INSTALL_DIR}/lib/libyaml${CMAKE_STATIC_LIBRARY_SUFFIX})
 add_library(Yaml::Yaml UNKNOWN IMPORTED)
 set_target_properties(Yaml::Yaml PROPERTIES
        IMPORTED_LOCATION ${YAML_LIBRARY_PATH}
        INTERFACE_INCLUDE_DIRECTORIES ${YAML_INCLUDE_DIR})
 add_dependencies(Yaml::Yaml libyaml)
--- a/cmake/Modules/generate_lmpgitversion.cmake
+++ b/cmake/Modules/generate_lmpgitversion.cmake
@ -7,17 +7,20 @@ set(temp_git_info "false")
 message(STATUS "Git Directory: ${LAMMPS_DIR}/.git")
 if(GIT_FOUND AND EXISTS ${LAMMPS_DIR}/.git)
  set(temp_git_info "true")
-  execute_process(COMMAND ${GIT_EXECUTABLE} -C ${LAMMPS_DIR} rev-parse HEAD
+  execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse HEAD
    OUTPUT_VARIABLE temp_git_commit
    ERROR_QUIET
    WORKING_DIRECTORY ${LAMMPS_DIR}
    OUTPUT_STRIP_TRAILING_WHITESPACE)
-  execute_process(COMMAND ${GIT_EXECUTABLE} -C ${LAMMPS_DIR} rev-parse --abbrev-ref HEAD
+  execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD
    OUTPUT_VARIABLE temp_git_branch
    ERROR_QUIET
    WORKING_DIRECTORY ${LAMMPS_DIR}
    OUTPUT_STRIP_TRAILING_WHITESPACE)
-  execute_process(COMMAND ${GIT_EXECUTABLE} -C ${LAMMPS_DIR} describe --dirty=-modified
+  execute_process(COMMAND ${GIT_EXECUTABLE} describe --dirty=-modified
    OUTPUT_VARIABLE temp_git_describe
    ERROR_QUIET
    WORKING_DIRECTORY ${LAMMPS_DIR}
    OUTPUT_STRIP_TRAILING_WHITESPACE)
 endif()
--- a/cmake/presets/all_off.cmake
+++ b/cmake/presets/all_off.cmake
@ -2,9 +2,9 @@
 # an existing package selection without losing any other settings
 set(ALL_PACKAGES ASPHERE BODY CLASS2 COLLOID COMPRESS CORESHELL DIPOLE GPU
-        GRANULAR KIM KOKKOS KSPACE LATTE MANYBODY MC MISC MESSAGE MOLECULE
+        GRANULAR KIM KOKKOS KSPACE LATTE MANYBODY MC MISC MESSAGE MLIAP
-        MPIIO MSCG OPT PERI POEMS PYTHON QEQ REPLICA RIGID SHOCK SNAP SPIN
+        MOLECULE MPIIO MSCG OPT PERI POEMS PYTHON QEQ REPLICA RIGID SHOCK
-        SRD VORONOI
+        SNAP SPIN SRD VORONOI
        USER-ADIOS USER-ATC USER-AWPMD USER-BOCS USER-CGDNA USER-CGSDK
        USER-COLVARS USER-DIFFRACTION USER-DPD USER-DRUDE USER-EFF USER-FEP
        USER-H5MD USER-INTEL USER-LB USER-MANIFOLD USER-MEAMC USER-MESODPD
--- a/cmake/presets/all_on.cmake
+++ b/cmake/presets/all_on.cmake
@ -4,9 +4,9 @@
 # with just a working C++ compiler and an MPI library.
 set(ALL_PACKAGES ASPHERE BODY CLASS2 COLLOID COMPRESS CORESHELL DIPOLE GPU
-        GRANULAR KIM KOKKOS KSPACE LATTE MANYBODY MC MISC MESSAGE MOLECULE
+        GRANULAR KIM KOKKOS KSPACE LATTE MANYBODY MC MISC MESSAGE MLIAP
-        MPIIO MSCG OPT PERI POEMS PYTHON QEQ REPLICA RIGID SHOCK SNAP SPIN
+        MOLECULE MPIIO MSCG OPT PERI POEMS PYTHON QEQ REPLICA RIGID SHOCK
-        SRD VORONOI
+        SNAP SPIN SRD VORONOI
        USER-ADIOS USER-ATC USER-AWPMD USER-BOCS USER-CGDNA USER-CGSDK
        USER-COLVARS USER-DIFFRACTION USER-DPD USER-DRUDE USER-EFF USER-FEP
        USER-H5MD USER-INTEL USER-LB USER-MANIFOLD USER-MEAMC USER-MESODPD
--- a/cmake/presets/clang.cmake
+++ b/cmake/presets/clang.cmake
@ -2,7 +2,8 @@
 set(CMAKE_CXX_COMPILER "clang++" CACHE STRING "" FORCE)
 set(CMAKE_C_COMPILER "clang" CACHE STRING "" FORCE)
-set(CMAKE_CXX_FLAGS "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
+set(CMAKE_CXX_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE)
 set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
 set(MPI_CXX "clang++" CACHE STRING "" FORCE)
 set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
 unset(HAVE_OMP_H_INCLUDE CACHE)
@ -14,4 +15,3 @@ set(OpenMP_CXX "clang++" CACHE STRING "" FORCE)
 set(OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" FORCE)
 set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
 set(OpenMP_omp_LIBRARY "libomp.so" CACHE PATH "" FORCE)
--- a/Show More
+++ b/Show More