Merge branch 'master' of https://github.com/lammps/lammps into snap-launch-bounds

2021-02-23 20:55:39 -05:00
parent 87fad6b82e 358c0a0c04
commit d2d6b63820
460 changed files with 18976 additions and 10791 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -156,8 +156,7 @@ if(BUILD_MPI)
    endif()
  endif()
 else()
-  enable_language(C)
+  file(GLOB MPI_SOURCES ${LAMMPS_SOURCE_DIR}/STUBS/mpi.cpp)
  file(GLOB MPI_SOURCES ${LAMMPS_SOURCE_DIR}/STUBS/mpi.c)
  add_library(mpi_stubs STATIC ${MPI_SOURCES})
  set_target_properties(mpi_stubs PROPERTIES OUTPUT_NAME lammps_mpi_stubs${LAMMPS_MACHINE})
  target_include_directories(mpi_stubs PUBLIC $<BUILD_INTERFACE:${LAMMPS_SOURCE_DIR}/STUBS>)
@ -778,9 +777,7 @@ if(PKG_GPU)
  message(STATUS "<<< GPU package settings >>>
 -- GPU API:          ${GPU_API}")
  if(GPU_API STREQUAL "CUDA")
-    message(STATUS "GPU architecture: ${GPU_ARCH}")
+    message(STATUS "GPU default architecture: ${GPU_ARCH}")
  elseif(GPU_API STREQUAL "OPENCL")
    message(STATUS "OpenCL tuning:    ${OCL_TUNE}")
  elseif(GPU_API STREQUAL "HIP")
    message(STATUS "HIP platform:     ${HIP_PLATFORM}")
    message(STATUS "HIP architecture: ${HIP_ARCH}")
--- a/cmake/Modules/Documentation.cmake
+++ b/cmake/Modules/Documentation.cmake
@ -50,9 +50,9 @@ if(BUILD_DOC)
    OUTPUT ${DOC_BUILD_DIR}/requirements.txt
    DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE}
    COMMAND ${CMAKE_COMMAND} -E copy ${DOCENV_REQUIREMENTS_FILE} ${DOC_BUILD_DIR}/requirements.txt
-    COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade pip
+    COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install --upgrade pip
-    COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
+    COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
-    COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
+    COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
  )
  # download mathjax distribution and unpack to folder "mathjax"
--- a/cmake/Modules/GTest.cmake
+++ b/cmake/Modules/GTest.cmake
@ -20,10 +20,10 @@ ExternalProject_Add(googletest
                                    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                                    -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
                                    -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
-                    BUILD_BYPRODUCTS <BINARY_DIR>/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest${GTEST_LIB_POSTFIX}.a
+                    BUILD_BYPRODUCTS <BINARY_DIR>/lib/libgtest${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
-                                     <BINARY_DIR>/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gmock${GTEST_LIB_POSTFIX}.a
+                                     <BINARY_DIR>/lib/libgmock${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
-                                     <BINARY_DIR>/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest_main${GTEST_LIB_POSTFIX}.a
+                                     <BINARY_DIR>/lib/libgtest_main${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
-                                     <BINARY_DIR>/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gmock_main${GTEST_LIB_POSTFIX}.a
+                                     <BINARY_DIR>/lib/libgmock_main${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
                    LOG_DOWNLOAD ON
                    LOG_CONFIGURE ON
                    LOG_BUILD ON
@ -39,10 +39,10 @@ file(MAKE_DIRECTORY ${GTEST_INCLUDE_DIR})
 file(MAKE_DIRECTORY ${GMOCK_INCLUDE_DIR})
 ExternalProject_Get_Property(googletest BINARY_DIR)
-set(GTEST_LIBRARY_PATH ${BINARY_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest${GTEST_LIB_POSTFIX}.a)
+set(GTEST_LIBRARY_PATH ${BINARY_DIR}/lib/libgtest${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
-set(GMOCK_LIBRARY_PATH ${BINARY_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gmock${GTEST_LIB_POSTFIX}.a)
+set(GMOCK_LIBRARY_PATH ${BINARY_DIR}/lib/libgmock${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
-set(GTEST_MAIN_LIBRARY_PATH ${BINARY_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest_main${GTEST_LIB_POSTFIX}.a)
+set(GTEST_MAIN_LIBRARY_PATH ${BINARY_DIR}/lib/libgtest_main${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
-set(GMOCK_MAIN_LIBRARY_PATH ${BINARY_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gmock_main${GTEST_LIB_POSTFIX}.a)
+set(GMOCK_MAIN_LIBRARY_PATH ${BINARY_DIR}/lib/libgmock_main${GTEST_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
 # Prevent GoogleTest from overriding our compiler/linker options
 # when building with Visual Studio
--- a/cmake/Modules/OpenCLLoader.cmake
+++ b/cmake/Modules/OpenCLLoader.cmake
@ -0,0 +1,54 @@
 message(STATUS "Downloading and building OpenCL loader library")
 if(CMAKE_BUILD_TYPE STREQUAL Debug)
  set(OPENCL_LOADER_LIB_POSTFIX d)
 else()
  set(OPENCL_LOADER_LIB_POSTFIX)
 endif()
 include(ExternalProject)
 set(OPENCL_LOADER_URL "https://download.lammps.org/thirdparty/opencl-loader-2020.12.18.tar.gz" CACHE STRING "URL for OpenCL loader tarball")
 mark_as_advanced(OPENCL_LOADER_URL)
 ExternalProject_Add(opencl_loader
                    URL ${OPENCL_LOADER_URL}
                    URL_MD5         011cdcbd41030be94f3fced6d763a52a
                    SOURCE_DIR      "${CMAKE_BINARY_DIR}/opencl_loader-src"
                    BINARY_DIR      "${CMAKE_BINARY_DIR}/opencl_loader-build"
                    CMAKE_ARGS      ${CMAKE_REQUEST_PIC} ${CMAKE_EXTRA_OPENCL_LOADER_OPTS}
                                    -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
                                    -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
                                    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                                    -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
                                    -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
                    BUILD_BYPRODUCTS <BINARY_DIR>/libOpenCL${OPENCL_LOADER_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
                    LOG_DOWNLOAD ON
                    LOG_CONFIGURE ON
                    LOG_BUILD ON
                    INSTALL_COMMAND ""
                    TEST_COMMAND    "")
 ExternalProject_Get_Property(opencl_loader SOURCE_DIR)
 set(OPENCL_LOADER_INCLUDE_DIR ${SOURCE_DIR}/inc)
 # workaround for CMake 3.10 on ubuntu 18.04
 file(MAKE_DIRECTORY ${OPENCL_LOADER_INCLUDE_DIR})
 ExternalProject_Get_Property(opencl_loader BINARY_DIR)
 set(OPENCL_LOADER_LIBRARY_PATH "${BINARY_DIR}/libOpenCL${OPENCL_LOADER_LIB_POSTFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}")
 find_package(Threads QUIET)
 if(NOT WIN32)
  set(OPENCL_LOADER_DEP_LIBS "Threads::Threads;${CMAKE_DL_LIBS}")
 else()
  set(OPENCL_LOADER_DEP_LIBS "cfgmgr32;runtimeobject")
 endif()
 add_library(OpenCL::OpenCL UNKNOWN IMPORTED)
 add_dependencies(OpenCL::OpenCL opencl_loader)
 set_target_properties(OpenCL::OpenCL PROPERTIES
  IMPORTED_LOCATION ${OPENCL_LOADER_LIBRARY_PATH}
  INTERFACE_INCLUDE_DIRECTORIES ${OPENCL_LOADER_INCLUDE_DIR}
  INTERFACE_LINK_LIBRARIES "${OPENCL_LOADER_DEP_LIBS}")
--- a/cmake/Modules/Packages/GPU.cmake
+++ b/cmake/Modules/Packages/GPU.cmake
@ -1,7 +1,9 @@
 set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU)
 set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h
                ${GPU_SOURCES_DIR}/fix_gpu.h
-                ${GPU_SOURCES_DIR}/fix_gpu.cpp)
+                ${GPU_SOURCES_DIR}/fix_gpu.cpp
                ${GPU_SOURCES_DIR}/fix_nh_gpu.h
                ${GPU_SOURCES_DIR}/fix_nh_gpu.cpp)
 target_compile_definitions(lammps PRIVATE -DLMP_GPU)
 set(GPU_API "opencl" CACHE STRING "API used by GPU package")
@ -97,9 +99,13 @@ if(GPU_API STREQUAL "CUDA")
  if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_75,code=[sm_75,compute_75]")
  endif()
-  # Ampere (GPU Arch 8.0 and 8.6) is supported by CUDA 11 and later
+  # Ampere (GPU Arch 8.0) is supported by CUDA 11 and later
  if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
-    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80] -gencode arch=compute_86,code=[sm_86,compute_86]")
+    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_80,code=[sm_80,compute_80]")
  endif()
  # Ampere (GPU Arch 8.6) is supported by CUDA 11.1 and later
  if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
  endif()
  if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
    message(WARNING "Unsupported CUDA version. Use at your own risk.")
@ -139,27 +145,13 @@ if(GPU_API STREQUAL "CUDA")
  target_include_directories(nvc_get_devices PRIVATE ${CUDA_INCLUDE_DIRS})
 elseif(GPU_API STREQUAL "OPENCL")
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
+  option(USE_STATIC_OPENCL_LOADER "Download and include a static OpenCL ICD loader" ON)
-    # download and unpack support binaries for compilation of windows binaries.
+  mark_as_advanced(USE_STATIC_OPENCL_LOADER)
-    set(LAMMPS_THIRDPARTY_URL "https://download.lammps.org/thirdparty")
+  if (USE_STATIC_OPENCL_LOADER)
-    file(DOWNLOAD "${LAMMPS_THIRDPARTY_URL}/opencl-win-devel.tar.gz" "${CMAKE_CURRENT_BINARY_DIR}/opencl-win-devel.tar.gz"
+    include(OpenCLLoader)
            EXPECTED_MD5 2c00364888d5671195598b44c2e0d44d)
    execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf opencl-win-devel.tar.gz WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
    add_library(OpenCL::OpenCL UNKNOWN IMPORTED)
    if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86")
      set_target_properties(OpenCL::OpenCL PROPERTIES IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/OpenCL/lib_win32/libOpenCL.dll")
    elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
      set_target_properties(OpenCL::OpenCL PROPERTIES IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/OpenCL/lib_win64/libOpenCL.dll")
    endif()
    set_target_properties(OpenCL::OpenCL PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_BINARY_DIR}/OpenCL/include")
  else()
    find_package(OpenCL REQUIRED)
  endif()
  set(OCL_TUNE "generic" CACHE STRING "OpenCL Device Tuning")
  set(OCL_TUNE_VALUES intel fermi kepler cypress generic)
  set_property(CACHE OCL_TUNE PROPERTY STRINGS ${OCL_TUNE_VALUES})
  validate_option(OCL_TUNE OCL_TUNE_VALUES)
  string(TOUPPER ${OCL_TUNE} OCL_TUNE)
  include(OpenCLUtils)
  set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h)
@ -203,7 +195,7 @@ elseif(GPU_API STREQUAL "OPENCL")
  add_library(gpu STATIC ${GPU_LIB_SOURCES})
  target_link_libraries(gpu PRIVATE OpenCL::OpenCL)
  target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu)
-  target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT)
+  target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT)
  target_compile_definitions(gpu PRIVATE -DUSE_OPENCL)
  target_link_libraries(lammps PRIVATE gpu)
@ -211,6 +203,7 @@ elseif(GPU_API STREQUAL "OPENCL")
  add_executable(ocl_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
  target_compile_definitions(ocl_get_devices PRIVATE -DUCL_OPENCL)
  target_link_libraries(ocl_get_devices PRIVATE OpenCL::OpenCL)
  add_dependencies(ocl_get_devices OpenCL::OpenCL)
 elseif(GPU_API STREQUAL "HIP")
  if(NOT DEFINED HIP_PATH)
      if(NOT DEFINED ENV{HIP_PATH})
@ -393,13 +386,10 @@ elseif(GPU_API STREQUAL "HIP")
  target_link_libraries(lammps PRIVATE gpu)
 endif()
 # GPU package
 FindStyleHeaders(${GPU_SOURCES_DIR} FIX_CLASS fix_ FIX)
 set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}")
-
+# detect styles which have a GPU version
 # detects styles which have GPU version
 RegisterStylesExt(${GPU_SOURCES_DIR} gpu GPU_SOURCES)
 RegisterFixStyle(${GPU_SOURCES_DIR}/fix_gpu.h)
 get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES)
--- a/cmake/Modules/Packages/KIM.cmake
+++ b/cmake/Modules/Packages/KIM.cmake
@ -69,14 +69,14 @@ if(DOWNLOAD_KIM)
    BUILD_RPATH "${_rpath_prefix}/kim_build-prefix/lib"
    )
 else()
-  if(KIM-API_FOUND AND KIM_API_VERSION VERSION_GREATER_EQUAL 2.2.0)
+  if(KIM-API_FOUND AND KIM-API_VERSION VERSION_GREATER_EQUAL 2.2.0)
    # For kim-api >= 2.2.0
-    find_package(KIM-API ${KIM-API_MIN_VERSION} CONFIG REQUIRED)
+    find_package(KIM-API 2.2.0 CONFIG REQUIRED)
    target_link_libraries(lammps PRIVATE KIM-API::kim-api)
  else()
    # For kim-api 2.1.3 (consistent with previous version of this file)
    find_package(PkgConfig REQUIRED)
-    pkg_check_modules(KIM-API REQUIRED IMPORTED_TARGET libkim-api>=KIM-API_MIN_VERSION)
+    pkg_check_modules(KIM-API REQUIRED IMPORTED_TARGET libkim-api>=${KIM-API_MIN_VERSION})
    target_link_libraries(lammps PRIVATE PkgConfig::KIM-API)
  endif()
 endif()
--- a/cmake/Modules/Packages/MESSAGE.cmake
+++ b/cmake/Modules/Packages/MESSAGE.cmake
@ -2,8 +2,7 @@ if(LAMMPS_SIZES STREQUAL BIGBIG)
  message(FATAL_ERROR "The MESSAGE Package is not compatible with -DLAMMPS_BIGBIG")
 endif()
 option(MESSAGE_ZMQ "Use ZeroMQ in MESSAGE package" OFF)
-file(GLOB_RECURSE cslib_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.F
+file(GLOB_RECURSE cslib_SOURCES
    ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.c
        ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.cpp)
 add_library(cslib STATIC ${cslib_SOURCES})
--- a/cmake/Modules/YAML.cmake
+++ b/cmake/Modules/YAML.cmake
@ -12,7 +12,7 @@ ExternalProject_Add(libyaml
                                      CXX=${CMAKE_CXX_COMPILER}
                                      CC=${CMAKE_C_COMPILER}
                                      --prefix=<INSTALL_DIR> --disable-shared
-                    BUILD_BYPRODUCTS  <INSTALL_DIR>/lib/${CMAKE_FIND_LIBRARY_PREFIXES}yaml.a
+                    BUILD_BYPRODUCTS  <INSTALL_DIR>/lib/libyaml${CMAKE_STATIC_LIBRARY_SUFFIX}
                    TEST_COMMAND      "")
 ExternalProject_Get_Property(libyaml INSTALL_DIR)
@ -23,7 +23,7 @@ set(YAML_LIBRARY_DIR ${INSTALL_DIR}/lib)
 file(MAKE_DIRECTORY ${YAML_INCLUDE_DIR})
 file(MAKE_DIRECTORY ${YAML_LIBRARY_DIR})
-set(YAML_LIBRARY_PATH ${INSTALL_DIR}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}yaml.a)
+set(YAML_LIBRARY_PATH ${INSTALL_DIR}/lib/libyaml${CMAKE_STATIC_LIBRARY_SUFFIX})
 add_library(Yaml::Yaml UNKNOWN IMPORTED)
 set_target_properties(Yaml::Yaml PROPERTIES
--- a/doc/Makefile
+++ b/doc/Makefile
@ -47,6 +47,8 @@ HAS_PDFLATEX = YES
 endif
 endif
 # override settings for PIP commands
 # PIP_OPTIONS = --cert /etc/pki/ca-trust/extracted/openssl/ca-bundle.trust.crt --proxy http://proxy.mydomain.org
 #SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())') $(shell test -f $(BUILDDIR)/doxygen/xml/run.stamp && printf -- "-E")
@ -228,13 +230,13 @@ $(VENV):
 	@( \
 		$(VIRTUALENV) -p $(PYTHON) $(VENV); \
 		. $(VENV)/bin/activate; \
-		pip install --upgrade pip; \
+		pip $(PIP_OPTIONS) install --upgrade pip; \
-		pip install -r $(BUILDDIR)/utils/requirements.txt; \
+		pip $(PIP_OPTIONS) install -r $(BUILDDIR)/utils/requirements.txt; \
 		deactivate;\
 	)
 $(MATHJAX):
-	@git clone --depth 1 https://github.com/mathjax/MathJax.git $@
+	@git clone --depth 1 git://github.com/mathjax/MathJax.git $@
 $(TXT2RST) $(ANCHORCHECK): $(VENV)
 	@( \
--- a/doc/src/Build_basics.rst
+++ b/doc/src/Build_basics.rst
@ -95,7 +95,7 @@ standard. A more detailed discussion of that is below.
      .. note::
-         The file ``src/STUBS/mpi.c`` provides a CPU timer function
+         The file ``src/STUBS/mpi.cpp`` provides a CPU timer function
         called ``MPI_Wtime()`` that calls ``gettimeofday()``.  If your
         operating system does not support ``gettimeofday()``, you will
         need to insert code to call another timer.  Note that the
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@ -120,8 +120,6 @@ CMake build
   -D GPU_API=value             # value = opencl (default) or cuda or hip
   -D GPU_PREC=value            # precision setting
                                # value = double or mixed (default) or single
   -D OCL_TUNE=value            # hardware choice for GPU_API=opencl
                                # generic (default) or intel (Intel CPU) or fermi, kepler, cypress (NVIDIA)
   -D GPU_ARCH=value            # primary GPU hardware choice for GPU_API=cuda
                                # value = sm_XX, see below
                                # default is sm_50
@ -135,6 +133,8 @@ CMake build
                                # value = yes (default) or no
   -D CUDA_MPS_SUPPORT=value    # enables some tweaks required to run with active nvidia-cuda-mps daemon
                                # value = yes or no (default)
   -D USE_STATIC_OPENCL_LOADER=value  # downloads/includes OpenCL ICD loader library, no local OpenCL headers/libs needed
                                      # value = yes (default) or no
 :code:`GPU_ARCH` settings for different GPU hardware is as follows:
@ -161,6 +161,12 @@ When building with CMake, you **must NOT** build the GPU library in ``lib/gpu``
 using the traditional build procedure. CMake will detect files generated by that
 process and will terminate with an error and a suggestion for how to remove them.
 If you are compiling for OpenCL, the default setting is to download, build, and
 link with a static OpenCL ICD loader library and standard OpenCL headers.  This
 way no local OpenCL development headers or library needs to be present and only
 OpenCL compatible drivers need to be installed to use OpenCL.  If this is not
 desired, you can set :code:`USE_STATIC_OPENCL_LOADER` to :code:`no`.
 If you are compiling with HIP, note that before running CMake you will have to
 set appropriate environment variables. Some variables such as
 :code:`HCC_AMDGPU_TARGET` or :code:`CUDA_PATH` are necessary for :code:`hipcc`
@ -258,18 +264,18 @@ To build with this package, the KIM library with API v2 must be downloaded
 and built on your system. It must include the KIM models that you want to
 use with LAMMPS.
-If you would like to use the :doc:`kim_query <kim_commands>`
+If you would like to use the :doc:`kim query <kim_commands>`
 command, you also need to have libcurl installed with the matching
 development headers and the curl-config tool.
-If you would like to use the :doc:`kim_property <kim_commands>`
+If you would like to use the :doc:`kim property <kim_commands>`
 command, you need to build LAMMPS with the PYTHON package installed
 and linked to Python 3.6 or later. See the :ref:`PYTHON package build info <python>`
 for more details on this. After successfully building LAMMPS with Python, you
-also need to install the kim-property Python package, which can be easily done using
+also need to install the ``kim-property`` Python package, which can be easily
-*pip* as ``pip install kim-property``, or from the *conda-forge* channel as
+done using *pip* as ``pip install kim-property``, or from the *conda-forge*
-``conda install kim-property`` if LAMMPS is built in Conda. More detailed
+channel as ``conda install kim-property`` if LAMMPS is built in Conda. More
-information is available at:
+detailed information is available at:
 `kim-property installation <https://github.com/openkim/kim-property#installing-kim-property>`_.
 In addition to installing the KIM API, it is also necessary to install the
@ -309,7 +315,7 @@ minutes to hours) to build.  Of course you only need to do that once.)
      You can download and build the KIM library manually if you prefer;
      follow the instructions in ``lib/kim/README``.  You can also do
-      this in one step from the lammps/src dir, using a command like
+      this in one step from the lammps/src directory, using a command like
      these, which simply invoke the ``lib/kim/Install.py`` script with
      the specified args.
@ -377,10 +383,11 @@ Enabling the extra unit tests have some requirements,
  Conda. More detailed information is available at:
  `kim-property installation <https://github.com/openkim/kim-property#installing-kim-property>`_.
 * It is also necessary to install
-  ``EAM_Dynamo_Mendelev_2007_Zr__MO_848899341753_000``, and
+  ``EAM_Dynamo_MendelevAckland_2007v3_Zr__MO_004835508849_000``,
-  ``EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005`` KIM models.
+  ``EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005``, and
  ``LennardJones612_UniversalShifted__MO_959249795837_003`` KIM models.
  See `Obtaining KIM Models <http://openkim.org/doc/usage/obtaining-models>`_
-  to learn how to install a pre-build binary of the OpenKIM Repository of
+  to learn how to install a pre-built binary of the OpenKIM Repository of
  Models or see
  `Installing KIM Models <https://openkim.org/doc/usage/obtaining-models/#installing_models>`_
  to learn how to install the specific KIM models.
--- a/doc/src/Build_link.rst
+++ b/doc/src/Build_link.rst
@ -20,16 +20,8 @@ the suffix ``.so.0`` (or some other number).
 .. note::
   Care should be taken to use the same MPI library for the calling code
-   and the LAMMPS library.  The ``library.h`` file includes ``mpi.h``
+   and the LAMMPS library unless LAMMPS is to be compiled without (real)
-   and uses definitions from it so those need to be available and
+   MPI support using the include STUBS MPI library.
   consistent.  When LAMMPS is compiled with the included STUBS MPI
   library, then its ``mpi.h`` file needs to be included.  While it is
   technically possible to use a full MPI library in the calling code
   and link to a serial LAMMPS library compiled with MPI STUBS, it is
   recommended to use the *same* MPI library for both, and then use
   ``MPI_Comm_split()`` in the calling code to pass a suitable
   communicator with a subset of MPI ranks to the function creating the
   LAMMPS instance.
 Link with LAMMPS as a static library
 ------------------------------------
@ -110,7 +102,7 @@ executable, that are also required to link the LAMMPS executable.
      .. code-block:: bash
-         gcc -c -O -I${HOME}/lammps/src/STUBS -I${HOME}/lammps/src -caller.c
+         gcc -c -O -I${HOME}/lammps/src -caller.c
         g++ -o caller caller.o -L${HOME}/lammps/lib/poems \
                      -L${HOME}/lammps/src/STUBS -L${HOME}/lammps/src \
                      -llammps_serial -lpoems -lmpi_stubs
@ -174,7 +166,7 @@ the POEMS package installed becomes:
      .. code-block:: bash
-         gcc -c -O -I${HOME}/lammps/src/STUBS -I${HOME}/lammps/src -caller.c
+         gcc -c -O -I${HOME}/lammps/src -caller.c
         g++ -o caller caller.o -L${HOME}/lammps/src -llammps_serial
 Locating liblammps.so at runtime
--- a/doc/src/Build_manual.rst
+++ b/doc/src/Build_manual.rst
@ -74,7 +74,11 @@ For the documentation build a python virtual environment is set up in
 the folder ``doc/docenv`` and various python packages are installed into
 that virtual environment via the ``pip`` tool.  For rendering embedded
 LaTeX code also the `MathJax <https://www.mathjax.org/>`_ JavaScript
-engine needs to be downloaded.
+engine needs to be downloaded.  If you need to pass additional options
 to the pip commands to work (e.g. to use a web proxy or to point to
 additional SSL certificates) you can set them via the ``PIP_OPTIONS``
 environment variable or uncomment and edit the ``PIP_OPTIONS`` setting
 at beginning of the makefile.
 The actual translation is then done via ``make`` commands in the doc
 folder.  The following ``make`` commands are available:
@ -108,7 +112,10 @@ installation of the HTML manual pages into the "install" step when
 installing LAMMPS after the CMake build via ``cmake --build . --target
 install``.  The documentation build is included in the default build
 target, but can also be requested independently with
-``cmake --build . --target doc``.
+``cmake --build . --target doc``.  If you need to pass additional options
 to the pip commands to work (e.g. to use a web proxy or to point to
 additional SSL certificates) you can set them via the ``PIP_OPTIONS``
 environment variable.
 .. code-block:: bash
--- a/doc/src/Commands_all.rst
+++ b/doc/src/Commands_all.rst
@ -60,11 +60,7 @@ An alphabetic list of all general LAMMPS commands.
   * :doc:`include <include>`
   * :doc:`info <info>`
   * :doc:`jump <jump>`
-   * :doc:`kim_init <kim_commands>`
+   * :doc:`kim <kim_commands>`
   * :doc:`kim_interactions <kim_commands>`
   * :doc:`kim_param <kim_commands>`
   * :doc:`kim_property <kim_commands>`
   * :doc:`kim_query <kim_commands>`
   * :doc:`kspace_modify <kspace_modify>`
   * :doc:`kspace_style <kspace_style>`
   * :doc:`label <label>`
--- a/doc/src/Commands_fix.rst
+++ b/doc/src/Commands_fix.rst
@ -114,7 +114,7 @@ OPT.
   * :doc:`nph/eff <fix_nh_eff>`
   * :doc:`nph/sphere (o) <fix_nph_sphere>`
   * :doc:`nphug <fix_nphug>`
-   * :doc:`npt (iko) <fix_nh>`
+   * :doc:`npt (giko) <fix_nh>`
   * :doc:`npt/asphere (o) <fix_npt_asphere>`
   * :doc:`npt/body <fix_npt_body>`
   * :doc:`npt/cauchy <fix_npt_cauchy>`
@ -122,8 +122,8 @@ OPT.
   * :doc:`npt/sphere (o) <fix_npt_sphere>`
   * :doc:`npt/uef <fix_nh_uef>`
   * :doc:`numdiff <fix_numdiff>`
-   * :doc:`nve (iko) <fix_nve>`
+   * :doc:`nve (giko) <fix_nve>`
-   * :doc:`nve/asphere (i) <fix_nve_asphere>`
+   * :doc:`nve/asphere (gi) <fix_nve_asphere>`
   * :doc:`nve/asphere/noforce <fix_nve_asphere_noforce>`
   * :doc:`nve/awpmd <fix_nve_awpmd>`
   * :doc:`nve/body <fix_nve_body>`
@ -138,7 +138,7 @@ OPT.
   * :doc:`nve/spin <fix_nve_spin>`
   * :doc:`nve/tri <fix_nve_tri>`
   * :doc:`nvk <fix_nvk>`
-   * :doc:`nvt (iko) <fix_nh>`
+   * :doc:`nvt (giko) <fix_nh>`
   * :doc:`nvt/asphere (o) <fix_nvt_asphere>`
   * :doc:`nvt/body <fix_nvt_body>`
   * :doc:`nvt/eff <fix_nh_eff>`
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@ -122,7 +122,7 @@ OPT.
   * :doc:`lebedeva/z <pair_lebedeva_z>`
   * :doc:`lennard/mdf <pair_mdf>`
   * :doc:`line/lj <pair_line_lj>`
-   * :doc:`lj/charmm/coul/charmm (iko) <pair_charmm>`
+   * :doc:`lj/charmm/coul/charmm (giko) <pair_charmm>`
   * :doc:`lj/charmm/coul/charmm/implicit (ko) <pair_charmm>`
   * :doc:`lj/charmm/coul/long (gikot) <pair_charmm>`
   * :doc:`lj/charmm/coul/long/soft (o) <pair_fep_soft>`
--- a/doc/src/Install_tarball.rst
+++ b/doc/src/Install_tarball.rst
@ -33,22 +33,19 @@ in its name, e.g. lammps-23Jun18.
 ----------
-You can also download a zip file via the "Clone or download" button on
+You can also download a compressed tar or zip archives from the
-the `LAMMPS GitHub site <git_>`_.  The file name will be lammps-master.zip
+"Assets" sections of the `LAMMPS GitHub releases site <git_>`_.
-which can be unzipped with the following command, to create
+The file name will be lammps-<version>.zip which can be unzipped
-a lammps-master dir:
+with the following command, to create a lammps-<version> dir:
 .. code-block:: bash
   $ unzip lammps*.zip
-This version is the most up-to-date LAMMPS development version.  It
+This version corresponds to the selected LAMMPS patch or stable
-will have the date of the most recent patch release (see the file
+release.
 src/version.h).  But it will also include any new bug-fixes or
 features added since the last patch release.  They will be included in
 the next patch release tarball.
-.. _git: https://github.com/lammps/lammps
+.. _git: https://github.com/lammps/lammps/releases
 ----------
--- a/doc/src/Intro_features.rst
+++ b/doc/src/Intro_features.rst
@ -85,7 +85,7 @@ commands)
 * water potentials: TIP3P, TIP4P, SPC
 * implicit solvent potentials: hydrodynamic lubrication, Debye
 * force-field compatibility with common CHARMM, AMBER, DREIDING,     OPLS, GROMACS, COMPASS options
-* access to the `OpenKIM Repository <http://openkim.org>`_ of potentials via     :doc:`kim_init, kim_interactions, and kim_query <kim_commands>` commands
+* access to the `OpenKIM Repository <http://openkim.org>`_ of potentials via     :doc:`kim command <kim_commands>`
 * hybrid potentials: multiple pair, bond, angle, dihedral, improper     potentials can be used in one simulation
 * overlaid potentials: superposition of multiple pair potentials
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@ -367,17 +367,19 @@ KIM package
 **Contents:**
-This package contains a set of commands that serve as a wrapper on the
+This package contains a command with a set of subcommands that serve as a
 wrapper on the
 `Open Knowledgebase of Interatomic Models (OpenKIM) <https://openkim.org>`_
 repository of interatomic models (IMs) enabling compatible ones to be used in
 LAMMPS simulations.
-This includes :doc:`kim_init <kim_commands>`, and
+
-:doc:`kim_interactions <kim_commands>` commands to select, initialize and
+This includes :doc:`kim init <kim_commands>`, and
-instantiate the IM, a :doc:`kim_query <kim_commands>` command to perform web
+:doc:`kim interactions <kim_commands>` commands to select, initialize and
 instantiate the IM, a :doc:`kim query <kim_commands>` command to perform web
 queries for material property predictions of OpenKIM IMs, a
-:doc:`kim_param <kim_commands>` command to access KIM Model Parameters from
+:doc:`kim param <kim_commands>` command to access KIM Model Parameters from
-LAMMPS, and a :doc:`kim_property <kim_commands>` command to write material
+LAMMPS, and a :doc:`kim property <kim_commands>` command to write material
 properties computed in LAMMPS to standard KIM property instance format.
 Support for KIM IMs that conform to the
@ -386,8 +388,8 @@ is provided by the :doc:`pair_style kim <pair_kim>` command.
 .. note::
-   The command *pair_style kim* is called by *kim_interactions* and
+   The command *pair_style kim* is called by *kim interactions* and is not
-   is not recommended to be directly used in input scripts.
+   recommended to be directly used in input scripts.
 To use this package you must have the KIM API library available on your
 system. The KIM API is available for download on the
@ -404,7 +406,7 @@ and is funded by the `National Science Foundation <https://www.nsf.gov/>`_.
 API and the *pair_style kim* command. Yaser Afshar (U Minnesota),
 Axel Kohlmeyer (Temple U), Ellad Tadmor (U Minnesota), and
 Daniel Karls (U Minnesota) contributed to the
-:doc:`kim_commands <kim_commands>` interface in close collaboration with
+:doc:`kim command <kim_commands>` interface in close collaboration with
 Ryan Elliott.
 **Install:**
@ -414,7 +416,7 @@ This package has :ref:`specific installation instructions <kim>` on the
 **Supporting info:**
-* :doc:`kim_commands <kim_commands>`
+* :doc:`kim command <kim_commands>`
 * :doc:`pair_style kim <pair_kim>`
 * src/KIM: filenames -> commands
 * src/KIM/README
--- a/doc/src/Python_atoms.rst
+++ b/doc/src/Python_atoms.rst
@ -50,7 +50,7 @@ against invalid accesses.
      **Numpy Methods**:
-      * :py:meth:`numpy.extract_atom() <lammps.numpy_wrapper.extract_atom()>`: extract a per-atom quantity as numpy array
+      * :py:meth:`numpy.extract_atom() <lammps.numpy_wrapper.numpy_wrapper.extract_atom()>`: extract a per-atom quantity as numpy array
   .. tab:: PyLammps/IPyLammps API
--- a/doc/src/Python_module.rst
+++ b/doc/src/Python_module.rst
@ -61,7 +61,7 @@ functions. Below is a detailed documentation of the API.
 .. autoclass:: lammps.lammps
   :members:
-.. autoclass:: lammps.numpy::numpy_wrapper
+.. autoclass:: lammps.numpy_wrapper::numpy_wrapper
   :members:
 ----------
@ -134,8 +134,8 @@ Style Constants
   to request from computes or fixes. See :cpp:enum:`_LMP_STYLE_CONST`
   for the equivalent constants in the C library interface. Used in
   :py:func:`lammps.extract_compute`, :py:func:`lammps.extract_fix`, and their NumPy variants
-   :py:func:`lammps.numpy.extract_compute() <lammps.numpy.numpy_wrapper.extract_compute>` and
+   :py:func:`lammps.numpy.extract_compute() <lammps.numpy_wrapper.numpy_wrapper.extract_compute>` and
-   :py:func:`lammps.numpy.extract_fix() <lammps.numpy.numpy_wrapper.extract_fix>`.
+   :py:func:`lammps.numpy.extract_fix() <lammps.numpy_wrapper.numpy_wrapper.extract_fix>`.
 .. _py_type_constants:
@ -149,8 +149,8 @@ Type Constants
   to request  from computes  or fixes.  See :cpp:enum:`_LMP_TYPE_CONST`
   for the equivalent constants in the C library interface. Used in
   :py:func:`lammps.extract_compute`, :py:func:`lammps.extract_fix`, and their NumPy variants
-   :py:func:`lammps.numpy.extract_compute() <lammps.numpy.numpy_wrapper.extract_compute>` and
+   :py:func:`lammps.numpy.extract_compute() <lammps.numpy_wrapper.numpy_wrapper.extract_compute>` and
-   :py:func:`lammps.numpy.extract_fix() <lammps.numpy.numpy_wrapper.extract_fix>`.
+   :py:func:`lammps.numpy.extract_fix() <lammps.numpy_wrapper.numpy_wrapper.extract_fix>`.
 .. _py_vartype_constants:
@ -170,6 +170,6 @@ Classes representing internal objects
   :members:
   :no-undoc-members:
-.. autoclass:: lammps.numpy::NumPyNeighList
+.. autoclass:: lammps.numpy_wrapper::NumPyNeighList
   :members:
   :no-undoc-members:
--- a/doc/src/Python_neighbor.rst
+++ b/doc/src/Python_neighbor.rst
@ -14,5 +14,5 @@ Neighbor list access
 **NumPy Methods:**
-* :py:meth:`lammps.numpy.get_neighlist() <lammps.numpy_wrapper.get_neighlist()>`: Get neighbor list for given index, which uses NumPy arrays for its element neighbor arrays
+* :py:meth:`lammps.numpy.get_neighlist() <lammps.numpy_wrapper.numpy_wrapper.get_neighlist()>`: Get neighbor list for given index, which uses NumPy arrays for its element neighbor arrays
-* :py:meth:`lammps.numpy.get_neighlist_element_neighbors() <lammps.numpy_wrapper.get_neighlist_element_neighbors()>`: Get element in neighbor list and its neighbors (as numpy array)
+* :py:meth:`lammps.numpy.get_neighlist_element_neighbors() <lammps.numpy_wrapper.numpy_wrapper.get_neighlist_element_neighbors()>`: Get element in neighbor list and its neighbors (as numpy array)
--- a/doc/src/Python_objects.rst
+++ b/doc/src/Python_objects.rst
@ -36,9 +36,9 @@ computes, fixes, or variables in LAMMPS using the :py:mod:`lammps` module.
      Python subscripting. The values will be zero for atoms not in the
      specified group.
-      :py:meth:`lammps.numpy.extract_compute() <lammps.numpy_wrapper.extract_compute()>`,
+      :py:meth:`lammps.numpy.extract_compute() <lammps.numpy_wrapper.numpy_wrapper.extract_compute()>`,
-      :py:meth:`lammps.numpy.extract_fix() <lammps.numpy_wrapper.extract_fix()>`, and
+      :py:meth:`lammps.numpy.extract_fix() <lammps.numpy_wrapper.numpy_wrapper.extract_fix()>`, and
-      :py:meth:`lammps.numpy.extract_variable() <lammps.numpy_wrapper.extract_variable()>` are
+      :py:meth:`lammps.numpy.extract_variable() <lammps.numpy_wrapper.numpy_wrapper.extract_variable()>` are
      equivalent NumPy implementations that return NumPy arrays instead of ``ctypes`` pointers.
      The :py:meth:`lammps.set_variable() <lammps.lammps.set_variable()>` method sets an
@ -54,9 +54,9 @@ computes, fixes, or variables in LAMMPS using the :py:mod:`lammps` module.
      **NumPy Methods**:
-      * :py:meth:`lammps.numpy.extract_compute() <lammps.numpy_wrapper.extract_compute()>`: extract value(s) from a compute, return arrays as numpy arrays
+      * :py:meth:`lammps.numpy.extract_compute() <lammps.numpy_wrapper.numpy_wrapper.extract_compute()>`: extract value(s) from a compute, return arrays as numpy arrays
-      * :py:meth:`lammps.numpy.extract_fix() <lammps.numpy_wrapper.extract_fix()>`: extract value(s) from a fix, return arrays as numpy arrays
+      * :py:meth:`lammps.numpy.extract_fix() <lammps.numpy_wrapper.numpy_wrapper.extract_fix()>`: extract value(s) from a fix, return arrays as numpy arrays
-      * :py:meth:`lammps.numpy.extract_variable() <lammps.numpy_wrapper.extract_variable()>`: extract value(s) from a variable, return arrays as numpy arrays
+      * :py:meth:`lammps.numpy.extract_variable() <lammps.numpy_wrapper.numpy_wrapper.extract_variable()>`: extract value(s) from a variable, return arrays as numpy arrays
   .. tab:: PyLammps/IPyLammps API
--- a/doc/src/Speed_gpu.rst
+++ b/doc/src/Speed_gpu.rst
@ -1,11 +1,14 @@
 GPU package
 ===========
-The GPU package was developed by Mike Brown while at SNL and ORNL
+The GPU package was developed by Mike Brown while at SNL and ORNL (now
-and his collaborators, particularly Trung Nguyen (now at Northwestern).
+at Intel Corp.) and his collaborators, particularly Trung Nguyen (now at
-It provides GPU versions of many pair styles and for parts of the
+Northwestern).  Support for AMD GPUs via HIP was added by Vsevolod Nikolskiy
-:doc:`kspace_style pppm <kspace_style>` for long-range Coulombics.
+and coworkers at HSE University.
-It has the following general features:
+
 The GPU package provides GPU versions of many pair styles and for
 parts of the :doc:`kspace_style pppm <kspace_style>` for long-range
 Coulombics.  It has the following general features:
 * It is designed to exploit common GPU hardware configurations where one
  or more GPUs are coupled to many cores of one or more multi-core CPUs,
@ -24,8 +27,9 @@ It has the following general features:
  force vectors.
 * LAMMPS-specific code is in the GPU package.  It makes calls to a
  generic GPU library in the lib/gpu directory.  This library provides
-  NVIDIA support as well as more general OpenCL support, so that the
+  either Nvidia support, AMD support, or more general OpenCL support
-  same functionality is supported on a variety of hardware.
+  (for Nvidia GPUs, AMD GPUs, Intel GPUs, and multi-core CPUs).
  so that the same functionality is supported on a variety of hardware.
 **Required hardware/software:**
@ -45,12 +49,23 @@ to have the OpenCL headers and the (vendor neutral) OpenCL library installed.
 In OpenCL mode, the acceleration depends on having an `OpenCL Installable Client Driver (ICD) <https://www.khronos.org/news/permalink/opencl-installable-client-driver-icd-loader>`_
 installed. There can be multiple of them for the same or different hardware
 (GPUs, CPUs, Accelerators) installed at the same time. OpenCL refers to those
-as 'platforms'.  The GPU library will select the **first** suitable platform,
+as 'platforms'.  The GPU library will try to auto-select the best suitable platform,
-but this can be overridden using the device option of the :doc:`package <package>`
+but this can be overridden using the platform option of the :doc:`package <package>`
 command. run lammps/lib/gpu/ocl_get_devices to get a list of available
 platforms and devices with a suitable ICD available.
-To compute and use this package in HIP mode, you have to have the AMD ROCm
+To compile and use this package for Intel GPUs, OpenCL or the Intel oneAPI
 HPC Toolkit can be installed using linux package managers. The latter also
 provides optimized C++, MPI, and many other libraries and tools. See:
 * https://software.intel.com/content/www/us/en/develop/tools/oneapi/hpc-toolkit/download.html
 If you do not have a discrete GPU card installed, this package can still provide
 significant speedups on some CPUs that include integrated GPUs. Additionally, for
 many macs, OpenCL is already included with the OS and Makefiles are available
 in the lib/gpu directory.
 To compile and use this package in HIP mode, you have to have the AMD ROCm
 software installed. Versions of ROCm older than 3.5 are currently deprecated
 by AMD.
@ -75,10 +90,20 @@ automatically if you create more MPI tasks/node than there are
 GPUs/mode.  E.g. with 8 MPI tasks/node and 2 GPUs, each GPU will be
 shared by 4 MPI tasks.
 The GPU package also has limited support for OpenMP for both
 multi-threading and vectorization of routines that are run on the CPUs.
 This requires that the GPU library and LAMMPS are built with flags to
 enable OpenMP support (e.g. -fopenmp). Some styles for time integration
 are also available in the GPU package. These run completely on the CPUs
 in full double precision, but exploit multi-threading and vectorization
 for faster performance.
 Use the "-sf gpu" :doc:`command-line switch <Run_options>`, which will
 automatically append "gpu" to styles that support it.  Use the "-pk
 gpu Ng" :doc:`command-line switch <Run_options>` to set Ng = # of
-GPUs/node to use.
+GPUs/node to use. If Ng is 0, the number is selected automatically as
 the number of matching GPUs that have the highest number of compute
 cores.
 .. code-block:: bash
@ -87,8 +112,8 @@ GPUs/node to use.
   mpirun -np 48 -ppn 12 lmp_machine -sf gpu -pk gpu 2 -in in.script   # ditto on 4 16-core nodes
 Note that if the "-sf gpu" switch is used, it also issues a default
-:doc:`package gpu 1 <package>` command, which sets the number of
+:doc:`package gpu 0 <package>` command, which will result in
-GPUs/node to 1.
+automatic selection of the number of GPUs to use.
 Using the "-pk" switch explicitly allows for setting of the number of
 GPUs/node to use and additional options.  Its syntax is the same as
@ -138,6 +163,13 @@ Likewise, you should experiment with the precision setting for the GPU
 library to see if single or mixed precision will give accurate
 results, since they will typically be faster.
 MPI parallelism typically outperforms OpenMP parallelism, but in some
 cases using fewer MPI tasks and multiple OpenMP threads with the GPU
 package can give better performance. 3-body potentials can often perform
 better with multiple OMP threads because the inter-process communication
 is higher for these styles with the GPU package in order to allow
 deterministic results.
 **Guidelines for best performance:**
 * Using multiple MPI tasks per GPU will often give the best performance,
@ -161,6 +193,12 @@ results, since they will typically be faster.
  :doc:`angle <angle_style>`, :doc:`dihedral <dihedral_style>`,
  :doc:`improper <improper_style>`, and :doc:`long-range <kspace_style>`
  calculations will not be included in the "Pair" time.
 * Since only part of the pppm kspace style is GPU accelerated, it
  may be faster to only use GPU acceleration for Pair styles with
  long-range electrostatics.  See the "pair/only" keyword of the
  package command for a shortcut to do that.  The work between kspace
  on the CPU and non-bonded interactions on the GPU can be balanced
  through adjusting the coulomb cutoff without loss of accuracy.
 * When the *mode* setting for the package gpu command is force/neigh,
  the time for neighbor list calculations on the GPU will be added into
  the "Pair" time, not the "Neigh" time.  An additional breakdown of the
--- a/doc/src/Speed_packages.rst
+++ b/doc/src/Speed_packages.rst
@ -16,7 +16,7 @@ These are the accelerator packages currently in LAMMPS, either as
 standard or user packages:
 +-----------------------------------------+-------------------------------------------------------+
-| :doc:`GPU Package <Speed_gpu>`          | for NVIDIA GPUs as well as OpenCL support             |
+| :doc:`GPU Package <Speed_gpu>`          | for GPUs via CUDA, OpenCL, or ROCm HIP                |
 +-----------------------------------------+-------------------------------------------------------+
 | :doc:`USER-INTEL Package <Speed_intel>` | for Intel CPUs and Intel Xeon Phi                     |
 +-----------------------------------------+-------------------------------------------------------+
@ -43,7 +43,7 @@ three kinds of hardware, via the listed packages:
 +-----------------+-----------------------------------------------------------------------------------------------------------------------------+
 | Many-core CPUs  | :doc:`USER-INTEL <Speed_intel>`, :doc:`KOKKOS <Speed_kokkos>`, :doc:`USER-OMP <Speed_omp>`, :doc:`OPT <Speed_opt>` packages |
 +-----------------+-----------------------------------------------------------------------------------------------------------------------------+
-| NVIDIA/AMD GPUs | :doc:`GPU <Speed_gpu>`, :doc:`KOKKOS <Speed_kokkos>` packages                                                               |
+| GPUs            | :doc:`GPU <Speed_gpu>`, :doc:`KOKKOS <Speed_kokkos>` packages                                                               |
 +-----------------+-----------------------------------------------------------------------------------------------------------------------------+
 | Intel Phi/AVX   | :doc:`USER-INTEL <Speed_intel>`, :doc:`KOKKOS <Speed_kokkos>` packages                                                      |
 +-----------------+-----------------------------------------------------------------------------------------------------------------------------+
@ -154,8 +154,8 @@ Here is a brief summary of what the various packages provide.  Details
 are in the individual accelerator sections.
 * Styles with a "gpu" suffix are part of the GPU package and can be run
-  on NVIDIA or AMD GPUs.  The speed-up on a GPU depends on a variety of
+  on Intel, NVIDIA, or AMD GPUs.  The speed-up on a GPU depends on a
-  factors, discussed in the accelerator sections.
+  variety of factors, discussed in the accelerator sections.
 * Styles with an "intel" suffix are part of the USER-INTEL
  package. These styles support vectorized single and mixed precision
  calculations, in addition to full double precision.  In extreme cases,
--- a/doc/src/compute_temp_chunk.rst
+++ b/doc/src/compute_temp_chunk.rst
@ -153,7 +153,7 @@ temp/chunk calculation to a file is to use the :doc:`fix ave/time <fix_ave_time>
   compute cc1 all chunk/atom molecule
   compute myChunk all temp/chunk cc1 temp
-   fix 1 all ave/time 100 1 100 c_myChunk file tmp.out mode vector
+   fix 1 all ave/time 100 1 100 c_myChunk[1] file tmp.out mode vector
 ----------
--- a/doc/src/fix_nh.rst
+++ b/doc/src/fix_nh.rst
@ -1,8 +1,10 @@
 .. index:: fix nvt
 .. index:: fix nvt/gpu
 .. index:: fix nvt/intel
 .. index:: fix nvt/kk
 .. index:: fix nvt/omp
 .. index:: fix npt
 .. index:: fix npt/gpu
 .. index:: fix npt/intel
 .. index:: fix npt/kk
 .. index:: fix npt/omp
@ -13,12 +15,12 @@
 fix nvt command
 ===============
-Accelerator Variants: *nvt/intel*, *nvt/kk*, *nvt/omp*
+Accelerator Variants: *nvt/gpu*, *nvt/intel*, *nvt/kk*, *nvt/omp*
 fix npt command
 ===============
-Accelerator Variants: *npt/intel*, *npt/kk*, *npt/omp*
+Accelerator Variants: *npt/gpu*, *npt/intel*, *npt/kk*, *npt/omp*
 fix nph command
 ===============
--- a/doc/src/fix_nve.rst
+++ b/doc/src/fix_nve.rst
@ -1,4 +1,5 @@
 .. index:: fix nve
 .. index:: fix nve/gpu
 .. index:: fix nve/intel
 .. index:: fix nve/kk
 .. index:: fix nve/omp
@ -6,7 +7,7 @@
 fix nve command
 ===============
-Accelerator Variants: *nve/intel*, *nve/kk*, *nve/omp*
+Accelerator Variants: *nve/gpu*, *nve/intel*, *nve/kk*, *nve/omp*
 Syntax
 """"""
--- a/doc/src/fix_nve_asphere.rst
+++ b/doc/src/fix_nve_asphere.rst
@ -1,10 +1,11 @@
 .. index:: fix nve/asphere
 .. index:: fix nve/asphere/gpu
 .. index:: fix nve/asphere/intel
 fix nve/asphere command
 =======================
-Accelerator Variants: *nve/asphere/intel*
+Accelerator Variants: *nve/asphere/gpu*, *nve/asphere/intel*
 Syntax
 """"""
--- a/doc/src/kim_commands.rst
+++ b/doc/src/kim_commands.rst
--- a/doc/src/package.rst
+++ b/doc/src/package.rst
@ -18,7 +18,7 @@ Syntax
       *gpu* args = Ngpu keyword value ...
         Ngpu = # of GPUs per node
         zero or more keyword/value pairs may be appended
-         keywords = *neigh* or *newton* or *pair/only* or *binsize* or *split* or *gpuID* or *tpa* or *device* or *blocksize*
+         keywords = *neigh* or *newton* or *pair/only* or *binsize* or *split* or *gpuID* or *tpa* or *blocksize* or *platform* or *device_type* or *ocl_args*
           *neigh* value = *yes* or *no*
             yes = neighbor list build on GPU (default)
             no = neighbor list build on CPU
@ -32,17 +32,20 @@ Syntax
             size = bin size for neighbor list construction (distance units)
           *split* = fraction
             fraction = fraction of atoms assigned to GPU (default = 1.0)
-           *gpuID* values = first last
+           *tpa* value = Nlanes
-             first = ID of first GPU to be used on each node
+             Nlanes = # of GPU vector lanes (CUDA threads) used per atom
             last = ID of last GPU to be used on each node
           *tpa* value = Nthreads
             Nthreads = # of GPU threads used per atom
           *device* value = device_type or platform_id:device_type or platform_id:custom,val1,val2,val3,..,val13
             platform_id = numerical OpenCL platform id (default: -1)
             device_type = *kepler* or *fermi* or *cypress* or *intel* or *phi* or *generic* or *custom*
             val1,val2,... = custom OpenCL tune parameters (see below for details)
           *blocksize* value = size
             size = thread block size for pair force computation
           *omp* value = Nthreads
             Nthreads = number of OpenMP threads to use on CPU (default = 0)
           *platform* value = id
             id = For OpenCL, platform ID for the GPU or accelerator
           *gpuID* values = id
             id = ID of first GPU to be used on each node
           *device_type* value = *intelgpu* or *nvidiagpu* or *amdgpu* or *applegpu* or *generic* or *custom,val1,val2,...*
             val1,val2,... = custom OpenCL accelerator configuration parameters (see below for details)
           *ocl_args* value = args
             args = List of additional OpenCL compiler arguments delimited by colons
       *intel* args = NPhi keyword value ...
         Nphi = # of co-processors per node
         zero or more keyword/value pairs may be appended
@ -100,7 +103,7 @@ Syntax
             off = use device acceleration (e.g. GPU) for all available styles in the KOKKOS package (default)
             on  = use device acceleration only for pair styles (and host acceleration for others)
       *omp* args = Nthreads keyword value ...
-         Nthread = # of OpenMP threads to associate with each MPI process
+         Nthreads = # of OpenMP threads to associate with each MPI process
         zero or more keyword/value pairs may be appended
         keywords = *neigh*
           *neigh* value = *yes* or *no*
@ -112,12 +115,10 @@ Examples
 .. code-block:: LAMMPS
-   package gpu 1
+   package gpu 0
   package gpu 1 split 0.75
   package gpu 2 split -1.0
-   package gpu 1 device kepler
+   package gpu 0 omp 2 device_type intelgpu
   package gpu 1 device 2:generic
   package gpu 1 device custom,32,4,8,256,11,128,256,128,32,64,8,128,128
   package kokkos neigh half comm device
   package omp 0 neigh no
   package omp 4
@ -174,10 +175,18 @@ simulations.
 The *gpu* style invokes settings associated with the use of the GPU
 package.
-The *Ngpu* argument sets the number of GPUs per node.  There must be
+The *Ngpu* argument sets the number of GPUs per node. If *Ngpu* is 0
-at least as many MPI tasks per node as GPUs, as set by the mpirun or
+and no other keywords are specified, GPU or accelerator devices are
-mpiexec command.  If there are more MPI tasks (per node)
+auto-selected. In this process, all platforms are searched for
-than GPUs, multiple MPI tasks will share each GPU.
+accelerator devices and GPUs are chosen if available. The device with
 the highest number of compute cores is selected. The number of devices
 is increased to be the number of matching accelerators with the same
 number of compute cores. If there are more devices than MPI tasks,
 the additional devices will be unused. The auto-selection of GPUs/
 accelerator devices and platforms can be restricted by specifying
 a non-zero value for *Ngpu* and / or using the *gpuID*, *platform*,
 and *device_type* keywords as described below. If there are more MPI
 tasks (per node) than GPUs, multiple MPI tasks will share each GPU.
 Optional keyword/value pairs can also be specified.  Each has a
 default value as listed below.
@ -212,18 +221,8 @@ overlapped with all other computations on the CPU.
 The *binsize* keyword sets the size of bins used to bin atoms in
 neighbor list builds performed on the GPU, if *neigh* = *yes* is set.
-If *binsize* is set to 0.0 (the default), then bins = the size of the
+If *binsize* is set to 0.0 (the default), then the binsize is set
-pairwise cutoff + neighbor skin distance.  This is 2x larger than the
+automatically using heuristics in the GPU package.
 LAMMPS default used for neighbor list building on the CPU.  This will
 be close to optimal for the GPU, so you do not normally need to use
 this keyword.  Note that if you use a longer-than-usual pairwise
 cutoff, e.g. to allow for a smaller fraction of KSpace work with a
 :doc:`long-range Coulombic solver <kspace_style>` because the GPU is
 faster at performing pairwise interactions, then it may be optimal to
 make the *binsize* smaller than the default.  For example, with a
 cutoff of 20\*sigma in LJ :doc:`units <units>` and a neighbor skin
 distance of sigma, a *binsize* = 5.25\*sigma can be more efficient than
 the default.
 The *split* keyword can be used for load balancing force calculations
 between CPU and GPU cores in GPU-enabled pair styles. If 0 < *split* <
@ -257,63 +256,79 @@ cores would perform force calculations for some fraction of the
 particles at the same time the GPUs performed force calculation for
 the other particles.
-The *gpuID* keyword allows selection of which GPUs on each node will
+The *gpuID* keyword is used to specify the first ID for the GPU or
-be used for a simulation.  The *first* and *last* values specify the
+other accelerator that LAMMPS will use. For example, if the ID is
-GPU IDs to use (from 0 to Ngpu-1).  By default, first = 0 and last =
+1 and *Ngpu* is 3, GPUs 1-3 will be used. Device IDs should be
-Ngpu-1, so that all GPUs are used, assuming Ngpu is set to the number
+determined from the output of nvc_get_devices, ocl_get_devices,
-of physical GPUs.  If you only wish to use a subset, set Ngpu to a
+or hip_get_devices
-smaller number and first/last to a sub-range of the available GPUs.
+as provided in the lib/gpu directory. When using OpenCL with
 accelerators that have main memory NUMA, the accelerators can be
 split into smaller virtual accelerators for more efficient use
 with MPI.
-The *tpa* keyword sets the number of GPU thread per atom used to
+The *tpa* keyword sets the number of GPU vector lanes per atom used to
 perform force calculations.  With a default value of 1, the number of
-threads will be chosen based on the pair style, however, the value can
+lanes will be chosen based on the pair style, however, the value can
 be set explicitly with this keyword to fine-tune performance.  For
 large cutoffs or with a small number of particles per GPU, increasing
-the value can improve performance. The number of threads per atom must
+the value can improve performance. The number of lanes per atom must
-be a power of 2 and currently cannot be greater than 32.
+be a power of 2 and currently cannot be greater than the SIMD width
-
+for the GPU / accelerator. In the case it exceeds the SIMD width, it
-The *device* keyword can be used to tune parameters optimized for a
+will automatically be decreased to meet the restriction.
 specific accelerator and platform when using OpenCL. OpenCL supports
 the concept of a **platform**\ , which represents one or more devices that
 share the same driver (e.g. there would be a different platform for
 GPUs from different vendors or for CPU based accelerator support).
 In LAMMPS only one platform can be active at a time and by default
 the first platform with an accelerator is selected. This is equivalent
 to using a platform ID of -1. The platform ID is a number corresponding
 to the output of the ocl_get_devices tool. The platform ID is passed
 to the GPU library, by prefixing the *device* keyword with that number
 separated by a colon. For CUDA, the *device* keyword is ignored.
 Currently, the device tuning support is limited to NVIDIA Kepler, NVIDIA
 Fermi, AMD Cypress, Intel x86_64 CPU, Intel Xeon Phi, or a generic device.
 More devices may be added later.  The default device type can be
 specified when building LAMMPS with the GPU library, via setting a
 variable in the lib/gpu/Makefile that is used.
 In addition, a device type *custom* is available, which is followed by
 13 comma separated numbers, which allows to set those tweakable parameters
 from the package command. It can be combined with the (colon separated)
 platform id. The individual settings are:
 * MEM_THREADS
 * THREADS_PER_ATOM
 * THREADS_PER_CHARGE
 * BLOCK_PAIR
 * MAX_SHARED_TYPES
 * BLOCK_NBOR_BUILD
 * BLOCK_BIO_PAIR
 * BLOCK_ELLIPSE
 * WARP_SIZE
 * PPPM_BLOCK_1D
 * BLOCK_CELL_2D
 * BLOCK_CELL_ID
 * MAX_BIO_SHARED_TYPES
 The *blocksize* keyword allows you to tweak the number of threads used
 per thread block. This number should be a multiple of 32 (for GPUs)
 and its maximum depends on the specific GPU hardware. Typical choices
 are 64, 128, or 256. A larger block size increases occupancy of
 individual GPU cores, but reduces the total number of thread blocks,
-thus may lead to load imbalance.
+thus may lead to load imbalance. On modern hardware, the sensitivity
 to the blocksize is typically low.
 The *Nthreads* value for the *omp* keyword sets the number of OpenMP
 threads allocated for each MPI task. This setting controls OpenMP
 parallelism only for routines run on the CPUs. For more details on
 setting the number of OpenMP threads, see the discussion of the
 *Nthreads* setting on this doc page for the "package omp" command.
 The meaning of *Nthreads* is exactly the same for the GPU, USER-INTEL,
 and GPU packages.
 The *platform* keyword is only used with OpenCL to specify the ID for
 an OpenCL platform. See the output from ocl_get_devices in the lib/gpu
 directory. In LAMMPS only one platform can be active at a time and by
 default (id=-1) the platform is auto-selected to find the GPU with the
 most compute cores. When *Ngpu* or other keywords are specified, the
 auto-selection is appropriately restricted. For example, if *Ngpu* is
 3, only platforms with at least 3 accelerators are considered. Similar
 restrictions can be enforced by the *gpuID* and *device_type* keywords.
 The *device_type* keyword can be used for OpenCL to specify the type of
 GPU to use or specify a custom configuration for an accelerator. In most
 cases this selection will be automatic and there is no need to use the
 keyword. The *applegpu* type is not specific to a particular GPU vendor,
 but is separate due to the more restrictive Apple OpenCL implementation.
 For expert users, to specify a custom configuration, the *custom* keyword
 followed by the next parameters can be specified:
 CONFIG_ID, SIMD_SIZE, MEM_THREADS, SHUFFLE_AVAIL, FAST_MATH,
 THREADS_PER_ATOM, THREADS_PER_CHARGE, THREADS_PER_THREE, BLOCK_PAIR,
 BLOCK_BIO_PAIR, BLOCK_ELLIPSE, PPPM_BLOCK_1D, BLOCK_NBOR_BUILD,
 BLOCK_CELL_2D, BLOCK_CELL_ID, MAX_SHARED_TYPES, MAX_BIO_SHARED_TYPES,
 PPPM_MAX_SPLINE.
 CONFIG_ID can be 0. SHUFFLE_AVAIL in {0,1} indicates that inline-PTX
 (NVIDIA) or OpenCL extensions (Intel) should be used for horizontal
 vector operations. FAST_MATH in {0,1} indicates that OpenCL fast math
 optimizations are used during the build and hardware-accelerated
 transcendental functions are used when available. THREADS_PER_* give the
 default *tpa* values for ellipsoidal models, styles using charge, and
 any other styles. The BLOCK_* parameters specify the block sizes for
 various kernel calls and the MAX_*SHARED*_ parameters are used to
 determine the amount of local shared memory to use for storing model
 parameters.
 For OpenCL, the routines are compiled at runtime for the specified GPU
 or accelerator architecture. The *ocl_args* keyword can be used to
 specify additional flags for the runtime build.
 ----------
@ -331,44 +346,13 @@ built with co-processor support.
 Optional keyword/value pairs can also be specified.  Each has a
 default value as listed below.
-The *omp* keyword determines the number of OpenMP threads allocated
+The *Nthreads* value for the *omp* keyword sets the number of OpenMP
-for each MPI task when any portion of the interactions computed by a
+threads allocated for each MPI task. This setting controls OpenMP
-USER-INTEL pair style are run on the CPU.  This can be the case even
+parallelism only for routines run on the CPUs. For more details on
-if LAMMPS was built with co-processor support; see the *balance*
+setting the number of OpenMP threads, see the discussion of the
-keyword discussion below.  If you are running with less MPI tasks/node
+*Nthreads* setting on this doc page for the "package omp" command.
-than there are CPUs, it can be advantageous to use OpenMP threading on
+The meaning of *Nthreads* is exactly the same for the GPU, USER-INTEL,
-the CPUs.
+and GPU packages.
 .. note::
   The *omp* keyword has nothing to do with co-processor threads on
   the Xeon Phi; see the *tpc* and *tptask* keywords below for a
   discussion of co-processor threads.
 The *Nthread* value for the *omp* keyword sets the number of OpenMP
 threads allocated for each MPI task.  Setting *Nthread* = 0 (the
 default) instructs LAMMPS to use whatever value is the default for the
 given OpenMP environment. This is usually determined via the
 *OMP_NUM_THREADS* environment variable or the compiler runtime, which
 is usually a value of 1.
 For more details, including examples of how to set the OMP_NUM_THREADS
 environment variable, see the discussion of the *Nthreads* setting on
 this doc page for the "package omp" command.  Nthreads is a required
 argument for the USER-OMP package.  Its meaning is exactly the same
 for the USER-INTEL package.
 .. note::
   If you build LAMMPS with both the USER-INTEL and USER-OMP
   packages, be aware that both packages allow setting of the *Nthreads*
   value via their package commands, but there is only a single global
   *Nthreads* value used by OpenMP.  Thus if both package commands are
   invoked, you should insure the two values are consistent.  If they are
   not, the last one invoked will take precedence, for both packages.
   Also note that if the :doc:`-sf hybrid intel omp command-line switch <Run_options>` is used, it invokes a "package intel"
   command, followed by a "package omp" command, both with a setting of
   *Nthreads* = 0.
 The *mode* keyword determines the precision mode to use for
 computing pair style forces, either on the CPU or on the co-processor,
@ -574,7 +558,7 @@ result in better performance for certain configurations and system sizes.
 The *omp* style invokes settings associated with the use of the
 USER-OMP package.
-The *Nthread* argument sets the number of OpenMP threads allocated for
+The *Nthreads* argument sets the number of OpenMP threads allocated for
 each MPI task.  For example, if your system has nodes with dual
 quad-core processors, it has a total of 8 cores per node.  You could
 use two MPI tasks per node (e.g. using the -ppn option of the mpirun
@ -583,7 +567,7 @@ This would use all 8 cores on each node.  Note that the product of MPI
 tasks \* threads/task should not exceed the physical number of cores
 (on a node), otherwise performance will suffer.
-Setting *Nthread* = 0 instructs LAMMPS to use whatever value is the
+Setting *Nthreads* = 0 instructs LAMMPS to use whatever value is the
 default for the given OpenMP environment. This is usually determined
 via the *OMP_NUM_THREADS* environment variable or the compiler
 runtime.  Note that in most cases the default for OpenMP capable
@ -614,6 +598,24 @@ input.  Not all features of LAMMPS support OpenMP threading via the
 USER-OMP package and the parallel efficiency can be very different,
 too.
 .. note::
   If you build LAMMPS with the GPU, USER-INTEL, and / or USER-OMP
   packages, be aware these packages all allow setting of the *Nthreads*
   value via their package commands, but there is only a single global
   *Nthreads* value used by OpenMP.  Thus if multiple package commands are
   invoked, you should insure the values are consistent.  If they are
   not, the last one invoked will take precedence, for all packages.
   Also note that if the :doc:`-sf hybrid intel omp command-line switch <Run_options>` is used, it invokes a "package intel" command, followed by a
   "package omp" command, both with a setting of *Nthreads* = 0. Likewise
   for a hybrid suffix for gpu and omp. Note that KOKKOS also supports
   setting the number of OpenMP threads from the command line using the
   "-k on" :doc:`command-line switch <Run_options>`. The default for
   KOKKOS is 1 thread per MPI task, so any other number of threads should
   be explicitly set using the "-k on" command-line switch (and this
   setting should be consistent with settings from any other packages
   used).
 Optional keyword/value pairs can also be specified.  Each has a
 default value as listed below.
@ -658,9 +660,9 @@ Related commands
 Default
 """""""
-For the GPU package, the default is Ngpu = 1 and the option defaults
+For the GPU package, the default is Ngpu = 0 and the option defaults
 are neigh = yes, newton = off, binsize = 0.0, split = 1.0, gpuID = 0
-to Ngpu-1, tpa = 1, and device = not used.  These settings are made
+to Ngpu-1, tpa = 1, omp = 0, and platform=-1.  These settings are made
 automatically if the "-sf gpu" :doc:`command-line switch <Run_options>`
 is used.  If it is not used, you must invoke the package gpu command
 in your input script or via the "-pk gpu" :doc:`command-line switch <Run_options>`.
--- a/doc/src/pair_adp.rst
+++ b/doc/src/pair_adp.rst
@ -59,7 +59,7 @@ command to specify them.
 * The OpenKIM Project at
  `https://openkim.org/browse/models/by-type <https://openkim.org/browse/models/by-type>`_
  provides ADP potentials that can be used directly in LAMMPS with the
-  :doc:`kim_commands <kim_commands>` interface.
+  :doc:`kim command <kim_commands>` interface.
 ----------
--- a/doc/src/pair_charmm.rst
+++ b/doc/src/pair_charmm.rst
@ -1,4 +1,5 @@
 .. index:: pair_style lj/charmm/coul/charmm
 .. index:: pair_style lj/charmm/coul/charmm/gpu
 .. index:: pair_style lj/charmm/coul/charmm/intel
 .. index:: pair_style lj/charmm/coul/charmm/kk
 .. index:: pair_style lj/charmm/coul/charmm/omp
@ -19,7 +20,7 @@
 pair_style lj/charmm/coul/charmm command
 ========================================
-Accelerator Variants: *lj/charmm/coul/charmm/intel*, *lj/charmm/coul/charmm/kk*, *lj/charmm/coul/charmm/omp*
+Accelerator Variants: *lj/charmm/coul/charmm/gpu*, *lj/charmm/coul/charmm/intel*, *lj/charmm/coul/charmm/kk*, *lj/charmm/coul/charmm/omp*
 pair_style lj/charmm/coul/charmm/implicit command
 =================================================
--- a/doc/src/pair_eam.rst
+++ b/doc/src/pair_eam.rst
@ -141,7 +141,7 @@ interatomic potentials and file formats.
 The OpenKIM Project at
 `https://openkim.org/browse/models/by-type <https://openkim.org/browse/models/by-type>`_
 provides EAM potentials that can be used directly in LAMMPS with the
-:doc:`kim_commands <kim_commands>` interface.
+:doc:`kim command <kim_commands>` interface.
 ----------
--- a/doc/src/pair_kim.rst
+++ b/doc/src/pair_kim.rst
@ -23,29 +23,30 @@ Examples
 Description
 """""""""""
-This pair style is a wrapper on the `Open Knowledgebase of Interatomic Models (OpenKIM) <https://openkim.org>`_ repository of interatomic
+This pair style is a wrapper on the
-potentials to enable their use in LAMMPS scripts.
+`Open Knowledgebase of Interatomic Models (OpenKIM) <https://openkim.org>`_
 repository of interatomic potentials to enable their use in LAMMPS scripts.
 The preferred interface for using interatomic models archived in
-OpenKIM is the :doc:`kim_commands interface <kim_commands>`. That
+OpenKIM is the :doc:`kim command <kim_commands>` interface. That
 interface supports both "KIM Portable Models" (PMs) that conform to the
 KIM API Portable Model Interface (PMI) and can be used by any
 simulation code that conforms to the KIM API/PMI, and
-"KIM Simulator Models" that are natively implemented within a single
+"KIM Simulator Models" (SMs) that are natively implemented within a single
 simulation code (like LAMMPS) and can only be used with it.
 The *pair_style kim* command is limited to KIM PMs. It is
-used by the :doc:`kim_commands interface <kim_commands>` as needed.
+used by the :doc:`kim command <kim_commands>` interface as needed.
 .. note::
-   Since *pair_style kim* is called by *kim_interactions* as needed,
+   Since *pair_style kim* is called by *kim interactions* as needed,
-   is not recommended to be directly used in input scripts.
+   it is not recommended to be directly used in input scripts.
 ----------
 The argument *model* is the name of the KIM PM.
 For potentials archived in OpenKIM
-this is the extended KIM ID (see :doc:`kim_commands <kim_commands>`
+this is the extended KIM ID (see :doc:`kim command <kim_commands>`
 for details). LAMMPS can invoke any KIM PM, however there can
 be incompatibilities (for example due to unit matching issues).
 In the event of an incompatibility, the code will terminate with
@ -106,7 +107,7 @@ Restrictions
 """"""""""""
 This pair style is part of the KIM package. See details on
-restrictions in :doc:`kim_commands <kim_commands>`.
+restrictions in :doc:`kim command <kim_commands>`.
 This current version of pair_style kim is compatible with the
 kim-api package version 2.0.0 and higher.
@ -114,7 +115,7 @@ kim-api package version 2.0.0 and higher.
 Related commands
 """"""""""""""""
-:doc:`pair_coeff <pair_coeff>`, :doc:`kim_commands <kim_commands>`
+:doc:`pair_coeff <pair_coeff>`, :doc:`kim command <kim_commands>`
 Default
 """""""
--- a/doc/utils/requirements.txt
+++ b/doc/utils/requirements.txt
@ -1,6 +1,6 @@
 Sphinx
 sphinxcontrib-spelling
-git+https://github.com/akohlmey/sphinx-fortran@parallel-read
+git+git://github.com/akohlmey/sphinx-fortran@parallel-read
 sphinx_tabs
 breathe
 Pygments
--- a/doc/utils/sphinx-config/LAMMPSLexer.py
+++ b/doc/utils/sphinx-config/LAMMPSLexer.py
@ -8,8 +8,8 @@ LAMMPS_COMMANDS = ("angle_coeff", "angle_style", "atom_modify", "atom_style",
 "delete_bonds", "dielectric", "dihedral_coeff", "dihedral_style", "dimension",
 "displace_atoms", "dump_modify", "dynamical_matrix", "echo",
 "fix_modify", "group2ndx", "hyper", "if", "improper_coeff",
-"improper_style", "include", "info", "jump", "kim_init", "kim_interactions",
+"improper_style", "include", "info", "jump", "kim",
-"kim_param", "kim_query", "kspace_modify", "kspace_style", "label", "lattice",
+"kspace_modify", "kspace_style", "label", "lattice",
 "log", "mass", "message", "minimize", "min_modify", "min_style", "molecule",
 "ndx2group", "neb", "neb/spin", "neighbor", "neigh_modify", "newton", "next",
 "package", "pair_coeff", "pair_modify", "pair_style", "pair_write",
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -2297,6 +2297,7 @@ omegaz
 Omelyan
 omp
 OMP
 oneAPI
 onelevel
 oneway
 onn
@ -2528,6 +2529,7 @@ ptm
 PTM
 ptol
 ptr
 PTX
 pu
 purdue
 Purohit
--- a/examples/USER/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability
+++ b/examples/USER/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability
@ -22,7 +22,7 @@ improper_style class2
 read_data tiny_nylon.data
 variable runsteps equal 1000
-variable prob1 equal step/v_runsteps*2
+variable prob1 equal step/v_runsteps*2+0.1
 variable prob2 equal (step/v_runsteps)>0.5
 velocity all create 300.0 4928459 dist gaussian
--- a/examples/kim/in.kim-ex.melt
+++ b/examples/kim/in.kim-ex.melt
@ -1,8 +1,8 @@
 # 3d Lennard-Jones melt
 #
 # This example requires that the example models provided with
-# the kim-api package are installed.  see the ./lib/kim/README or
+# the kim-api package are installed.  see the `./lib/kim/README` or
-# ./lib/kim/Install.py files for details on how to install these
+# `./lib/kim/Install.py` files for details on how to install these
 # example models.
 #
@ -14,14 +14,14 @@ variable	xx equal 20*$x
 variable     yy equal 20*$y
 variable     zz equal 20*$z
-kim_init	LennardJones_Ar real
+kim          init LennardJones_Ar real
 lattice      fcc 4.4300
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 create_box   1 box
 create_atoms 1 box
-kim_interactions Ar
+kim          interactions Ar
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
--- a/examples/kim/in.kim-pm-property
+++ b/examples/kim/in.kim-pm-property
@ -1,7 +1,7 @@
-# kim-property example
+# kim property example
 #
 # For detailed information of this example please refer to:
-# https://openkim.org/doc/evaluation/tutorial-lammps/
+# `https://openkim.org/doc/evaluation/tutorial-lammps/`
 #
 # Description:
 #
@ -10,10 +10,10 @@
 # `LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004` model for
 # argon. The material properties computed in LAMMPS are represented as a
 # standard KIM property instance format. (See
-# https://openkim.org/doc/schema/properties-framework/ and 
+# `https://openkim.org/doc/schema/properties-framework/` and
-# https://lammps.sandia.gov/doc/kim_commands.html for further details).
+# `https://lammps.sandia.gov/doc/kim_commands.html` for further details).
-# Then the created property instance is written to a file named results.edn 
+# Then the created property instance is written to a file named `results.edn`
-# using the `kim_property dump` commands.  
+# using the `kim property dump` command.
 #
 # Requirement:
 #
@ -28,7 +28,7 @@
 # This example requires that the KIM Portable Model (PM)
 # `LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004`
 # is installed.  This can be done with the command
-# `kim-api-collections-management install user LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004`
+#   kim-api-collections-management install user LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
@ -38,14 +38,14 @@
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
-# Or, see https://openkim.org/doc/obtaining-models for alternative options.
+# Or, see `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 # Initialize interatomic potential (KIM model) and units
 atom_style atomic
 # Set the OpenKIM model that will be used
-kim_init LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004 metal
+kim init LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004 metal
 # the equilibrium lattice constant for the fcc structure
 variable lattice_constant equal 5.248509056866169
@ -62,7 +62,7 @@ create_atoms 1 box
 mass         1 39.948
 # Specify the KIM interactions
-kim_interactions Ar
+kim interactions Ar
 # Compute energy
 run 0
@ -72,10 +72,10 @@ variable natoms       equal "count(all)"
 variable ecohesive    equal "-pe/v_natoms"
 # Create a property instance
-kim_property create 1 cohesive-potential-energy-cubic-crystal
+kim property create 1 cohesive-potential-energy-cubic-crystal
 # Set all the key-value pairs for this property instance
-kim_property modify 1 key short-name source-value 1 fcc                          &
+kim property modify 1 key short-name source-value 1 fcc                          &
                      key species source-value 1 Ar                              &
                      key a source-value ${lattice_constant}                     &
                            source-unit angstrom                                 &
@ -88,4 +88,4 @@ kim_property modify 1 key short-name source-value 1 fcc
                                                    source-unit eV
 # Dump the results in a file
-kim_property dump "results.edn"
+kim property dump "results.edn"
--- a/examples/kim/in.kim-pm-query.melt
+++ b/examples/kim/in.kim-pm-query.melt
@ -1,7 +1,7 @@
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Portable Model (PM)
-# SW_StillingerWeber_1985_Si__MO_405512056662_005
+# `SW_StillingerWeber_1985_Si__MO_405512056662_005`
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user SW_StillingerWeber_1985_Si__MO_405512056662_005
 # If this command does not work, you may need to setup your PATH to find the utility.
@ -13,7 +13,7 @@
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
-# Or, see https://openkim.org/doc/obtaining-models for alternative options.
+# Or, see `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 variable     x index 1
@ -24,15 +24,15 @@ variable	xx equal 20*$x
 variable     yy equal 20*$y
 variable     zz equal 20*$z
-kim_init	SW_StillingerWeber_1985_Si__MO_405512056662_005 real
+kim          init SW_StillingerWeber_1985_Si__MO_405512056662_005 real
-kim_query       a0 get_lattice_constant_cubic crystal=["fcc"] species=["Si"] units=["angstrom"]
+kim          query a0 get_lattice_constant_cubic crystal=["fcc"] species=["Si"] units=["angstrom"]
 lattice      fcc ${a0}
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 create_box   1 box
 create_atoms 1 box
-kim_interactions Si
+kim          interactions Si
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
--- a/examples/kim/in.kim-pm.melt
+++ b/examples/kim/in.kim-pm.melt
@ -1,7 +1,7 @@
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Portable Model (PM)
-# SW_StillingerWeber_1985_Si__MO_405512056662_005
+# `SW_StillingerWeber_1985_Si__MO_405512056662_005`
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user SW_StillingerWeber_1985_Si__MO_405512056662_005
 # If this command does not work, you may need to setup your PATH to find the utility.
@ -13,7 +13,7 @@
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
-# Or, see https://openkim.org/doc/obtaining-models for alternative options.
+# Or, see `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 variable     x index 1
@ -24,14 +24,14 @@ variable	xx equal 20*$x
 variable     yy equal 20*$y
 variable     zz equal 20*$z
-kim_init	SW_StillingerWeber_1985_Si__MO_405512056662_005 real
+kim          init SW_StillingerWeber_1985_Si__MO_405512056662_005 real
 lattice      fcc 4.4300
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 create_box   1 box
 create_atoms 1 box
-kim_interactions Si
+kim          interactions Si
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
--- a/examples/kim/in.kim-query
+++ b/examples/kim/in.kim-query
@ -0,0 +1,76 @@
 # kim query example
 #
 # Requirement:
 #
 # This example requires LAMMPS is built with KIM package. A requirement for
 # the KIM package, is the KIM API library that must be downloaded from the
 # OpenKIM website and installed before LAMMPS is compiled. The 'kim query'
 # command requires the libcurl library to be installed. See the
 # `https://lammps.sandia.gov/doc/Build_extras.html#kim` doc page for further
 # details
 #
 # This example requires that the KIM Models
 # `EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005`
 # and
 # `EAM_Dynamo_MendelevAckland_2007v3_Zr__MO_004835508849_000`
 # are installed.
 #
 # This can be done with the commands
 # `kim-api-collections-management install user `EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005`
 # `kim-api-collections-management install user `EAM_Dynamo_MendelevAckland_2007v3_Zr__MO_004835508849_000`
 #
 # If these commands do not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see https://openkim.org/doc/obtaining-models for alternative options.
 #
 # -----------------------------------------------
 # Get an equilibrium fcc crystal lattice constant
 # -----------------------------------------------
 kim   init EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005 metal
 kim   query latconst_1 get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom]
 print "FCC lattice constant (EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005) = ${latconst_1}"
 # Get the lattice contant from a different model
 kim   query latconst_2 get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005]
 print "FCC lattice constant (EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005) = ${latconst_2}"
 clear
 # -----------------------------------------------
 # Get an equilibrium fcc crystal lattice constant
 # -----------------------------------------------
 kim   query latconst_1 get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005]
 kim   query latconst_2 get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005]
 print "FCC lattice constant (EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005)   = ${latconst_1}"
 print "FCC lattice constant (EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005) = ${latconst_2}"
 clear
 # -----------------------------------------------
 # Get an equilibrium hcp crystal lattice constant
 # -----------------------------------------------
 kim   init EAM_Dynamo_MendelevAckland_2007v3_Zr__MO_004835508849_000 metal
 kim   query latconst split get_lattice_constant_hexagonal crystal=["hcp"] species=["Zr"] units=["angstrom"]
 print "HCP lattice constants = ${latconst_1}, ${latconst_2}"
 clear
 # -----------------------------------------------
 # Query for KIM models from openkim.org
 # Get all the EAM models that support Al
 # -----------------------------------------------
 kim   query model index get_available_models species=[Al] potential_type=[eam]
 label model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 print "FCC lattice constant (${model}) = ${latconst}"
 next  model
 jump  SELF model_loop
 clear
--- a/examples/kim/in.kim-sm.melt
+++ b/examples/kim/in.kim-sm.melt
@ -1,7 +1,7 @@
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Simulator Model (PM)
-# Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
+# `Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000`
 # is installed. This can be done with the command
 #   kim-api-collections-management install user Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # If this command does not work, you may need to setup your PATH to find the utility.
@ -13,7 +13,7 @@
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
-# See https://openkim.org/doc/obtaining-models for alternative options.
+# See `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 variable     x index 1
@ -24,14 +24,14 @@ variable	xx equal 20*$x
 variable     yy equal 20*$y
 variable     zz equal 20*$z
-kim_init	Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000 real
+kim          init Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000 real
 lattice      fcc 4.4300
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 create_box   1 box
 create_atoms 1 box
-kim_interactions O
+kim          interactions O
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
--- a/examples/kim/log.10Feb21.in.kim-ex.melt.clang.1
+++ b/examples/kim/log.10Feb21.in.kim-ex.melt.clang.1
@ -0,0 +1,107 @@
 LAMMPS (10 Feb 2021)
 # 3d Lennard-Jones melt
 #
 # This example requires that the example models provided with
 # the kim-api package are installed.  see the `./lib/kim/README` or
 # `./lib/kim/Install.py` files for details on how to install these
 # example models.
 #
 variable     x index 1
 variable     y index 1
 variable     z index 1
 variable     xx equal 20*$x
 variable     xx equal 20*1
 variable     yy equal 20*$y
 variable     yy equal 20*1
 variable     zz equal 20*$z
 variable     zz equal 20*1
 kim          init LennardJones_Ar real
 #=== BEGIN kim init ==========================================
 units real
 neighbor 2.0 bin   # Angstroms
 timestep 1.0       # femtoseconds
 This model has No mutable parameters.
 #=== END kim init ============================================
 lattice      fcc 4.4300
 Lattice spacing in x,y,z = 4.4300000 4.4300000 4.4300000
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 region       box block 0 20 0 ${yy} 0 ${zz}
 region       box block 0 20 0 20 0 ${zz}
 region       box block 0 20 0 20 0 20
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (88.600000 88.600000 88.600000)
  1 by 1 by 1 MPI processor grid
 create_atoms 1 box
 Created 32000 atoms
  create_atoms CPU = 0.003 seconds
 kim          interactions Ar
 #=== BEGIN kim interactions ==================================
 pair_style kim LennardJones_Ar
 WARNING: KIM Model does not provide 'partialParticleEnergy'; energy per atom will be zero (src/KIM/pair_kim.cpp:1139)
 WARNING: KIM Model does not provide 'partialParticleVirial'; virial per atom will be zero (src/KIM/pair_kim.cpp:1145)
 pair_coeff * * Ar
 #=== END kim interactions ====================================
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
 neighbor     0.3 bin
 neigh_modify delay 0 every 1 check yes
 fix          1 all nve
 #fix         1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run          100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 8.45
  ghost atom cutoff = 8.45
  binsize = 4.225, bins = 21 21 21
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 8.450000000000001
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 28.12 | 28.12 | 28.12 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200    145069.63            0    164146.22    128015.94 
     100    95.179703    154939.42            0    164017.94    131602.75 
 Loop time of 2.8463 on 1 procs for 100 steps with 32000 atoms
 Performance: 3.036 ns/day, 7.906 hours/ns, 35.133 timesteps/s
 99.9% CPU use with 1 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 2.5046     | 2.5046     | 2.5046     |   0.0 | 88.00
 Neigh   | 0.29437    | 0.29437    | 0.29437    |   0.0 | 10.34
 Comm    | 0.01182    | 0.01182    | 0.01182    |   0.0 |  0.42
 Output  | 7e-05      | 7e-05      | 7e-05      |   0.0 |  0.00
 Modify  | 0.024522   | 0.024522   | 0.024522   |   0.0 |  0.86
 Other   |            | 0.01091    |            |       |  0.38
 Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:        19911.0 ave       19911 max       19911 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:         0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:  4.25375e+06 ave 4.25375e+06 max 4.25375e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 4253750
 Ave neighs/atom = 132.92969
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:02
--- a/examples/kim/log.10Feb21.in.kim-ex.melt.clang.4
+++ b/examples/kim/log.10Feb21.in.kim-ex.melt.clang.4
@ -0,0 +1,107 @@
 LAMMPS (10 Feb 2021)
 # 3d Lennard-Jones melt
 #
 # This example requires that the example models provided with
 # the kim-api package are installed.  see the `./lib/kim/README` or
 # `./lib/kim/Install.py` files for details on how to install these
 # example models.
 #
 variable     x index 1
 variable     y index 1
 variable     z index 1
 variable     xx equal 20*$x
 variable     xx equal 20*1
 variable     yy equal 20*$y
 variable     yy equal 20*1
 variable     zz equal 20*$z
 variable     zz equal 20*1
 kim          init LennardJones_Ar real
 #=== BEGIN kim init ==========================================
 units real
 neighbor 2.0 bin   # Angstroms
 timestep 1.0       # femtoseconds
 This model has No mutable parameters.
 #=== END kim init ============================================
 lattice      fcc 4.4300
 Lattice spacing in x,y,z = 4.4300000 4.4300000 4.4300000
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 region       box block 0 20 0 ${yy} 0 ${zz}
 region       box block 0 20 0 20 0 ${zz}
 region       box block 0 20 0 20 0 20
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (88.600000 88.600000 88.600000)
  1 by 2 by 2 MPI processor grid
 create_atoms 1 box
 Created 32000 atoms
  create_atoms CPU = 0.001 seconds
 kim          interactions Ar
 #=== BEGIN kim interactions ==================================
 pair_style kim LennardJones_Ar
 WARNING: KIM Model does not provide 'partialParticleEnergy'; energy per atom will be zero (src/KIM/pair_kim.cpp:1139)
 WARNING: KIM Model does not provide 'partialParticleVirial'; virial per atom will be zero (src/KIM/pair_kim.cpp:1145)
 pair_coeff * * Ar
 #=== END kim interactions ====================================
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
 neighbor     0.3 bin
 neigh_modify delay 0 every 1 check yes
 fix          1 all nve
 #fix         1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run          100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 8.45
  ghost atom cutoff = 8.45
  binsize = 4.225, bins = 21 21 21
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 8.450000000000001
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 9.791 | 9.791 | 9.791 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200    145069.63            0    164146.22    128015.94 
     100    95.179703    154939.42            0    164017.94    131602.75 
 Loop time of 0.857614 on 4 procs for 100 steps with 32000 atoms
 Performance: 10.074 ns/day, 2.382 hours/ns, 116.603 timesteps/s
 99.6% CPU use with 4 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 0.73048    | 0.73398    | 0.73855    |   0.3 | 85.58
 Neigh   | 0.083739   | 0.083964   | 0.084335   |   0.1 |  9.79
 Comm    | 0.017996   | 0.022912   | 0.026515   |   2.1 |  2.67
 Output  | 2.7e-05    | 3.5e-05    | 4.5e-05    |   0.0 |  0.00
 Modify  | 0.010073   | 0.010158   | 0.010271   |   0.1 |  1.18
 Other   |            | 0.006571   |            |       |  0.77
 Nlocal:        8000.00 ave        8018 max        7967 min
 Histogram: 1 0 0 0 0 0 1 0 0 2
 Nghost:        9131.00 ave        9164 max        9113 min
 Histogram: 2 0 0 1 0 0 0 0 0 1
 Neighs:         0.00000 ave           0 max           0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:  1.06344e+06 ave 1.06594e+06 max 1.05881e+06 min
 Histogram: 1 0 0 0 0 0 1 0 0 2
 Total # of neighbors = 4253750
 Ave neighs/atom = 132.92969
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:00
--- a/examples/kim/log.10Feb21.in.kim-pm-property.clang.1
+++ b/examples/kim/log.10Feb21.in.kim-pm-property.clang.1
@ -0,0 +1,223 @@
 LAMMPS (10 Feb 2021)
 # kim property example
 #
 # For detailed information of this example please refer to:
 # `https://openkim.org/doc/evaluation/tutorial-lammps/`
 #
 # Description:
 #
 # This example is designed to calculate the cohesive energy corresponding to
 # the equilibrium FCC lattice constant for
 # `LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004` model for
 # argon. The material properties computed in LAMMPS are represented as a
 # standard KIM property instance format. (See
 # `https://openkim.org/doc/schema/properties-framework/` and
 # `https://lammps.sandia.gov/doc/kim_commands.html` for further details).
 # Then the created property instance is written to a file named `results.edn`
 # using the `kim property dump` command.
 #
 # Requirement:
 #
 # This example requires LAMMPS built with the Python 3.6 or later package
 # installed. See the `https://lammps.sandia.gov/doc/python.html` doc page for
 # more info on building LAMMPS with the version of Python on your system.
 # After successfully building LAMMPS with Python, you need to install the
 # kim-property Python package, See the
 # `https://lammps.sandia.gov/doc/Build_extras.html#kim` doc page for
 # further details.
 #
 # This example requires that the KIM Portable Model (PM)
 # `LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004`
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 # Initialize interatomic potential (KIM model) and units
 atom_style atomic
 # Set the OpenKIM model that will be used
 kim init LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004 metal
 #=== BEGIN kim init ==========================================
 units metal
 neighbor 2.0 bin   # Angstroms
 timestep 1.0e-3    # picoseconds
 This model has 3 mutable parameters. 
 No.      | Parameter name     | data type  | extent
 -----------------------------------------------------
 1        | cutoff             | "Double"   | 1
 2        | epsilon            | "Double"   | 1
 3        | sigma              | "Double"   | 1
 #=== END kim init ============================================
 # the equilibrium lattice constant for the fcc structure
 variable lattice_constant equal 5.248509056866169
 # Periodic boundary conditions along all three dimensions
 boundary p p p
 # Create an FCC lattice with the lattice spacing
 # using a single conventional (orthogonal) unit cell
 lattice      fcc ${lattice_constant}
 lattice      fcc 5.24850905686617
 Lattice spacing in x,y,z = 5.2485091 5.2485091 5.2485091
 region box   block 0 1 0 1 0 1 units lattice
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (5.2485091 5.2485091 5.2485091)
  1 by 1 by 1 MPI processor grid
 create_atoms 1 box
 Created 4 atoms
  create_atoms CPU = 0.000 seconds
 mass         1 39.948
 # Specify the KIM interactions
 kim interactions Ar
 #=== BEGIN kim interactions ==================================
 pair_style kim LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004
 pair_coeff * * Ar
 #=== END kim interactions ====================================
 # Compute energy
 run 0
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Your simulation uses code contributions which should be cited:
 - @Comment
 {
 \documentclass{article}
 \usepackage{url}
 \begin{document}
 This Model originally published in \cite{MO_126566794224_004a} is archived in OpenKIM~\cite{MO_126566794224_004, MD_498634107543_004, tadmor:elliott:2011, elliott:tadmor:2011}.
 \bibliographystyle{vancouver}
 \bibliography{kimcite-MO_126566794224_004.bib}
 \end{document}
 }
@Misc{MO_126566794224_004,
  author       = {Ellad Tadmor},
  title        = {{L}ennard-{J}ones model (shifted) for {A}r with parameters from {B}ernardes (1958) (medium precision cutoff) v004},
  doi          = {10.25950/9f98b989},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/9f98b989}},
  keywords     = {OpenKIM, Model, MO_126566794224_004},
  publisher    = {OpenKIM},
  year         = 2020,
 }
@Misc{MD_498634107543_004,
  author       = {Ellad Tadmor},
  title        = {{D}river for the {L}ennard-{J}ones model uniformly shifted to have zero energy at the cutoff radius v004},
  doi          = {10.25950/bdffd6a6},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/9f98b989}},
  keywords     = {OpenKIM, Model Driver, MD_498634107543_004},
  publisher    = {OpenKIM},
  year         = 2020,
 }
@Article{tadmor:elliott:2011,
  author    = {E. B. Tadmor and R. S. Elliott and J. P. Sethna and R. E. Miller and C. A. Becker},
  title     = {The potential of atomistic simulations and the {K}nowledgebase of {I}nteratomic {M}odels},
  journal   = {{JOM}},
  year      = {2011},
  volume    = {63},
  number    = {7},
  pages     = {17},
  doi       = {10.1007/s11837-011-0102-6},
 }
@Misc{elliott:tadmor:2011,
  author       = {Ryan S. Elliott and Ellad B. Tadmor},
  title        = {{K}nowledgebase of {I}nteratomic {M}odels ({KIM}) Application Programming Interface ({API})},
  howpublished = {\url{https://openkim.org/kim-api}},
  publisher    = {OpenKIM},
  year         = 2011,
  doi          = {10.25950/ff8f563a},
 }
@Article{MO_126566794224_004a,
  author = {Newton Bernardes},
  doi = {10.1103/PhysRev.112.1534},
  issue = {5},
  journal = {Physical Review},
  pages = {1534--1539},
  publisher = {American Physical Society},
  title = {Theory of Solid {N}e, {A}, {K}r, and {X}e at 0{K}},
  volume = {112},
  year = {1958},
 }
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Neighbor list info ...
  update every 1 steps, delay 10 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 15.5
  ghost atom cutoff = 15.5
  binsize = 7.75, bins = 1 1 1
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 15.5
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 3.119 | 3.119 | 3.119 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0            0  -0.34602203            0  -0.34602203 0.00061471244 
 Loop time of 0 on 1 procs for 0 steps with 4 atoms
 0.0% CPU use with 1 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 0          | 0          | 0          |   0.0 |  0.00
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
 Comm    | 0          | 0          | 0          |   0.0 |  0.00
 Output  | 0          | 0          | 0          |   0.0 |  0.00
 Modify  | 0          | 0          | 0          |   0.0 |  0.00
 Other   |            | 0          |            |       |  0.00
 Nlocal:        4.00000 ave           4 max           4 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:        1094.00 ave        1094 max        1094 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:         0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:      1712.00 ave        1712 max        1712 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 1712
 Ave neighs/atom = 428.00000
 Neighbor list builds = 0
 Dangerous builds = 0
 # Get cohesive energy
 variable natoms       equal "count(all)"
 variable ecohesive    equal "-pe/v_natoms"
 # Create a property instance
 kim property create 1 cohesive-potential-energy-cubic-crystal
 #=== kim property ===========================================
 # Set all the key-value pairs for this property instance
 kim property modify 1 key short-name source-value 1 fcc                                                key species source-value 1 Ar                                                    key a source-value ${lattice_constant}                                                 source-unit angstrom                                                       key basis-atom-coordinates source-value 1 1:3 0.0 0.0 0.0                                                   source-value 2 1:3 0.0 0.5 0.5                                                   source-value 3 1:3 0.5 0.0 0.5                                                   source-value 4 1:3 0.5 0.5 0.0                        key space-group source-value Fm-3m                                               key cohesive-potential-energy source-value ${ecohesive}                                                        source-unit eV
 kim property modify 1 key short-name source-value 1 fcc                                                key species source-value 1 Ar                                                    key a source-value 5.24850905686617                                                 source-unit angstrom                                                       key basis-atom-coordinates source-value 1 1:3 0.0 0.0 0.0                                                   source-value 2 1:3 0.0 0.5 0.5                                                   source-value 3 1:3 0.5 0.0 0.5                                                   source-value 4 1:3 0.5 0.5 0.0                        key space-group source-value Fm-3m                                               key cohesive-potential-energy source-value ${ecohesive}                                                        source-unit eV
 kim property modify 1 key short-name source-value 1 fcc                                                key species source-value 1 Ar                                                    key a source-value 5.24850905686617                                                 source-unit angstrom                                                       key basis-atom-coordinates source-value 1 1:3 0.0 0.0 0.0                                                   source-value 2 1:3 0.0 0.5 0.5                                                   source-value 3 1:3 0.5 0.0 0.5                                                   source-value 4 1:3 0.5 0.5 0.0                        key space-group source-value Fm-3m                                               key cohesive-potential-energy source-value 0.0865055084950546                                                        source-unit eV
 #=== kim property ===========================================
 # Dump the results in a file
 kim property dump "results.edn"
 #=== kim property ===========================================
 Total wall time: 0:00:00
--- a/examples/kim/log.10Feb21.in.kim-pm-property.clang.4
+++ b/examples/kim/log.10Feb21.in.kim-pm-property.clang.4
@ -0,0 +1,223 @@
 LAMMPS (10 Feb 2021)
 # kim property example
 #
 # For detailed information of this example please refer to:
 # `https://openkim.org/doc/evaluation/tutorial-lammps/`
 #
 # Description:
 #
 # This example is designed to calculate the cohesive energy corresponding to
 # the equilibrium FCC lattice constant for
 # `LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004` model for
 # argon. The material properties computed in LAMMPS are represented as a
 # standard KIM property instance format. (See
 # `https://openkim.org/doc/schema/properties-framework/` and
 # `https://lammps.sandia.gov/doc/kim_commands.html` for further details).
 # Then the created property instance is written to a file named `results.edn`
 # using the `kim property dump` command.
 #
 # Requirement:
 #
 # This example requires LAMMPS built with the Python 3.6 or later package
 # installed. See the `https://lammps.sandia.gov/doc/python.html` doc page for
 # more info on building LAMMPS with the version of Python on your system.
 # After successfully building LAMMPS with Python, you need to install the
 # kim-property Python package, See the
 # `https://lammps.sandia.gov/doc/Build_extras.html#kim` doc page for
 # further details.
 #
 # This example requires that the KIM Portable Model (PM)
 # `LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004`
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 # Initialize interatomic potential (KIM model) and units
 atom_style atomic
 # Set the OpenKIM model that will be used
 kim init LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004 metal
 #=== BEGIN kim init ==========================================
 units metal
 neighbor 2.0 bin   # Angstroms
 timestep 1.0e-3    # picoseconds
 This model has 3 mutable parameters. 
 No.      | Parameter name     | data type  | extent
 -----------------------------------------------------
 1        | cutoff             | "Double"   | 1
 2        | epsilon            | "Double"   | 1
 3        | sigma              | "Double"   | 1
 #=== END kim init ============================================
 # the equilibrium lattice constant for the fcc structure
 variable lattice_constant equal 5.248509056866169
 # Periodic boundary conditions along all three dimensions
 boundary p p p
 # Create an FCC lattice with the lattice spacing
 # using a single conventional (orthogonal) unit cell
 lattice      fcc ${lattice_constant}
 lattice      fcc 5.24850905686617
 Lattice spacing in x,y,z = 5.2485091 5.2485091 5.2485091
 region box   block 0 1 0 1 0 1 units lattice
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (5.2485091 5.2485091 5.2485091)
  1 by 2 by 2 MPI processor grid
 create_atoms 1 box
 Created 4 atoms
  create_atoms CPU = 0.000 seconds
 mass         1 39.948
 # Specify the KIM interactions
 kim interactions Ar
 #=== BEGIN kim interactions ==================================
 pair_style kim LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004
 pair_coeff * * Ar
 #=== END kim interactions ====================================
 # Compute energy
 run 0
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Your simulation uses code contributions which should be cited:
 - @Comment
 {
 \documentclass{article}
 \usepackage{url}
 \begin{document}
 This Model originally published in \cite{MO_126566794224_004a} is archived in OpenKIM~\cite{MO_126566794224_004, MD_498634107543_004, tadmor:elliott:2011, elliott:tadmor:2011}.
 \bibliographystyle{vancouver}
 \bibliography{kimcite-MO_126566794224_004.bib}
 \end{document}
 }
@Misc{MO_126566794224_004,
  author       = {Ellad Tadmor},
  title        = {{L}ennard-{J}ones model (shifted) for {A}r with parameters from {B}ernardes (1958) (medium precision cutoff) v004},
  doi          = {10.25950/9f98b989},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/9f98b989}},
  keywords     = {OpenKIM, Model, MO_126566794224_004},
  publisher    = {OpenKIM},
  year         = 2020,
 }
@Misc{MD_498634107543_004,
  author       = {Ellad Tadmor},
  title        = {{D}river for the {L}ennard-{J}ones model uniformly shifted to have zero energy at the cutoff radius v004},
  doi          = {10.25950/bdffd6a6},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/9f98b989}},
  keywords     = {OpenKIM, Model Driver, MD_498634107543_004},
  publisher    = {OpenKIM},
  year         = 2020,
 }
@Article{tadmor:elliott:2011,
  author    = {E. B. Tadmor and R. S. Elliott and J. P. Sethna and R. E. Miller and C. A. Becker},
  title     = {The potential of atomistic simulations and the {K}nowledgebase of {I}nteratomic {M}odels},
  journal   = {{JOM}},
  year      = {2011},
  volume    = {63},
  number    = {7},
  pages     = {17},
  doi       = {10.1007/s11837-011-0102-6},
 }
@Misc{elliott:tadmor:2011,
  author       = {Ryan S. Elliott and Ellad B. Tadmor},
  title        = {{K}nowledgebase of {I}nteratomic {M}odels ({KIM}) Application Programming Interface ({API})},
  howpublished = {\url{https://openkim.org/kim-api}},
  publisher    = {OpenKIM},
  year         = 2011,
  doi          = {10.25950/ff8f563a},
 }
@Article{MO_126566794224_004a,
  author = {Newton Bernardes},
  doi = {10.1103/PhysRev.112.1534},
  issue = {5},
  journal = {Physical Review},
  pages = {1534--1539},
  publisher = {American Physical Society},
  title = {Theory of Solid {N}e, {A}, {K}r, and {X}e at 0{K}},
  volume = {112},
  year = {1958},
 }
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Neighbor list info ...
  update every 1 steps, delay 10 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 15.5
  ghost atom cutoff = 15.5
  binsize = 7.75, bins = 1 1 1
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 15.5
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 3.165 | 3.165 | 3.165 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0            0  -0.34602203            0  -0.34602203 0.00061471244 
 Loop time of 1.5e-06 on 4 procs for 0 steps with 4 atoms
 100.0% CPU use with 4 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 0          | 0          | 0          |   0.0 |  0.00
 Neigh   | 0          | 0          | 0          |   0.0 |  0.00
 Comm    | 0          | 0          | 0          |   0.0 |  0.00
 Output  | 0          | 0          | 0          |   0.0 |  0.00
 Modify  | 0          | 0          | 0          |   0.0 |  0.00
 Other   |            | 1.5e-06    |            |       |100.00
 Nlocal:        1.00000 ave           1 max           1 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 Nghost:        935.000 ave         935 max         935 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 Neighs:         0.00000 ave           0 max           0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:      428.000 ave         428 max         428 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 1712
 Ave neighs/atom = 428.00000
 Neighbor list builds = 0
 Dangerous builds = 0
 # Get cohesive energy
 variable natoms       equal "count(all)"
 variable ecohesive    equal "-pe/v_natoms"
 # Create a property instance
 kim property create 1 cohesive-potential-energy-cubic-crystal
 #=== kim property ===========================================
 # Set all the key-value pairs for this property instance
 kim property modify 1 key short-name source-value 1 fcc                                                key species source-value 1 Ar                                                    key a source-value ${lattice_constant}                                                 source-unit angstrom                                                       key basis-atom-coordinates source-value 1 1:3 0.0 0.0 0.0                                                   source-value 2 1:3 0.0 0.5 0.5                                                   source-value 3 1:3 0.5 0.0 0.5                                                   source-value 4 1:3 0.5 0.5 0.0                        key space-group source-value Fm-3m                                               key cohesive-potential-energy source-value ${ecohesive}                                                        source-unit eV
 kim property modify 1 key short-name source-value 1 fcc                                                key species source-value 1 Ar                                                    key a source-value 5.24850905686617                                                 source-unit angstrom                                                       key basis-atom-coordinates source-value 1 1:3 0.0 0.0 0.0                                                   source-value 2 1:3 0.0 0.5 0.5                                                   source-value 3 1:3 0.5 0.0 0.5                                                   source-value 4 1:3 0.5 0.5 0.0                        key space-group source-value Fm-3m                                               key cohesive-potential-energy source-value ${ecohesive}                                                        source-unit eV
 kim property modify 1 key short-name source-value 1 fcc                                                key species source-value 1 Ar                                                    key a source-value 5.24850905686617                                                 source-unit angstrom                                                       key basis-atom-coordinates source-value 1 1:3 0.0 0.0 0.0                                                   source-value 2 1:3 0.0 0.5 0.5                                                   source-value 3 1:3 0.5 0.0 0.5                                                   source-value 4 1:3 0.5 0.5 0.0                        key space-group source-value Fm-3m                                               key cohesive-potential-energy source-value 0.0865055084950538                                                        source-unit eV
 #=== kim property ===========================================
 # Dump the results in a file
 kim property dump "results.edn"
 #=== kim property ===========================================
 Total wall time: 0:00:00
--- a/examples/kim/log.10Feb21.in.kim-pm-query.melt.clang.1
+++ b/examples/kim/log.10Feb21.in.kim-pm-query.melt.clang.1
@ -0,0 +1,210 @@
 LAMMPS (10 Feb 2021)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Portable Model (PM)
 # `SW_StillingerWeber_1985_Si__MO_405512056662_005`
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user SW_StillingerWeber_1985_Si__MO_405512056662_005
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 variable     x index 1
 variable     y index 1
 variable     z index 1
 variable     xx equal 20*$x
 variable     xx equal 20*1
 variable     yy equal 20*$y
 variable     yy equal 20*1
 variable     zz equal 20*$z
 variable     zz equal 20*1
 kim          init SW_StillingerWeber_1985_Si__MO_405512056662_005 real
 #=== BEGIN kim init ==========================================
 units real
 neighbor 2.0 bin   # Angstroms
 timestep 1.0       # femtoseconds
 This model has 9 mutable parameters. 
 No.      | Parameter name     | data type  | extent
 -----------------------------------------------------
 1        | A                  | "Double"   | 1
 2        | B                  | "Double"   | 1
 3        | p                  | "Double"   | 1
 4        | q                  | "Double"   | 1
 5        | sigma              | "Double"   | 1
 6        | gamma              | "Double"   | 1
 7        | cutoff             | "Double"   | 1
 8        | lambda             | "Double"   | 1
 9        | costheta0          | "Double"   | 1
 #=== END kim init ============================================
 kim          query a0 get_lattice_constant_cubic crystal=["fcc"] species=["Si"] units=["angstrom"]
 #=== BEGIN kim-query =========================================
 variable a0 string "4.146581932902336"
 #=== END kim-query ===========================================
 lattice      fcc ${a0}
 lattice      fcc 4.146581932902336
 Lattice spacing in x,y,z = 4.1465819 4.1465819 4.1465819
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 region       box block 0 20 0 ${yy} 0 ${zz}
 region       box block 0 20 0 20 0 ${zz}
 region       box block 0 20 0 20 0 20
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (82.931639 82.931639 82.931639)
  1 by 1 by 1 MPI processor grid
 create_atoms 1 box
 Created 32000 atoms
  create_atoms CPU = 0.004 seconds
 kim          interactions Si
 #=== BEGIN kim interactions ==================================
 pair_style kim SW_StillingerWeber_1985_Si__MO_405512056662_005
 pair_coeff * * Si
 #=== END kim interactions ====================================
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
 neighbor     0.3 bin
 neigh_modify delay 0 every 1 check yes
 fix          1 all nve
 #fix         1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run          100
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Your simulation uses code contributions which should be cited:
 - @Comment
 {
 \documentclass{article}
 \usepackage{url}
 \begin{document}
 This Model originally published in \cite{MO_405512056662_005a, MO_405512056662_005b} is archived in OpenKIM~\cite{MO_405512056662_005, MD_335816936951_004, tadmor:elliott:2011, elliott:tadmor:2011}.
 \bibliographystyle{vancouver}
 \bibliography{kimcite-MO_405512056662_005.bib}
 \end{document}
 }
@Misc{MO_405512056662_005,
  author       = {Amit K Singh},
  title        = {{S}tillinger-{W}eber potential for {S}i due to {S}tillinger and {W}eber (1985) v005},
  doi          = {10.25950/c74b293f},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/c74b293f}},
  keywords     = {OpenKIM, Model, MO_405512056662_005},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Misc{MD_335816936951_004,
  author       = {Mingjian Wen},
  title        = {{S}tillinger-{W}eber ({SW}) {M}odel {D}river v004},
  doi          = {10.25950/f3abd2d6},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/c74b293f}},
  keywords     = {OpenKIM, Model Driver, MD_335816936951_004},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Article{tadmor:elliott:2011,
  author    = {E. B. Tadmor and R. S. Elliott and J. P. Sethna and R. E. Miller and C. A. Becker},
  title     = {The potential of atomistic simulations and the {K}nowledgebase of {I}nteratomic {M}odels},
  journal   = {{JOM}},
  year      = {2011},
  volume    = {63},
  number    = {7},
  pages     = {17},
  doi       = {10.1007/s11837-011-0102-6},
 }
@Misc{elliott:tadmor:2011,
  author       = {Ryan S. Elliott and Ellad B. Tadmor},
  title        = {{K}nowledgebase of {I}nteratomic {M}odels ({KIM}) Application Programming Interface ({API})},
  howpublished = {\url{https://openkim.org/kim-api}},
  publisher    = {OpenKIM},
  year         = 2011,
  doi          = {10.25950/ff8f563a},
 }
@Article{MO_405512056662_005a,
  author = {Stillinger, Frank H. and Weber, Thomas A.},
  doi = {10.1103/PhysRevB.31.5262},
  issue = {8},
  journal = {Physical Review B},
  month = {Apr},
  pages = {5262--5271},
  publisher = {American Physical Society},
  title = {Computer simulation of local order in condensed phases of silicon},
  volume = {31},
  year = {1985},
 }
@Book{MO_405512056662_005b,
  author = {Tadmor, Ellad B. and Miller, Ronald E.},
  doi = {10.1017/CBO9781139003582},
  publisher = {Cambridge University Press},
  title = {Modeling Materials: {C}ontinuum, Atomistic and Multiscale Techniques},
  year = {2011},
 }
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 4.07118
  ghost atom cutoff = 4.07118
  binsize = 2.03559, bins = 41 41 41
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 4.07118
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 10.36 | 10.36 | 10.36 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200   -126084.25            0   -107007.66    1528.8768 
     100    94.450495   -116016.03            0   -107007.07    2282.2685 
 Loop time of 18.2886 on 1 procs for 100 steps with 32000 atoms
 Performance: 0.472 ns/day, 50.802 hours/ns, 5.468 timesteps/s
 99.9% CPU use with 1 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 18.155     | 18.155     | 18.155     |   0.0 | 99.27
 Neigh   | 0.087194   | 0.087194   | 0.087194   |   0.0 |  0.48
 Comm    | 0.009477   | 0.009477   | 0.009477   |   0.0 |  0.05
 Output  | 6.7e-05    | 6.7e-05    | 6.7e-05    |   0.0 |  0.00
 Modify  | 0.02616    | 0.02616    | 0.02616    |   0.0 |  0.14
 Other   |            | 0.0111     |            |       |  0.06
 Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:        9667.00 ave        9667 max        9667 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:         0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:      450192.0 ave      450192 max      450192 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 450192
 Ave neighs/atom = 14.068500
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:21
--- a/examples/kim/log.10Feb21.in.kim-pm-query.melt.clang.4
+++ b/examples/kim/log.10Feb21.in.kim-pm-query.melt.clang.4
@ -0,0 +1,210 @@
 LAMMPS (10 Feb 2021)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Portable Model (PM)
 # `SW_StillingerWeber_1985_Si__MO_405512056662_005`
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user SW_StillingerWeber_1985_Si__MO_405512056662_005
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 variable     x index 1
 variable     y index 1
 variable     z index 1
 variable     xx equal 20*$x
 variable     xx equal 20*1
 variable     yy equal 20*$y
 variable     yy equal 20*1
 variable     zz equal 20*$z
 variable     zz equal 20*1
 kim          init SW_StillingerWeber_1985_Si__MO_405512056662_005 real
 #=== BEGIN kim init ==========================================
 units real
 neighbor 2.0 bin   # Angstroms
 timestep 1.0       # femtoseconds
 This model has 9 mutable parameters. 
 No.      | Parameter name     | data type  | extent
 -----------------------------------------------------
 1        | A                  | "Double"   | 1
 2        | B                  | "Double"   | 1
 3        | p                  | "Double"   | 1
 4        | q                  | "Double"   | 1
 5        | sigma              | "Double"   | 1
 6        | gamma              | "Double"   | 1
 7        | cutoff             | "Double"   | 1
 8        | lambda             | "Double"   | 1
 9        | costheta0          | "Double"   | 1
 #=== END kim init ============================================
 kim          query a0 get_lattice_constant_cubic crystal=["fcc"] species=["Si"] units=["angstrom"]
 #=== BEGIN kim-query =========================================
 variable a0 string "4.146581932902336"
 #=== END kim-query ===========================================
 lattice      fcc ${a0}
 lattice      fcc 4.146581932902336
 Lattice spacing in x,y,z = 4.1465819 4.1465819 4.1465819
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 region       box block 0 20 0 ${yy} 0 ${zz}
 region       box block 0 20 0 20 0 ${zz}
 region       box block 0 20 0 20 0 20
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (82.931639 82.931639 82.931639)
  1 by 2 by 2 MPI processor grid
 create_atoms 1 box
 Created 32000 atoms
  create_atoms CPU = 0.001 seconds
 kim          interactions Si
 #=== BEGIN kim interactions ==================================
 pair_style kim SW_StillingerWeber_1985_Si__MO_405512056662_005
 pair_coeff * * Si
 #=== END kim interactions ====================================
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
 neighbor     0.3 bin
 neigh_modify delay 0 every 1 check yes
 fix          1 all nve
 #fix         1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run          100
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Your simulation uses code contributions which should be cited:
 - @Comment
 {
 \documentclass{article}
 \usepackage{url}
 \begin{document}
 This Model originally published in \cite{MO_405512056662_005a, MO_405512056662_005b} is archived in OpenKIM~\cite{MO_405512056662_005, MD_335816936951_004, tadmor:elliott:2011, elliott:tadmor:2011}.
 \bibliographystyle{vancouver}
 \bibliography{kimcite-MO_405512056662_005.bib}
 \end{document}
 }
@Misc{MO_405512056662_005,
  author       = {Amit K Singh},
  title        = {{S}tillinger-{W}eber potential for {S}i due to {S}tillinger and {W}eber (1985) v005},
  doi          = {10.25950/c74b293f},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/c74b293f}},
  keywords     = {OpenKIM, Model, MO_405512056662_005},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Misc{MD_335816936951_004,
  author       = {Mingjian Wen},
  title        = {{S}tillinger-{W}eber ({SW}) {M}odel {D}river v004},
  doi          = {10.25950/f3abd2d6},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/c74b293f}},
  keywords     = {OpenKIM, Model Driver, MD_335816936951_004},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Article{tadmor:elliott:2011,
  author    = {E. B. Tadmor and R. S. Elliott and J. P. Sethna and R. E. Miller and C. A. Becker},
  title     = {The potential of atomistic simulations and the {K}nowledgebase of {I}nteratomic {M}odels},
  journal   = {{JOM}},
  year      = {2011},
  volume    = {63},
  number    = {7},
  pages     = {17},
  doi       = {10.1007/s11837-011-0102-6},
 }
@Misc{elliott:tadmor:2011,
  author       = {Ryan S. Elliott and Ellad B. Tadmor},
  title        = {{K}nowledgebase of {I}nteratomic {M}odels ({KIM}) Application Programming Interface ({API})},
  howpublished = {\url{https://openkim.org/kim-api}},
  publisher    = {OpenKIM},
  year         = 2011,
  doi          = {10.25950/ff8f563a},
 }
@Article{MO_405512056662_005a,
  author = {Stillinger, Frank H. and Weber, Thomas A.},
  doi = {10.1103/PhysRevB.31.5262},
  issue = {8},
  journal = {Physical Review B},
  month = {Apr},
  pages = {5262--5271},
  publisher = {American Physical Society},
  title = {Computer simulation of local order in condensed phases of silicon},
  volume = {31},
  year = {1985},
 }
@Book{MO_405512056662_005b,
  author = {Tadmor, Ellad B. and Miller, Ronald E.},
  doi = {10.1017/CBO9781139003582},
  publisher = {Cambridge University Press},
  title = {Modeling Materials: {C}ontinuum, Atomistic and Multiscale Techniques},
  year = {2011},
 }
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 4.07118
  ghost atom cutoff = 4.07118
  binsize = 2.03559, bins = 41 41 41
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 4.07118
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 3.489 | 3.489 | 3.489 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200   -126084.25            0   -107007.66    1528.8768 
     100    94.450495   -116016.03            0   -107007.07    2282.2685 
 Loop time of 5.00432 on 4 procs for 100 steps with 32000 atoms
 Performance: 1.727 ns/day, 13.901 hours/ns, 19.983 timesteps/s
 99.7% CPU use with 4 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 4.9281     | 4.9366     | 4.9447     |   0.3 | 98.65
 Neigh   | 0.02399    | 0.024135   | 0.024318   |   0.1 |  0.48
 Comm    | 0.020646   | 0.029014   | 0.037515   |   4.3 |  0.58
 Output  | 2.9e-05    | 3.325e-05  | 4.2e-05    |   0.0 |  0.00
 Modify  | 0.008808   | 0.0088445  | 0.00888    |   0.0 |  0.18
 Other   |            | 0.005691   |            |       |  0.11
 Nlocal:        8000.00 ave        8029 max        7968 min
 Histogram: 1 1 0 0 0 0 0 0 0 2
 Nghost:        4259.00 ave        4303 max        4202 min
 Histogram: 1 0 0 0 0 0 2 0 0 1
 Neighs:         0.00000 ave           0 max           0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:      112548.0 ave      113091 max      111995 min
 Histogram: 1 0 0 1 0 0 0 1 0 1
 Total # of neighbors = 450192
 Ave neighs/atom = 14.068500
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:07
--- a/examples/kim/log.10Feb21.in.kim-pm.melt.clang.1
+++ b/examples/kim/log.10Feb21.in.kim-pm.melt.clang.1
@ -0,0 +1,204 @@
 LAMMPS (10 Feb 2021)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Portable Model (PM)
 # `SW_StillingerWeber_1985_Si__MO_405512056662_005`
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user SW_StillingerWeber_1985_Si__MO_405512056662_005
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 variable     x index 1
 variable     y index 1
 variable     z index 1
 variable     xx equal 20*$x
 variable     xx equal 20*1
 variable     yy equal 20*$y
 variable     yy equal 20*1
 variable     zz equal 20*$z
 variable     zz equal 20*1
 kim          init SW_StillingerWeber_1985_Si__MO_405512056662_005 real
 #=== BEGIN kim init ==========================================
 units real
 neighbor 2.0 bin   # Angstroms
 timestep 1.0       # femtoseconds
 This model has 9 mutable parameters. 
 No.      | Parameter name     | data type  | extent
 -----------------------------------------------------
 1        | A                  | "Double"   | 1
 2        | B                  | "Double"   | 1
 3        | p                  | "Double"   | 1
 4        | q                  | "Double"   | 1
 5        | sigma              | "Double"   | 1
 6        | gamma              | "Double"   | 1
 7        | cutoff             | "Double"   | 1
 8        | lambda             | "Double"   | 1
 9        | costheta0          | "Double"   | 1
 #=== END kim init ============================================
 lattice      fcc 4.4300
 Lattice spacing in x,y,z = 4.4300000 4.4300000 4.4300000
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 region       box block 0 20 0 ${yy} 0 ${zz}
 region       box block 0 20 0 20 0 ${zz}
 region       box block 0 20 0 20 0 20
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (88.600000 88.600000 88.600000)
  1 by 1 by 1 MPI processor grid
 create_atoms 1 box
 Created 32000 atoms
  create_atoms CPU = 0.002 seconds
 kim          interactions Si
 #=== BEGIN kim interactions ==================================
 pair_style kim SW_StillingerWeber_1985_Si__MO_405512056662_005
 pair_coeff * * Si
 #=== END kim interactions ====================================
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
 neighbor     0.3 bin
 neigh_modify delay 0 every 1 check yes
 fix          1 all nve
 #fix         1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run          100
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Your simulation uses code contributions which should be cited:
 - @Comment
 {
 \documentclass{article}
 \usepackage{url}
 \begin{document}
 This Model originally published in \cite{MO_405512056662_005a, MO_405512056662_005b} is archived in OpenKIM~\cite{MO_405512056662_005, MD_335816936951_004, tadmor:elliott:2011, elliott:tadmor:2011}.
 \bibliographystyle{vancouver}
 \bibliography{kimcite-MO_405512056662_005.bib}
 \end{document}
 }
@Misc{MO_405512056662_005,
  author       = {Amit K Singh},
  title        = {{S}tillinger-{W}eber potential for {S}i due to {S}tillinger and {W}eber (1985) v005},
  doi          = {10.25950/c74b293f},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/c74b293f}},
  keywords     = {OpenKIM, Model, MO_405512056662_005},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Misc{MD_335816936951_004,
  author       = {Mingjian Wen},
  title        = {{S}tillinger-{W}eber ({SW}) {M}odel {D}river v004},
  doi          = {10.25950/f3abd2d6},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/c74b293f}},
  keywords     = {OpenKIM, Model Driver, MD_335816936951_004},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Article{tadmor:elliott:2011,
  author    = {E. B. Tadmor and R. S. Elliott and J. P. Sethna and R. E. Miller and C. A. Becker},
  title     = {The potential of atomistic simulations and the {K}nowledgebase of {I}nteratomic {M}odels},
  journal   = {{JOM}},
  year      = {2011},
  volume    = {63},
  number    = {7},
  pages     = {17},
  doi       = {10.1007/s11837-011-0102-6},
 }
@Misc{elliott:tadmor:2011,
  author       = {Ryan S. Elliott and Ellad B. Tadmor},
  title        = {{K}nowledgebase of {I}nteratomic {M}odels ({KIM}) Application Programming Interface ({API})},
  howpublished = {\url{https://openkim.org/kim-api}},
  publisher    = {OpenKIM},
  year         = 2011,
  doi          = {10.25950/ff8f563a},
 }
@Article{MO_405512056662_005a,
  author = {Stillinger, Frank H. and Weber, Thomas A.},
  doi = {10.1103/PhysRevB.31.5262},
  issue = {8},
  journal = {Physical Review B},
  month = {Apr},
  pages = {5262--5271},
  publisher = {American Physical Society},
  title = {Computer simulation of local order in condensed phases of silicon},
  volume = {31},
  year = {1985},
 }
@Book{MO_405512056662_005b,
  author = {Tadmor, Ellad B. and Miller, Ronald E.},
  doi = {10.1017/CBO9781139003582},
  publisher = {Cambridge University Press},
  title = {Modeling Materials: {C}ontinuum, Atomistic and Multiscale Techniques},
  year = {2011},
 }
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 4.07118
  ghost atom cutoff = 4.07118
  binsize = 2.03559, bins = 44 44 44
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 4.07118
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 10.44 | 10.44 | 10.44 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200   -85249.847            0   -66173.259   -33302.387 
     100    253.43357    -90346.68            0   -66173.441   -14888.698 
 Loop time of 17.7449 on 1 procs for 100 steps with 32000 atoms
 Performance: 0.487 ns/day, 49.291 hours/ns, 5.635 timesteps/s
 99.9% CPU use with 1 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 17.64      | 17.64      | 17.64      |   0.0 | 99.41
 Neigh   | 0.060149   | 0.060149   | 0.060149   |   0.0 |  0.34
 Comm    | 0.008585   | 0.008585   | 0.008585   |   0.0 |  0.05
 Output  | 6.3e-05    | 6.3e-05    | 6.3e-05    |   0.0 |  0.00
 Modify  | 0.025324   | 0.025324   | 0.025324   |   0.0 |  0.14
 Other   |            | 0.01057    |            |       |  0.06
 Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:        7760.00 ave        7760 max        7760 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:         0.00000 ave           0 max           0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:      402352.0 ave      402352 max      402352 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 402352
 Ave neighs/atom = 12.573500
 Neighbor list builds = 4
 Dangerous builds = 0
 Total wall time: 0:00:17
--- a/examples/kim/log.10Feb21.in.kim-pm.melt.clang.4
+++ b/examples/kim/log.10Feb21.in.kim-pm.melt.clang.4
@ -0,0 +1,204 @@
 LAMMPS (10 Feb 2021)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Portable Model (PM)
 # `SW_StillingerWeber_1985_Si__MO_405512056662_005`
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user SW_StillingerWeber_1985_Si__MO_405512056662_005
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 variable     x index 1
 variable     y index 1
 variable     z index 1
 variable     xx equal 20*$x
 variable     xx equal 20*1
 variable     yy equal 20*$y
 variable     yy equal 20*1
 variable     zz equal 20*$z
 variable     zz equal 20*1
 kim          init SW_StillingerWeber_1985_Si__MO_405512056662_005 real
 #=== BEGIN kim init ==========================================
 units real
 neighbor 2.0 bin   # Angstroms
 timestep 1.0       # femtoseconds
 This model has 9 mutable parameters. 
 No.      | Parameter name     | data type  | extent
 -----------------------------------------------------
 1        | A                  | "Double"   | 1
 2        | B                  | "Double"   | 1
 3        | p                  | "Double"   | 1
 4        | q                  | "Double"   | 1
 5        | sigma              | "Double"   | 1
 6        | gamma              | "Double"   | 1
 7        | cutoff             | "Double"   | 1
 8        | lambda             | "Double"   | 1
 9        | costheta0          | "Double"   | 1
 #=== END kim init ============================================
 lattice      fcc 4.4300
 Lattice spacing in x,y,z = 4.4300000 4.4300000 4.4300000
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 region       box block 0 20 0 ${yy} 0 ${zz}
 region       box block 0 20 0 20 0 ${zz}
 region       box block 0 20 0 20 0 20
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (88.600000 88.600000 88.600000)
  1 by 2 by 2 MPI processor grid
 create_atoms 1 box
 Created 32000 atoms
  create_atoms CPU = 0.001 seconds
 kim          interactions Si
 #=== BEGIN kim interactions ==================================
 pair_style kim SW_StillingerWeber_1985_Si__MO_405512056662_005
 pair_coeff * * Si
 #=== END kim interactions ====================================
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
 neighbor     0.3 bin
 neigh_modify delay 0 every 1 check yes
 fix          1 all nve
 #fix         1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run          100
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Your simulation uses code contributions which should be cited:
 - @Comment
 {
 \documentclass{article}
 \usepackage{url}
 \begin{document}
 This Model originally published in \cite{MO_405512056662_005a, MO_405512056662_005b} is archived in OpenKIM~\cite{MO_405512056662_005, MD_335816936951_004, tadmor:elliott:2011, elliott:tadmor:2011}.
 \bibliographystyle{vancouver}
 \bibliography{kimcite-MO_405512056662_005.bib}
 \end{document}
 }
@Misc{MO_405512056662_005,
  author       = {Amit K Singh},
  title        = {{S}tillinger-{W}eber potential for {S}i due to {S}tillinger and {W}eber (1985) v005},
  doi          = {10.25950/c74b293f},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/c74b293f}},
  keywords     = {OpenKIM, Model, MO_405512056662_005},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Misc{MD_335816936951_004,
  author       = {Mingjian Wen},
  title        = {{S}tillinger-{W}eber ({SW}) {M}odel {D}river v004},
  doi          = {10.25950/f3abd2d6},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/c74b293f}},
  keywords     = {OpenKIM, Model Driver, MD_335816936951_004},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Article{tadmor:elliott:2011,
  author    = {E. B. Tadmor and R. S. Elliott and J. P. Sethna and R. E. Miller and C. A. Becker},
  title     = {The potential of atomistic simulations and the {K}nowledgebase of {I}nteratomic {M}odels},
  journal   = {{JOM}},
  year      = {2011},
  volume    = {63},
  number    = {7},
  pages     = {17},
  doi       = {10.1007/s11837-011-0102-6},
 }
@Misc{elliott:tadmor:2011,
  author       = {Ryan S. Elliott and Ellad B. Tadmor},
  title        = {{K}nowledgebase of {I}nteratomic {M}odels ({KIM}) Application Programming Interface ({API})},
  howpublished = {\url{https://openkim.org/kim-api}},
  publisher    = {OpenKIM},
  year         = 2011,
  doi          = {10.25950/ff8f563a},
 }
@Article{MO_405512056662_005a,
  author = {Stillinger, Frank H. and Weber, Thomas A.},
  doi = {10.1103/PhysRevB.31.5262},
  issue = {8},
  journal = {Physical Review B},
  month = {Apr},
  pages = {5262--5271},
  publisher = {American Physical Society},
  title = {Computer simulation of local order in condensed phases of silicon},
  volume = {31},
  year = {1985},
 }
@Book{MO_405512056662_005b,
  author = {Tadmor, Ellad B. and Miller, Ronald E.},
  doi = {10.1017/CBO9781139003582},
  publisher = {Cambridge University Press},
  title = {Modeling Materials: {C}ontinuum, Atomistic and Multiscale Techniques},
  year = {2011},
 }
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 4.07118
  ghost atom cutoff = 4.07118
  binsize = 2.03559, bins = 44 44 44
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 4.07118
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 3.517 | 3.517 | 3.517 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200   -85249.847            0   -66173.259   -33302.387 
     100    253.43357    -90346.68            0   -66173.441   -14888.698 
 Loop time of 4.87378 on 4 procs for 100 steps with 32000 atoms
 Performance: 1.773 ns/day, 13.538 hours/ns, 20.518 timesteps/s
 99.7% CPU use with 4 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 4.8075     | 4.816      | 4.8244     |   0.3 | 98.81
 Neigh   | 0.015902   | 0.015996   | 0.016077   |   0.1 |  0.33
 Comm    | 0.018078   | 0.026375   | 0.034752   |   4.2 |  0.54
 Output  | 3e-05      | 3.5e-05    | 4.4e-05    |   0.0 |  0.00
 Modify  | 0.009331   | 0.0094922  | 0.009588   |   0.1 |  0.19
 Other   |            | 0.005919   |            |       |  0.12
 Nlocal:        8000.00 ave        8014 max        7988 min
 Histogram: 1 1 0 0 0 0 1 0 0 1
 Nghost:        3374.75 ave        3389 max        3361 min
 Histogram: 1 0 1 0 0 0 0 1 0 1
 Neighs:         0.00000 ave           0 max           0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:      100588.0 ave      100856 max      100392 min
 Histogram: 1 0 1 0 1 0 0 0 0 1
 Total # of neighbors = 402352
 Ave neighs/atom = 12.573500
 Neighbor list builds = 4
 Dangerous builds = 0
 Total wall time: 0:00:04
--- a/examples/kim/log.10Feb21.in.kim-query.clang.1
+++ b/examples/kim/log.10Feb21.in.kim-query.clang.1
@ -0,0 +1,655 @@
 LAMMPS (10 Feb 2021)
 # kim query example
 #
 # Requirement:
 #
 # This example requires LAMMPS is built with KIM package. A requirement for
 # the KIM package, is the KIM API library that must be downloaded from the
 # OpenKIM website and installed before LAMMPS is compiled. The 'kim query'
 # command requires the libcurl library to be installed. See the
 # `https://lammps.sandia.gov/doc/Build_extras.html#kim` doc page for further
 # details
 #
 # This example requires that the KIM Models
 # `EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005`
 # and
 # `EAM_Dynamo_MendelevAckland_2007v3_Zr__MO_004835508849_000`
 # are installed.
 #
 # This can be done with the commands
 # `kim-api-collections-management install user `EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005`
 # `kim-api-collections-management install user `EAM_Dynamo_MendelevAckland_2007v3_Zr__MO_004835508849_000`
 #
 # If these commands do not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see https://openkim.org/doc/obtaining-models for alternative options.
 #
 # -----------------------------------------------
 # Get an equilibrium fcc crystal lattice constant
 # -----------------------------------------------
 kim   init EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005 metal
 #=== BEGIN kim init ==========================================
 units metal
 neighbor 2.0 bin   # Angstroms
 timestep 1.0e-3    # picoseconds
 This model has 6 mutable parameters. 
 No.      | Parameter name     | data type  | extent
 -----------------------------------------------------
 1        | cutoff             | "Double"   | 1
 2        | deltaRho           | "Double"   | 1
 3        | deltaR             | "Double"   | 1
 4        | embeddingData      | "Double"   | 500
 5        | rPhiData           | "Double"   | 500
 6        | densityData        | "Double"   | 500
 #=== END kim init ============================================
 kim   query latconst_1 get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom]
 #=== BEGIN kim-query =========================================
 variable latconst_1 string "4.032082033157349"
 #=== END kim-query ===========================================
 print "FCC lattice constant (EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005) = ${latconst_1}"
 FCC lattice constant (EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005) = 4.032082033157349
 # Get the lattice contant from a different model
 kim   query latconst_2 get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005]
 #=== BEGIN kim-query =========================================
 variable latconst_2 string "4.024845376610756"
 #=== END kim-query ===========================================
 print "FCC lattice constant (EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005) = ${latconst_2}"
 FCC lattice constant (EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005) = 4.024845376610756
 clear
 # -----------------------------------------------
 # Get an equilibrium fcc crystal lattice constant
 # -----------------------------------------------
 kim   query latconst_1 get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005]
 #=== BEGIN kim-query =========================================
 variable latconst_1 string "4.032082033157349"
 #=== END kim-query ===========================================
 kim   query latconst_2 get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005]
 #=== BEGIN kim-query =========================================
 variable latconst_2 string "4.024845376610756"
 #=== END kim-query ===========================================
 print "FCC lattice constant (EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005)   = ${latconst_1}"
 FCC lattice constant (EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005)   = 4.032082033157349
 print "FCC lattice constant (EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005) = ${latconst_2}"
 FCC lattice constant (EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005) = 4.024845376610756
 clear
 # -----------------------------------------------
 # Get an equilibrium hcp crystal lattice constant
 # -----------------------------------------------
 kim   init EAM_Dynamo_MendelevAckland_2007v3_Zr__MO_004835508849_000 metal
 #=== BEGIN kim init ==========================================
 units metal
 neighbor 2.0 bin   # Angstroms
 timestep 1.0e-3    # picoseconds
 This model has 6 mutable parameters. 
 No.      | Parameter name     | data type  | extent
 -----------------------------------------------------
 1        | cutoff             | "Double"   | 1
 2        | deltaRho           | "Double"   | 1
 3        | deltaR             | "Double"   | 1
 4        | embeddingData      | "Double"   | 10000
 5        | rPhiData           | "Double"   | 10000
 6        | densityData        | "Double"   | 10000
 #=== END kim init ============================================
 kim   query latconst split get_lattice_constant_hexagonal crystal=["hcp"] species=["Zr"] units=["angstrom"]
 #=== BEGIN kim-query =========================================
 variable latconst_1 string 3.234055244384789
 variable latconst_2 string  5.167650199630013
 #=== END kim-query ===========================================
 print "HCP lattice constants = ${latconst_1}, ${latconst_2}"
 HCP lattice constants = 3.234055244384789, 5.167650199630013
 clear
 # -----------------------------------------------
 # Query for KIM models from openkim.org
 # Get all the EAM models that support Al
 # -----------------------------------------------
 kim   query model index get_available_models species=[Al] potential_type=[eam]
 #=== BEGIN kim-query =========================================
 variable model index "EAM_CubicNaturalSpline_ErcolessiAdams_1994_Al__MO_800509458712_002"  "EAM_Dynamo_AngeloMoodyBaskes_1995_NiAlH__MO_418978237058_005"  "EAM_Dynamo_CaiYe_1996_AlCu__MO_942551040047_005"  "EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005"  "EAM_Dynamo_FarkasJones_1996_NbTiAl__MO_042691367780_000"  "EAM_Dynamo_JacobsenNorskovPuska_1987_Al__MO_411692133366_000"  "EAM_Dynamo_LandaWynblattSiegel_2000_AlPb__MO_699137396381_005"  "EAM_Dynamo_LiuAdams_1998_AlMg__MO_019873715786_000"  "EAM_Dynamo_LiuErcolessiAdams_2004_Al__MO_051157671505_000"  "EAM_Dynamo_LiuLiuBorucki_1999_AlCu__MO_020851069572_000"  "EAM_Dynamo_LiuOhotnickyAdams_1997_AlMg__MO_559870613549_000"  "EAM_Dynamo_MendelevAstaRahman_2009_AlMg__MO_658278549784_005"  "EAM_Dynamo_MendelevFangYe_2015_AlSm__MO_338600200739_000"  "EAM_Dynamo_MendelevKramerBecker_2008_Al__MO_106969701023_005"  "EAM_Dynamo_MendelevSrolovitzAckland_2005_AlFe__MO_577453891941_005"  "EAM_Dynamo_MishinFarkasMehl_1999_Al__MO_651801486679_005"  "EAM_Dynamo_MishinMehlPapaconstantopoulos_2002_NiAl__MO_109933561507_005"  "EAM_Dynamo_Mishin_2004_NiAl__MO_101214310689_005"  "EAM_Dynamo_PunMishin_2009_NiAl__MO_751354403791_005"  "EAM_Dynamo_PunYamakovMishin_2013_AlCo__MO_678952612413_000"  "EAM_Dynamo_PunYamakovMishin_2013_NiAlCo__MO_826591359508_000"  "EAM_Dynamo_SchopfBrommerFrigan_2012_AlMnPd__MO_137572817842_000"  "EAM_Dynamo_SturgeonLaird_2000_Al__MO_120808805541_005"  "EAM_Dynamo_VailheFarkas_1997_CoAl__MO_284963179498_005"  "EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005"  "EAM_Dynamo_Zhakhovsky_2009_Al__MO_519613893196_000"  "EAM_Dynamo_ZhouJohnsonWadley_2004NISTretabulation_Al__MO_060567868558_000"  "EAM_Dynamo_ZhouJohnsonWadley_2004_Al__MO_131650261510_005"  "EAM_Dynamo_ZhouWadleyJohnson_2001_Al__MO_049243498555_000"  "EAM_Dynamo_ZopeMishin_2003_Al__MO_664470114311_005"  "EAM_Dynamo_ZopeMishin_2003_TiAl__MO_117656786760_005"  "EAM_ErcolessiAdams_1994_Al__MO_324507536345_003"  "EAM_IMD_BrommerGaehler_2006A_AlNiCo__MO_122703700223_003"  "EAM_IMD_BrommerGaehler_2006B_AlNiCo__MO_128037485276_003"  "EAM_IMD_SchopfBrommerFrigan_2012_AlMnPd__MO_878712978062_003"  "EAM_QuinticClampedSpline_ErcolessiAdams_1994_Al__MO_450093727396_002"  "EAM_QuinticHermiteSpline_ErcolessiAdams_1994_Al__MO_781138671863_002"  "EMT_Asap_Standard_JacobsenStoltzeNorskov_1996_AlAgAuCuNiPdPt__MO_115316750986_001"  "EMT_Asap_Standard_JacobsenStoltzeNorskov_1996_Al__MO_623376124862_001"
 #=== END kim-query ===========================================
 label model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_CubicNaturalSpline_ErcolessiAdams_1994_Al__MO_800509458712_002]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.032082748413087"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_CubicNaturalSpline_ErcolessiAdams_1994_Al__MO_800509458712_002) = 4.032082748413087
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_AngeloMoodyBaskes_1995_NiAlH__MO_418978237058_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.050000071525574"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_AngeloMoodyBaskes_1995_NiAlH__MO_418978237058_005) = 4.050000071525574
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_CaiYe_1996_AlCu__MO_942551040047_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.049763545393944"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_CaiYe_1996_AlCu__MO_942551040047_005) = 4.049763545393944
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.032082033157349"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005) = 4.032082033157349
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_FarkasJones_1996_NbTiAl__MO_042691367780_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "3.869337007403374"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_FarkasJones_1996_NbTiAl__MO_042691367780_000) = 3.869337007403374
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_JacobsenNorskovPuska_1987_Al__MO_411692133366_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "3.987558534741402"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_JacobsenNorskovPuska_1987_Al__MO_411692133366_000) = 3.987558534741402
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_LandaWynblattSiegel_2000_AlPb__MO_699137396381_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.031036108732224"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_LandaWynblattSiegel_2000_AlPb__MO_699137396381_005) = 4.031036108732224
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_LiuAdams_1998_AlMg__MO_019873715786_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.03203821182251"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_LiuAdams_1998_AlMg__MO_019873715786_000) = 4.03203821182251
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_LiuErcolessiAdams_2004_Al__MO_051157671505_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "9.5"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_LiuErcolessiAdams_2004_Al__MO_051157671505_000) = 9.5
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_LiuLiuBorucki_1999_AlCu__MO_020851069572_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.032073378562927"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_LiuLiuBorucki_1999_AlCu__MO_020851069572_000) = 4.032073378562927
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_LiuOhotnickyAdams_1997_AlMg__MO_559870613549_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "8.5"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_LiuOhotnickyAdams_1997_AlMg__MO_559870613549_000) = 8.5
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_MendelevAstaRahman_2009_AlMg__MO_658278549784_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.045270472764969"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_MendelevAstaRahman_2009_AlMg__MO_658278549784_005) = 4.045270472764969
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_MendelevFangYe_2015_AlSm__MO_338600200739_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.040926471352577"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_MendelevFangYe_2015_AlSm__MO_338600200739_000) = 4.040926471352577
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_MendelevKramerBecker_2008_Al__MO_106969701023_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.045259781181811"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_MendelevKramerBecker_2008_Al__MO_106969701023_005) = 4.045259781181811
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_MendelevSrolovitzAckland_2005_AlFe__MO_577453891941_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.03330184519291"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_MendelevSrolovitzAckland_2005_AlFe__MO_577453891941_005) = 4.03330184519291
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_MishinFarkasMehl_1999_Al__MO_651801486679_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.050004702806472"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_MishinFarkasMehl_1999_Al__MO_651801486679_005) = 4.050004702806472
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_MishinMehlPapaconstantopoulos_2002_NiAl__MO_109933561507_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.051526293158533"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_MishinMehlPapaconstantopoulos_2002_NiAl__MO_109933561507_005) = 4.051526293158533
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_Mishin_2004_NiAl__MO_101214310689_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.049999862909317"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_Mishin_2004_NiAl__MO_101214310689_005) = 4.049999862909317
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_PunMishin_2009_NiAl__MO_751354403791_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.050000071525574"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_PunMishin_2009_NiAl__MO_751354403791_005) = 4.050000071525574
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_PunYamakovMishin_2013_AlCo__MO_678952612413_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.05000014603138"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_PunYamakovMishin_2013_AlCo__MO_678952612413_000) = 4.05000014603138
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_PunYamakovMishin_2013_NiAlCo__MO_826591359508_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.05000014603138"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_PunYamakovMishin_2013_NiAlCo__MO_826591359508_000) = 4.05000014603138
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_SchopfBrommerFrigan_2012_AlMnPd__MO_137572817842_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.210718545317654"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_SchopfBrommerFrigan_2012_AlMnPd__MO_137572817842_000) = 4.210718545317654
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_SturgeonLaird_2000_Al__MO_120808805541_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.050010219216347"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_SturgeonLaird_2000_Al__MO_120808805541_005) = 4.050010219216347
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_VailheFarkas_1997_CoAl__MO_284963179498_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.049696564674378"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_VailheFarkas_1997_CoAl__MO_284963179498_005) = 4.049696564674378
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.024845376610756"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_WineyKubotaGupta_2010_Al__MO_149316865608_005) = 4.024845376610756
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_Zhakhovsky_2009_Al__MO_519613893196_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.031999975442885"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_Zhakhovsky_2009_Al__MO_519613893196_000) = 4.031999975442885
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_ZhouJohnsonWadley_2004NISTretabulation_Al__MO_060567868558_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.050199627876282"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_ZhouJohnsonWadley_2004NISTretabulation_Al__MO_060567868558_000) = 4.050199627876282
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_ZhouJohnsonWadley_2004_Al__MO_131650261510_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.050180745124819"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_ZhouJohnsonWadley_2004_Al__MO_131650261510_005) = 4.050180745124819
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_ZhouWadleyJohnson_2001_Al__MO_049243498555_000]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.081654928624631"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_ZhouWadleyJohnson_2001_Al__MO_049243498555_000) = 4.081654928624631
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_ZopeMishin_2003_Al__MO_664470114311_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.050000011920929"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_ZopeMishin_2003_Al__MO_664470114311_005) = 4.050000011920929
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_Dynamo_ZopeMishin_2003_TiAl__MO_117656786760_005]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.049999445676804"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_Dynamo_ZopeMishin_2003_TiAl__MO_117656786760_005) = 4.049999445676804
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_ErcolessiAdams_1994_Al__MO_324507536345_003]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.032082714140415"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_ErcolessiAdams_1994_Al__MO_324507536345_003) = 4.032082714140415
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_IMD_BrommerGaehler_2006A_AlNiCo__MO_122703700223_003]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.128871455788613"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_IMD_BrommerGaehler_2006A_AlNiCo__MO_122703700223_003) = 4.128871455788613
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_IMD_BrommerGaehler_2006B_AlNiCo__MO_128037485276_003]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.073718130588532"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_IMD_BrommerGaehler_2006B_AlNiCo__MO_128037485276_003) = 4.073718130588532
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_IMD_SchopfBrommerFrigan_2012_AlMnPd__MO_878712978062_003]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.210700303316115"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_IMD_SchopfBrommerFrigan_2012_AlMnPd__MO_878712978062_003) = 4.210700303316115
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_QuinticClampedSpline_ErcolessiAdams_1994_Al__MO_450093727396_002]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.032082897424699"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_QuinticClampedSpline_ErcolessiAdams_1994_Al__MO_450093727396_002) = 4.032082897424699
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EAM_QuinticHermiteSpline_ErcolessiAdams_1994_Al__MO_781138671863_002]
 #=== BEGIN kim-query =========================================
 variable latconst string "4.03208246231079"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EAM_QuinticHermiteSpline_ErcolessiAdams_1994_Al__MO_781138671863_002) = 4.03208246231079
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EMT_Asap_Standard_JacobsenStoltzeNorskov_1996_AlAgAuCuNiPdPt__MO_115316750986_001]
 #=== BEGIN kim-query =========================================
 variable latconst string "3.994616635143757"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EMT_Asap_Standard_JacobsenStoltzeNorskov_1996_AlAgAuCuNiPdPt__MO_115316750986_001) = 3.994616635143757
 next  model
 jump  SELF model_loop
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[${model}]
 kim   query latconst get_lattice_constant_cubic crystal=[fcc] species=[Al] units=[angstrom] model=[EMT_Asap_Standard_JacobsenStoltzeNorskov_1996_Al__MO_623376124862_001]
 #=== BEGIN kim-query =========================================
 variable latconst string "3.994608342647553"
 #=== END kim-query ===========================================
 print "FCC lattice constant (${model}) = ${latconst}"
 FCC lattice constant (EMT_Asap_Standard_JacobsenStoltzeNorskov_1996_Al__MO_623376124862_001) = 3.994608342647553
 next  model
 jump  SELF model_loop
 clear
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Your simulation uses code contributions which should be cited:
 - @Comment
 {
 \documentclass{article}
 \usepackage{url}
 \begin{document}
 This Model originally published in \cite{MO_123629422045_005a} is archived in OpenKIM~\cite{MO_123629422045_005, MD_120291908751_005, tadmor:elliott:2011, elliott:tadmor:2011}.
 \bibliographystyle{vancouver}
 \bibliography{kimcite-MO_123629422045_005.bib}
 \end{document}
 }
@Misc{MO_123629422045_005,
  author       = {Ryan S. Elliott},
  title        = {{EAM} potential ({LAMMPS} cubic hermite tabulation) for {A}l developed by {E}rcolessi and {A}dams (1994) v005},
  doi          = {10.25950/7cd2a6ab},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/7cd2a6ab}},
  keywords     = {OpenKIM, Model, MO_123629422045_005},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Misc{MD_120291908751_005,
  author       = {Ryan S. Elliott},
  title        = {{EAM} {M}odel {D}river for tabulated potentials with cubic {H}ermite spline interpolation as used in {LAMMPS} v005},
  doi          = {10.25950/68defa36},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/7cd2a6ab}},
  keywords     = {OpenKIM, Model Driver, MD_120291908751_005},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Article{tadmor:elliott:2011,
  author    = {E. B. Tadmor and R. S. Elliott and J. P. Sethna and R. E. Miller and C. A. Becker},
  title     = {The potential of atomistic simulations and the {K}nowledgebase of {I}nteratomic {M}odels},
  journal   = {{JOM}},
  year      = {2011},
  volume    = {63},
  number    = {7},
  pages     = {17},
  doi       = {10.1007/s11837-011-0102-6},
 }
@Misc{elliott:tadmor:2011,
  author       = {Ryan S. Elliott and Ellad B. Tadmor},
  title        = {{K}nowledgebase of {I}nteratomic {M}odels ({KIM}) Application Programming Interface ({API})},
  howpublished = {\url{https://openkim.org/kim-api}},
  publisher    = {OpenKIM},
  year         = 2011,
  doi          = {10.25950/ff8f563a},
 }
@Article{MO_123629422045_005a,
  author = {F. Ercolessi and J. B. Adams},
  doi = {10.1209/0295-5075/26/8/005},
  journal = {Europhysics Letters},
  number = {8},
  pages = {583},
  title = {Interatomic Potentials from First-Principles Calculations: {T}he Force-Matching Method},
  volume = {26},
  year = {1994},
 }
 - @Comment
 {
 \documentclass{article}
 \usepackage{url}
 \begin{document}
 This Model originally published in \cite{MO_004835508849_000a} is archived in OpenKIM~\cite{MO_004835508849_000, MD_120291908751_005, tadmor:elliott:2011, elliott:tadmor:2011}.
 \bibliographystyle{vancouver}
 \bibliography{kimcite-MO_004835508849_000.bib}
 \end{document}
 }
@Misc{MO_004835508849_000,
  author       = {Ellad Tadmor},
  title        = {{F}innis-{S}inclair potential ({LAMMPS} cubic hermite tabulation) for {Z}r developed by {M}endelev and {A}ckland (2007); version 3 refitted for radiation studies v000},
  doi          = {10.25950/7b7b5ab5},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/7b7b5ab5}},
  keywords     = {OpenKIM, Model, MO_004835508849_000},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Misc{MD_120291908751_005,
  author       = {Ryan S. Elliott},
  title        = {{EAM} {M}odel {D}river for tabulated potentials with cubic {H}ermite spline interpolation as used in {LAMMPS} v005},
  doi          = {10.25950/68defa36},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/7b7b5ab5}},
  keywords     = {OpenKIM, Model Driver, MD_120291908751_005},
  publisher    = {OpenKIM},
  year         = 2018,
 }
@Article{tadmor:elliott:2011,
  author    = {E. B. Tadmor and R. S. Elliott and J. P. Sethna and R. E. Miller and C. A. Becker},
  title     = {The potential of atomistic simulations and the {K}nowledgebase of {I}nteratomic {M}odels},
  journal   = {{JOM}},
  year      = {2011},
  volume    = {63},
  number    = {7},
  pages     = {17},
  doi       = {10.1007/s11837-011-0102-6},
 }
@Misc{elliott:tadmor:2011,
  author       = {Ryan S. Elliott and Ellad B. Tadmor},
  title        = {{K}nowledgebase of {I}nteratomic {M}odels ({KIM}) Application Programming Interface ({API})},
  howpublished = {\url{https://openkim.org/kim-api}},
  publisher    = {OpenKIM},
  year         = 2011,
  doi          = {10.25950/ff8f563a},
 }
@Article{MO_004835508849_000a,
  author = {Mendelev, M. I. and Ackland, G. J.},
  doi = {10.1080/09500830701191393},
  journal = {Philosophical Magazine Letters},
  number = {5},
  pages = {349-359},
  title = {Development of an interatomic potential for the simulation of phase transformations in zirconium},
  volume = {87},
  year = {2007},
 }
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Total wall time: 0:01:58
--- a/examples/kim/log.10Feb21.in.kim-sm.melt.clang.1
+++ b/examples/kim/log.10Feb21.in.kim-sm.melt.clang.1
@ -0,0 +1,208 @@
 LAMMPS (10 Feb 2021)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Simulator Model (PM)
 # `Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000`
 # is installed. This can be done with the command
 #   kim-api-collections-management install user Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # See `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 variable     x index 1
 variable     y index 1
 variable     z index 1
 variable     xx equal 20*$x
 variable     xx equal 20*1
 variable     yy equal 20*$y
 variable     yy equal 20*1
 variable     zz equal 20*$z
 variable     zz equal 20*1
 kim          init Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000 real
 #=== BEGIN kim init ==========================================
 # Using KIM Simulator Model : Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # For Simulator             : LAMMPS 28 Feb 2019
 # Running on                : LAMMPS 10 Feb 2021
 #
 units real
 neighbor 2.0 bin   # Angstroms
 timestep 1.0       # femtoseconds
 atom_style charge
 neigh_modify one 4000
 #=== END kim init ============================================
 lattice      fcc 4.4300
 Lattice spacing in x,y,z = 4.4300000 4.4300000 4.4300000
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 region       box block 0 20 0 ${yy} 0 ${zz}
 region       box block 0 20 0 20 0 ${zz}
 region       box block 0 20 0 20 0 20
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (88.600000 88.600000 88.600000)
  1 by 1 by 1 MPI processor grid
 create_atoms 1 box
 Created 32000 atoms
  create_atoms CPU = 0.003 seconds
 kim          interactions O
 #=== BEGIN kim interactions ==================================
 variable kim_periodic equal 1
 pair_style reax/c /var/tmp/kim-shared-library-parameter-file-directory-pgBW45WFK0TI/lmp_control safezone 2.0 mincap 100
 pair_coeff * * /var/tmp/kim-shared-library-parameter-file-directory-pgBW45WFK0TI/ffield.reax.rdx O
 Reading potential file /var/tmp/kim-shared-library-parameter-file-directory-pgBW45WFK0TI/ffield.reax.rdx with DATE: 2010-02-19
 fix reaxqeq all qeq/reax 1 0.0 10.0 1.0e-6 /var/tmp/kim-shared-library-parameter-file-directory-pgBW45WFK0TI/param.qeq
 #=== END kim interactions ====================================
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
 neighbor     0.3 bin
 neigh_modify delay 0 every 1 check yes
 fix          1 all nve
 #fix         1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run          100
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Your simulation uses code contributions which should be cited:
 - @Comment
 {
 \documentclass{article}
 \usepackage{url}
 \begin{document}
 This Simulator Model originally published in \cite{SM_107643900657_000a} is archived in OpenKIM~\cite{SM_107643900657_000, tadmor:elliott:2011, elliott:tadmor:2011}.
 \bibliographystyle{vancouver}
 \bibliography{kimcite-SM_107643900657_000.bib}
 \end{document}
 }
@Misc{SM_107643900657_000,
  author       = {Ellad Tadmor},
  title        = {{LAMMPS} {R}eax{FF} potential for {RDX} ({C}-{H}-{N}-{O}) systems developed by {S}trachan et al. (2003) v000},
  doi          = {10.25950/acd3fc89},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/acd3fc89}},
  keywords     = {OpenKIM, Simulator Model, SM_107643900657_000},
  publisher    = {OpenKIM},
  year         = 2019,
 }
@Article{tadmor:elliott:2011,
  author    = {E. B. Tadmor and R. S. Elliott and J. P. Sethna and R. E. Miller and C. A. Becker},
  title     = {The potential of atomistic simulations and the {K}nowledgebase of {I}nteratomic {M}odels},
  journal   = {{JOM}},
  year      = {2011},
  volume    = {63},
  number    = {7},
  pages     = {17},
  doi       = {10.1007/s11837-011-0102-6},
 }
@Misc{elliott:tadmor:2011,
  author       = {Ryan S. Elliott and Ellad B. Tadmor},
  title        = {{K}nowledgebase of {I}nteratomic {M}odels ({KIM}) Application Programming Interface ({API})},
  howpublished = {\url{https://openkim.org/kim-api}},
  publisher    = {OpenKIM},
  year         = 2011,
  doi          = {10.25950/ff8f563a},
 }
@Article{SM_107643900657_000a,
  author = {Strachan, Alejandro and van Duin, Adri C. T. and Chakraborty, Debashis and Dasgupta, Siddharth and Goddard, William A.},
  doi = {10.1103/PhysRevLett.91.098301},
  issue = {9},
  journal = {Physical Review Letters},
  month = {Aug},
  numpages = {4},
  pages = {098301},
  publisher = {American Physical Society},
  title = {Shock Waves in High-Energy Materials: {T}he Initial Chemical Events in Nitramine {RDX}},
  volume = {91},
  year = {2003},
 }
 - pair reax/c command:
@Article{Aktulga12,
 author = {H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama},
 title = {Parallel reactive molecular dynamics: Numerical methods and algorithmic techniques},
 journal = {Parallel Computing},
 year =    2012,
 volume =  38,
 pages =   {245--259}
 }
 - fix qeq/reax command:
@Article{Aktulga12,
 author = {H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama},
 title = {Parallel reactive molecular dynamics: Numerical methods and algorithmic techniques},
 journal = {Parallel Computing},
 year =    2012,
 volume =  38,
 pages =   {245--259}
 }
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 4000, page size: 100000
  master list distance cutoff = 10.3
  ghost atom cutoff = 10.3
  binsize = 5.15, bins = 18 18 18
  2 neighbor lists, perpetual/occasional/extra = 2 0 0
  (1) pair reax/c, perpetual
      attributes: half, newton off, ghost
      pair build: half/bin/newtoff/ghost
      stencil: half/ghost/bin/3d/newtoff
      bin: standard
  (2) fix qeq/reax, perpetual, copy from (1)
      attributes: half, newton off, ghost
      pair build: copy
      stencil: none
      bin: none
 Per MPI rank memory allocation (min/avg/max) = 1803.0 | 1803.0 | 1803.0 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200   -39091.147            0   -20014.559    19501.107 
     100    63.198252   -26042.062            0   -20014.027    21497.661 
 Loop time of 40.2545 on 1 procs for 100 steps with 32000 atoms
 Performance: 0.215 ns/day, 111.818 hours/ns, 2.484 timesteps/s
 99.1% CPU use with 1 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 24.364     | 24.364     | 24.364     |   0.0 | 60.52
 Neigh   | 0.4185     | 0.4185     | 0.4185     |   0.0 |  1.04
 Comm    | 0.022045   | 0.022045   | 0.022045   |   0.0 |  0.05
 Output  | 6.6e-05    | 6.6e-05    | 6.6e-05    |   0.0 |  0.00
 Modify  | 15.438     | 15.438     | 15.438     |   0.0 | 38.35
 Other   |            | 0.01285    |            |       |  0.03
 Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:        26825.0 ave       26825 max       26825 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    3.73924e+06 ave 3.73924e+06 max 3.73924e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 3739236
 Ave neighs/atom = 116.85112
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:41
--- a/examples/kim/log.10Feb21.in.kim-sm.melt.clang.4
+++ b/examples/kim/log.10Feb21.in.kim-sm.melt.clang.4
@ -0,0 +1,208 @@
 LAMMPS (10 Feb 2021)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Simulator Model (PM)
 # `Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000`
 # is installed. This can be done with the command
 #   kim-api-collections-management install user Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # See `https://openkim.org/doc/obtaining-models` for alternative options.
 #
 variable     x index 1
 variable     y index 1
 variable     z index 1
 variable     xx equal 20*$x
 variable     xx equal 20*1
 variable     yy equal 20*$y
 variable     yy equal 20*1
 variable     zz equal 20*$z
 variable     zz equal 20*1
 kim          init Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000 real
 #=== BEGIN kim init ==========================================
 # Using KIM Simulator Model : Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # For Simulator             : LAMMPS 28 Feb 2019
 # Running on                : LAMMPS 10 Feb 2021
 #
 units real
 neighbor 2.0 bin   # Angstroms
 timestep 1.0       # femtoseconds
 atom_style charge
 neigh_modify one 4000
 #=== END kim init ============================================
 lattice      fcc 4.4300
 Lattice spacing in x,y,z = 4.4300000 4.4300000 4.4300000
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 region       box block 0 20 0 ${yy} 0 ${zz}
 region       box block 0 20 0 20 0 ${zz}
 region       box block 0 20 0 20 0 20
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (88.600000 88.600000 88.600000)
  1 by 2 by 2 MPI processor grid
 create_atoms 1 box
 Created 32000 atoms
  create_atoms CPU = 0.001 seconds
 kim          interactions O
 #=== BEGIN kim interactions ==================================
 variable kim_periodic equal 1
 pair_style reax/c /var/tmp/kim-shared-library-parameter-file-directory-zYQfH0ms5WSw/lmp_control safezone 2.0 mincap 100
 pair_coeff * * /var/tmp/kim-shared-library-parameter-file-directory-zYQfH0ms5WSw/ffield.reax.rdx O
 Reading potential file /var/tmp/kim-shared-library-parameter-file-directory-zYQfH0ms5WSw/ffield.reax.rdx with DATE: 2010-02-19
 fix reaxqeq all qeq/reax 1 0.0 10.0 1.0e-6 /var/tmp/kim-shared-library-parameter-file-directory-zYQfH0ms5WSw/param.qeq
 #=== END kim interactions ====================================
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
 neighbor     0.3 bin
 neigh_modify delay 0 every 1 check yes
 fix          1 all nve
 #fix         1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run          100
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Your simulation uses code contributions which should be cited:
 - @Comment
 {
 \documentclass{article}
 \usepackage{url}
 \begin{document}
 This Simulator Model originally published in \cite{SM_107643900657_000a} is archived in OpenKIM~\cite{SM_107643900657_000, tadmor:elliott:2011, elliott:tadmor:2011}.
 \bibliographystyle{vancouver}
 \bibliography{kimcite-SM_107643900657_000.bib}
 \end{document}
 }
@Misc{SM_107643900657_000,
  author       = {Ellad Tadmor},
  title        = {{LAMMPS} {R}eax{FF} potential for {RDX} ({C}-{H}-{N}-{O}) systems developed by {S}trachan et al. (2003) v000},
  doi          = {10.25950/acd3fc89},
  howpublished = {OpenKIM, \url{https://doi.org/10.25950/acd3fc89}},
  keywords     = {OpenKIM, Simulator Model, SM_107643900657_000},
  publisher    = {OpenKIM},
  year         = 2019,
 }
@Article{tadmor:elliott:2011,
  author    = {E. B. Tadmor and R. S. Elliott and J. P. Sethna and R. E. Miller and C. A. Becker},
  title     = {The potential of atomistic simulations and the {K}nowledgebase of {I}nteratomic {M}odels},
  journal   = {{JOM}},
  year      = {2011},
  volume    = {63},
  number    = {7},
  pages     = {17},
  doi       = {10.1007/s11837-011-0102-6},
 }
@Misc{elliott:tadmor:2011,
  author       = {Ryan S. Elliott and Ellad B. Tadmor},
  title        = {{K}nowledgebase of {I}nteratomic {M}odels ({KIM}) Application Programming Interface ({API})},
  howpublished = {\url{https://openkim.org/kim-api}},
  publisher    = {OpenKIM},
  year         = 2011,
  doi          = {10.25950/ff8f563a},
 }
@Article{SM_107643900657_000a,
  author = {Strachan, Alejandro and van Duin, Adri C. T. and Chakraborty, Debashis and Dasgupta, Siddharth and Goddard, William A.},
  doi = {10.1103/PhysRevLett.91.098301},
  issue = {9},
  journal = {Physical Review Letters},
  month = {Aug},
  numpages = {4},
  pages = {098301},
  publisher = {American Physical Society},
  title = {Shock Waves in High-Energy Materials: {T}he Initial Chemical Events in Nitramine {RDX}},
  volume = {91},
  year = {2003},
 }
 - pair reax/c command:
@Article{Aktulga12,
 author = {H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama},
 title = {Parallel reactive molecular dynamics: Numerical methods and algorithmic techniques},
 journal = {Parallel Computing},
 year =    2012,
 volume =  38,
 pages =   {245--259}
 }
 - fix qeq/reax command:
@Article{Aktulga12,
 author = {H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama},
 title = {Parallel reactive molecular dynamics: Numerical methods and algorithmic techniques},
 journal = {Parallel Computing},
 year =    2012,
 volume =  38,
 pages =   {245--259}
 }
 CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 4000, page size: 100000
  master list distance cutoff = 10.3
  ghost atom cutoff = 10.3
  binsize = 5.15, bins = 18 18 18
  2 neighbor lists, perpetual/occasional/extra = 2 0 0
  (1) pair reax/c, perpetual
      attributes: half, newton off, ghost
      pair build: half/bin/newtoff/ghost
      stencil: half/ghost/bin/3d/newtoff
      bin: standard
  (2) fix qeq/reax, perpetual, copy from (1)
      attributes: half, newton off, ghost
      pair build: copy
      stencil: none
      bin: none
 Per MPI rank memory allocation (min/avg/max) = 630.2 | 630.2 | 630.2 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200   -39091.147            0   -20014.559    19501.107 
     100    63.198252   -26042.062            0   -20014.027    21497.661 
 Loop time of 15.049 on 4 procs for 100 steps with 32000 atoms
 Performance: 0.574 ns/day, 41.803 hours/ns, 6.645 timesteps/s
 99.0% CPU use with 4 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 9.8158     | 9.8159     | 9.8161     |   0.0 | 65.23
 Neigh   | 0.17685    | 0.17759    | 0.17832    |   0.1 |  1.18
 Comm    | 0.028692   | 0.028847   | 0.028942   |   0.1 |  0.19
 Output  | 2.5e-05    | 3.575e-05  | 4.6e-05    |   0.0 |  0.00
 Modify  | 5.0171     | 5.0179     | 5.0186     |   0.0 | 33.34
 Other   |            | 0.008715   |            |       |  0.06
 Nlocal:        8000.00 ave        8010 max        7993 min
 Histogram: 2 0 0 0 0 1 0 0 0 1
 Nghost:        12605.0 ave       12612 max       12595 min
 Histogram: 1 0 0 0 1 0 0 0 0 2
 Neighs:    1.00097e+06 ave 1.00187e+06 max  1.0006e+06 min
 Histogram: 2 1 0 0 0 0 0 0 0 1
 Total # of neighbors = 4003876
 Ave neighs/atom = 125.12113
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:15
--- a/examples/kim/log.10Feb21.in.lammps.melt.clang.1
+++ b/examples/kim/log.10Feb21.in.lammps.melt.clang.1
@ -0,0 +1,88 @@
 LAMMPS (10 Feb 2021)
 # 3d Lennard-Jones melt
 variable     x index 1
 variable     y index 1
 variable     z index 1
 variable     xx equal 20*$x
 variable     xx equal 20*1
 variable     yy equal 20*$y
 variable     yy equal 20*1
 variable     zz equal 20*$z
 variable     zz equal 20*1
 units        real
 lattice      fcc 4.4300
 Lattice spacing in x,y,z = 4.4300000 4.4300000 4.4300000
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 region       box block 0 20 0 ${yy} 0 ${zz}
 region       box block 0 20 0 20 0 ${zz}
 region       box block 0 20 0 20 0 20
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (88.600000 88.600000 88.600000)
  1 by 1 by 1 MPI processor grid
 create_atoms 1 box
 Created 32000 atoms
  create_atoms CPU = 0.002 seconds
 pair_style   lj/cut 8.1500
 pair_coeff   1 1 0.0104 3.4000
 #pair_style  kim LennardJones_Ar
 #pair_coeff  * * Ar
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
 neighbor     0.3 bin
 neigh_modify delay 0 every 1 check yes
 fix          1 all nve
 #fix         1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run          100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 8.45
  ghost atom cutoff = 8.45
  binsize = 4.225, bins = 21 21 21
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair lj/cut, perpetual
      attributes: half, newton on
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 19.23 | 19.23 | 19.23 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200    6290.8194            0    25367.408    6750.7421 
     100    98.747096    15900.676            0    25319.465    10184.453 
 Loop time of 1.92822 on 1 procs for 100 steps with 32000 atoms
 Performance: 4.481 ns/day, 5.356 hours/ns, 51.861 timesteps/s
 99.8% CPU use with 1 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 1.7377     | 1.7377     | 1.7377     |   0.0 | 90.12
 Neigh   | 0.14234    | 0.14234    | 0.14234    |   0.0 |  7.38
 Comm    | 0.011694   | 0.011694   | 0.011694   |   0.0 |  0.61
 Output  | 6.7e-05    | 6.7e-05    | 6.7e-05    |   0.0 |  0.00
 Modify  | 0.02476    | 0.02476    | 0.02476    |   0.0 |  1.28
 Other   |            | 0.01163    |            |       |  0.60
 Nlocal:        32000.0 ave       32000 max       32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:        19911.0 ave       19911 max       19911 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    1.96027e+06 ave 1.96027e+06 max 1.96027e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 1960266
 Ave neighs/atom = 61.258313
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:01
--- a/examples/kim/log.10Feb21.in.lammps.melt.clang.4
+++ b/examples/kim/log.10Feb21.in.lammps.melt.clang.4
@ -0,0 +1,88 @@
 LAMMPS (10 Feb 2021)
 # 3d Lennard-Jones melt
 variable     x index 1
 variable     y index 1
 variable     z index 1
 variable     xx equal 20*$x
 variable     xx equal 20*1
 variable     yy equal 20*$y
 variable     yy equal 20*1
 variable     zz equal 20*$z
 variable     zz equal 20*1
 units        real
 lattice      fcc 4.4300
 Lattice spacing in x,y,z = 4.4300000 4.4300000 4.4300000
 region       box block 0 ${xx} 0 ${yy} 0 ${zz}
 region       box block 0 20 0 ${yy} 0 ${zz}
 region       box block 0 20 0 20 0 ${zz}
 region       box block 0 20 0 20 0 20
 create_box   1 box
 Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (88.600000 88.600000 88.600000)
  1 by 2 by 2 MPI processor grid
 create_atoms 1 box
 Created 32000 atoms
  create_atoms CPU = 0.001 seconds
 pair_style   lj/cut 8.1500
 pair_coeff   1 1 0.0104 3.4000
 #pair_style  kim LennardJones_Ar
 #pair_coeff  * * Ar
 mass         1 39.95
 velocity     all create 200.0 232345 loop geom
 neighbor     0.3 bin
 neigh_modify delay 0 every 1 check yes
 fix          1 all nve
 #fix         1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run          100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 8.45
  ghost atom cutoff = 8.45
  binsize = 4.225, bins = 21 21 21
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair lj/cut, perpetual
      attributes: half, newton on
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
 Per MPI rank memory allocation (min/avg/max) = 7.633 | 7.633 | 7.633 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200    6290.8194            0    25367.408    6750.7421 
     100    98.747096    15900.676            0    25319.465    10184.453 
 Loop time of 0.561006 on 4 procs for 100 steps with 32000 atoms
 Performance: 15.401 ns/day, 1.558 hours/ns, 178.251 timesteps/s
 99.6% CPU use with 4 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 0.48486    | 0.48676    | 0.48817    |   0.2 | 86.77
 Neigh   | 0.040698   | 0.04091    | 0.041066   |   0.1 |  7.29
 Comm    | 0.016616   | 0.01811    | 0.0202     |   1.1 |  3.23
 Output  | 3e-05      | 3.575e-05  | 4.7e-05    |   0.0 |  0.01
 Modify  | 0.008934   | 0.009025   | 0.009142   |   0.1 |  1.61
 Other   |            | 0.006161   |            |       |  1.10
 Nlocal:        8000.00 ave        8012 max        7989 min
 Histogram: 1 0 0 0 2 0 0 0 0 1
 Nghost:        9131.00 ave        9142 max        9119 min
 Histogram: 1 0 0 0 0 2 0 0 0 1
 Neighs:        490066.0 ave      491443 max      489273 min
 Histogram: 2 0 0 0 1 0 0 0 0 1
 Total # of neighbors = 1960266
 Ave neighs/atom = 61.258313
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:00
--- a/examples/kim/log.7Aug19.in.kim-ex.melt.clang.1
+++ b/examples/kim/log.7Aug19.in.kim-ex.melt.clang.1
@ -1,107 +0,0 @@
 LAMMPS (7 Aug 2019)
 # 3d Lennard-Jones melt
 #
 # This example requires that the example models provided with
 # the kim-api package are installed.  see the ./lib/kim/README or
 # ./lib/kim/Install.py files for details on how to install these
 # example models.
 #
 variable	x index 1
 variable	y index 1
 variable	z index 1
 variable	xx equal 20*$x
 variable	xx equal 20*1
 variable	yy equal 20*$y
 variable	yy equal 20*1
 variable	zz equal 20*$z
 variable	zz equal 20*1
 kim_init	LennardJones_Ar real
 #=== BEGIN kim-init ==========================================
 units real
 #=== END kim-init ============================================
 lattice		fcc 4.4300
 Lattice spacing in x,y,z = 4.43 4.43 4.43
 region		box block 0 ${xx} 0 ${yy} 0 ${zz}
 region		box block 0 20 0 ${yy} 0 ${zz}
 region		box block 0 20 0 20 0 ${zz}
 region		box block 0 20 0 20 0 20
 create_box	1 box
 Created orthogonal box = (0 0 0) to (88.6 88.6 88.6)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
  create_atoms CPU = 0.004321 secs
 kim_interactions Ar
 #=== BEGIN kim_interactions ==================================
 pair_style kim LennardJones_Ar
 WARNING: KIM Model does not provide `partialParticleEnergy'; energy per atom will be zero (../pair_kim.cpp:974)
 WARNING: KIM Model does not provide `partialParticleVirial'; virial per atom will be zero (../pair_kim.cpp:979)
 pair_coeff * * Ar 
 #=== END kim_interactions ====================================
 mass		1 39.95
 velocity	all create 200.0 232345 loop geom
 neighbor	0.3 bin
 neigh_modify	delay 0 every 1 check yes
 fix		1 all nve
 #fix		1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run 		100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 8.45
  ghost atom cutoff = 8.45
  binsize = 4.225, bins = 21 21 21
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 8.45
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Setting up Verlet run ...
  Unit style    : real
  Current step  : 0
  Time step     : 1
 Per MPI rank memory allocation (min/avg/max) = 28.12 | 28.12 | 28.12 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200    145069.63            0    164146.22    128015.94 
     100    95.179703    154939.42            0    164017.94    131602.75 
 Loop time of 3.48256 on 1 procs for 100 steps with 32000 atoms
 Performance: 2.481 ns/day, 9.674 hours/ns, 28.715 timesteps/s
 98.3% CPU use with 1 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 3.0502     | 3.0502     | 3.0502     |   0.0 | 87.59
 Neigh   | 0.3646     | 0.3646     | 0.3646     |   0.0 | 10.47
 Comm    | 0.01783    | 0.01783    | 0.01783    |   0.0 |  0.51
 Output  | 6.8e-05    | 6.8e-05    | 6.8e-05    |   0.0 |  0.00
 Modify  | 0.034349   | 0.034349   | 0.034349   |   0.0 |  0.99
 Other   |            | 0.01547    |            |       |  0.44
 Nlocal:    32000 ave 32000 max 32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:    19911 ave 19911 max 19911 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    0 ave 0 max 0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:  4.25375e+06 ave 4.25375e+06 max 4.25375e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 4253750
 Ave neighs/atom = 132.93
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:03
--- a/examples/kim/log.7Aug19.in.kim-ex.melt.clang.4
+++ b/examples/kim/log.7Aug19.in.kim-ex.melt.clang.4
@ -1,113 +0,0 @@
 LAMMPS (7 Aug 2019)
 # 3d Lennard-Jones melt
 #
 # This example requires that the example models provided with
 # the kim-api package are installed.  see the ./lib/kim/README or
 # ./lib/kim/Install.py files for details on how to install these
 # example models.
 #
 variable	x index 1
 variable	y index 1
 variable	z index 1
 variable	xx equal 20*$x
 variable	xx equal 20*1
 variable	yy equal 20*$y
 variable	yy equal 20*1
 variable	zz equal 20*$z
 variable	zz equal 20*1
 kim_init	LennardJones_Ar real
 #=== BEGIN kim-init ==========================================
 units real
 #=== END kim-init ============================================
 lattice		fcc 4.4300
 Lattice spacing in x,y,z = 4.43 4.43 4.43
 region		box block 0 ${xx} 0 ${yy} 0 ${zz}
 region		box block 0 20 0 ${yy} 0 ${zz}
 region		box block 0 20 0 20 0 ${zz}
 region		box block 0 20 0 20 0 20
 create_box	1 box
 Created orthogonal box = (0 0 0) to (88.6 88.6 88.6)
  1 by 2 by 2 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
  create_atoms CPU = 0.000989 secs
 kim_interactions Ar
 #=== BEGIN kim_interactions ==================================
 pair_style kim LennardJones_Ar
 WARNING: KIM Model does not provide `partialParticleEnergy'; energy per atom will be zero (../pair_kim.cpp:974)
 WARNING: KIM Model does not provide `partialParticleVirial'; virial per atom will be zero (../pair_kim.cpp:979)
 pair_coeff * * Ar 
 WARNING: KIM Model does not provide `partialParticleEnergy'; energy per atom will be zero (../pair_kim.cpp:974)
 WARNING: KIM Model does not provide `partialParticleVirial'; virial per atom will be zero (../pair_kim.cpp:979)
 #=== END kim_interactions ====================================
 mass		1 39.95
 velocity	all create 200.0 232345 loop geom
 WARNING: KIM Model does not provide `partialParticleEnergy'; energy per atom will be zero (../pair_kim.cpp:974)
 WARNING: KIM Model does not provide `partialParticleVirial'; virial per atom will be zero (../pair_kim.cpp:979)
 WARNING: KIM Model does not provide `partialParticleEnergy'; energy per atom will be zero (../pair_kim.cpp:974)
 WARNING: KIM Model does not provide `partialParticleVirial'; virial per atom will be zero (../pair_kim.cpp:979)
 neighbor	0.3 bin
 neigh_modify	delay 0 every 1 check yes
 fix		1 all nve
 #fix		1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run 		100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 8.45
  ghost atom cutoff = 8.45
  binsize = 4.225, bins = 21 21 21
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 8.45
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Setting up Verlet run ...
  Unit style    : real
  Current step  : 0
  Time step     : 1
 Per MPI rank memory allocation (min/avg/max) = 9.791 | 9.791 | 9.791 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200    145069.63            0    164146.22    128015.94 
     100    95.179703    154939.42            0    164017.94    131602.75 
 Loop time of 0.924494 on 4 procs for 100 steps with 32000 atoms
 Performance: 9.346 ns/day, 2.568 hours/ns, 108.167 timesteps/s
 99.6% CPU use with 4 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 0.76434    | 0.76847    | 0.77207    |   0.3 | 83.12
 Neigh   | 0.09089    | 0.094446   | 0.099911   |   1.1 | 10.22
 Comm    | 0.038599   | 0.044759   | 0.051381   |   2.1 |  4.84
 Output  | 3.5e-05    | 4e-05      | 4.9e-05    |   0.0 |  0.00
 Modify  | 0.009396   | 0.009685   | 0.009941   |   0.2 |  1.05
 Other   |            | 0.00709    |            |       |  0.77
 Nlocal:    8000 ave 8018 max 7967 min
 Histogram: 1 0 0 0 0 0 1 0 0 2
 Nghost:    9131 ave 9164 max 9113 min
 Histogram: 2 0 0 1 0 0 0 0 0 1
 Neighs:    0 ave 0 max 0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:  1.06344e+06 ave 1.06594e+06 max 1.05881e+06 min
 Histogram: 1 0 0 0 0 0 1 0 0 2
 Total # of neighbors = 4253750
 Ave neighs/atom = 132.93
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:00
--- a/examples/kim/log.7Aug19.in.kim-pm-query.melt.clang.1
+++ b/examples/kim/log.7Aug19.in.kim-pm-query.melt.clang.1
@ -1,124 +0,0 @@
 LAMMPS (7 Aug 2019)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Portable Model (PM)
 # SW_StillingerWeber_1985_Si__MO_405512056662_005
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user SW_StillingerWeber_1985_Si__MO_405512056662_005
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see https://openkim.org/doc/obtaining-models for alternative options.
 #
 variable	x index 1
 variable	y index 1
 variable	z index 1
 variable	xx equal 20*$x
 variable	xx equal 20*1
 variable	yy equal 20*$y
 variable	yy equal 20*1
 variable	zz equal 20*$z
 variable	zz equal 20*1
 kim_init	SW_StillingerWeber_1985_Si__MO_405512056662_005 real
 #=== BEGIN kim-init ==========================================
 units real
 #=== END kim-init ============================================
 kim_query       a0 get_lattice_constant_cubic crystal=["fcc"] species=["Si"] units=["angstrom"]
 #=== BEGIN kim-query =========================================
 variable a0 string 4.146581932902336
 #=== END kim-query ===========================================
 lattice		fcc ${a0}
 lattice		fcc 4.146581932902336
 Lattice spacing in x,y,z = 4.14658 4.14658 4.14658
 region		box block 0 ${xx} 0 ${yy} 0 ${zz}
 region		box block 0 20 0 ${yy} 0 ${zz}
 region		box block 0 20 0 20 0 ${zz}
 region		box block 0 20 0 20 0 20
 create_box	1 box
 Created orthogonal box = (0 0 0) to (82.9316 82.9316 82.9316)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
  create_atoms CPU = 0.005415 secs
 kim_interactions Si
 #=== BEGIN kim_interactions ==================================
 pair_style kim SW_StillingerWeber_1985_Si__MO_405512056662_005
 pair_coeff * * Si 
 #=== END kim_interactions ====================================
 mass		1 39.95
 velocity	all create 200.0 232345 loop geom
 neighbor	0.3 bin
 neigh_modify	delay 0 every 1 check yes
 fix		1 all nve
 #fix		1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run 		100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 4.07118
  ghost atom cutoff = 4.07118
  binsize = 2.03559, bins = 41 41 41
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 4.07118
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Setting up Verlet run ...
  Unit style    : real
  Current step  : 0
  Time step     : 1
 Per MPI rank memory allocation (min/avg/max) = 10.36 | 10.36 | 10.36 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200   -126084.25            0   -107007.66    1528.8768 
     100    94.450495   -116016.03            0   -107007.07    2282.2685 
 Loop time of 74.6055 on 1 procs for 100 steps with 32000 atoms
 Performance: 0.116 ns/day, 207.238 hours/ns, 1.340 timesteps/s
 98.6% CPU use with 1 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 74.446     | 74.446     | 74.446     |   0.0 | 99.79
 Neigh   | 0.096611   | 0.096611   | 0.096611   |   0.0 |  0.13
 Comm    | 0.014594   | 0.014594   | 0.014594   |   0.0 |  0.02
 Output  | 7.9e-05    | 7.9e-05    | 7.9e-05    |   0.0 |  0.00
 Modify  | 0.03454    | 0.03454    | 0.03454    |   0.0 |  0.05
 Other   |            | 0.01396    |            |       |  0.02
 Nlocal:    32000 ave 32000 max 32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:    9667 ave 9667 max 9667 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    0 ave 0 max 0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:  450192 ave 450192 max 450192 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 450192
 Ave neighs/atom = 14.0685
 Neighbor list builds = 3
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
 Total wall time: 0:01:16
--- a/examples/kim/log.7Aug19.in.kim-pm-query.melt.clang.4
+++ b/examples/kim/log.7Aug19.in.kim-pm-query.melt.clang.4
@ -1,124 +0,0 @@
 LAMMPS (7 Aug 2019)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Portable Model (PM)
 # SW_StillingerWeber_1985_Si__MO_405512056662_005
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user SW_StillingerWeber_1985_Si__MO_405512056662_005
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see https://openkim.org/doc/obtaining-models for alternative options.
 #
 variable	x index 1
 variable	y index 1
 variable	z index 1
 variable	xx equal 20*$x
 variable	xx equal 20*1
 variable	yy equal 20*$y
 variable	yy equal 20*1
 variable	zz equal 20*$z
 variable	zz equal 20*1
 kim_init	SW_StillingerWeber_1985_Si__MO_405512056662_005 real
 #=== BEGIN kim-init ==========================================
 units real
 #=== END kim-init ============================================
 kim_query       a0 get_lattice_constant_cubic crystal=["fcc"] species=["Si"] units=["angstrom"]
 #=== BEGIN kim-query =========================================
 variable a0 string 4.146581932902336
 #=== END kim-query ===========================================
 lattice		fcc ${a0}
 lattice		fcc 4.146581932902336
 Lattice spacing in x,y,z = 4.14658 4.14658 4.14658
 region		box block 0 ${xx} 0 ${yy} 0 ${zz}
 region		box block 0 20 0 ${yy} 0 ${zz}
 region		box block 0 20 0 20 0 ${zz}
 region		box block 0 20 0 20 0 20
 create_box	1 box
 Created orthogonal box = (0 0 0) to (82.9316 82.9316 82.9316)
  1 by 2 by 2 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
  create_atoms CPU = 0.000946 secs
 kim_interactions Si
 #=== BEGIN kim_interactions ==================================
 pair_style kim SW_StillingerWeber_1985_Si__MO_405512056662_005
 pair_coeff * * Si 
 #=== END kim_interactions ====================================
 mass		1 39.95
 velocity	all create 200.0 232345 loop geom
 neighbor	0.3 bin
 neigh_modify	delay 0 every 1 check yes
 fix		1 all nve
 #fix		1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run 		100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 4.07118
  ghost atom cutoff = 4.07118
  binsize = 2.03559, bins = 41 41 41
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 4.07118
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Setting up Verlet run ...
  Unit style    : real
  Current step  : 0
  Time step     : 1
 Per MPI rank memory allocation (min/avg/max) = 3.489 | 3.489 | 3.489 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200   -126084.25            0   -107007.66    1528.8768 
     100    94.450495   -116016.03            0   -107007.07    2282.2685 
 Loop time of 19.0792 on 4 procs for 100 steps with 32000 atoms
 Performance: 0.453 ns/day, 52.998 hours/ns, 5.241 timesteps/s
 99.4% CPU use with 4 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 18.78      | 18.855     | 18.937     |   1.5 | 98.83
 Neigh   | 0.026047   | 0.026274   | 0.0266     |   0.1 |  0.14
 Comm    | 0.09039    | 0.17196    | 0.24675    |  15.9 |  0.90
 Output  | 3.9e-05    | 4.975e-05  | 6.1e-05    |   0.0 |  0.00
 Modify  | 0.015667   | 0.015819   | 0.016008   |   0.1 |  0.08
 Other   |            | 0.01008    |            |       |  0.05
 Nlocal:    8000 ave 8029 max 7968 min
 Histogram: 1 1 0 0 0 0 0 0 0 2
 Nghost:    4259 ave 4303 max 4202 min
 Histogram: 1 0 0 0 0 0 2 0 0 1
 Neighs:    0 ave 0 max 0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:  112548 ave 113091 max 111995 min
 Histogram: 1 0 0 1 0 0 0 1 0 1
 Total # of neighbors = 450192
 Ave neighs/atom = 14.0685
 Neighbor list builds = 3
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
 Total wall time: 0:00:20
--- a/examples/kim/log.7Aug19.in.kim-pm.melt.clang.1
+++ b/examples/kim/log.7Aug19.in.kim-pm.melt.clang.1
@ -1,118 +0,0 @@
 LAMMPS (7 Aug 2019)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Portable Model (PM)
 # SW_StillingerWeber_1985_Si__MO_405512056662_005
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user SW_StillingerWeber_1985_Si__MO_405512056662_005
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see https://openkim.org/doc/obtaining-models for alternative options.
 #
 variable	x index 1
 variable	y index 1
 variable	z index 1
 variable	xx equal 20*$x
 variable	xx equal 20*1
 variable	yy equal 20*$y
 variable	yy equal 20*1
 variable	zz equal 20*$z
 variable	zz equal 20*1
 kim_init	SW_StillingerWeber_1985_Si__MO_405512056662_005 real
 #=== BEGIN kim-init ==========================================
 units real
 #=== END kim-init ============================================
 lattice		fcc 4.4300
 Lattice spacing in x,y,z = 4.43 4.43 4.43
 region		box block 0 ${xx} 0 ${yy} 0 ${zz}
 region		box block 0 20 0 ${yy} 0 ${zz}
 region		box block 0 20 0 20 0 ${zz}
 region		box block 0 20 0 20 0 20
 create_box	1 box
 Created orthogonal box = (0 0 0) to (88.6 88.6 88.6)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
  create_atoms CPU = 0.003591 secs
 kim_interactions Si
 #=== BEGIN kim_interactions ==================================
 pair_style kim SW_StillingerWeber_1985_Si__MO_405512056662_005
 pair_coeff * * Si 
 #=== END kim_interactions ====================================
 mass		1 39.95
 velocity	all create 200.0 232345 loop geom
 neighbor	0.3 bin
 neigh_modify	delay 0 every 1 check yes
 fix		1 all nve
 #fix		1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run 		100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 4.07118
  ghost atom cutoff = 4.07118
  binsize = 2.03559, bins = 44 44 44
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 4.07118
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Setting up Verlet run ...
  Unit style    : real
  Current step  : 0
  Time step     : 1
 Per MPI rank memory allocation (min/avg/max) = 10.44 | 10.44 | 10.44 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200   -85249.847            0   -66173.259   -33302.387 
     100    253.43357    -90346.68            0   -66173.441   -14888.698 
 Loop time of 74.248 on 1 procs for 100 steps with 32000 atoms
 Performance: 0.116 ns/day, 206.244 hours/ns, 1.347 timesteps/s
 98.8% CPU use with 1 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 74.118     | 74.118     | 74.118     |   0.0 | 99.83
 Neigh   | 0.069623   | 0.069623   | 0.069623   |   0.0 |  0.09
 Comm    | 0.0137     | 0.0137     | 0.0137     |   0.0 |  0.02
 Output  | 7.6e-05    | 7.6e-05    | 7.6e-05    |   0.0 |  0.00
 Modify  | 0.031883   | 0.031883   | 0.031883   |   0.0 |  0.04
 Other   |            | 0.01433    |            |       |  0.02
 Nlocal:    32000 ave 32000 max 32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:    7760 ave 7760 max 7760 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    0 ave 0 max 0 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 FullNghs:  402352 ave 402352 max 402352 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 402352
 Ave neighs/atom = 12.5735
 Neighbor list builds = 4
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
 Total wall time: 0:01:14
--- a/examples/kim/log.7Aug19.in.kim-pm.melt.clang.4
+++ b/examples/kim/log.7Aug19.in.kim-pm.melt.clang.4
@ -1,118 +0,0 @@
 LAMMPS (7 Aug 2019)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Portable Model (PM)
 # SW_StillingerWeber_1985_Si__MO_405512056662_005
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user SW_StillingerWeber_1985_Si__MO_405512056662_005
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # Or, see https://openkim.org/doc/obtaining-models for alternative options.
 #
 variable	x index 1
 variable	y index 1
 variable	z index 1
 variable	xx equal 20*$x
 variable	xx equal 20*1
 variable	yy equal 20*$y
 variable	yy equal 20*1
 variable	zz equal 20*$z
 variable	zz equal 20*1
 kim_init	SW_StillingerWeber_1985_Si__MO_405512056662_005 real
 #=== BEGIN kim-init ==========================================
 units real
 #=== END kim-init ============================================
 lattice		fcc 4.4300
 Lattice spacing in x,y,z = 4.43 4.43 4.43
 region		box block 0 ${xx} 0 ${yy} 0 ${zz}
 region		box block 0 20 0 ${yy} 0 ${zz}
 region		box block 0 20 0 20 0 ${zz}
 region		box block 0 20 0 20 0 20
 create_box	1 box
 Created orthogonal box = (0 0 0) to (88.6 88.6 88.6)
  1 by 2 by 2 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
  create_atoms CPU = 0.000997 secs
 kim_interactions Si
 #=== BEGIN kim_interactions ==================================
 pair_style kim SW_StillingerWeber_1985_Si__MO_405512056662_005
 pair_coeff * * Si 
 #=== END kim_interactions ====================================
 mass		1 39.95
 velocity	all create 200.0 232345 loop geom
 neighbor	0.3 bin
 neigh_modify	delay 0 every 1 check yes
 fix		1 all nve
 #fix		1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run 		100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 4.07118
  ghost atom cutoff = 4.07118
  binsize = 2.03559, bins = 44 44 44
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair kim, perpetual
      attributes: full, newton off, cut 4.07118
      pair build: full/bin/atomonly
      stencil: full/bin/3d
      bin: standard
 Setting up Verlet run ...
  Unit style    : real
  Current step  : 0
  Time step     : 1
 Per MPI rank memory allocation (min/avg/max) = 3.517 | 3.517 | 3.517 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200   -85249.847            0   -66173.259   -33302.387 
     100    253.43357    -90346.68            0   -66173.441   -14888.698 
 Loop time of 19.0287 on 4 procs for 100 steps with 32000 atoms
 Performance: 0.454 ns/day, 52.857 hours/ns, 5.255 timesteps/s
 99.1% CPU use with 4 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 18.81      | 18.838     | 18.883     |   0.6 | 99.00
 Neigh   | 0.018598   | 0.01914    | 0.020732   |   0.7 |  0.10
 Comm    | 0.10341    | 0.1475     | 0.17393    |   7.1 |  0.78
 Output  | 6e-05      | 6.225e-05  | 6.7e-05    |   0.0 |  0.00
 Modify  | 0.014839   | 0.014925   | 0.015047   |   0.1 |  0.08
 Other   |            | 0.008997   |            |       |  0.05
 Nlocal:    8000 ave 8014 max 7988 min
 Histogram: 1 1 0 0 0 0 1 0 0 1
 Nghost:    3374.75 ave 3389 max 3361 min
 Histogram: 1 0 1 0 0 0 0 1 0 1
 Neighs:    0 ave 0 max 0 min
 Histogram: 4 0 0 0 0 0 0 0 0 0
 FullNghs:  100588 ave 100856 max 100392 min
 Histogram: 1 0 1 0 1 0 0 0 0 1
 Total # of neighbors = 402352
 Ave neighs/atom = 12.5735
 Neighbor list builds = 4
 Dangerous builds = 0
 Please see the log.cite file for references relevant to this simulation
 Total wall time: 0:00:19
--- a/examples/kim/log.7Aug19.in.kim-sm.melt.clang.1
+++ b/examples/kim/log.7Aug19.in.kim-sm.melt.clang.1
@ -1,71 +0,0 @@
 LAMMPS (7 Aug 2019)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Simulator Model (PM)
 # Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # See https://openkim.org/doc/obtaining-models for alternative options.
 #
 variable	x index 1
 variable	y index 1
 variable	z index 1
 variable	xx equal 20*$x
 variable	xx equal 20*1
 variable	yy equal 20*$y
 variable	yy equal 20*1
 variable	zz equal 20*$z
 variable	zz equal 20*1
 kim_init	Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000 real
 #=== BEGIN kim-init ==========================================
 # Using KIM Simulator Model : Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # For Simulator             : LAMMPS 28 Feb 2019
 # Running on                : LAMMPS 7 Aug 2019
 #
 units real
 atom_style charge
 neigh_modify one 4000
 #=== END kim-init ============================================
 lattice		fcc 4.4300
 Lattice spacing in x,y,z = 4.43 4.43 4.43
 region		box block 0 ${xx} 0 ${yy} 0 ${zz}
 region		box block 0 20 0 ${yy} 0 ${zz}
 region		box block 0 20 0 20 0 ${zz}
 region		box block 0 20 0 20 0 20
 create_box	1 box
 Created orthogonal box = (0 0 0) to (88.6 88.6 88.6)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
  create_atoms CPU = 0.003447 secs
 kim_interactions O
 #=== BEGIN kim_interactions ==================================
 pair_style reax/c /var/tmp/kim-simulator-model-parameter-file-directory-6Acs1QDbXgBx/lmp_control safezone 2.0 mincap 100
 ERROR: Unrecognized pair style 'reax/c' is part of the USER-REAXC package which is not enabled in this LAMMPS binary. (../force.cpp:262)
 Last command: pair_style reax/c /var/tmp/kim-simulator-model-parameter-file-directory-6Acs1QDbXgBx/lmp_control safezone 2.0 mincap 100
 --------------------------------------------------------------------------
 Primary job  terminated normally, but 1 process returned
 a non-zero exit code. Per user-direction, the job has been aborted.
 --------------------------------------------------------------------------
 --------------------------------------------------------------------------
 mpirun detected that one or more processes exited with non-zero status, thus causing
 the job to be terminated. The first process to do so was:
  Process name: [[33054,1],0]
  Exit code:    1
 --------------------------------------------------------------------------
--- a/examples/kim/log.7Aug19.in.kim-sm.melt.clang.4
+++ b/examples/kim/log.7Aug19.in.kim-sm.melt.clang.4
@ -1,60 +0,0 @@
 LAMMPS (7 Aug 2019)
 # 3d Lennard-Jones melt
 #
 # This example requires that the KIM Simulator Model (PM)
 # Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # is installed.  This can be done with the command
 #   kim-api-collections-management install user Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # If this command does not work, you may need to setup your PATH to find the utility.
 # If you installed the kim-api using the LAMMPS CMake build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS build directory)
 #   source ./kim_build-prefix/bin/kim-api-activate
 # If you installed the kim-api using the LAMMPS Make build, you can do the following
 # (where the current working directory is assumed to be the LAMMPS src directory)
 #   source ../lib/kim/installed-kim-api-X.Y.Z/bin/kim-api-activate
 # (where you should relplace X.Y.Z with the appropriate kim-api version number).
 #
 # See https://openkim.org/doc/obtaining-models for alternative options.
 #
 variable	x index 1
 variable	y index 1
 variable	z index 1
 variable	xx equal 20*$x
 variable	xx equal 20*1
 variable	yy equal 20*$y
 variable	yy equal 20*1
 variable	zz equal 20*$z
 variable	zz equal 20*1
 kim_init	Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000 real
 #=== BEGIN kim-init ==========================================
 # Using KIM Simulator Model : Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000
 # For Simulator             : LAMMPS 28 Feb 2019
 # Running on                : LAMMPS 7 Aug 2019
 #
 units real
 atom_style charge
 neigh_modify one 4000
 #=== END kim-init ============================================
 lattice		fcc 4.4300
 Lattice spacing in x,y,z = 4.43 4.43 4.43
 region		box block 0 ${xx} 0 ${yy} 0 ${zz}
 region		box block 0 20 0 ${yy} 0 ${zz}
 region		box block 0 20 0 20 0 ${zz}
 region		box block 0 20 0 20 0 20
 create_box	1 box
 Created orthogonal box = (0 0 0) to (88.6 88.6 88.6)
  1 by 2 by 2 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
  create_atoms CPU = 0.001307 secs
 kim_interactions O
 #=== BEGIN kim_interactions ==================================
 pair_style reax/c /var/tmp/kim-simulator-model-parameter-file-directory-6tmKtZEXzhgv/lmp_control safezone 2.0 mincap 100
 ERROR: Unrecognized pair style 'reax/c' is part of the USER-REAXC package which is not enabled in this LAMMPS binary. (../force.cpp:262)
 Last command: pair_style reax/c /var/tmp/kim-simulator-model-parameter-file-directory-6tmKtZEXzhgv/lmp_control safezone 2.0 mincap 100
--- a/examples/kim/log.7Aug19.in.lammps.melt.clang.1
+++ b/examples/kim/log.7Aug19.in.lammps.melt.clang.1
@ -1,92 +0,0 @@
 LAMMPS (7 Aug 2019)
 # 3d Lennard-Jones melt
 variable	x index 1
 variable	y index 1
 variable	z index 1
 variable	xx equal 20*$x
 variable	xx equal 20*1
 variable	yy equal 20*$y
 variable	yy equal 20*1
 variable	zz equal 20*$z
 variable	zz equal 20*1
 units		real
 lattice		fcc 4.4300
 Lattice spacing in x,y,z = 4.43 4.43 4.43
 region		box block 0 ${xx} 0 ${yy} 0 ${zz}
 region		box block 0 20 0 ${yy} 0 ${zz}
 region		box block 0 20 0 20 0 ${zz}
 region		box block 0 20 0 20 0 20
 create_box	1 box
 Created orthogonal box = (0 0 0) to (88.6 88.6 88.6)
  1 by 1 by 1 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
  create_atoms CPU = 0.003037 secs
 pair_style	lj/cut 8.1500
 pair_coeff	1 1 0.0104 3.4000
 #pair_style      kim LennardJones_Ar
 #pair_coeff      * * Ar
 mass		1 39.95
 velocity	all create 200.0 232345 loop geom
 neighbor	0.3 bin
 neigh_modify	delay 0 every 1 check yes
 fix		1 all nve
 #fix		1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run 		100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 8.45
  ghost atom cutoff = 8.45
  binsize = 4.225, bins = 21 21 21
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair lj/cut, perpetual
      attributes: half, newton on
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
 Setting up Verlet run ...
  Unit style    : real
  Current step  : 0
  Time step     : 1
 Per MPI rank memory allocation (min/avg/max) = 19.23 | 19.23 | 19.23 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200    6290.8194            0    25367.408    6750.7421 
     100    98.747096    15900.676            0    25319.465    10184.453 
 Loop time of 2.43768 on 1 procs for 100 steps with 32000 atoms
 Performance: 3.544 ns/day, 6.771 hours/ns, 41.023 timesteps/s
 97.8% CPU use with 1 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 2.1895     | 2.1895     | 2.1895     |   0.0 | 89.82
 Neigh   | 0.17546    | 0.17546    | 0.17546    |   0.0 |  7.20
 Comm    | 0.021001   | 0.021001   | 0.021001   |   0.0 |  0.86
 Output  | 7.9e-05    | 7.9e-05    | 7.9e-05    |   0.0 |  0.00
 Modify  | 0.034253   | 0.034253   | 0.034253   |   0.0 |  1.41
 Other   |            | 0.01735    |            |       |  0.71
 Nlocal:    32000 ave 32000 max 32000 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Nghost:    19911 ave 19911 max 19911 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    1.96027e+06 ave 1.96027e+06 max 1.96027e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 Total # of neighbors = 1960266
 Ave neighs/atom = 61.2583
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:02
--- a/examples/kim/log.7Aug19.in.lammps.melt.clang.4
+++ b/examples/kim/log.7Aug19.in.lammps.melt.clang.4
@ -1,92 +0,0 @@
 LAMMPS (7 Aug 2019)
 # 3d Lennard-Jones melt
 variable	x index 1
 variable	y index 1
 variable	z index 1
 variable	xx equal 20*$x
 variable	xx equal 20*1
 variable	yy equal 20*$y
 variable	yy equal 20*1
 variable	zz equal 20*$z
 variable	zz equal 20*1
 units		real
 lattice		fcc 4.4300
 Lattice spacing in x,y,z = 4.43 4.43 4.43
 region		box block 0 ${xx} 0 ${yy} 0 ${zz}
 region		box block 0 20 0 ${yy} 0 ${zz}
 region		box block 0 20 0 20 0 ${zz}
 region		box block 0 20 0 20 0 20
 create_box	1 box
 Created orthogonal box = (0 0 0) to (88.6 88.6 88.6)
  1 by 2 by 2 MPI processor grid
 create_atoms	1 box
 Created 32000 atoms
  create_atoms CPU = 0.001194 secs
 pair_style	lj/cut 8.1500
 pair_coeff	1 1 0.0104 3.4000
 #pair_style      kim LennardJones_Ar
 #pair_coeff      * * Ar
 mass		1 39.95
 velocity	all create 200.0 232345 loop geom
 neighbor	0.3 bin
 neigh_modify	delay 0 every 1 check yes
 fix		1 all nve
 #fix		1 all npt temp 1.0 1.0 1.0 iso 1.0 1.0 3.0
 run 		100
 Neighbor list info ...
  update every 1 steps, delay 0 steps, check yes
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 8.45
  ghost atom cutoff = 8.45
  binsize = 4.225, bins = 21 21 21
  1 neighbor lists, perpetual/occasional/extra = 1 0 0
  (1) pair lj/cut, perpetual
      attributes: half, newton on
      pair build: half/bin/atomonly/newton
      stencil: half/bin/3d/newton
      bin: standard
 Setting up Verlet run ...
  Unit style    : real
  Current step  : 0
  Time step     : 1
 Per MPI rank memory allocation (min/avg/max) = 7.633 | 7.633 | 7.633 Mbytes
 Step Temp E_pair E_mol TotEng Press 
       0          200    6290.8194            0    25367.408    6750.7421 
     100    98.747096    15900.676            0    25319.465    10184.453 
 Loop time of 0.726239 on 4 procs for 100 steps with 32000 atoms
 Performance: 11.897 ns/day, 2.017 hours/ns, 137.696 timesteps/s
 98.7% CPU use with 4 MPI tasks x no OpenMP threads
 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
 Pair    | 0.57617    | 0.5835     | 0.59084    |   0.9 | 80.34
 Neigh   | 0.046682   | 0.047783   | 0.048641   |   0.3 |  6.58
 Comm    | 0.065469   | 0.071509   | 0.07899    |   2.3 |  9.85
 Output  | 3.9e-05    | 4.6e-05    | 6.1e-05    |   0.0 |  0.01
 Modify  | 0.013205   | 0.01363    | 0.014044   |   0.3 |  1.88
 Other   |            | 0.009775   |            |       |  1.35
 Nlocal:    8000 ave 8012 max 7989 min
 Histogram: 1 0 0 0 2 0 0 0 0 1
 Nghost:    9131 ave 9142 max 9119 min
 Histogram: 1 0 0 0 0 2 0 0 0 1
 Neighs:    490066 ave 491443 max 489273 min
 Histogram: 2 0 0 0 1 0 0 0 0 1
 Total # of neighbors = 1960266
 Ave neighs/atom = 61.2583
 Neighbor list builds = 3
 Dangerous builds = 0
 Total wall time: 0:00:00
--- a/lib/gpu/Makefile.cuda_mps
+++ b/lib/gpu/Makefile.cuda_mps
@ -51,7 +51,7 @@ BIN2C = $(CUDA_HOME)/bin/bin2c
 # host code compiler and settings
-CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
+CUDR_CPP = mpicxx -fopenmp -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
 CUDR_OPTS = -O2 $(LMP_INC)
 CUDR  = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PROXY) $(CUDA_PRECISION) $(CUDA_INCLUDE) \
         $(CUDPP_OPT)
--- a/lib/gpu/Makefile.hip
+++ b/lib/gpu/Makefile.hip
@ -17,7 +17,7 @@ LMP_INC = -DLAMMPS_SMALLBIG
 HIP_PRECISION = -D_SINGLE_DOUBLE
 HIP_OPTS = -O3
-HIP_HOST_OPTS = -Wno-deprecated-declarations
+HIP_HOST_OPTS = -Wno-deprecated-declarations -fopenmp
 HIP_HOST_INCLUDE =
 # use device sort
--- a/lib/gpu/Makefile.lammps.mac_ocl
+++ b/lib/gpu/Makefile.lammps.mac_ocl
@ -1,5 +1,5 @@
 # Settings that the LAMMPS build will import when this package library is used
-gpu_SYSINC =
+gpu_SYSINC = -DFFT_SINGLE
 gpu_SYSLIB = -framework OpenCL
 gpu_SYSPATH = 
--- a/lib/gpu/Makefile.linux_opencl
+++ b/lib/gpu/Makefile.linux_opencl
@ -1,25 +1,21 @@
 # /* ----------------------------------------------------------------------   
-#  Generic Linux Makefile for OpenCL 
+#  Generic Linux Makefile for OpenCL - Mixed precision
 # ------------------------------------------------------------------------- */
 # which file will be copied to Makefile.lammps
 EXTRAMAKE = Makefile.lammps.opencl
 # OCL_TUNE = -DFERMI_OCL       # -- Uncomment for NVIDIA Fermi
 # OCL_TUNE = -DKEPLER_OCL    # -- Uncomment for NVIDIA Kepler
 # OCL_TUNE = -DCYPRESS_OCL   # -- Uncomment for AMD Cypress
 OCL_TUNE = -DGENERIC_OCL   # -- Uncomment for generic device
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
 LMP_INC = -DLAMMPS_SMALLBIG
-OCL_INC = -I/usr/local/cuda/include  # Path to CL directory
+OCL_INC = 
-OCL_CPP = mpic++ $(DEFAULT_DEVICE) -O3 -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC) -std=c++11
+OCL_CPP = mpic++ -std=c++11 -O3 -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
-OCL_LINK = -L/usr/local/cuda/lib64 -lOpenCL
+OCL_LINK = -lOpenCL
 OCL_PREC = -D_SINGLE_DOUBLE
 OCL_TUNE = -fopenmp -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
 BIN_DIR = ./
 OBJ_DIR = ./
@ -28,4 +24,3 @@ AR = ar
 BSH = /bin/sh
 include Opencl.makefile
--- a/lib/gpu/Makefile.mac_opencl
+++ b/lib/gpu/Makefile.mac_opencl
@ -1,19 +1,17 @@
 # /* ----------------------------------------------------------------------   
-#  Generic Mac Makefile for OpenCL 
+#  Generic Mac Makefile for OpenCL - Single precision with FFT_SINGLE
 # ------------------------------------------------------------------------- */
 # which file will be copied to Makefile.lammps
 EXTRAMAKE = Makefile.lammps.mac_ocl
-OCL_TUNE = -DFERMI_OCL       # -- Uncomment for NVIDIA Fermi
+LMP_INC = -DLAMMPS_SMALLBIG
 # OCL_TUNE = -DKEPLER_OCL    # -- Uncomment for NVIDIA Kepler
 # OCL_TUNE = -DCYPRESS_OCL   # -- Uncomment for AMD Cypress
 # OCL_TUNE = -DGENERIC_OCL   # -- Uncomment for generic device
-OCL_CPP = mpic++ -O3 -DMPI_GERYON -DUCL_NO_EXIT
+OCL_CPP = clang++ -std=c++11 -O3 -I../../src/STUBS
 OCL_LINK = -framework OpenCL
 OCL_PREC = -D_SINGLE_SINGLE
 OCL_TUNE = -DUCL_NO_EXIT
 BIN_DIR = ./
 OBJ_DIR = ./
--- a/lib/gpu/Makefile.mac_opencl_mpi
+++ b/lib/gpu/Makefile.mac_opencl_mpi
@ -0,0 +1,23 @@
 # /* ----------------------------------------------------------------------   
 #  Generic Mac Makefile for OpenCL - Single precision with FFT_SINGLE
 # ------------------------------------------------------------------------- */
 # which file will be copied to Makefile.lammps
 EXTRAMAKE = Makefile.lammps.mac_ocl
 LMP_INC = -DLAMMPS_SMALLBIG
 OCL_CPP = mpicxx -std=c++11 -O3 -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
 OCL_LINK = -framework OpenCL
 OCL_PREC = -D_SINGLE_SINGLE
 OCL_TUNE = -DUCL_NO_EXIT -DMPI_GERYON
 BIN_DIR = ./
 OBJ_DIR = ./
 LIB_DIR = ./
 AR = ar
 BSH = /bin/sh
 include Opencl.makefile
--- a/lib/gpu/Makefile.oneapi
+++ b/lib/gpu/Makefile.oneapi
@ -0,0 +1,26 @@
 # /* ----------------------------------------------------------------------
 #  Generic Linux Makefile for OpenCL
 # ------------------------------------------------------------------------- */
 # which file will be copied to Makefile.lammps
 EXTRAMAKE = Makefile.lammps.opencl
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
 LMP_INC = -DLAMMPS_SMALLBIG
 OCL_INC =
 OCL_CPP = mpiicpc -std=c++11 -xHost -O2 -qopenmp -qopenmp-simd  -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
 OCL_LINK = -lOpenCL
 OCL_PREC = -D_SINGLE_DOUBLE
 OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT -fp-model fast=2 -no-prec-div
 BIN_DIR = ./
 OBJ_DIR = ./
 LIB_DIR = ./
 AR = ar
 BSH = /bin/sh
 include Opencl.makefile
--- a/lib/gpu/Makefile.opencl
+++ b/lib/gpu/Makefile.opencl
@ -1,92 +0,0 @@
 # /* ----------------------------------------------------------------------   
 #  Generic Linux Makefile for OpenCL 
 # ------------------------------------------------------------------------- */
 # which file will be copied to Makefile.lammps
 EXTRAMAKE = Makefile.lammps.opencl
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
 LMP_INC = -DLAMMPS_SMALLBIG
 # precision for GPU calculations
 # -D_SINGLE_SINGLE  # Single precision for all calculations
 # -D_DOUBLE_DOUBLE  # Double precision for all calculations
 # -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double
 OCL_PREC = -D_SINGLE_DOUBLE
 BIN_DIR = ./
 OBJ_DIR = ./
 LIB_DIR = ./
 AR = ar
 BSH = /bin/sh
 # Compiler and linker settings
 # OCL_TUNE = -DFERMI_OCL     # -- Uncomment for NVIDIA Fermi
 # OCL_TUNE = -DKEPLER_OCL    # -- Uncomment for NVIDIA Kepler
 # OCL_TUNE = -DCYPRESS_OCL   # -- Uncomment for AMD Cypress
 OCL_TUNE = -DGENERIC_OCL   # -- Uncomment for generic device
 OCL_INC = -I/usr/local/cuda/include  # Path to CL directory
 OCL_CPP = mpic++ $(DEFAULT_DEVICE) -g -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
 OCL_LINK = -lOpenCL
 OCL  = $(OCL_CPP) $(OCL_PREC) $(OCL_TUNE) -DUSE_OPENCL
 # Headers for Geryon
 UCL_H  = $(wildcard ./geryon/ucl*.h)
 OCL_H  = $(wildcard ./geryon/ocl*.h) $(UCL_H) lal_preprocessor.h
 PRE1_H = lal_preprocessor.h lal_aux_fun1.h
 ALL_H  =  $(OCL_H) $(wildcard ./lal_*.h)
 # Source files
 SRCS := $(wildcard ./lal_*.cpp)
 OBJS := $(subst ./,$(OBJ_DIR)/,$(SRCS:%.cpp=%.o))
 CUS  := $(wildcard lal_*.cu)
 KERS := $(subst ./,$(OBJ_DIR)/,$(CUS:lal_%.cu=%_cl.h))
 KERS := $(addprefix $(OBJ_DIR)/, $(KERS))
 # targets
 GPU_LIB = $(LIB_DIR)/libgpu.a
 EXECS = $(BIN_DIR)/ocl_get_devices
 all: $(OBJ_DIR) $(KERS) $(GPU_LIB) $(EXECS)
 $(OBJ_DIR):
 	mkdir -p $@
 # device code compilation
 $(OBJ_DIR)/%_cl.h: lal_%.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh $* $(PRE1_H) $< $@;
 # host code compilation
 $(OBJ_DIR)/lal_%.o: lal_%.cpp $(KERS)
 	$(OCL) -o $@ -c $< -I$(OBJ_DIR)
 # build libgpu.a
 $(GPU_LIB): $(OBJS)
 	$(AR) -crusv $(GPU_LIB) $(OBJS)
 	@cp $(EXTRAMAKE) Makefile.lammps
 # test app for querying device info
 $(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp $(OCL_H)
 	$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK)
 clean:
 	-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(KERS) *.linkinfo
 veryclean: clean
 	-rm -rf *~ *.linkinfo
 cleanlib:
 	-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(KERS) *.linkinfo
--- a/lib/gpu/Nvidia.makefile
+++ b/lib/gpu/Nvidia.makefile
@ -1,6 +1,7 @@
 # Headers for Geryon
 UCL_H  = $(wildcard ./geryon/ucl*.h)
-NVD_H  = $(wildcard ./geryon/nvd*.h) $(UCL_H) lal_preprocessor.h
+NVD_H  = $(wildcard ./geryon/nvd*.h) $(UCL_H) lal_preprocessor.h \
         lal_pre_cuda_hip.h
 ALL_H  =  $(NVD_H) $(wildcard ./lal_*.h)
 # Source files
@ -39,17 +40,21 @@ BIN2C = $(CUDA_HOME)/bin/bin2c
 # device code compilation
-$(OBJ_DIR)/pppm_f.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h
+$(OBJ_DIR)/pppm_f.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h \
                         lal_pre_cuda_hip.h
 	$(CUDA) --fatbin -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ lal_pppm.cu
 $(OBJ_DIR)/pppm_f_cubin.h: $(OBJ_DIR)/pppm_f.cubin
 	$(BIN2C) -c -n pppm_f $(OBJ_DIR)/pppm_f.cubin > $(OBJ_DIR)/pppm_f_cubin.h
 	rm $(OBJ_DIR)/pppm_f.cubin
-$(OBJ_DIR)/pppm_d.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h
+$(OBJ_DIR)/pppm_d.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h \
                         lal_pre_cuda_hip.h
 	$(CUDA) --fatbin -DNV_KERNEL -Dgrdtyp=double -Dgrdtyp4=double4 -o $@ lal_pppm.cu
 $(OBJ_DIR)/pppm_d_cubin.h: $(OBJ_DIR)/pppm_d.cubin
 	$(BIN2C) -c -n pppm_d $(OBJ_DIR)/pppm_d.cubin > $(OBJ_DIR)/pppm_d_cubin.h
 	rm $(OBJ_DIR)/pppm_d.cubin
 $(OBJ_DIR)/%_cubin.h: lal_%.cu  $(ALL_H)
 	$(CUDA) --fatbin -DNV_KERNEL -o $(OBJ_DIR)/$*.cubin $(OBJ_DIR)/lal_$*.cu
@ -93,7 +98,7 @@ $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
 	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda 
 clean:
-	-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo
+	-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.cubin *.linkinfo
 veryclean: clean
 	-rm -rf *~ *.linkinfo
--- a/lib/gpu/Opencl.makefile
+++ b/lib/gpu/Opencl.makefile
@ -1,8 +1,15 @@
 # Common headers for kernels
 PRE1_H = lal_preprocessor.h lal_aux_fun1.h
 # Headers for Geryon
 UCL_H  = $(wildcard ./geryon/ucl*.h)
-OCL_H  = $(wildcard ./geryon/ocl*.h) $(UCL_H) lal_preprocessor.h
+OCL_H  = $(wildcard ./geryon/ocl*.h) $(UCL_H) lal_precision.h
-PRE1_H = lal_preprocessor.h lal_aux_fun1.h
+
-ALL_H  =  $(OCL_H) $(wildcard ./lal_*.h)
+# Headers for Host files
 HOST_H = lal_answer.h lal_atom.h lal_balance.h lal_base_atomic.h \
         lal_base_charge.h lal_base_dipole.h lal_base_dpd.h \
         lal_base_ellipsoid.h lal_base_three.h lal_device.h lal_neighbor.h \
         lal_neighbor_shared.h lal_pre_ocl_config.h $(OCL_H)
 # Source files
 SRCS := $(wildcard ./lal_*.cpp)
@ -28,12 +35,75 @@ OCL  = $(OCL_CPP) $(OCL_PREC) $(OCL_TUNE) -DUSE_OPENCL
 # device code compilation
 $(OBJ_DIR)/atom_cl.h: lal_atom.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh atom lal_preprocessor.h lal_atom.cu $(OBJ_DIR)/atom_cl.h
 $(OBJ_DIR)/neighbor_cpu_cl.h: lal_neighbor_cpu.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh neighbor_cpu lal_preprocessor.h lal_neighbor_cpu.cu $(OBJ_DIR)/neighbor_cpu_cl.h
 $(OBJ_DIR)/neighbor_gpu_cl.h: lal_neighbor_gpu.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh neighbor_gpu lal_preprocessor.h lal_neighbor_gpu.cu $(OBJ_DIR)/neighbor_gpu_cl.h
 $(OBJ_DIR)/device_cl.h: lal_device.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh device lal_preprocessor.h lal_device.cu $(OBJ_DIR)/device_cl.h
 $(OBJ_DIR)/pppm_cl.h: lal_pppm.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh pppm lal_preprocessor.h lal_pppm.cu $(OBJ_DIR)/pppm_cl.h;
 $(OBJ_DIR)/ellipsoid_nbor_cl.h: lal_ellipsoid_nbor.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh ellipsoid_nbor lal_preprocessor.h lal_ellipsoid_nbor.cu $(OBJ_DIR)/ellipsoid_nbor_cl.h
 $(OBJ_DIR)/gayberne_cl.h: lal_gayberne.cu $(PRE1_H) lal_ellipsoid_extra.h
 	$(BSH) ./geryon/file_to_cstr.sh gayberne $(PRE1_H) lal_ellipsoid_extra.h lal_gayberne.cu $(OBJ_DIR)/gayberne_cl.h;
 $(OBJ_DIR)/gayberne_lj_cl.h: lal_gayberne_lj.cu $(PRE1_H) lal_ellipsoid_extra.h
 	$(BSH) ./geryon/file_to_cstr.sh gayberne_lj $(PRE1_H) lal_ellipsoid_extra.h lal_gayberne_lj.cu $(OBJ_DIR)/gayberne_lj_cl.h;
 $(OBJ_DIR)/re_squared_cl.h: lal_re_squared.cu $(PRE1_H) lal_ellipsoid_extra.h
 	$(BSH) ./geryon/file_to_cstr.sh re_squared $(PRE1_H) lal_ellipsoid_extra.h lal_re_squared.cu $(OBJ_DIR)/re_squared_cl.h;
 $(OBJ_DIR)/re_squared_lj_cl.h: lal_re_squared_lj.cu $(PRE1_H) lal_ellipsoid_extra.h
 	$(BSH) ./geryon/file_to_cstr.sh re_squared_lj $(PRE1_H) lal_ellipsoid_extra.h lal_re_squared_lj.cu $(OBJ_DIR)/re_squared_lj_cl.h;
 $(OBJ_DIR)/tersoff_cl.h: lal_tersoff.cu $(PRE1_H) lal_tersoff_extra.h
 	$(BSH) ./geryon/file_to_cstr.sh tersoff $(PRE1_H) lal_tersoff_extra.h lal_tersoff.cu $(OBJ_DIR)/tersoff_cl.h;
 $(OBJ_DIR)/tersoff_mod_cl.h: lal_tersoff_mod.cu $(PRE1_H) lal_tersoff_mod_extra.h
 	$(BSH) ./geryon/file_to_cstr.sh tersoff_mod $(PRE1_H) lal_tersoff_mod_extra.h lal_tersoff_mod.cu $(OBJ_DIR)/tersoff_mod_cl.h;
 $(OBJ_DIR)/tersoff_zbl_cl.h: lal_tersoff_zbl.cu $(PRE1_H) lal_tersoff_zbl_extra.h
 	$(BSH) ./geryon/file_to_cstr.sh tersoff_zbl $(PRE1_H) lal_tersoff_zbl_extra.h lal_tersoff_zbl.cu $(OBJ_DIR)/tersoff_zbl_cl.h;
 $(OBJ_DIR)/%_cl.h: lal_%.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh $* $(PRE1_H) $< $@;
 # host code compilation
-$(OBJ_DIR)/lal_%.o: lal_%.cpp $(KERS)
+$(OBJ_DIR)/lal_answer.o: lal_answer.cpp $(HOST_H)
 	$(OCL) -o $@ -c lal_answer.cpp -I$(OBJ_DIR)
 $(OBJ_DIR)/lal_dpd_tstat_ext.o: lal_dpd_tstat_ext.cpp lal_dpd.h $(HOST_H)
 	$(OCL) -o $@ -c lal_dpd_tstat_ext.cpp -I$(OBJ_DIR)
 $(OBJ_DIR)/lal_eam_alloy_ext.o: lal_eam_alloy_ext.cpp lal_eam.h $(HOST_H)
 	$(OCL) -o $@ -c lal_eam_alloy_ext.cpp -I$(OBJ_DIR)
 $(OBJ_DIR)/lal_eam_fs_ext.o: lal_eam_fs_ext.cpp lal_eam.h $(HOST_H)
 	$(OCL) -o $@ -c lal_eam_fs_ext.cpp -I$(OBJ_DIR)
 $(OBJ_DIR)/lal_neighbor.o: lal_neighbor.cpp $(HOST_H)
 	$(OCL) -o $@ -c lal_neighbor.cpp -I$(OBJ_DIR)
 $(OBJ_DIR)/lal_neighbor_shared.o: lal_neighbor_shared.cpp $(HOST_H)
 	$(OCL) -o $@ -c lal_neighbor_shared.cpp -I$(OBJ_DIR)
 $(OBJ_DIR)/lal_%_ext.o: lal_%_ext.cpp lal_%.h $(HOST_H)
 	$(OCL) -o $@ -c $< -I$(OBJ_DIR)
 $(OBJ_DIR)/lal_base_%.o: lal_base_%.cpp $(HOST_H)
 	$(OCL) -o $@ -c $< -I$(OBJ_DIR)
 $(OBJ_DIR)/lal_%.o: lal_%.cpp %_cl.h $(HOST_H)
 	$(OCL) -o $@ -c $< -I$(OBJ_DIR)
 $(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp $(OCL_H)
--- a/lib/gpu/README
+++ b/lib/gpu/README
@ -4,18 +4,109 @@
                       W. Michael Brown (ORNL)
                        Trung Dac Nguyen (ORNL/Northwestern)
-                          Peng Wang (NVIDIA)
+                        Nitin Dhamankar (Intel)
                       Axel Kohlmeyer (Temple)
                          Peng Wang (NVIDIA)
                        Anders Hafreager (UiO)
                          V. Nikolskiy (HSE)
                   Maurice de Koning (Unicamp/Brazil)
                  Rodolfo Paula Leite (Unicamp/Brazil)
                         Steve Plimpton (SNL)
                        Inderaj Bains (NVIDIA)
 -------------------------------------------------------------------
-This directory has source files to build a library that LAMMPS
+------------------------------------------------------------------------------
 links against when using the GPU package.
-This library must be built with a C++ compiler, before LAMMPS is
+This directory has source files to build a library that LAMMPS links against
-built, so LAMMPS can link against it.
+when using the GPU package.
 This library must be built with a C++ compiler along with CUDA, HIP, or OpenCL
 before LAMMPS is built, so LAMMPS can link against it.
 This library, libgpu.a, provides routines for acceleration of certain
 LAMMPS styles and neighbor list builds using CUDA, OpenCL, or ROCm HIP.
 Pair styles supported by this library are marked in the list of Pair style
 potentials with a "g". See the online version at:
 https://lammps.sandia.gov/doc/Commands_pair.html
 In addition the (plain) pppm kspace style is supported as well.
 ------------------------------------------------------------------------------
                              DEVICE QUERY
 ------------------------------------------------------------------------------
 The gpu library includes binaries to check for available GPUs and their
 properties. It is a good idea to run this on first use to make sure the
 system and build is setup properly. Additionally, the GPU numbering for
 specific selection of devices should be taking from this output. The GPU
 library may split some accelerators into separate virtual accelerators for
 efficient use with MPI.
 After building the GPU library, for OpenCL:
  ./ocl_get_devices
 for CUDA:
  ./nvc_get_devices
 and for ROCm HIP:
  ./hip_get_devices
 ------------------------------------------------------------------------------
                              QUICK START
 ------------------------------------------------------------------------------
 OpenCL: Mac without MPI:
  make -f Makefile.mac_opencl -j; cd ../../src/; make mpi-stubs
  make g++_serial -j
  ./lmp_g++_serial -in ../bench/in.lj -log none -sf gpu
 OpenCL: Mac with MPI:
  make -f Makefile.mac_opencl_mpi -j; cd ../../src/; make g++_openmpi -j
  mpirun -np $NUM_MPI ./lmp_g++_openmpi -in ../bench/in.lj -log none -sf gpu
 OpenCL: Linux with Intel oneAPI:
  make -f Makefile.oneapi -j; cd ../../src; make oneapi -j
  export OMP_NUM_THREADS=$NUM_THREADS
  mpirun -np $NUM_MPI ./lmp_oneapi -in ../bench/in.lj -log none -sf gpu
 OpenCL: Linux with MPI:
  make -f Makefile.linux_opencl -j; cd ../../src; make omp -j
  export OMP_NUM_THREADS=$NUM_THREADS
  mpirun -np $NUM_MPI ./lmp_omp -in ../bench/in.lj -log none -sf gpu
 NVIDIA CUDA:
  make -f Makefile.cuda_mps -j; cd ../../src; make omp -j
  export CUDA_MPS_LOG_DIRECTORY=/tmp; export CUDA_MPS_PIPE_DIRECTORY=/tmp
  nvidia-smi -i 0 -c EXCLUSIVE_PROCESS
  export OMP_NUM_THREADS=$NUM_THREADS
  mpirun -np $NUM_MPI ./lmp_omp -in ../bench/in.lj -log none -sf gpu
  echo quit | /usr/bin/nvidia-cuda-mps-control
 AMD HIP:
  make -f Makefile.hip -j; cd ../../src; make omp -j
  export OMP_NUM_THREADS=$NUM_THREADS
  mpirun -np $NUM_MPI ./lmp_omp -in ../bench/in.lj -log none -sf gpu
 ------------------------------------------------------------------------------
                 Installing oneAPI, OpenCl, CUDA, or ROCm
 ------------------------------------------------------------------------------
 The easiest approach is to use the linux package manger to perform the
 installation from Intel, NVIDIA, etc. repositories. All are available for
 free. The oneAPI installation includes Intel optimized MPI and C++ compilers,
 along with many libraries. Alternatively, Intel OpenCL can also be installed
 separately from the Intel repository.
 NOTE: Installation of the CUDA SDK is not required, only the CUDA toolkit.
 See:
 https://software.intel.com/content/www/us/en/develop/tools/oneapi/hpc-toolkit.html
 https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html
 https://github.com/RadeonOpenCompute/ROCm
 ------------------------------------------------------------------------------
                              Build Intro
 ------------------------------------------------------------------------------
 You can type "make lib-gpu" from the src directory to see help on how
 to build this library via make commands, or you can do the same thing
@ -25,7 +116,7 @@ do it manually by following the instructions below.
 Build the library using one of the provided Makefile.* files or create
 your own, specific to your compiler and system.  For example:
-make -f Makefile.linux
+make -f Makefile.linux_opencl
 When you are done building this library, two files should
 exist in this directory:
@ -45,33 +136,132 @@ IMPORTANT: If you re-build the library, e.g. for a different precision
 Makefile.linux clean, to insure all previous derived files are removed
 before the new build is done.
-Makefile.lammps has settings for 3 variables:
+NOTE: The system-specific setting LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG,
      or LAMMPS_SMALLSMALL if specified when building LAMMPS (i.e. in
      src/MAKE/Makefile.foo) should be consistent with that specified
      when building libgpu.a (i.e. by LMP_INC in the lib/gpu/Makefile.bar).
 user-gpu_SYSINC = leave blank for this package
 user-gpu_SYSLIB = CUDA libraries needed by this package
 user-gpu_SYSPATH = path(s) to where those libraries are
-Because you have the CUDA compilers on your system, you should have
+------------------------------------------------------------------------------
-the needed libraries.  If the CUDA development tools were installed
+                             PRECISION MODES
-in the standard manner, the settings in the Makefile.lammps.standard
+------------------------------------------------------------------------------
-file should work.
+The GPU library supports 3 precision modes: single, double, and mixed, with
 the latter being the default for most Makefiles aside from Mac specific
 Makefiles due to the more restrictive nature of the Apple OpenCL for some
 devices.
-------------------------------------------------------------------
+To specify the precision mode (output to the screen before LAMMPS runs for
 verification), set either CUDA_PRECISION, OCL_PREC, or HIP_PRECISION to one
 of -D_SINGLE_SINGLE, -D_DOUBLE_DOUBLE, or -D_SINGLE_DOUBLE.
-                          GENERAL NOTES
+Some accelerators or OpenCL implementations only support single precision.
-                  --------------------------------
+This mode should be used with care and appropriate validation as the errors
 can scale with system size in this implementation. This can be useful for
 accelerating test runs when setting up a simulation for production runs on
 another machine. In the case where only single precision is supported, either
 LAMMPS must be compiled with -DFFT_SINGLE to use PPPM with GPU acceleration
 or GPU acceleration should be disabled for PPPM (e.g. suffix off or pair/only
 as described in the LAMMPS documentation).
 This library, libgpu.a, provides routines for GPU acceleration
 of certain LAMMPS styles and neighbor list builds. Compilation of this 
 library requires installing the CUDA GPU driver and CUDA toolkit for
 your operating system. Installation of the CUDA SDK is not necessary.
 In addition to the LAMMPS library, the binary nvc_get_devices will also
 be built. This can be used to query the names and properties of GPU 
 devices on your system. A Makefile for OpenCL and ROCm HIP compilation
 is provided, but support for it is not currently provided by the developers.
 Details of the implementation are provided in:
----
+------------------------------------------------------------------------------
                             CUDA BUILD NOTES
 ------------------------------------------------------------------------------
 NOTE: when compiling with CMake, all of the considerations listed below
 are considered within the CMake configuration process, so no separate
 compilation of the gpu library is required. Also this will build in support
 for all compute architecture that are supported by the CUDA toolkit version
 used to build the gpu library.
 If you do not want to use a fat binary, that supports multiple CUDA
 architectures, the CUDA_ARCH must be set to match the GPU architecture. This
 is reported by nvc_get_devices executable created by the build process and
 a detailed list of GPU architectures and CUDA compatible GPUs can be found
 e.g. here: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
 The CUDA_HOME variable should be set to the location of the CUDA toolkit.
 To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME variables in one of
 the Makefiles. CUDA_ARCH should be set based on the compute capability of
 your GPU. This can be verified by running the nvc_get_devices executable after
 the build is complete. Additionally, the GPU package must be installed and
 compiled for LAMMPS. This may require editing the gpu_SYSPATH variable in the
 LAMMPS makefile.
 Please note that the GPU library accesses the CUDA driver library directly,
 so it needs to be linked with the CUDA driver library (libcuda.so) that ships
 with the Nvidia driver. If you are compiling LAMMPS on the head node of a GPU
 cluster, this library may not be installed, so you may need to copy it over
 from one of the compute nodes (best into this directory). Recent CUDA toolkits
 starting from CUDA 9 provide a dummy libcuda.so library (typically under
 $(CUDA_HOME)/lib64/stubs), that can be used for linking.
 Best performance with the GPU library is typically with multiple MPI processes
 sharing the same GPU cards. For NVIDIA, this is most efficient with CUDA
 MPS enabled. To prevent runtime errors for GPUs configured in exclusive process
 mode with MPS, the GPU library should be build with either of the equivalent
 -DCUDA_MPS_SUPPORT or -DCUDA_PROXY flags.
 ------------------------------------------------------------------------------
                             HIP BUILD NOTES
 ------------------------------------------------------------------------------
 1. GPU sorting requires installing hipcub
 (https://github.com/ROCmSoftwarePlatform/hipCUB). The HIP CUDA-backend
 additionally requires cub (https://nvlabs.github.io/cub). Download and
 extract the cub directory to lammps/lib/gpu/ or specify an appropriate
 path in lammps/lib/gpu/Makefile.hip.
 2. In Makefile.hip it is possible to specify the target platform via
 export HIP_PLATFORM=hcc or HIP_PLATFORM=nvcc as well as the target
 architecture (gfx803, gfx900, gfx906 etc.)
 3. If your MPI implementation does not support `mpicxx --showme` command,
 it is required to specify the corresponding MPI compiler and linker flags
 in lammps/lib/gpu/Makefile.hip and in lammps/src/MAKE/OPTIONS/Makefile.hip.
 ------------------------------------------------------------------------------
                             OPENCL BUILD NOTES
 ------------------------------------------------------------------------------
 If GERYON_NUMA_FISSION is defined at build time, LAMMPS will consider separate
 NUMA nodes on GPUs or accelerators as separate devices. For example, a 2-socket
 CPU would appear as two separate devices for OpenCL (and LAMMPS would require
 two MPI processes to use both sockets with the GPU library - each with its
 own device ID as output by ocl_get_devices).
 For a debug build, use "-DUCL_DEBUG -DGERYON_KERNEL_DUMP" and remove
 "-DUCL_NO_EXIT" and "-DMPI_GERYON" from the build options.
 ------------------------------------------------------------------------------
                   ALL PREPROCESSOR OPTIONS (For Advanced Users)
 ------------------------------------------------------------------------------
 _SINGLE_SINGLE          Build library for single precision mode
 _SINGLE_DOUBLE          Build library for mixed precision mode
 _DOUBLE_DOUBLE          Build library for double precision mode
 CUDA_MPS_SUPPORT        Do not generate errors for exclusive mode for CUDA
 CUDA_PROXY              Same as above
 MPI_GERYON              Library should use MPI_Abort for unhandled errors
 GERYON_NUMA_FISSION     Accelerators with main memory NUMA are split into
                        multiple virtual accelerators for each NUMA node
 LAL_USE_OMP=0           Disable OpenMP in lib, regardless of compiler setting
 LAL_USE_OMP_SIMD=0      Disable OpenMP SIMD in lib, regardless of compiler set
 GERYON_OCL_FLUSH        For OpenCL, flush queue after every enqueue
 LAL_NO_OCL_EV_JIT       Turn off JIT specialization for kernels in OpenCL
 LAL_USE_OLD_NEIGHBOR    Use old neighbor list algorithm
 USE_CUDPP               Enable GPU binning in neighbor builds (not recommended)
 USE_HIP_DEVICE_SORT     Enable GPU binning for HIP builds
                        (only w/ LAL_USE_OLD_NEIGHBOR)
 LAL_NO_BLOCK_REDUCE     Use host for energy/virial accumulation
 LAL_OCL_EXTRA_ARGS      Supply extra args for OpenCL compiler delimited with :
 UCL_NO_EXIT             LAMMPS should handle errors instead of Geryon lib
 UCL_DEBUG               Debug build for Geryon
 GERYON_KERNEL_DUMP      Dump all compiled OpenCL programs with compiler
                        flags and build logs
 GPU_CAST                Casting performed on GPU, untested recently
 THREE_CONCURRENT        Concurrent 3-body calcs in separate queues, untested
 ------------------------------------------------------------------------------
                           References for Details
 ------------------------------------------------------------------------------
 Brown, W.M., Wang, P. Plimpton, S.J., Tharrington, A.N. Implementing
 Molecular Dynamics on Hybrid High Performance Computers - Short Range
@ -89,116 +279,3 @@ Brown, W.M., Masako, Y. Implementing Molecular Dynamics on Hybrid High
 Performance Computers - Three-Body Potentials. Computer Physics Communications.
 2013. 184: p. 2785–2793.
 ----
 NOTE: Installation of the CUDA SDK is not required, only the CUDA
 toolkit itself or an OpenCL 1.2 compatible header and library.
 Pair styles supporting GPU acceleration this this library
 are marked in the list of Pair style potentials with a "g".
 See the online version at: https://lammps.sandia.gov/doc/Commands_pair.html
 In addition the (plain) pppm kspace style is supported as well.
                     MULTIPLE LAMMPS PROCESSES
                  --------------------------------
 Multiple LAMMPS MPI processes can share GPUs on the system, but multiple
 GPUs cannot be utilized by a single MPI process. In many cases, the
 best performance will be obtained by running as many MPI processes as
 CPU cores available with the condition that the number of MPI processes
 is an integer multiple of the number of GPUs being used. See the 
 LAMMPS user manual for details on running with GPU acceleration.
                    BUILDING AND PRECISION MODES
                  --------------------------------
 To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME variables in one of 
 the Makefiles. CUDA_ARCH should be set based on the compute capability of
 your GPU. This can be verified by running the nvc_get_devices executable after
 the build is complete. Additionally, the GPU package must be installed and
 compiled for LAMMPS. This may require editing the gpu_SYSPATH variable in the
 LAMMPS makefile.
 Please note that the GPU library accesses the CUDA driver library directly,
 so it needs to be linked not only to the CUDA runtime library (libcudart.so)
 that ships with the CUDA toolkit, but also with the CUDA driver library
 (libcuda.so) that ships with the Nvidia driver. If you are compiling LAMMPS
 on the head node of a GPU cluster, this library may not be installed,
 so you may need to copy it over from one of the compute nodes (best into
 this directory). Recent CUDA toolkits starting from CUDA 9 provide a dummy
 libcuda.so library (typically under $(CUDA_HOME)/lib64/stubs), that can be used for
 linking.
 The gpu library supports 3 precision modes as determined by 
 the CUDA_PRECISION variable:
  CUDA_PRECISION = -D_SINGLE_SINGLE  # Single precision for all calculations
  CUDA_PRECISION = -D_DOUBLE_DOUBLE  # Double precision for all calculations
  CUDA_PRECISION = -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double
 As of CUDA 7.5 only GPUs with compute capability 2.0 (Fermi) or newer are
 supported and as of CUDA 9.0 only compute capability 3.0 (Kepler) or newer
 are supported. There are some limitations of this library for GPUs older
 than that, which require additional preprocessor flag, and limit features,
 but they are kept for historical reasons. There is no value in trying to
 use those GPUs for production calculations.
 You have to make sure that you set a CUDA_ARCH line suitable for your
 hardware and CUDA toolkit version: e.g. -arch=sm_35 for Tesla K20 or K40
 or -arch=sm_52 GeForce GTX Titan X. A detailed list of GPU architectures
 and CUDA compatible GPUs can be found e.g. here: 
 https://en.wikipedia.org/wiki/CUDA#GPUs_supported
 NOTE: when compiling with CMake, all of the considerations listed below
 are considered within the CMake configuration process, so no separate 
 compilation of the gpu library is required. Also this will build in support
 for all compute architecture that are supported by the CUDA toolkit version
 used to build the gpu library.
 Please note the CUDA_CODE settings in Makefile.linux_multi, which allows
 to compile this library with support for multiple GPUs. This list can be
 extended for newer GPUs with newer CUDA toolkits and should allow to build
 a single GPU library compatible with all GPUs that are worth using for
 GPU acceleration and supported by the current CUDA toolkits and drivers.
 NOTE: The system-specific setting LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG, 
      or LAMMPS_SMALLSMALL if specified when building LAMMPS (i.e. in 
      src/MAKE/Makefile.foo) should be consistent with that specified 
      when building libgpu.a (i.e. by LMP_INC in the lib/gpu/Makefile.bar).
                      BUILDING FOR HIP FRAMEWORK
                   --------------------------------
 1. Install the latest ROCm framework (https://github.com/RadeonOpenCompute/ROCm).
 2. GPU sorting requires installing hipcub 
 (https://github.com/ROCmSoftwarePlatform/hipCUB). The HIP CUDA-backend
 additionally requires cub (https://nvlabs.github.io/cub). Download and
 extract the cub directory to lammps/lib/gpu/ or specify an appropriate
 path in lammps/lib/gpu/Makefile.hip.
 3. In Makefile.hip it is possible to specify the target platform via 
 export HIP_PLATFORM=hcc or HIP_PLATFORM=nvcc as well as the target 
 architecture (gfx803, gfx900, gfx906 etc.)
 4. If your MPI implementation does not support `mpicxx --showme` command,
 it is required to specify the corresponding MPI compiler and linker flags
 in lammps/lib/gpu/Makefile.hip and in lammps/src/MAKE/OPTIONS/Makefile.hip.
 5. Building the GPU library (libgpu.a): 
    cd lammps/lib/gpu; make -f Makefile.hip -j
 6. Building the LAMMPS executable (lmp_hip):
    cd ../../src; make hip -j
                      EXAMPLE CONVENTIONAL BUILD PROCESS
                  --------------------------------
 cd ~/lammps/lib/gpu
 emacs Makefile.linux
 make -f Makefile.linux
 ./nvc_get_devices
 cd ../../src
 emacs ./MAKE/Makefile.linux
 make yes-asphere
 make yes-kspace
 make yes-gpu
 make linux
--- a/lib/gpu/geryon/hip_device.h
+++ b/lib/gpu/geryon/hip_device.h
@ -8,6 +8,9 @@
 #ifndef HIP_DEVICE
 #define HIP_DEVICE
 // workaround after GPU package Feb2021 update
 // todo: make new neighbor code work with HIP
 #define LAL_USE_OLD_NEIGHBOR
 #include <hip/hip_runtime.h>
 #include <unordered_map>
@ -24,6 +27,8 @@ namespace ucl_hip {
 // --------------------------------------------------------------------------
 typedef hipStream_t command_queue;
 inline void ucl_flush(command_queue &cq) {}
 inline void ucl_sync(hipStream_t &stream) {
  CU_SAFE_CALL(hipStreamSynchronize(stream));
 }
@ -39,8 +44,8 @@ struct NVDProperties {
  int maxThreadsPerBlock;
  int maxThreadsDim[3];
  int maxGridSize[3];
-  int sharedMemPerBlock;
+  CUDA_INT_TYPE sharedMemPerBlock;
-  int totalConstantMemory;
+  CUDA_INT_TYPE totalConstantMemory;
  int SIMDWidth;
  int memPitch;
  int regsPerBlock;
@ -143,15 +148,26 @@ class UCL_Device {
  inline std::string device_type_name(const int i) { return "GPU"; }
  /// Get current device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
-  inline int device_type() { return device_type(_device); }
+  inline enum UCL_DEVICE_TYPE device_type() { return device_type(_device); }
  /// Get device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
-  inline int device_type(const int i) { return UCL_GPU; }
+  inline enum UCL_DEVICE_TYPE device_type(const int i) { return UCL_GPU; }
  /// Returns true if host memory is efficiently addressable from device
  inline bool shared_memory() { return shared_memory(_device); }
  /// Returns true if host memory is efficiently addressable from device
  inline bool shared_memory(const int i) { return device_type(i)==UCL_CPU; }
  /// Returns preferred vector width
  inline int preferred_fp32_width() { return preferred_fp32_width(_device); }
  /// Returns preferred vector width
  inline int preferred_fp32_width(const int i)
    {return _properties[i].SIMDWidth;}
  /// Returns preferred vector width
  inline int preferred_fp64_width() { return preferred_fp64_width(_device); }
  /// Returns preferred vector width
  inline int preferred_fp64_width(const int i)
    {return _properties[i].SIMDWidth;}
  /// Returns true if double precision is support for the current device
  inline bool double_precision() { return double_precision(_device); }
  /// Returns true if double precision is support for the device
@ -215,6 +231,18 @@ class UCL_Device {
  /// Get the maximum number of threads per block
  inline size_t group_size(const int i)
    { return _properties[i].maxThreadsPerBlock; }
  /// Get the maximum number of threads per block in dimension 'dim'
  inline size_t group_size_dim(const int dim)
    { return group_size_dim(_device, dim); }
  /// Get the maximum number of threads per block in dimension 'dim'
  inline size_t group_size_dim(const int i, const int dim)
    { return _properties[i].maxThreadsDim[dim];}
  /// Get the shared local memory size in bytes
  inline size_t slm_size() { return slm_size(_device); }
  /// Get the shared local memory size in bytes
  inline size_t slm_size(const int i)
    { return _properties[i].sharedMemPerBlock; }
  /// Return the maximum memory pitch in bytes for current device
  inline size_t max_pitch() { return max_pitch(_device); }
@ -255,11 +283,20 @@ class UCL_Device {
  inline int max_sub_devices(const int i)
    { return 0; }
  /// True if the device supports shuffle intrinsics
  inline bool has_shuffle_support()
    { return has_shuffle_support(_device); }
  /// True if the device supports shuffle intrinsics
  inline bool has_shuffle_support(const int i)
    { return arch(i)>=3.0; }
  /// List all devices along with all properties
  inline void print_all(std::ostream &out);
-  /// Select the platform that has accelerators (for compatibility with OpenCL)
+  /// For compatability with OCL API
-  inline int set_platform_accelerator(int pid=-1) { return UCL_SUCCESS; }
+  inline int auto_set_platform(const enum UCL_DEVICE_TYPE type=UCL_GPU,
 			       const std::string vendor="")
    { return set_platform(0); }
  inline int load_module(const void* program, hipModule_t& module, std::string *log=nullptr){
    auto it = _loaded_modules.emplace(program, hipModule_t());
@ -328,32 +365,35 @@ UCL_Device::UCL_Device() {
    CU_SAFE_CALL_NS(hipDeviceGetName(namecstr,1024,dev));
    prop.name=namecstr;
-    CU_SAFE_CALL_NS(hipDeviceTotalMem(&prop.totalGlobalMem,dev));
+    hipDeviceProp_t hip_prop;
    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.multiProcessorCount, hipDeviceAttributeMultiprocessorCount, dev));
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsPerBlock, hipDeviceAttributeMaxThreadsPerBlock, dev));
+    CU_SAFE_CALL_NS(hipGetDeviceProperties(&hip_prop,dev));
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[0], hipDeviceAttributeMaxBlockDimX, dev));
+
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[1], hipDeviceAttributeMaxBlockDimY, dev));
+    prop.totalGlobalMem = hip_prop.totalGlobalMem;
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[2], hipDeviceAttributeMaxBlockDimZ, dev));
+    prop.multiProcessorCount = hip_prop.multiProcessorCount;
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[0], hipDeviceAttributeMaxGridDimX, dev));
+    prop.maxThreadsPerBlock = hip_prop.maxThreadsPerBlock;
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[1], hipDeviceAttributeMaxGridDimY, dev));
+    prop.maxThreadsDim[0] = hip_prop.maxThreadsDim[0];
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[2], hipDeviceAttributeMaxGridDimZ, dev));
+    prop.maxThreadsDim[1] = hip_prop.maxThreadsDim[1];
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.sharedMemPerBlock, hipDeviceAttributeMaxSharedMemoryPerBlock, dev));
+    prop.maxThreadsDim[2] = hip_prop.maxThreadsDim[2];
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.totalConstantMemory, hipDeviceAttributeTotalConstantMemory, dev));
+    prop.maxGridSize[0] = hip_prop.maxGridSize[0];
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.SIMDWidth, hipDeviceAttributeWarpSize, dev));
+    prop.maxGridSize[1] = hip_prop.maxGridSize[1];
    prop.maxGridSize[2] = hip_prop.maxGridSize[2];
    prop.sharedMemPerBlock = hip_prop.sharedMemPerBlock;
    prop.totalConstantMemory = hip_prop.totalConstMem;
    prop.SIMDWidth = hip_prop.warpSize;
    prop.regsPerBlock = hip_prop.regsPerBlock;
    prop.clockRate = hip_prop.clockRate;
    prop.computeMode = hip_prop.computeMode;
    //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev));
    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.regsPerBlock, hipDeviceAttributeMaxRegistersPerBlock, dev));
    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.clockRate, hipDeviceAttributeClockRate, dev));
    //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
    //#if CUDA_VERSION >= 2020
    //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.integrated, hipDeviceAttributeIntegrated, dev));
    //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.computeMode, hipDeviceAttributeComputeMode,dev));
    //#endif
    //#if CUDA_VERSION >= 3010
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.concurrentKernels, hipDeviceAttributeConcurrentKernels, dev));
+    prop.concurrentKernels = hip_prop.concurrentKernels;
    //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
    //#endif
--- a/lib/gpu/geryon/hip_kernel.h
+++ b/lib/gpu/geryon/hip_kernel.h
@ -14,6 +14,7 @@
 #include <fstream>
 #include <string>
 #include <iostream>
 #include <cstdio>
 namespace ucl_hip {
@ -64,15 +65,19 @@ class UCL_Program {
  }
  /// Load a program from a string and compile with flags
-  inline int load_string(const void *program, const char *flags="", std::string *log=nullptr) {
+  inline int load_string(const void *program, const char *flags="", std::string *log=nullptr, FILE* foutput=nullptr) {
    return _device_ptr->load_module(program, _module, log);
  }
  /// Return the default command queue/stream associated with this data
  inline hipStream_t & cq() { return _cq; }
  friend class UCL_Kernel;
 private:
  hipModule_t _module;
  hipStream_t _cq;
  friend class UCL_Texture;
  friend class UCL_Const;
 };
 /// Class for dealing with CUDA Driver kernels
--- a/lib/gpu/geryon/hip_texture.h
+++ b/lib/gpu/geryon/hip_texture.h
@ -107,6 +107,37 @@ class UCL_Texture {
  }
 };
 /// Class storing a const global memory reference
 class UCL_Const {
 public:
  UCL_Const() {}
  ~UCL_Const() {}
  /// Construct with a specified global reference
  inline UCL_Const(UCL_Program &prog, const char *global_name)
    { get_global(prog,global_name); }
  /// Set the global reference for this object
  inline void get_global(UCL_Program &prog, const char *global_name) {
    _cq=prog.cq();
    CU_SAFE_CALL(hipModuleGetGlobal(&_global, &_global_bytes, prog._module,
 				    global_name)); 
  }
  /// Copy from array on host to const memory
  template <class numtyp>
  inline void update_device(UCL_H_Vec<numtyp> &src, const int numel) {
    CU_SAFE_CALL(hipMemcpyHtoDAsync(_global, src.begin(), numel*sizeof(numtyp),
 				    _cq));
  }
  /// Get device ptr associated with object
  inline const void* begin() const { return &_global; }
  inline void clear() {}
 private:
  hipStream_t _cq;
  void* _global;
  size_t _global_bytes;
  friend class UCL_Kernel;
 };
 } // namespace
 #endif
--- a/lib/gpu/geryon/nvd_device.h
+++ b/lib/gpu/geryon/nvd_device.h
@ -37,6 +37,8 @@ namespace ucl_cudadr {
 // --------------------------------------------------------------------------
 typedef CUstream command_queue;
 inline void ucl_flush(command_queue &cq) {}
 inline void ucl_sync(CUstream &stream) {
  CU_SAFE_CALL(cuStreamSynchronize(stream));
 }
@ -156,15 +158,26 @@ class UCL_Device {
  inline std::string device_type_name(const int i) { return "GPU"; }
  /// Get current device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
-  inline int device_type() { return device_type(_device); }
+  inline enum UCL_DEVICE_TYPE device_type() { return device_type(_device); }
  /// Get device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
-  inline int device_type(const int i) { return UCL_GPU; }
+  inline enum UCL_DEVICE_TYPE device_type(const int i) { return UCL_GPU; }
  /// Returns true if host memory is efficiently addressable from device
  inline bool shared_memory() { return shared_memory(_device); }
  /// Returns true if host memory is efficiently addressable from device
  inline bool shared_memory(const int i) { return device_type(i)==UCL_CPU; }
  /// Returns preferred vector width
  inline int preferred_fp32_width() { return preferred_fp32_width(_device); }
  /// Returns preferred vector width
  inline int preferred_fp32_width(const int i)
    {return _properties[i].SIMDWidth;}
  /// Returns preferred vector width
  inline int preferred_fp64_width() { return preferred_fp64_width(_device); }
  /// Returns preferred vector width
  inline int preferred_fp64_width(const int i)
    {return _properties[i].SIMDWidth;}
  /// Returns true if double precision is support for the current device
  inline bool double_precision() { return double_precision(_device); }
  /// Returns true if double precision is support for the device
@ -228,6 +241,18 @@ class UCL_Device {
  /// Get the maximum number of threads per block
  inline size_t group_size(const int i)
    { return _properties[i].maxThreadsPerBlock; }
  /// Get the maximum number of threads per block in dimension 'dim'
  inline size_t group_size_dim(const int dim)
    { return group_size_dim(_device, dim); }
  /// Get the maximum number of threads per block in dimension 'dim'
  inline size_t group_size_dim(const int i, const int dim)
    { return _properties[i].maxThreadsDim[dim]; }
  /// Get the shared local memory size in bytes
  inline size_t slm_size() { return slm_size(_device); }
  /// Get the shared local memory size in bytes
  inline size_t slm_size(const int i)
    { return _properties[i].sharedMemPerBlock; }
  /// Return the maximum memory pitch in bytes for current device
  inline size_t max_pitch() { return max_pitch(_device); }
@ -268,11 +293,22 @@ class UCL_Device {
  inline int max_sub_devices(const int i)
    { return 0; }
  /// True if the device supports shuffle intrinsics
  inline bool has_shuffle_support()
    { return has_shuffle_support(_device); }
  /// True if the device supports shuffle intrinsics
  inline bool has_shuffle_support(const int i)
    { return arch(i)>=3.0; }
  /// List all devices along with all properties
  inline void print_all(std::ostream &out);
-  /// Select the platform that has accelerators (for compatibility with OpenCL)
+  /// For compatability with OCL API
-  inline int set_platform_accelerator(int pid=-1) { return UCL_SUCCESS; }
+  inline int auto_set_platform(const enum UCL_DEVICE_TYPE type=UCL_GPU,
 			       const std::string vendor="",
 			       const int ndevices=-1,
 			       const int first_device=-1)
    { return set_platform(0); }
 private:
  int _device, _num_devices;
--- a/lib/gpu/geryon/nvd_kernel.h
+++ b/lib/gpu/geryon/nvd_kernel.h
@ -26,6 +26,7 @@
 #include "nvd_device.h"
 #include <fstream>
 #include <cstdio>
 namespace ucl_cudadr {
@ -77,7 +78,7 @@ class UCL_Program {
  /// Load a program from a string and compile with flags
  inline int load_string(const void *program, const char *flags="",
-                         std::string *log=nullptr) {
+                         std::string *log=nullptr, FILE* foutput=nullptr) {
    if (std::string(flags)=="BINARY")
      return load_binary((const char *)program);
    const unsigned int num_opts=2;
@ -100,12 +101,25 @@ class UCL_Program {
    if (err != CUDA_SUCCESS) {
      #ifndef UCL_NO_EXIT
-      std::cerr << std::endl
+      std::cerr << std::endl << std::endl
                << "----------------------------------------------------------\n"
                << " UCL Error: Error compiling PTX Program...\n"
                << "----------------------------------------------------------\n";
-      std::cerr << log << std::endl;
+      std::cerr << log << std::endl
                << "----------------------------------------------------------\n\n";
      #endif
      if (foutput != NULL) {
 	fprintf(foutput,"\n\n");
 	fprintf(foutput,
 		"----------------------------------------------------------\n");
 	fprintf(foutput," UCL Error: Error compiling PTX Program...\n");
 	fprintf(foutput,
 		"----------------------------------------------------------\n");
 	fprintf(foutput,"%s\n",log);
 	fprintf(foutput,
 		"----------------------------------------------------------\n");
 	fprintf(foutput,"\n\n");
      }
      return UCL_COMPILE_ERROR;
    }
@ -139,11 +153,15 @@ class UCL_Program {
    return UCL_SUCCESS;
  }
  /// Return the default command queue/stream associated with this data
  inline command_queue & cq() { return _cq; }
  friend class UCL_Kernel;
 private:
  CUmodule _module;
  CUstream _cq;
  friend class UCL_Texture;
  friend class UCL_Const;
 };
 /// Class for dealing with CUDA Driver kernels
--- a/lib/gpu/geryon/nvd_texture.h
+++ b/lib/gpu/geryon/nvd_texture.h
@ -38,8 +38,11 @@ class UCL_Texture {
  inline UCL_Texture(UCL_Program &prog, const char *texture_name)
    { get_texture(prog,texture_name); }
  /// Set the texture reference for this object
-  inline void get_texture(UCL_Program &prog, const char *texture_name)
+  inline void get_texture(UCL_Program &prog, const char *texture_name) {
-    { CU_SAFE_CALL(cuModuleGetTexRef(&_tex, prog._module, texture_name)); }
+    #if (CUDA_VERSION < 11000)
    CU_SAFE_CALL(cuModuleGetTexRef(&_tex, prog._module, texture_name));
    #endif
  }
  /// Bind a float array where each fetch grabs a vector of length numel
  template<class numtyp>
@ -72,11 +75,14 @@ class UCL_Texture {
  }
 private:
  #if (CUDA_VERSION < 11000)
  CUtexref _tex;
  #endif
  friend class UCL_Kernel;
  template<class mat_typ>
  inline void _bind_float(mat_typ &vec, const unsigned numel) {
    #if (CUDA_VERSION < 11000)
    #ifdef UCL_DEBUG
    assert(numel!=0 && numel<5);
    #endif
@ -90,10 +96,42 @@ class UCL_Texture {
      else
        CU_SAFE_CALL(cuTexRefSetFormat(_tex,CU_AD_FORMAT_SIGNED_INT32,numel*2));
    }
    #endif
  }
 };
 /// Class storing a const global memory reference
 class UCL_Const {
 public:
  UCL_Const() {}
  ~UCL_Const() {}
  /// Construct with a specified global reference
  inline UCL_Const(UCL_Program &prog, const char *global_name)
    { get_global(prog,global_name); }
  /// Set the global reference for this object
  inline void get_global(UCL_Program &prog, const char *global_name) {
    _cq=prog.cq();
    CU_SAFE_CALL(cuModuleGetGlobal(&_global, &_global_bytes, prog._module,
 				   global_name)); 
  }
  /// Copy from array on host to const memory
  template <class numtyp>
  inline void update_device(UCL_H_Vec<numtyp> &src, const int numel) {
    CU_SAFE_CALL(cuMemcpyHtoDAsync(_global, src.begin(), numel*sizeof(numtyp),
 				   _cq));
  }
  /// Get device ptr associated with object
  inline const CUdeviceptr * begin() const { return &_global; }
  inline void clear() {}
 private:
  CUstream _cq;
  CUdeviceptr _global;
  size_t _global_bytes;
  friend class UCL_Kernel;
 };
 } // namespace
 #endif
--- a/lib/gpu/geryon/ocl_device.h
+++ b/lib/gpu/geryon/ocl_device.h
@ -28,12 +28,8 @@
 #include <vector>
 #include <iostream>
-/* We default to OpenCL 1.2 as target version for now as
+#ifndef CL_TARGET_OPENCL_VERSION
- * there are known issues with OpenCL 2.0 and later.
+#define CL_TARGET_OPENCL_VERSION 210
 * This is also to silence warnings from generic OpenCL headers */
 #if !defined(CL_TARGET_OPENCL_VERSION)
 #define CL_TARGET_OPENCL_VERSION 120
 #endif
 #ifdef __APPLE__
@ -55,17 +51,36 @@ namespace ucl_opencl {
 typedef cl_command_queue command_queue;
 typedef cl_context context_type;
 inline void ucl_flush(command_queue &cq) { CL_SAFE_CALL(clFlush(cq)); }
 inline void ucl_sync(cl_command_queue &cq) {
  CL_SAFE_CALL(clFinish(cq));
 }
-inline bool _shared_mem_device(cl_device_type &device_type) {
+#if defined(GERYON_FORCE_SHARED_MAIN_MEM_ON)
 inline bool _shared_mem_device(cl_device_id &device) { return true; }
 #elif defined(GERYON_FORCE_SHARED_MAIN_MEM_OFF)
 inline bool _shared_mem_device(cl_device_id &device) { return false; }
 #else
 inline bool _shared_mem_device(cl_device_id &device) {
  #ifdef CL_VERSION_1_2
  cl_bool br;
  CL_SAFE_CALL(clGetDeviceInfo(device, CL_DEVICE_HOST_UNIFIED_MEMORY,
                               sizeof(cl_bool), &br,NULL));
  return (br == CL_TRUE);
  #else
  cl_device_type device_type;
  CL_SAFE_CALL(clGetDeviceInfo(device,CL_DEVICE_TYPE,
 			       sizeof(device_type),&device_type,NULL));
  return (device_type==CL_DEVICE_TYPE_CPU);
  #endif
 }
 #endif
 struct OCLProperties {
  std::string name;
  cl_device_type device_type;
  bool is_subdevice;
  cl_ulong global_mem;
  cl_ulong shared_mem;
  cl_ulong const_mem;
@ -74,12 +89,16 @@ struct OCLProperties {
  size_t work_group_size;
  size_t work_item_size[3];
  bool double_precision;
  int preferred_vector_width32, preferred_vector_width64;
  int alignment;
  size_t timer_resolution;
  bool ecc_support;
  std::string c_version;
  bool partition_equal, partition_counts, partition_affinity;
  cl_uint max_sub_devices;
  int cl_device_version;
  bool has_subgroup_support;
  bool has_shuffle_support;
 };
 /// Class for looking at data parallel device properties
@ -182,15 +201,26 @@ class UCL_Device {
  inline std::string device_type_name(const int i);
  /// Get current device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
-  inline int device_type() { return device_type(_device); }
+  inline enum UCL_DEVICE_TYPE device_type() { return device_type(_device); }
  /// Get device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
-  inline int device_type(const int i);
+  inline enum UCL_DEVICE_TYPE device_type(const int i);
  /// Returns true if host memory is efficiently addressable from device
  inline bool shared_memory() { return shared_memory(_device); }
  /// Returns true if host memory is efficiently addressable from device
  inline bool shared_memory(const int i)
-    { return _shared_mem_device(_properties[i].device_type); }
+    { return _shared_mem_device(_cl_devices[i]); }
  /// Returns preferred vector width
  inline int preferred_fp32_width() { return preferred_fp32_width(_device); }
  /// Returns preferred vector width
  inline int preferred_fp32_width(const int i)
    {return _properties[i].preferred_vector_width32;}
  /// Returns preferred vector width
  inline int preferred_fp64_width() { return preferred_fp64_width(_device); }
  /// Returns preferred vector width
  inline int preferred_fp64_width(const int i)
    {return _properties[i].preferred_vector_width64;}
  /// Returns true if double precision is support for the current device
  inline bool double_precision() { return double_precision(_device); }
@ -242,6 +272,18 @@ class UCL_Device {
  /// Get the maximum number of threads per block
  inline size_t group_size(const int i)
    { return _properties[i].work_group_size; }
  /// Get the maximum number of threads per block in dimension 'dim'
  inline size_t group_size_dim(const int dim)
    { return group_size_dim(_device, dim); }
  /// Get the maximum number of threads per block in dimension 'dim'
  inline size_t group_size_dim(const int i, const int dim)
    { return _properties[i].work_item_size[dim]; }
  /// Get the shared local memory size in bytes
  inline size_t slm_size() { return slm_size(_device); }
  /// Get the shared local memory size in bytes
  inline size_t slm_size(const int i)
    { return _properties[i].shared_mem; }
  /// Return the maximum memory pitch in bytes for current device
  inline size_t max_pitch() { return max_pitch(_device); }
@ -256,6 +298,12 @@ class UCL_Device {
  inline bool sharing_supported(const int i)
    { return true; }
  /// True if the device is a sub-device
  inline bool is_subdevice()
    { return is_subdevice(_device); }
  /// True if the device is a sub-device
  inline bool is_subdevice(const int i)
    { return _properties[i].is_subdevice; }
  /// True if splitting device into equal subdevices supported
  inline bool fission_equal()
    { return fission_equal(_device); }
@ -274,6 +322,18 @@ class UCL_Device {
  /// True if splitting device into subdevices by affinity domains supported
  inline bool fission_by_affinity(const int i)
    { return _properties[i].partition_affinity; }
  /// True if the device has subgroup support
  inline bool has_subgroup_support()
    { return has_subgroup_support(_device); }
  /// True if the device has subgroup support
  inline bool has_subgroup_support(const int i)
    { return _properties[i].has_subgroup_support; }
  /// True if the device supports shuffle intrinsics
  inline bool has_shuffle_support()
    { return has_shuffle_support(_device); }
  /// True if the device supports shuffle intrinsics
  inline bool has_shuffle_support(const int i)
    { return _properties[i].has_shuffle_support; }
  /// Maximum number of subdevices allowed from device fission
  inline int max_sub_devices()
@ -281,6 +341,12 @@ class UCL_Device {
  /// Maximum number of subdevices allowed from device fission
  inline int max_sub_devices(const int i)
    { return _properties[i].max_sub_devices; }
  /// OpenCL version supported by the device
  inline int cl_device_version()
    { return cl_device_version(_device); }
  /// OpenCL version supported by the device
  inline int cl_device_version(const int i)
    { return _properties[i].cl_device_version; }
  /// List all devices along with all properties
  inline void print_all(std::ostream &out);
@ -288,8 +354,14 @@ class UCL_Device {
  /// Return the OpenCL type for the device
  inline cl_device_id & cl_device() { return _cl_device; }
-  /// Select the platform that has accelerators
+  /// Automatically set the platform by type, vendor, and/or CU count
-  inline int set_platform_accelerator(int pid=-1);
+  /** If first_device is positive, search restricted to platforms containing
    * this device IDs. If ndevices is positive, search is restricted 
    * to platforms with at least that many devices  **/
  inline int auto_set_platform(const enum UCL_DEVICE_TYPE type=UCL_GPU,
 			       const std::string vendor="",
 			       const int ndevices=-1,
 			       const int first_device=-1);
 private:
  int _num_platforms;          // Number of platforms
@ -322,8 +394,7 @@ UCL_Device::UCL_Device() {
    return;
  } else
    _num_platforms=static_cast<int>(nplatforms);
-  // note that platform 0 may not necessarily be associated with accelerators
+  set_platform(0);
  set_platform_accelerator();
 }
 UCL_Device::~UCL_Device() {
@ -332,6 +403,14 @@ UCL_Device::~UCL_Device() {
 void UCL_Device::clear() {
  _properties.clear();
  #ifdef GERYON_NUMA_FISSION
  #ifdef CL_VERSION_1_2
  for (int i=0; i<_cl_devices.size(); i++)
    CL_DESTRUCT_CALL(clReleaseDevice(_cl_devices[i]));
  #endif
  #endif
  _cl_devices.clear();
  if (_device>-1) {
    for (size_t i=0; i<_cq.size(); i++) {
@ -341,6 +420,7 @@ void UCL_Device::clear() {
    CL_DESTRUCT_CALL(clReleaseContext(_context));
  }
  _device=-1;
  _num_devices=0;
 }
 int UCL_Device::set_platform(int pid) {
@ -370,11 +450,51 @@ int UCL_Device::set_platform(int pid) {
  CL_SAFE_CALL(clGetDeviceIDs(_cl_platform,CL_DEVICE_TYPE_ALL,n,device_list,
                              &n));
  #ifndef GERYON_NUMA_FISSION
  // --- Store properties for each device
  for (int i=0; i<_num_devices; i++) {
    _cl_devices.push_back(device_list[i]);
    add_properties(device_list[i]);
  }
  #else
  // --- Create sub-devices for anything partitionable by NUMA and store props
  int num_unpart = _num_devices;
  _num_devices = 0;
  for (int i=0; i<num_unpart; i++) {
    cl_uint num_subdevices = 1;
    cl_device_id *subdevice_list = device_list + i;
    #ifdef CL_VERSION_1_2
    cl_device_affinity_domain adomain;
    CL_SAFE_CALL(clGetDeviceInfo(device_list[i],
 				 CL_DEVICE_PARTITION_AFFINITY_DOMAIN,
 				 sizeof(cl_device_affinity_domain),
 				 &adomain,NULL));
    cl_device_partition_property props[3];
    props[0]=CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
    props[1]=CL_DEVICE_AFFINITY_DOMAIN_NUMA;
    props[2]=0;
    if (adomain & CL_DEVICE_AFFINITY_DOMAIN_NUMA)
      CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, 0, NULL,
 				      &num_subdevices));
    if (num_subdevices > 1) {
      subdevice_list = new cl_device_id[num_subdevices];
      CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices,
 				      subdevice_list, &num_subdevices));
    }
    #endif
    for (int j=0; j<num_subdevices; j++) {
      _num_devices++;
      _cl_devices.push_back(subdevice_list[j]);
      add_properties(subdevice_list[j]);
    }
    if (num_subdevices > 1) delete[] subdevice_list;
  } // for i
  #endif
  delete[] device_list;
  return UCL_SUCCESS;
 }
@ -429,11 +549,18 @@ void UCL_Device::add_properties(cl_device_id device_list) {
                               sizeof(cl_uint),&op.alignment,nullptr));
  op.alignment/=8;
  cl_uint float_width;
  CL_SAFE_CALL(clGetDeviceInfo(device_list,
                               CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,
                               sizeof(float_width),&float_width,nullptr));
  op.preferred_vector_width32=float_width;
  // Determine if double precision is supported
  cl_uint double_width;
  CL_SAFE_CALL(clGetDeviceInfo(device_list,
                               CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
                               sizeof(double_width),&double_width,nullptr));
  op.preferred_vector_width64=double_width;
  if (double_width==0)
    op.double_precision=false;
  else
@ -452,9 +579,14 @@ void UCL_Device::add_properties(cl_device_id device_list) {
    op.ecc_support=true;
  op.c_version="";
  op.is_subdevice=false;
  op.partition_equal=false;
  op.partition_counts=false;
  op.partition_affinity=false;
  op.max_sub_devices=1;
  op.cl_device_version=0;
  op.has_subgroup_support=false;
  op.has_shuffle_support=false;
  #ifdef CL_VERSION_1_2
  size_t return_bytes;
@ -463,6 +595,13 @@ void UCL_Device::add_properties(cl_device_id device_list) {
  op.c_version=buffer;
  cl_device_partition_property pinfo[4];
  CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_DEVICE_PARTITION_TYPE,
 			       4*sizeof(cl_device_partition_property),
 			       &pinfo, &return_bytes));
  if (return_bytes == 0) op.is_subdevice=false;
  else if (pinfo[0]) op.is_subdevice=true;
  else op.is_subdevice=false;
  CL_SAFE_CALL(clGetDeviceInfo(device_list,
                               CL_DEVICE_PARTITION_PROPERTIES,
                               4*sizeof(cl_device_partition_property),
@ -480,6 +619,46 @@ void UCL_Device::add_properties(cl_device_id device_list) {
  CL_SAFE_CALL(clGetDeviceInfo(device_list,
                               CL_DEVICE_PARTITION_MAX_SUB_DEVICES,
                               sizeof(cl_uint),&op.max_sub_devices,nullptr));
  CL_SAFE_CALL(clGetDeviceInfo(device_list,CL_DEVICE_VERSION,1024,buffer,nullptr));
  int cl_version_maj = buffer[7] - '0';
  int cl_version_min = buffer[9] - '0';
  op.cl_device_version = cl_version_maj * 100 + cl_version_min * 10;
  size_t ext_str_size_ret;
  CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_DEVICE_EXTENSIONS, 0, nullptr,
 			       &ext_str_size_ret));
  char buffer2[ext_str_size_ret];
  CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_DEVICE_EXTENSIONS,
 			       ext_str_size_ret, buffer2, nullptr));
  #if defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)
  if (op.cl_device_version >= 210) {
    if ((std::string(buffer2).find("cl_khr_subgroups") != std::string::npos) ||
        (std::string(buffer2).find("cl_intel_subgroups") != std::string::npos))
      op.has_subgroup_support=true;
    if (std::string(buffer2).find("cl_intel_subgroups") != std::string::npos)
      op.has_shuffle_support=true;
  }
  #endif
  if (std::string(buffer2).find("cl_nv_device_attribute_query") !=
      std::string::npos) {
    #ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
    #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
    #endif
    #ifndef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV
    #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
    #endif
    cl_uint major, minor;
    CL_SAFE_CALL(clGetDeviceInfo(device_list,
 				 CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
                                 sizeof(cl_uint), &major, nullptr));
    CL_SAFE_CALL(clGetDeviceInfo(device_list,
 				 CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
                                 sizeof(cl_uint), &minor, nullptr));
    double arch = static_cast<double>(minor)/10+major;
    if (arch >= 3.0)
      op.has_shuffle_support=true;
  }
  #endif
  _properties.push_back(op);
@ -516,7 +695,7 @@ std::string UCL_Device::device_type_name(const int i) {
 }
 // Get a string telling the type of the device
-int UCL_Device::device_type(const int i) {
+enum UCL_DEVICE_TYPE UCL_Device::device_type(const int i) {
  if (_properties[i].device_type==CL_DEVICE_TYPE_CPU)
    return UCL_CPU;
  else if (_properties[i].device_type==CL_DEVICE_TYPE_GPU)
@ -529,14 +708,8 @@ int UCL_Device::device_type(const int i) {
 // Set the CUDA device to the specified device number
 int UCL_Device::set(int num) {
  cl_device_id *device_list = new cl_device_id[_num_devices];
  cl_uint n;
  CL_SAFE_CALL(clGetDeviceIDs(_cl_platform,CL_DEVICE_TYPE_ALL,_num_devices,
                               device_list,&n));
  _device=num;
-  _cl_device=device_list[_device];
+  _cl_device=_cl_devices[_device];
  delete[] device_list;
  return create_context();
 }
@ -555,6 +728,11 @@ void UCL_Device::print_all(std::ostream &out) {
      out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n";
      out << "  Type of device:                                "
          << device_type_name(i).c_str() << std::endl;
      out << "  Is a subdevice:                                ";
      if (is_subdevice(i))
 	out << "Yes\n";
      else
 	out << "No\n";
      out << "  Double precision support:                      ";
      if (double_precision(i))
        out << "Yes\n";
@ -613,31 +791,91 @@ void UCL_Device::print_all(std::ostream &out) {
        out << "No\n";
      out << "  Maximum subdevices from fission:               "
          << max_sub_devices(i) << std::endl;
      out << "  Shared memory system:                          ";
      if (shared_memory(i))
        out << "Yes\n";
      else
        out << "No\n";
    }
  }
 }
-// Select the platform that is associated with accelerators
+int UCL_Device::auto_set_platform(const enum UCL_DEVICE_TYPE type,
-// if pid < 0, select the first platform
+				  const std::string vendor,
-int UCL_Device::set_platform_accelerator(int pid) {
+				  const int ndevices,
-  if (pid < 0) {
+				  const int first_device) {
-    int found = 0;
+  if (_num_platforms < 2) return set_platform(0);
  int last_device = -1;
  if (first_device > -1) {
    if (ndevices)
      last_device = first_device + ndevices - 1;
    else
      last_device = first_device;
  }
  bool vendor_match=false;
  bool type_match=false;
  int max_cus=0;
  int best_platform=0;
  std::string vendor_upper=vendor;
  for (int i=0; i<vendor.length(); i++)
    if (vendor_upper[i]<='z' && vendor_upper[i]>='a')
      vendor_upper[i]=toupper(vendor_upper[i]);
  for (int n=0; n<_num_platforms; n++) {
    set_platform(n);
-      for (int i=0; i<num_devices(); i++) {
+    if (last_device > -1 && last_device >= num_devices()) continue;
-        if ((_properties[i].device_type & CL_DEVICE_TYPE_CPU) ||
+    if (ndevices > num_devices()) continue;
-            (_properties[i].device_type & CL_DEVICE_TYPE_GPU) ||
+
-            (_properties[i].device_type & CL_DEVICE_TYPE_ACCELERATOR)) {
+    int first_id=0;
-          found = 1;
+    int last_id=num_devices()-1;
-          break;
+    if (last_device > -1) {
      first_id=first_device;
      last_id=last_device;
    }
    if (vendor_upper!="") {
      std::string pname = platform_name();
      for (int i=0; i<pname.length(); i++)
 	if (pname[i]<='z' && pname[i]>='a')
 	  pname[i]=toupper(pname[i]);
      if (pname.find(vendor_upper)!=std::string::npos) {
 	if (vendor_match == false) {
 	  best_platform=n;
 	  max_cus=0;
 	  vendor_match=true;
 	}
      } else if (vendor_match)
 	continue;
    }
    if (type != UCL_DEFAULT) {
      bool ptype_matched=false;
      for (int d=first_id; d<=last_id; d++) {
 	if (type==device_type(d)) {
 	  if (type_match == false) {
 	    best_platform=n;
 	    max_cus=0;
 	    type_match=true;
 	    ptype_matched=true;
 	  }
 	}
      if (found) return UCL_SUCCESS;
      }
-    return UCL_ERROR;
+      if (type_match==true && ptype_matched==false)
-  } else {
+	continue;
    return set_platform(pid);
    }
    for (int d=first_id; d<=last_id; d++) {
      if (cus(d) > max_cus) {
 	best_platform=n;
 	max_cus=cus(d);
      }
    }
  }
  return set_platform(best_platform);
 }
 } // namespace ucl_opencl
--- a/lib/gpu/geryon/ocl_kernel.h
+++ b/lib/gpu/geryon/ocl_kernel.h
@ -2,6 +2,7 @@
                                ocl_kernel.h
                             -------------------
                               W. Michael Brown
                            Nitin Dhamankar (Intel)
  Utilities for dealing with OpenCL kernels
@ -26,6 +27,7 @@
 #include "ocl_device.h"
 #include <fstream>
 #include <cstdio>
 namespace ucl_opencl {
@ -93,7 +95,7 @@ class UCL_Program {
  /// Load a program from a string and compile with flags
  inline int load_string(const void *program, const char *flags="",
-                         std::string *log=nullptr) {
+                         std::string *log=nullptr, FILE* foutput=nullptr) {
    cl_int error_flag;
    const char *prog=(const char *)program;
    _program=clCreateProgramWithSource(_context,1,&prog,nullptr,&error_flag);
@ -107,26 +109,65 @@ class UCL_Program {
                                       sizeof(cl_build_status),&build_status,
                                       nullptr));
-    if (build_status != CL_SUCCESS || log!=nullptr) {
+    #ifdef GERYON_KERNEL_DUMP
    {
      size_t ms;
-      CL_SAFE_CALL(clGetProgramBuildInfo(_program,_device,CL_PROGRAM_BUILD_LOG,0,
+      CL_SAFE_CALL(clGetProgramBuildInfo(_program,_device,CL_PROGRAM_BUILD_LOG,
-                                         nullptr, &ms));
+					 0,NULL,&ms));
      char *build_log = new char[ms];
-      CL_SAFE_CALL(clGetProgramBuildInfo(_program,_device,CL_PROGRAM_BUILD_LOG,ms,
+      CL_SAFE_CALL(clGetProgramBuildInfo(_program,_device,CL_PROGRAM_BUILD_LOG,
-                                         build_log, nullptr));
+					 ms,build_log, NULL));
      std::cout << std::endl << std::endl
 		<< "--------------------------------------------------------\n"
 		<< "   UCL PROGRAM DUMP\n"
 		<< "--------------------------------------------------------\n"
 		<< flags << std::endl
 		<< "--------------------------------------------------------\n"
 		<< prog << std::endl
 		<< "--------------------------------------------------------\n"
 		<< build_log
 		<< "--------------------------------------------------------\n"
 		<< std::endl << std::endl;
    }
    #endif
    if (build_status != CL_SUCCESS || log!=NULL) {
      size_t ms;
      CL_SAFE_CALL(clGetProgramBuildInfo(_program,_device,CL_PROGRAM_BUILD_LOG,
 					 0,NULL,&ms));
      char *build_log = new char[ms];
      CL_SAFE_CALL(clGetProgramBuildInfo(_program,_device,CL_PROGRAM_BUILD_LOG,
 					 ms,build_log, NULL));
      if (log!=nullptr)
        *log=std::string(build_log);
      if (build_status != CL_SUCCESS) {
        #ifndef UCL_NO_EXIT
-        std::cerr << std::endl
+        std::cerr << std::endl << std::endl
          << "----------------------------------------------------------\n"
          << " UCL Error: Error compiling OpenCL Program ("
          << build_status << ") ...\n"
          << "----------------------------------------------------------\n";
        std::cerr << build_log << std::endl;
 	std::cerr <<
 	  "----------------------------------------------------------\n"
 	  << std::endl << std::endl;
        #endif
 	if (foutput != NULL) {
 	  fprintf(foutput,"\n\n");
 	  fprintf(foutput,
 	    "----------------------------------------------------------\n");
 	  fprintf(foutput,
 		  " UCL Error: Error compiling OpenCL Program (%d) ...\n",
 		  build_status);
 	  fprintf(foutput,
 	    "----------------------------------------------------------\n");
 	  fprintf(foutput,"%s\n",build_log);
 	  fprintf(foutput,
 	    "----------------------------------------------------------\n");
 	  fprintf(foutput,"\n\n");
 	}
 	delete[] build_log;
        return UCL_COMPILE_ERROR;
      } else delete[] build_log;
@ -141,6 +182,7 @@ class UCL_Program {
  inline void cq(command_queue &cq_in) { _cq=cq_in; }
  friend class UCL_Kernel;
  friend class UCL_Const;
 private:
  bool _init_done;
  cl_program _program;
@ -322,9 +364,45 @@ class UCL_Kernel {
  inline void cq(command_queue &cq_in) { _cq=cq_in; }
  #include "ucl_arg_kludge.h"
  #if defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)
  inline size_t max_subgroup_size(const size_t block_size_x) {
    size_t block_size = block_size_x;
    CL_SAFE_CALL(clGetKernelSubGroupInfo(_kernel, _device,
                                         CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
                                         sizeof(block_size), (void *) &block_size,
                                         sizeof(size_t), (void *) &_mx_subgroup_sz,
                                         NULL));
    return _mx_subgroup_sz;
  }
  inline size_t max_subgroup_size(const size_t block_size_x,
                                  const size_t block_size_y) {
    size_t block_size[2] { block_size_x, block_size_y };
    CL_SAFE_CALL(clGetKernelSubGroupInfo(_kernel, _device,
                                         CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
                                         sizeof(block_size), (void *) &block_size,
                                         sizeof(size_t), (void *) &_mx_subgroup_sz,
                                         NULL));
    return _mx_subgroup_sz;
  }
  inline size_t max_subgroup_size(const size_t block_size_x,
                                  const size_t block_size_y,
                                  const size_t block_size_z) {
    size_t block_size[3] { block_size_x, block_size_y, block_size_z };
    CL_SAFE_CALL(clGetKernelSubGroupInfo(_kernel, _device,
                                         CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
                                         sizeof(block_size), (void *) &block_size,
                                         sizeof(size_t), (void *) &_mx_subgroup_sz,
                                         NULL));
    return _mx_subgroup_sz;
  }
  #endif
 private:
  cl_kernel _kernel;
  cl_program _program;
  cl_device_id _device;
  cl_uint _dimensions;
  size_t _block_size[3];
  size_t _num_blocks[3];
@ -338,6 +416,11 @@ class UCL_Kernel {
  unsigned _kernel_info_nargs;
  //std::string _kernel_info_args[256];
  #endif
  #ifdef CL_VERSION_2_1
  size_t _mx_subgroup_sz;      // Maximum sub-group size for this kernel
  #endif
 };
 inline int UCL_Kernel::set_function(UCL_Program &program, const char *function) {
@ -347,6 +430,7 @@ inline int UCL_Kernel::set_function(UCL_Program &program, const char *function)
  CL_SAFE_CALL(clRetainCommandQueue(_cq));
  _program=program._program;
  CL_SAFE_CALL(clRetainProgram(_program));
  _device=program._device;
  cl_int error_flag;
  _kernel=clCreateKernel(program._program,function,&error_flag);
@ -380,8 +464,11 @@ inline int UCL_Kernel::set_function(UCL_Program &program, const char *function)
 }
 void UCL_Kernel::run() {
-  CL_SAFE_CALL(clEnqueueNDRangeKernel(_cq,_kernel,_dimensions,nullptr,
+  CL_SAFE_CALL(clEnqueueNDRangeKernel(_cq,_kernel,_dimensions,NULL,
-                                      _num_blocks,_block_size,0,nullptr,nullptr));
+                                      _num_blocks,_block_size,0,NULL,NULL));
  #ifdef GERYON_OCL_FLUSH
  ucl_flush(_cq);
  #endif
 }
 } // namespace
--- a/lib/gpu/geryon/ocl_macros.h
+++ b/lib/gpu/geryon/ocl_macros.h
@ -4,12 +4,8 @@
 #include <cstdio>
 #include <cassert>
-/* We default to OpenCL 1.2 as target version for now as
+#ifndef CL_TARGET_OPENCL_VERSION
- * there are known issues with OpenCL 2.0 and later.
+#define CL_TARGET_OPENCL_VERSION 210
 * This is also to silence warnings from generic OpenCL headers */
 #if !defined(CL_TARGET_OPENCL_VERSION)
 #define CL_TARGET_OPENCL_VERSION 120
 #endif
 #ifdef __APPLE__
--- a/lib/gpu/geryon/ocl_memory.h
+++ b/lib/gpu/geryon/ocl_memory.h
@ -108,7 +108,7 @@ inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
    return UCL_MEMORY_ERROR;
  *mat.host_ptr() = (typename mat_type::data_type*)
    clEnqueueMapBuffer(cm.cq(),mat.cbegin(),CL_TRUE,
-                                         map_perm,0,n,0,nullptr,nullptr,nullptr);
+		       map_perm,0,n,0,NULL,NULL,NULL);
  mat.cq()=cm.cq();
  CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
@ -116,18 +116,15 @@ inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
 }
 template <class mat_type, class copy_type>
-inline int _host_view(mat_type &mat, copy_type &cm, const size_t n) {
+inline int _host_view(mat_type &mat, copy_type &cm, const size_t o,
                      const size_t n) {
  cl_int error_flag;
-  cl_context context;
+  cl_buffer_region subbuffer;
-  CL_SAFE_CALL(clGetMemObjectInfo(cm.cbegin(),CL_MEM_CONTEXT,sizeof(context),
+  subbuffer.origin = o;
-                                  &context,nullptr));
+  subbuffer.size = n;
-  cl_mem_flags orig_flags;
+  mat.cbegin()=clCreateSubBuffer(cm.cbegin(), 0,
-  CL_SAFE_CALL(clGetMemObjectInfo(cm.cbegin(),CL_MEM_FLAGS,sizeof(orig_flags),
+                                 CL_BUFFER_CREATE_TYPE_REGION, &subbuffer,
-                                  &orig_flags,nullptr));
+                                 &error_flag);
  orig_flags=orig_flags & ~CL_MEM_ALLOC_HOST_PTR;
  mat.cbegin()=clCreateBuffer(context, CL_MEM_USE_HOST_PTR | orig_flags, n,
                              *mat.host_ptr(), &error_flag);
  CL_CHECK_ERR(error_flag);
  CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
@ -470,6 +467,9 @@ inline void _device_zero(mat_type &mat, const size_t n, command_queue &cq) {
  size_t kn=n/sizeof(typename mat_type::data_type);
  CL_SAFE_CALL(clEnqueueNDRangeKernel(cq,kzero,1,0,&kn,0,0,0,0));
  #endif
  #ifdef GERYON_OCL_FLUSH
  ucl_flush(cq);
  #endif
 }
 // --------------------------------------------------------------------------
@ -585,7 +585,10 @@ template <> struct _ucl_memcpy<1,0> {
    std::cerr << "UCL_COPY 1NS\n";
    #endif
    CL_SAFE_CALL(clEnqueueReadBuffer(cq,src.cbegin(),block,src_offset,n,
-                                     dst.begin(),0,nullptr,nullptr));
+                                     dst.begin(),0,NULL,NULL));
    #ifdef GERYON_OCL_FLUSH
    if (block==CL_FALSE) ucl_flush(cq);
    #endif
  }
  template <class p1, class p2>
  static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
@ -617,6 +620,9 @@ template <> struct _ucl_memcpy<1,0> {
        src_offset+=spitch;
        dst_offset+=dpitch;
      }
    #ifdef GERYON_OCL_FLUSH
    if (block==CL_FALSE) ucl_flush(cq);
    #endif
  }
 };
@ -637,7 +643,10 @@ template <> struct _ucl_memcpy<0,1> {
    std::cerr << "UCL_COPY 3NS\n";
    #endif
    CL_SAFE_CALL(clEnqueueWriteBuffer(cq,dst.cbegin(),block,dst_offset,n,
-                                      src.begin(),0,nullptr,nullptr));
+                                      src.begin(),0,NULL,NULL));
    #ifdef GERYON_OCL_FLUSH
    if (block==CL_FALSE) ucl_flush(cq);
    #endif
  }
  template <class p1, class p2>
  static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
@ -669,6 +678,9 @@ template <> struct _ucl_memcpy<0,1> {
        src_offset+=spitch;
        dst_offset+=dpitch;
      }
    #ifdef GERYON_OCL_FLUSH
    if (block==CL_FALSE) ucl_flush(cq);
    #endif
  }
 };
@ -690,6 +702,9 @@ template <int mem1, int mem2> struct _ucl_memcpy {
    #endif
    if (block==CL_TRUE) ucl_sync(cq);
    #ifdef GERYON_OCL_FLUSH
    else ucl_flush(cq);
    #endif
  }
  template <class p1, class p2>
  static inline void mc(p1 &dst, const size_t dpitch, const p2 &src,
@ -720,6 +735,9 @@ template <int mem1, int mem2> struct _ucl_memcpy {
    #endif
    if (block==CL_TRUE) ucl_sync(cq);
    #ifdef GERYON_OCL_FLUSH
    else ucl_flush(cq);
    #endif
  }
 };
--- a/lib/gpu/geryon/ocl_texture.h
+++ b/lib/gpu/geryon/ocl_texture.h
@ -53,6 +53,59 @@ class UCL_Texture {
  friend class UCL_Kernel;
 };
 /// Class storing a const global memory reference
 class UCL_Const {
 public:
   UCL_Const() : _global_bytes(0), _active(false) {}
  ~UCL_Const() { clear(); }
  /// Construct with a specified global reference
  inline UCL_Const(UCL_Program &prog, const char *global_name)
    { get_global(prog,global_name); }
  /// Set the global reference for this object
  inline void get_global(UCL_Program &prog, const char *global_name) {
    if (_active) {
      CL_DESTRUCT_CALL(clReleaseContext(_context));
      CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
    }
    _active = true;
    _context = prog._context;
    _cq = prog._cq;
    CL_SAFE_CALL(clRetainContext(_context));
    CL_SAFE_CALL(clRetainCommandQueue(_cq));
  }
  /// Copy from array on host to const memory
  template <class numtyp>
  inline void update_device(UCL_H_Vec<numtyp> &src, const int numel) {
    const int bytes=numel*sizeof(numtyp);
    if (_global_bytes < bytes) {
      if (_global_bytes) CL_SAFE_CALL(clReleaseMemObject(_global));
      cl_int e;
      _global = clCreateBuffer(_context, CL_MEM_READ_ONLY, bytes, NULL, &e);
      CL_SAFE_CALL(e);
    }
    CL_SAFE_CALL(clEnqueueWriteBuffer(_cq, _global, CL_FALSE, 0, bytes,
 				      (void *)src.begin(), 0, NULL, NULL));
  }
  /// Get device ptr associated with object
  inline const cl_mem * begin() const { return &_global; }
  inline void clear() {
    if (_global_bytes) CL_SAFE_CALL(clReleaseMemObject(_global));
    if (_active) {
      CL_DESTRUCT_CALL(clReleaseContext(_context));
      CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
    }
    _global_bytes=0;
    _active=false;
  }
 private:
  cl_mem _global;
  size_t _global_bytes;
  cl_context _context;
  cl_command_queue _cq;
  bool _active;
 };
 } // namespace
 #endif
--- a/lib/gpu/geryon/ocl_timer.h
+++ b/lib/gpu/geryon/ocl_timer.h
@ -61,7 +61,6 @@ class UCL_Timer {
  /// Initialize command queue for timing
  inline void init(UCL_Device &dev, command_queue &cq) {
    clear();
    t_factor=dev.timer_resolution()/1000000000.0;
    _cq=cq;
    clRetainCommandQueue(_cq);
    _initialized=true;
@ -124,17 +123,17 @@ class UCL_Timer {
    clReleaseEvent(start_event);
    clReleaseEvent(stop_event);
    has_measured_time = false;
-    return (tend-tstart)*t_factor;
+    return (tend-tstart)*1e-6;
  }
  /// Return the time (s) of last start to stop - Forces synchronization
-  inline double seconds() { return time()/1000.0; }
+  inline double seconds() { return time()*1e-3; }
  /// Return the total time in ms
  inline double total_time() { return _total_time; }
  /// Return the total time in seconds
-  inline double total_seconds() { return _total_time/1000.0; }
+  inline double total_seconds() { return _total_time*1e-3; }
 private:
  cl_event start_event, stop_event;
--- a/lib/gpu/geryon/ucl_basemat.h
+++ b/lib/gpu/geryon/ucl_basemat.h
@ -69,17 +69,17 @@ class UCL_BaseMat {
  /// Return the type/permissions of memory allocation
  /** Returns UCL_READ_WRITE, UCL_WRITE_ONLY, UCL_READ_ONLY, UCL_NOT_PINNED
    * or UCL_VIEW **/
  /// Assert that any ops in associate command queue have been issued to device
  inline void flush() { ucl_flush(_cq); }
  inline enum UCL_MEMOPT kind() const { return _kind; }
  inline bool shared_mem_device() {
    #ifdef _OCL_MAT
    cl_device_id device;
    CL_SAFE_CALL(clGetCommandQueueInfo(_cq,CL_QUEUE_DEVICE,
-                                       sizeof(cl_device_id),&device,nullptr));
+                                       sizeof(cl_device_id),&device,NULL));
-    cl_device_type device_type;
+    return _shared_mem_device(device);
    CL_SAFE_CALL(clGetDeviceInfo(device,CL_DEVICE_TYPE,
                                 sizeof(device_type),&device_type,nullptr));
    return _shared_mem_device(device_type);
    #else
    return false;
    #endif
--- a/lib/gpu/geryon/ucl_d_vec.h
+++ b/lib/gpu/geryon/ucl_d_vec.h
@ -39,7 +39,7 @@ class UCL_D_Vec : public UCL_BaseMat {
  };
  typedef numtyp data_type;
-  UCL_D_Vec() : _cols(0) {}
+ UCL_D_Vec() : _cols(0), _row_bytes(0) {}
  ~UCL_D_Vec() { _device_free(*this); }
  /// Construct with n columns
--- a/lib/gpu/geryon/ucl_get_devices.cpp
+++ b/lib/gpu/geryon/ucl_get_devices.cpp
@ -44,10 +44,8 @@ using namespace ucl_hip;
 int main(int argc, char** argv) {
  UCL_Device cop;
  std::cout << "Found " << cop.num_platforms() << " platform(s).\n";
-  if (cop.num_platforms()>0) {
+  if (cop.num_platforms()>0)
    std::cout << "Using platform: " << cop.platform_name() << std::endl;
    cop.print_all(std::cout);
  }
  return 0;
 }
--- a/lib/gpu/geryon/ucl_h_mat.h
+++ b/lib/gpu/geryon/ucl_h_mat.h
@ -241,7 +241,7 @@ class UCL_H_Mat : public UCL_BaseMat {
    _array=input.begin()+offset;
    _end=_array+_cols;
    #ifdef _OCL_MAT
-    _host_view(*this,input,_row_bytes*_rows);
+    _host_view(*this,input,offset*sizeof(numtyp),_row_bytes*_rows);
    #endif
  }
--- a/lib/gpu/geryon/ucl_h_vec.h
+++ b/lib/gpu/geryon/ucl_h_vec.h
@ -39,7 +39,7 @@ class UCL_H_Vec : public UCL_BaseMat {
   };
   typedef numtyp data_type;
-  UCL_H_Vec() : _cols(0) {
+ UCL_H_Vec() : _cols(0), _row_bytes(0) {
    #ifdef _OCL_MAT
    _carray=(cl_mem)(0);
    #endif
@ -135,7 +135,7 @@ class UCL_H_Vec : public UCL_BaseMat {
    _cols=cols;
    _row_bytes=_cols*sizeof(numtyp);
    this->_cq=input.cq();
-    _array=input.begin();
+    _array=(numtyp *)input.begin();
    _end=_array+_cols;
    #ifdef _OCL_MAT
    _carray=input.cbegin();
@ -240,10 +240,10 @@ class UCL_H_Vec : public UCL_BaseMat {
    _cols=cols;
    _row_bytes=_cols*sizeof(numtyp);
    this->_cq=input.cq();
-    _array=input.begin()+offset;
+    _array=(numtyp *)input.begin()+offset;
    _end=_array+_cols;
    #ifdef _OCL_MAT
-    _host_view(*this,input,_row_bytes);
+    _host_view(*this,input,offset*sizeof(numtyp),_row_bytes);
    #endif
  }
--- a/lib/gpu/geryon/ucl_vector.h
+++ b/lib/gpu/geryon/ucl_vector.h
@ -162,6 +162,8 @@ class UCL_Vector {
  inline void cq(command_queue &cq_in) { host.cq(cq_in); device.cq(cq_in); }
  /// Block until command_queue associated with matrix is complete
  inline void sync() { host.sync(); }
  /// Assert that any ops in associate command queue have been issued to device
  inline void flush() { ucl_flush(host.cq()); }
  ///Get the size of a row on the host (including any padding) in elements
  inline size_t row_size() const { return host.row_size(); }
--- a/lib/gpu/lal_answer.cpp
+++ b/lib/gpu/lal_answer.cpp
@ -14,6 +14,9 @@
 ***************************************************************************/
 #include "lal_answer.h"
 #if (LAL_USE_OMP == 1)
 #include <omp.h>
 #endif
 namespace LAMMPS_AL {
 #define AnswerT Answer<numtyp,acctyp>
@ -81,6 +84,10 @@ bool AnswerT::init(const int inum, const bool charge, const bool rot,
  _time_cast=0.0;
  _time_cpu_idle=0.0;
  success=success && (error_flag.alloc(1,*dev,UCL_READ_WRITE,
                                        UCL_WRITE_ONLY)==UCL_SUCCESS);
  if (success) error_flag.zero();
  return success && alloc(ef_inum);
 }
@ -111,6 +118,7 @@ bool AnswerT::add_fields(const bool charge, const bool rot) {
 template <class numtyp, class acctyp>
 void AnswerT::clear() {
  _gpu_bytes=0;
  error_flag.clear();
  if (!_allocated)
    return;
  _allocated=false;
@ -138,12 +146,21 @@ double AnswerT::host_memory_usage() const {
 template <class numtyp, class acctyp>
 void AnswerT::copy_answers(const bool eflag, const bool vflag,
-                               const bool ef_atom, const bool vf_atom) {
+                           const bool ef_atom, const bool vf_atom,
                           const int red_blocks) {
  time_answer.start();
  _eflag=eflag;
  _vflag=vflag;
  _ef_atom=ef_atom;
  _vf_atom=vf_atom;
  #ifdef LAL_NO_BLOCK_REDUCE
  _ev_stride=_inum;
  #else
  if (ef_atom || vf_atom)
    _ev_stride=_inum;
  else
    _ev_stride=red_blocks;
  #endif
  int csize=_ev_fields;
  if (!eflag)
@ -152,20 +169,24 @@ void AnswerT::copy_answers(const bool eflag, const bool vflag,
    csize-=6;
  if (csize>0)
-    engv.update_host(_inum*csize,true);
+    engv.update_host(_ev_stride*csize,true);
  if (_rot)
    force.update_host(_inum*4*2,true);
  else
    force.update_host(_inum*4,true);
  time_answer.stop();
  #ifndef GERYON_OCL_FLUSH
  force.flush();
  #endif
 }
 template <class numtyp, class acctyp>
 void AnswerT::copy_answers(const bool eflag, const bool vflag,
                           const bool ef_atom, const bool vf_atom,
-                               int *ilist) {
+                           int *ilist, const int red_blocks) {
  _ilist=ilist;
-  copy_answers(eflag,vflag,ef_atom,vf_atom);
+  copy_answers(eflag,vflag,ef_atom,vf_atom,red_blocks);
 }
 template <class numtyp, class acctyp>
@ -177,21 +198,24 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
  double evdwl=0.0;
  int vstart=0;
  if (_eflag) {
-    for (int i=0; i<_inum; i++)
+    #if (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd reduction(+:evdwl)
    #endif
    for (int i=0; i<_ev_stride; i++)
      evdwl+=engv[i];
    if (_ef_atom) {
      if (_ilist==nullptr) {
-        for (int i=0; i<_inum; i++)
+        for (int i=0; i<_ev_stride; i++)
          eatom[i]+=engv[i];
      } else {
-        for (int i=0; i<_inum; i++)
+        for (int i=0; i<_ev_stride; i++)
          eatom[_ilist[i]]+=engv[i];
      }
    }
-    vstart=_inum;
+    vstart=_ev_stride;
  }
  if (_vflag) {
-    int iend=vstart+_inum;
+    int iend=vstart+_ev_stride;
    for (int j=0; j<6; j++) {
      for (int i=vstart; i<iend; i++)
        virial[j]+=engv[i];
@ -206,8 +230,8 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
            vatom[_ilist[ii++]][j]+=engv[i];
        }
      }
-      vstart+=_inum;
+      vstart+=_ev_stride;
-      iend+=_inum;
+      iend+=_ev_stride;
    }
  }
@ -224,28 +248,36 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
    return energy_virial(eatom,vatom,virial);
  double evdwl=0.0;
-  int ii, vstart=0, iend=_inum;
+  int ii, vstart=0, iend=_ev_stride;
  if (_eflag) {
-    iend=_inum*2;
+    iend=_ev_stride*2;
-    for (int i=0; i<_inum; i++)
+    #if (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd reduction(+:evdwl)
    #endif
    for (int i=0; i<_ev_stride; i++)
      evdwl+=engv[i];
-    for (int i=_inum; i<iend; i++)
+    double ecv=0.0;
-      ecoul+=engv[i];
+    #if (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd reduction(+:ecv)
    #endif
    for (int i=_ev_stride; i<iend; i++)
      ecv+=engv[i];
    ecoul+=ecv;
    if (_ef_atom) {
      if (_ilist==nullptr) {
-        for (int i=0; i<_inum; i++)
+        for (int i=0; i<_ev_stride; i++)
          eatom[i]+=engv[i];
-        for (int i=_inum; i<iend; i++)
+        for (int i=_ev_stride; i<iend; i++)
          eatom[i]+=engv[i];
      } else {
-        for (int i=0, ii=0; i<_inum; i++)
+        for (int i=0, ii=0; i<_ev_stride; i++)
          eatom[_ilist[ii++]]+=engv[i];
-        for (int i=_inum, ii=0; i<iend; i++)
+        for (int i=_ev_stride, ii=0; i<iend; i++)
          eatom[_ilist[ii++]]+=engv[i];
      }
    }
    vstart=iend;
-    iend+=_inum;
+    iend+=_ev_stride;
  }
  if (_vflag) {
    for (int j=0; j<6; j++) {
@ -260,8 +292,8 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
            vatom[_ilist[ii++]][j]+=engv[i];
        }
      }
-      vstart+=_inum;
+      vstart+=_ev_stride;
-      iend+=_inum;
+      iend+=_ev_stride;
    }
  }
@ -270,24 +302,63 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
 template <class numtyp, class acctyp>
 void AnswerT::get_answers(double **f, double **tor) {
  int fl=0;
  if (_ilist==nullptr) {
-    for (int i=0; i<_inum; i++) {
+    typedef struct { double x,y,z; } vec3d;
-      f[i][0]+=force[fl];
+    typedef struct { acctyp x,y,z,w; } vec4d_t;
-      f[i][1]+=force[fl+1];
+    vec3d *fp=reinterpret_cast<vec3d*>(&(f[0][0]));
-      f[i][2]+=force[fl+2];
+    vec4d_t *forcep=reinterpret_cast<vec4d_t*>(&(force[0]));
-      fl+=4;
+
    #if (LAL_USE_OMP == 1)
    #pragma omp parallel
    #endif
    {
      #if (LAL_USE_OMP == 1)
      const int nthreads = omp_get_num_threads();
      const int tid = omp_get_thread_num();
      const int idelta = _inum / nthreads + 1;
      const int ifrom = tid * idelta;
      const int ito = std::min(ifrom + idelta, _inum);
      #else
      const int tid = 0;
      const int ifrom = 0;
      const int ito = _inum;
      #endif
      for (int i=ifrom; i<ito; i++) {
        fp[i].x+=forcep[i].x;
        fp[i].y+=forcep[i].y;
        fp[i].z+=forcep[i].z;
      }
      if (_rot) {
-      for (int i=0; i<_inum; i++) {
+        vec3d *torp=reinterpret_cast<vec3d*>(&(tor[0][0]));
-        tor[i][0]+=force[fl];
+        vec4d_t *torquep=reinterpret_cast<vec4d_t*>(&(force[_inum*4]));
-        tor[i][1]+=force[fl+1];
+        for (int i=ifrom; i<ito; i++) {
-        tor[i][2]+=force[fl+2];
+          torp[i].x+=torquep[i].x;
-        fl+=4;
+          torp[i].y+=torquep[i].y;
          torp[i].z+=torquep[i].z;
        }
      }
    }
  } else {
-    for (int i=0; i<_inum; i++) {
+    #if (LAL_USE_OMP == 1)
    #pragma omp parallel
    #endif
    {
      #if (LAL_USE_OMP == 1)
      const int nthreads = omp_get_num_threads();
      const int tid = omp_get_thread_num();
      const int idelta = _inum / nthreads + 1;
      const int ifrom = tid * idelta;
      const int ito = std::min(ifrom + idelta, _inum);
      int fl=ifrom*4;
      #else
      const int tid = 0;
      const int ifrom = 0;
      const int ito = _inum;
      int fl=0;
      #endif
      for (int i=ifrom; i<ito; i++) {
        int ii=_ilist[i];
        f[ii][0]+=force[fl];
        f[ii][1]+=force[fl+1];
@ -295,7 +366,8 @@ void AnswerT::get_answers(double **f, double **tor) {
        fl+=4;
      }
      if (_rot) {
-      for (int i=0; i<_inum; i++) {
+        fl=_inum*4 + ifrom*4;
        for (int i=ifrom; i<ito; i++) {
          int ii=_ilist[i];
          tor[ii][0]+=force[fl];
          tor[ii][1]+=force[fl+1];
@ -304,6 +376,7 @@ void AnswerT::get_answers(double **f, double **tor) {
        }
      }
    }
  }
 }
 template <class numtyp, class acctyp>
--- a/lib/gpu/lal_answer.h
+++ b/lib/gpu/lal_answer.h
@ -110,12 +110,12 @@ class Answer {
  // -------------------------COPY FROM GPU -------------------------------
  /// Copy answers from device into read buffer asynchronously
-  void copy_answers(const bool eflag, const bool vflag,
+  void copy_answers(const bool eflag, const bool vflag, const bool ef_atom,
-                    const bool ef_atom, const bool vf_atom);
+                    const bool vf_atom, const int red_blocks);
  /// Copy answers from device into read buffer asynchronously
-  void copy_answers(const bool eflag, const bool vflag,
+  void copy_answers(const bool eflag, const bool vflag, const bool ef_atom,
-                    const bool ef_atom, const bool vf_atom, int *ilist);
+                    const bool vf_atom, int *ilist, const int red_blocks);
  /// Copy energy and virial data into LAMMPS memory
  double energy_virial(double *eatom, double **vatom, double *virial);
@ -128,11 +128,13 @@ class Answer {
  void get_answers(double **f, double **tor);
  inline double get_answers(double **f, double **tor, double *eatom,
-                            double **vatom, double *virial, double &ecoul) {
+                            double **vatom, double *virial, double &ecoul,
                            int &error_flag_in) {
    double ta=MPI_Wtime();
    time_answer.sync_stop();
    _time_cpu_idle+=MPI_Wtime()-ta;
    double ts=MPI_Wtime();
    if (error_flag[0]) error_flag_in=error_flag[0];
    double evdw=energy_virial(eatom,vatom,virial,ecoul);
    get_answers(f,tor);
    _time_cast+=MPI_Wtime()-ts;
@ -151,6 +153,8 @@ class Answer {
  UCL_Vector<acctyp,acctyp> force;
  /// Energy and virial per-atom storage
  UCL_Vector<acctyp,acctyp> engv;
  /// Error flag
  UCL_Vector<int,int> error_flag;
  /// Device timers
  UCL_Timer time_answer;
@ -162,7 +166,7 @@ class Answer {
  bool alloc(const int inum);
  bool _allocated, _eflag, _vflag, _ef_atom, _vf_atom, _rot, _charge, _other;
-  int _max_local, _inum, _e_fields, _ev_fields, _ans_fields;
+  int _max_local, _inum, _e_fields, _ev_fields, _ans_fields, _ev_stride;
  int *_ilist;
  double _time_cast, _time_cpu_idle;
--- a/lib/gpu/lal_atom.cpp
+++ b/lib/gpu/lal_atom.cpp
@ -414,9 +414,9 @@ const char *atom=0;
 template <class numtyp, class acctyp>
 void AtomT::compile_kernels(UCL_Device &dev) {
-  std::string flags = "-D"+std::string(OCL_VENDOR);
+  std::string flags = "";
  atom_program=new UCL_Program(dev);
-  atom_program->load_string(atom,flags);
+  atom_program->load_string(atom,flags,nullptr,screen);
  k_cast_x.set_function(*atom_program,"kernel_cast_x");
  _compiled=true;
 }
--- a/Show More
+++ b/Show More