Merge branch 'master' into master

2020-09-01 10:59:03 +02:00
parent d601acd0ca 24f5807623
commit 0541996919
2136 changed files with 37709 additions and 82215 deletions
--- a/cmake/Modules/Documentation.cmake
+++ b/cmake/Modules/Documentation.cmake
@ -15,75 +15,93 @@ if(BUILD_DOC)
    endif()
    set(VIRTUALENV ${Python3_EXECUTABLE} -m virtualenv -p ${Python3_EXECUTABLE})
  endif()
  find_package(Doxygen 1.8.10 REQUIRED)
  file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.rst)
  add_custom_command(
    OUTPUT docenv
    COMMAND ${VIRTUALENV} docenv
  )
  set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
  set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt)
  set(SPHINX_CONFIG_DIR ${LAMMPS_DOC_DIR}/utils/sphinx-config)
  set(SPHINX_CONFIG_FILE_TEMPLATE ${SPHINX_CONFIG_DIR}/conf.py.in)
  set(SPHINX_STATIC_DIR  ${SPHINX_CONFIG_DIR}/_static)
  # configuration and static files are copied to binary dir to avoid collisions with parallel builds
  set(DOC_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/doc)
  set(DOC_BUILD_CONFIG_FILE ${DOC_BUILD_DIR}/conf.py)
  set(DOC_BUILD_STATIC_DIR ${DOC_BUILD_DIR}/_static)
  set(DOXYGEN_BUILD_DIR ${DOC_BUILD_DIR}/doxygen)
  set(DOXYGEN_XML_DIR ${DOXYGEN_BUILD_DIR}/xml)
  # copy entire configuration folder to doc build directory
  # files in _static are automatically copied during sphinx-build, so no need to copy them individually
  file(COPY ${SPHINX_CONFIG_DIR}/ DESTINATION ${DOC_BUILD_DIR})
  # configure paths in conf.py, since relative paths change when file is copied
  configure_file(${SPHINX_CONFIG_FILE_TEMPLATE} ${DOC_BUILD_CONFIG_FILE})
  add_custom_command(
-    OUTPUT requirements.txt
+    OUTPUT ${DOC_BUILD_DIR}/requirements.txt
-    DEPENDS docenv
+    DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE}
-    COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/utils/requirements.txt requirements.txt
+    COMMAND ${CMAKE_COMMAND} -E copy ${DOCENV_REQUIREMENTS_FILE} ${DOC_BUILD_DIR}/requirements.txt
    COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade pip
    COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
-    COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r requirements.txt --upgrade
+    COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
  )
  # download mathjax distribution and unpack to folder "mathjax"
-  if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/mathjax/es5)
+  if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/mathjax/es5)
    file(DOWNLOAD "https://github.com/mathjax/MathJax/archive/3.0.5.tar.gz"
      "${CMAKE_CURRENT_BINARY_DIR}/mathjax.tar.gz"
      EXPECTED_MD5 5d9d3799cce77a1a95eee6be04eb68e7)
    execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf mathjax.tar.gz WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
    file(GLOB MATHJAX_VERSION_DIR ${CMAKE_CURRENT_BINARY_DIR}/MathJax-*)
-    execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${CMAKE_CURRENT_BINARY_DIR}/mathjax)
+    execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${DOC_BUILD_STATIC_DIR}/mathjax)
  endif()
  file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax)
  file(COPY ${CMAKE_CURRENT_BINARY_DIR}/mathjax/es5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax/)
  # for increased browser compatibility
-  if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/html/_static/polyfill.js)
+  if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/polyfill.js)
    file(DOWNLOAD "https://polyfill.io/v3/polyfill.min.js?features=es6"
-      "${CMAKE_CURRENT_BINARY_DIR}/html/_static/polyfill.js")
+      "${DOC_BUILD_STATIC_DIR}/polyfill.js")
  endif()
-  # note, this may run in parallel with other tasks, so we must not use multiple processes here
+  # set up doxygen and add targets to run it
  file(MAKE_DIRECTORY ${DOXYGEN_BUILD_DIR})
  file(COPY ${LAMMPS_DOC_DIR}/doxygen/lammps-logo.png DESTINATION ${DOXYGEN_BUILD_DIR}/lammps-logo.png)
  configure_file(${LAMMPS_DOC_DIR}/doxygen/Doxyfile.in ${DOXYGEN_BUILD_DIR}/Doxyfile)
  get_target_property(LAMMPS_SOURCES lammps SOURCES)
  add_custom_command(
-    OUTPUT html
+    OUTPUT ${DOXYGEN_XML_DIR}/index.xml
-    DEPENDS ${DOC_SOURCES} docenv requirements.txt
+    DEPENDS ${DOC_SOURCES} ${LAMMPS_SOURCES}
-    COMMAND ${DOCENV_BINARY_DIR}/sphinx-build -b html -c ${LAMMPS_DOC_DIR}/utils/sphinx-config -d ${CMAKE_BINARY_DIR}/doctrees ${LAMMPS_DOC_DIR}/src html
+    COMMAND Doxygen::doxygen ${DOXYGEN_BUILD_DIR}/Doxyfile WORKING_DIRECTORY ${DOXYGEN_BUILD_DIR}
-    COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${CMAKE_CURRENT_BINARY_DIR}/html/index.html
+    COMMAND ${CMAKE_COMMAND} -E touch ${DOXYGEN_XML_DIR}/run.stamp
  )
-  # copy selected image files to html output tree
+  if(EXISTS ${DOXYGEN_XML_DIR}/run.stamp)
-  file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/html/JPG)
+    set(SPHINX_EXTRA_OPTS "-E")
-  set(HTML_EXTRA_IMAGES balance_nonuniform.jpg balance_rcb.jpg
+  else()
-    balance_uniform.jpg bow_tutorial_01.png bow_tutorial_02.png
+    set(SPHINX_EXTRA_OPTS "")
-    bow_tutorial_03.png bow_tutorial_04.png bow_tutorial_05.png
+  endif()
    dump1.jpg dump2.jpg examples_mdpd.gif gran_funnel.png gran_mixer.png
    hop1.jpg hop2.jpg saed_ewald_intersect.jpg saed_mesh.jpg
    screenshot_atomeye.jpg screenshot_gl.jpg screenshot_pymol.jpg
    screenshot_vmd.jpg sinusoid.jpg xrd_mesh.jpg)
  set(HTML_IMAGE_TARGETS "")
  foreach(_IMG ${HTML_EXTRA_IMAGES})
    string(PREPEND _IMG JPG/)
    list(APPEND HTML_IMAGE_TARGETS "${CMAKE_CURRENT_BINARY_DIR}/html/${_IMG}")
  add_custom_command(
-      OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/html/${_IMG}
+    OUTPUT html
-      DEPENDS ${LAMMPS_DOC_DIR}/src/${_IMG} ${CMAKE_CURRENT_BINARY_DIR}/html/JPG
+    DEPENDS ${DOC_SOURCES} docenv ${DOC_BUILD_DIR}/requirements.txt ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE}
-      COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/src/${_IMG} ${CMAKE_BINARY_DIR}/html/${_IMG}
+    COMMAND ${DOCENV_BINARY_DIR}/sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html
    COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${DOC_BUILD_DIR}/html/index.html
    COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src/PDF ${DOC_BUILD_DIR}/html/PDF
    COMMAND ${CMAKE_COMMAND} -E remove -f ${DOXYGEN_XML_DIR}/run.stamp
  )
  endforeach()
  add_custom_target(
    doc ALL
-    DEPENDS html ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax/es5 ${HTML_IMAGE_TARGETS}
+    DEPENDS html ${DOC_BUILD_STATIC_DIR}/mathjax/es5
    SOURCES ${LAMMPS_DOC_DIR}/utils/requirements.txt ${DOC_SOURCES}
  )
-  install(DIRECTORY ${CMAKE_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
+  install(DIRECTORY ${DOC_BUILD_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
 endif()
--- a/cmake/Modules/Packages/GPU.cmake
+++ b/cmake/Modules/Packages/GPU.cmake
@ -75,7 +75,7 @@ if(GPU_API STREQUAL "CUDA")
  endif()
  # Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
  if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
-    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35]")
+    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
  endif()
  # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
  if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@ -35,8 +35,8 @@ if(DOWNLOAD_KOKKOS)
  list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
  include(ExternalProject)
  ExternalProject_Add(kokkos_build
-    URL https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz
+    URL https://github.com/kokkos/kokkos/archive/3.2.00.tar.gz
-    URL_MD5 3ccb2100f7fc316891e7dad3bc33fa37
+    URL_MD5 81569170fe232e5e64ab074f7cca5e50
    CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS}
    BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a
  )
@ -50,7 +50,7 @@ if(DOWNLOAD_KOKKOS)
  target_link_libraries(lammps PRIVATE LAMMPS::KOKKOS)
  add_dependencies(LAMMPS::KOKKOS kokkos_build)
 elseif(EXTERNAL_KOKKOS)
-  find_package(Kokkos 3.1.01 REQUIRED CONFIG)
+  find_package(Kokkos 3.2.00 REQUIRED CONFIG)
  target_link_libraries(lammps PRIVATE Kokkos::kokkos)
 else()
  set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
--- a/doc/.gitignore
+++ b/doc/.gitignore
@ -1,6 +1,7 @@
 /old
 /html
 /html-offline
 /epub
 /latex
 /mathjax
 /spelling
@ -10,3 +11,9 @@
 /Developer.pdf
 /doctrees
 /docenv
 /doxygen-warn.log
 /utils/sphinx-config/conf.py
 /doxygen/Doxyfile
 *.el
 /utils/sphinx-config/_static/mathjax
 /utils/sphinx-config/_static/polyfill.js
--- a/doc/Makefile
+++ b/doc/Makefile
@ -4,20 +4,28 @@ SHELL         = /bin/bash
 BUILDDIR       = ${CURDIR}
 RSTDIR         = $(BUILDDIR)/src
 VENV           = $(BUILDDIR)/docenv
 MATHJAX       = $(BUILDDIR)/mathjax
 TXT2RST        = $(VENV)/bin/txt2rst
 ANCHORCHECK    = $(VENV)/bin/rst_anchor_check
 SPHINXCONFIG   = $(BUILDDIR)/utils/sphinx-config
 MATHJAX        = $(SPHINXCONFIG)/_static/mathjax
 POLYFILL       = $(SPHINXCONFIG)/_static/polyfill.js
 PYTHON         = $(shell which python3)
 DOXYGEN        = $(shell which doxygen)
 VIRTUALENV     = virtualenv
 HAS_PYTHON3    = NO
 HAS_VIRTUALENV = NO
 HAS_DOXYGEN    = NO
 HAS_PDFLATEX   = NO
 ifeq ($(shell which python3 >/dev/null 2>&1; echo $$?), 0)
 HAS_PYTHON3    = YES
 endif
 ifeq ($(shell which doxygen >/dev/null 2>&1; echo $$?), 0)
 HAS_DOXYGEN    = YES
 endif
 ifeq ($(shell which virtualenv-3 >/dev/null 2>&1; echo $$?), 0)
 VIRTUALENV     = virtualenv-3
 HAS_VIRTUALENV = YES
@ -33,16 +41,20 @@ HAS_PDFLATEX = YES
 endif
-SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())')
+SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())') $(shell test -f $(BUILDDIR)/doxygen/xml/run.stamp && printf -- "-E")
-.PHONY: help clean-all clean clean-spelling epub mobi rst html pdf spelling anchor_check style_check
+# grab list of sources from doxygen config file.
 # we only want to use explicitly listed files.
 DOXYFILES      = $(shell sed -n -e 's/\#.*$$//' -e '/^ *INPUT \+=/,/^[A-Z_]\+ \+=/p' doxygen/Doxyfile.in | sed -e 's/@LAMMPS_SOURCE_DIR@/..\/src/g' -e 's/\\//g' -e 's/ \+/ /' -e 's/[A-Z_]\+ \+= *\(YES\|NO\|\)//') 
 .PHONY: help clean-all clean clean-spelling epub mobi rst html pdf spelling anchor_check style_check xmlgen
 # ------------------------------------------
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html          create HTML doc pages in html dir"
-	@echo "  pdf           create Developer.pdf and Manual.pdf in this dir"
+	@echo "  pdf           create Manual.pdf in this dir"
 	@echo "  fetch         fetch HTML and PDF files from LAMMPS web site"
 	@echo "  epub          create ePUB format manual for e-book readers"
 	@echo "  mobi          convert ePUB to MOBI format manual for e-book readers (e.g. Kindle)"
@ -57,23 +69,32 @@ help:
 # ------------------------------------------
 clean-all: clean
-	rm -rf $(BUILDDIR)/docenv $(BUILDDIR)/doctrees $(BUILDDIR)/mathjax Manual.pdf Developer.pdf
+	rm -rf $(BUILDDIR)/docenv $(MATHJAX) $(BUILDDIR)/LAMMPS.mobi $(BUILDDIR)/LAMMPS.epub $(BUILDDIR)/Manual.pdf
 clean: clean-spelling
-	rm -rf html epub latex
+	rm -rf $(BUILDDIR)/html $(BUILDDIR)/epub $(BUILDDIR)/latex $(BUILDDIR)/doctrees $(BUILDDIR)/doxygen/xml $(BUILDDIR)/doxygen-warn.log $(BUILDDIR)/doxygen/Doxyfile $(SPHINXCONFIG)/conf.py
 clean-spelling:
-	rm -rf spelling
+	rm -rf $(BUILDDIR)/spelling
-html: $(ANCHORCHECK) $(MATHJAX)
+$(SPHINXCONFIG)/conf.py: $(SPHINXCONFIG)/conf.py.in
 	sed -e 's,@DOXYGEN_XML_DIR@,$(BUILDDIR)/doxygen/xml,g'   \
 	    -e 's,@LAMMPS_SOURCE_DIR@,$(BUILDDIR)/../src,g'    \
 	    -e 's,@LAMMPS_PYTHON_DIR@,$(BUILDDIR)/../python,g' \
 	    -e 's,@LAMMPS_DOC_DIR@,$(BUILDDIR),g' $< > $@
 html: xmlgen $(SPHINXCONFIG)/conf.py $(ANCHORCHECK) $(MATHJAX) $(POLYFILL)
 	@$(MAKE) $(MFLAGS) -C graphviz all
 	@(\
-		. $(VENV)/bin/activate ;\
+		. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
-		sphinx-build $(SPHINXEXTRA) -b html -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) html ;\
+		sphinx-build $(SPHINXEXTRA) -b html -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) html ;\
 		ln -sf Manual.html html/index.html;\
 		rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
 		echo "############################################" ;\
 		rst_anchor_check src/*.rst ;\
-		python utils/check-packages.py -s ../src -d src ;\
+		python $(BUILDDIR)/utils/check-packages.py -s ../src -d src ;\
 		env LC_ALL=C grep -n '[^ -~]' $(RSTDIR)/*.rst ;\
-		python utils/check-styles.py -s ../src -d src ;\
+		python $(BUILDDIR)/utils/check-styles.py -s ../src -d src ;\
 		echo "############################################" ;\
 		deactivate ;\
 	)
@ -82,30 +103,28 @@ html: $(ANCHORCHECK) $(MATHJAX)
 	@rm -rf html/USER
 	@rm -rf html/JPG
 	@cp -r src/PDF html/PDF
 	@mkdir -p html/JPG
 	@cp `grep -A2 '\.\. .*\(image\|figure\)::' src/*.rst | grep ':target: JPG' | sed -e 's,.*:target: JPG/,src/JPG/,' | sort | uniq` html/JPG/
 	@rm -rf html/PDF/.[sg]*
 	@mkdir -p html/_static/mathjax
 	@cp -r $(MATHJAX)/es5 html/_static/mathjax/
 	@echo "Build finished. The HTML pages are in doc/html."
-spelling: $(VENV) utils/sphinx-config/false_positives.txt
+spelling: xmlgen $(VENV) $(SPHINXCONFIG)/false_positives.txt
 	@(\
-		. $(VENV)/bin/activate ;\
+		. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
-		cp utils/sphinx-config/false_positives.txt $(RSTDIR)/ ; env PYTHONWARNINGS= \
+		cp $(SPHINXCONFIG)/false_positives.txt $(RSTDIR)/ ; env PYTHONWARNINGS= \
-		sphinx-build -b spelling -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) spelling ;\
+		sphinx-build -b spelling -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) spelling ;\
 		rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
 		deactivate ;\
 	)
 	@echo "Spell check finished."
-epub: $(VENV)
+epub: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
 	@$(MAKE) $(MFLAGS) -C graphviz all
 	@mkdir -p epub/JPG
 	@rm -f LAMMPS.epub
 	@cp src/JPG/lammps-logo.png epub/
 	@cp src/JPG/*.* epub/JPG
 	@(\
 		. $(VENV)/bin/activate ;\
-		sphinx-build $(SPHINXEXTRA) -b epub -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) epub ;\
+		sphinx-build $(SPHINXEXTRA) -b epub -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) epub ;\
 		rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
 		deactivate ;\
 	)
 	@mv  epub/LAMMPS.epub .
@ -117,18 +136,13 @@ mobi: epub
 	@ebook-convert LAMMPS.epub LAMMPS.mobi
 	@echo "Conversion finished. The MOBI manual file is created."
-pdf: $(ANCHORCHECK)
+pdf: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
 	@$(MAKE) $(MFLAGS) -C graphviz all
 	@if [ "$(HAS_PDFLATEX)" == "NO" ] ; then echo "PDFLaTeX was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
 	@(\
-		cd src/Developer; \
+		. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
-		pdflatex developer; \
+		sphinx-build $(SPHINXEXTRA) -b latex -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) latex ;\
-		pdflatex developer; \
+		rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
 		mv developer.pdf ../../Developer.pdf; \
 		cd ../../; \
 	)
 	@(\
 		. $(VENV)/bin/activate ;\
 		sphinx-build $(SPHINXEXTRA) -b latex -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) latex ;\
 		echo "############################################" ;\
 		rst_anchor_check src/*.rst ;\
 		python utils/check-packages.py -s ../src -d src ;\
@ -154,12 +168,11 @@ pdf: $(ANCHORCHECK)
 	@rm -rf latex/USER
 	@cp -r src/PDF latex/PDF
 	@rm -rf latex/PDF/.[sg]*
-	@echo "Build finished. Manual.pdf and Developer.pdf are in this directory."
+	@echo "Build finished. Manual.pdf is in this directory."
 fetch:
-	@rm -rf html_www Manual_www.pdf Developer_www.pdf
+	@rm -rf html_www Manual_www.pdf
 	@curl -s -o Manual_www.pdf http://lammps.sandia.gov/doc/Manual.pdf
 	@curl -s -o Developer_www.pdf http://lammps.sandia.gov/doc/Developer.pdf
 	@curl -s -o lammps-doc.tar.gz http://lammps.sandia.gov/tars/lammps-doc.tar.gz
 	@tar xzf lammps-doc.tar.gz
 	@rm -f lammps-doc.tar.gz
@ -185,21 +198,32 @@ package_check : $(VENV)
 		deactivate ;\
 	)
 xmlgen : doxygen/xml/index.xml
 doxygen/Doxyfile: doxygen/Doxyfile.in
 	sed -e 's/@LAMMPS_SOURCE_DIR@/..\/..\/src/g' $< > $@
 doxygen/xml/index.xml : $(VENV) doxygen/Doxyfile $(DOXYFILES)
 	@(cd doxygen; $(DOXYGEN) Doxyfile && touch xml/run.stamp)
 # ------------------------------------------
 $(VENV):
-	@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "Python3 was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
+	@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "python3 was not found! Please see README for further instructions" 1>&2; exit 1; fi
-	@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
+	@if [ "$(HAS_DOXYGEN)" == "NO" ] ; then echo "doxygen was not found! Please see README for further instructions" 1>&2; exit 1; fi
 	@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please see README for further instructions" 1>&2; exit 1; fi
 	@( \
 		$(VIRTUALENV) -p $(PYTHON) $(VENV); \
 		. $(VENV)/bin/activate; \
 		pip install --upgrade pip; \
-		pip install --use-feature=2020-resolver -r requirements.txt; \
+		pip install --use-feature=2020-resolver -r $(BUILDDIR)/utils/requirements.txt; \
 		deactivate;\
 	)
 $(MATHJAX):
-	@git clone --depth 1 https://github.com/mathjax/MathJax.git mathjax
+	@git clone --depth 1 https://github.com/mathjax/MathJax.git $@
 $(POLYFILL): $(MATHJAX)
 	@curl -s -o $@ "https://polyfill.io/v3/polyfill.min.js?features=es6"
 $(TXT2RST) $(ANCHORCHECK): $(VENV)
 	@( \
--- a/doc/README
+++ b/doc/README
@ -1,97 +1,60 @@
 LAMMPS Documentation
-Depending on how you obtained LAMMPS, this directory has 2 or 3
+Depending on how you obtained LAMMPS and whether you have built
-sub-directories and optionally 2 PDF files and an ePUB file:
+the manual yourself, this directory has a varying number of
 sub-directories and files. Here is a list with descriptions:
 README            this file
 src               content files for LAMMPS documentation
 html              HTML version of the LAMMPS manual (see html/Manual.html)
 utils             utilities and settings for building the documentation
-Manual.pdf      large PDF version of entire manual
+Manual.pdf        PDF version of entire manual
-Developer.pdf   small PDF with info about how LAMMPS is structured
+Developer.pdf     PDF with info about how LAMMPS is structured
 LAMMPS.epub       Manual in ePUB format
 LAMMPS.mobi       Manual in MOBI (Kindle) format
 lammps.1          man page for the lammps command
 msi2lmp.1         man page for the msi2lmp command
 mathjax           code and fonts for rendering math in html
 doctree           temporary data
 docenv            python virtual environment for generating the manual
 doxygen           Doxygen configuration and output
 .gitignore        list of files and folders to be ignored by git
 doxygen-warn.log  logfile with warnings from running doxygen
-If you downloaded LAMMPS as a tarball from the web site, all these
+and:
 directories and files should be included.
-If you downloaded LAMMPS from the public SVN or Git repositories, then
+github-development-workflow.md   notes on the LAMMPS development workflow
-the HTML and PDF files are not included.  Instead you need to create
+include-file-conventions.md      notes on LAMMPS' include file conventions
-them, in one of three ways:
+documentation_conventions.md     notes on writing documentation for LAMMPS
 If you downloaded a LAMMPS tarball from lammps.sandia.gov, then the html
 folder and the PDF manual should be included. If you downloaded LAMMPS
 from GitHub then you either need to download them or build them.
 (a) You can "fetch" the current HTML and PDF files from the LAMMPS web
 site.  Just type "make fetch".  This should create a html_www dir and
-Manual_www.pdf/Developer_www.pdf files.  Note that if new LAMMPS
+Manual_www.pdf/Developer_www.pdf files.  These files will always
-features have been added more recently than the date of your version,
+represent the latest published patch/development version of LAMMPS.
 the fetched documentation will include those changes (but your source
 code will not, unless you update your local repository).
-(b) You can build the HTML and PDF files yourself, by typing "make
+(b) You can build the HTML and PDF files yourself, by typing "make html"
-html" or by "make pdf", respectively.  This requires various tools
+or by "make pdf", respectively.  This requires various tools and files.
-including the Python documentation processing tool Sphinx, which the
+Some of them have to be installed (more on that below).  For the rest the
-build process will attempt to download and install on your system into
+build process will attempt to download and install into a python virtual
-a python virtual environment, if not already available.  The PDF file
+environment and local folders.
 will require a working LaTeX installation with several add-on packages
 in addition to the Python/Sphinx setup.  See more details below.
 ----------------
-The generation of all documentation is managed by the Makefile in this
+Installing prerequisites for the documentation build
 dir.
-Options:
+To run the HTML documention build toolchain, python 3.x, doxygen, git,
 and virtualenv have to be installed.  Also internet access is initially
 required to download external files and tools.
-make html         # generate HTML in html dir using Sphinx
+Building the PDF format manual requires in addition a compatible LaTeX
-make pdf          # generate 2 PDF files (Manual.pdf,Developer.pdf)
+installation with support for PDFLaTeX and several add-on LaTeX packages
-                  #   in this dir via Sphinx and PDFLaTeX
+installed.  This includes:
 make fetch        # fetch HTML doc pages and 2 PDF files from web site
                  #   as a tarball and unpack into html dir and 2 PDFs
 make epub         # generate LAMMPS.epub in ePUB format using Sphinx
 make clean        # remove intermediate RST files created by HTML build
 make clean-all    # remove entire build folder and any cached data
 ----------------
 Installing prerequisites for HTML build
 To run the HTML documention build toolchain, Python 3 and virtualenv
 have to be installed.  Here are instructions for common setups:
 # Ubuntu
 sudo apt-get install python-virtualenv
 # Fedora (up to version 21)
 # Red Hat Enterprise Linux or CentOS (up to version 7.x)
 sudo yum install python3-virtualenv
 # Fedora (since version 22)
 sudo dnf install python3-virtualenv
 # MacOS X
 ## Python 3
 Download the latest Python 3 MacOS X package from
 https://www.python.org and install it.  This will install both Python
 3 and pip3.
 ## virtualenv
 Once Python 3 is installed, open a Terminal and type
 pip3 install virtualenv
 This will install virtualenv from the Python Package Index.
 ----------------
 Installing prerequisites for PDF build
 Same as for HTML plus a compatible LaTeX installation with
 support for PDFLaTeX. Also the following LaTeX packages need
 to be installed (e.g. from texlive):
 - amsmath
 - anysize
 - babel
 - capt-of
 - cmap
@ -105,24 +68,13 @@ to be installed (e.g. from texlive):
 - tabulary
 - upquote
 - wrapfig
 Building the EPUB format requires LaTeX installation with the same packages
 as for the PDF format plus the 'dvipng' command to convert the embedded math
 into images. The MOBI format is generated from the EPUB format file by using
 the tool 'ebook-convert' from the 'calibre' e-book management software
 (https://calibre-ebook.com).
 ----------------
-Installing prerequisites for epub build
+More details this can be found in the manual itself. The online
-
+version is at: https://lammps.sandia.gov/doc/Manual_build.html
 ## ePUB
 Same as for HTML. This uses the same tools and configuration
 files as the HTML tree. The ePUB format conversion currently
 does not support processing mathematical expressions via MathJAX,
 so there will be limitations on some pages. For the time being
 until this is resolved, building and using the PDF format file
 is recommended instead.
 For converting the generated ePUB file to a mobi format file
 (for e-book readers like Kindle, that cannot read ePUB), you
 also need to have the 'ebook-convert' tool from the "calibre"
 software installed. http://calibre-ebook.com/
 You first create the ePUB file with 'make epub' and then do:
 ebook-convert LAMMPS.epub LAMMPS.mobi
--- a/doc/documentation_conventions.md
+++ b/doc/documentation_conventions.md
@ -0,0 +1,93 @@
 # Outline of LAMMPS documentation file conventions
 This purpose of this document is to provide a point of reference
 for LAMMPS developers and contributors as to what conventions
 should be used to structure and format files in the LAMMPS manual.
 Last change: 2020-04-23
 ## File format and tools
 In fall 2019, the LAMMPS documentation file format has changed from
 a home grown minimal markup designed to generate HTML format files
 from a mostly plain text format to using the reStructuredText file
 format.  For a transition period all files in the old .txt format
 were transparently converted to .rst and then processed.  The txt2rst
 tool is still included in the distribution to obtain an initial .rst
 file for integration into the manual.  Since the transition to
 reStructured text as source format, many of the artifacts or the
 translation have been removed though and parts of the documentation
 refactored and expanded to take advantage of the capabilities
 reStructuredText and associated tools.  The conversion from the
 source to the final formats (HTML, PDF, and optionally e-book
 reader formats ePUB and MOBI) is mostly automated and controlled
 by a Makefile in the `doc` folder. This makefile assumes that the
 processing is done on a Unix-like machine and Python 3.5 or later
 and a matching virtualenv module are available.  Additional Python
 packages (like the Sphinx tool and several extensions) are
 transparently installed into a virtual environment over the
 internet using the `pip` package manager.  Further requirements
 and details are discussed in the manual.
 ## Work in progress
 The refactoring and improving of the documentation is an ongoing
 process, so statements in this document may not always be fully
 up-to-date.  If in doubt, contact the LAMMPS developers.
 ## General structure
 The layout and formatting of added files should follow the example
 of the existing files.  Since those are directly derived from their
 former .txt format versions and the manual has been maintained in
 that format for many years, there is a large degree of consistency
 already, so comparision with similar files should give you a good
 idea what kind of information and sections are needed.
 ## Formatting conventions
 Filenames, folders, paths, (shell) commands, definitions, makefile
 settings and similar should be formatted as "literals" with
 double backward quotes bracketing the item: \`\`path/to/some/file\`\`
 Keywords and options are formatted in italics:  \*option\*
 Mathematical expressions, equations, symbols are typeset using
 either a `.. math:`` block or the `:math:` role.
 Groups of shell commands or LAMMPS input script or C/C++ source
 code should be typeset into a `.. code-block::` section. A syntax
 highlighting extension for LAMMPS input scripts is provided, so
 `LAMMPS` can be used to indicate the language in the code block
 in addition to `bash`, `c`, or `python`.  When no syntax style
 is indicated, no syntax highlighting is performed.
 As an alternative, e.g. to typeset the syntax of file formats
 a `.. parsed-literal::` block can be used, which allows some
 formatting directives, which means that related characters need
 to be escaped with a preceding backslash: `\*`.
 Special remarks can be highlighted with a `.. note::` block and
 strong warnings can be put into a `.. warning::` block.
 ## Required steps when adding a custom style to LAMMPS
 When adding a new style (e.g. pair style or a compute or a fix)
 or a new command, it is **required** to include the corresponding
 documentation.  Those are often new files that need to be added.
 In order to be included in the documentation, those new files
 need to be reference in a `.. toctree::` block.  Most of those
 use patterns with wildcards, so the addition will be automatic.
 However, those additions also need to be added to some lists of
 styles or commands.  The `make style\_check` command will perform
 a test and report any missing entries and list the affected files.
 Any references defined with `.. \_refname:` have to be unique
 across all documentation files and this can be checked for with
 `make anchor\_check`.  Finally, a spell-check should be done,
 which is triggered via `make spelling`.  Any offenses need to
 be corrected and false positives should be added to the file
 `utils/sphinx-config/false\_positives.txt`.
 ## Required additional steps when adding a new package to LAMMPS
 TODO
--- a/doc/doxygen/.gitignore
+++ b/doc/doxygen/.gitignore
@ -0,0 +1 @@
 /xml
--- a/doc/doxygen/Doxyfile.in
+++ b/doc/doxygen/Doxyfile.in
@ -0,0 +1,528 @@
 # Doxyfile 1.8.15 -*- makefile -*-
 DOXYFILE_ENCODING      = UTF-8
 PROJECT_NAME           = "LAMMPS Programmer's Guide"
 PROJECT_NUMBER         = "24 August 2020"
 PROJECT_BRIEF          = "Documentation of the LAMMPS library interface and Python wrapper"
 PROJECT_LOGO           = lammps-logo.png
 CREATE_SUBDIRS         = NO
 ALLOW_UNICODE_NAMES    = NO
 OUTPUT_LANGUAGE        = English
 OUTPUT_TEXT_DIRECTION  = LTR
 BRIEF_MEMBER_DESC      = YES
 REPEAT_BRIEF           = YES
 ALWAYS_DETAILED_SEC    = NO
 INLINE_INHERITED_MEMB  = NO
 FULL_PATH_NAMES        = NO
 INHERIT_DOCS           = YES
 TAB_SIZE               = 2
 # When enabled doxygen tries to link words that correspond to documented
 # classes, or namespaces to their corresponding documentation. Such a link can
 # be prevented in individual cases by putting a % sign in front of the word or
 # globally by setting AUTOLINK_SUPPORT to NO.
 # The default value is: YES.
 AUTOLINK_SUPPORT       = YES
 # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
 # to include (a tag file for) the STL sources as input, then you should set this
 # tag to YES in order to let doxygen match functions declarations and
 # definitions whose arguments contain STL classes (e.g. func(std::string);
 # versus func(std::string) {}). This also make the inheritance and collaboration
 # diagrams that involve STL classes more complete and accurate.
 # The default value is: NO.
 BUILTIN_STL_SUPPORT    = YES
 IDL_PROPERTY_SUPPORT   = NO
 # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
 # cache is used to resolve symbols given their name and scope. Since this can be
 # an expensive process and often the same symbol appears multiple times in the
 # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
 # doxygen will become slower. If the cache is too large, memory is wasted. The
 # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
 # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
 # symbols. At the end of a run doxygen will report the cache usage and suggest
 # the optimal cache size from a speed point of view.
 # Minimum value: 0, maximum value: 9, default value: 0.
 LOOKUP_CACHE_SIZE      = 2
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
 # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
 # documentation are documented, even if no documentation was available. Private
 # class members and static file members will be hidden unless the
 # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
 # Note: This will also disable the warnings about undocumented members that are
 # normally produced when WARNINGS is set to YES.
 # The default value is: NO.
 EXTRACT_ALL            = NO
 # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
 # be included in the documentation.
 # The default value is: NO.
 EXTRACT_PRIVATE        = YES
 # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
 # scope will be included in the documentation.
 # The default value is: NO.
 EXTRACT_PACKAGE        = YES
 # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
 # included in the documentation.
 # The default value is: NO.
 EXTRACT_STATIC         = YES
 # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
 # locally in source files will be included in the documentation. If set to NO,
 # only classes defined in header files are included. Does not have any effect
 # for Java sources.
 # The default value is: YES.
 EXTRACT_LOCAL_CLASSES  = YES
 # If this flag is set to YES, the members of anonymous namespaces will be
 # extracted and appear in the documentation as a namespace called
 # 'anonymous_namespace{file}', where file will be replaced with the base name of
 # the file that contains the anonymous namespace. By default anonymous namespace
 # are hidden.
 # The default value is: NO.
 EXTRACT_ANON_NSPACES   = YES
 # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
 # undocumented members inside documented classes or files. If set to NO these
 # members will be included in the various overviews, but no documentation
 # section is generated. This option has no effect if EXTRACT_ALL is enabled.
 # The default value is: NO.
 HIDE_UNDOC_MEMBERS     = YES
 # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
 # undocumented classes that are normally visible in the class hierarchy. If set
 # to NO, these classes will be included in the various overviews. This option
 # has no effect if EXTRACT_ALL is enabled.
 # The default value is: NO.
 HIDE_UNDOC_CLASSES     = YES
 # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
 # (class|struct|union) declarations. If set to NO, these declarations will be
 # included in the documentation.
 # The default value is: NO.
 HIDE_FRIEND_COMPOUNDS  = NO
 # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
 # documentation blocks found inside the body of a function. If set to NO, these
 # blocks will be appended to the function's detailed documentation block.
 # The default value is: NO.
 HIDE_IN_BODY_DOCS      = NO
 # The INTERNAL_DOCS tag determines if documentation that is typed after a
 # \internal command is included. If the tag is set to NO then the documentation
 # will be excluded. Set it to YES to include the internal documentation.
 # The default value is: NO.
 INTERNAL_DOCS          = NO
 # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
 # names in lower-case letters. If set to YES, upper-case letters are also
 # allowed. This is useful if you have classes or files whose names only differ
 # in case and if your file system supports case sensitive file names. Windows
 # and Mac users are advised to set this option to NO.
 # The default value is: system dependent.
 CASE_SENSE_NAMES       = YES
 # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
 # their full class and namespace scopes in the documentation. If set to YES, the
 # scope will be hidden.
 # The default value is: NO.
 HIDE_SCOPE_NAMES       = YES
 # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
 # append additional text to a page's title, such as Class Reference. If set to
 # YES the compound reference will be hidden.
 # The default value is: NO.
 HIDE_COMPOUND_REFERENCE= NO
 # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
 # the files that are included by a file in the documentation of that file.
 # The default value is: YES.
 SHOW_INCLUDE_FILES     = NO
 # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
 # grouped member an include statement to the documentation, telling the reader
 # which file to include in order to use the member.
 # The default value is: NO.
 SHOW_GROUPED_MEMB_INC  = NO
 # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
 # files with double quotes in the documentation rather than with sharp brackets.
 # The default value is: NO.
 FORCE_LOCAL_INCLUDES   = NO
 # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
 # documentation for inline members.
 # The default value is: YES.
 INLINE_INFO            = YES
 # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
 # (detailed) documentation of file and class members alphabetically by member
 # name. If set to NO, the members will appear in declaration order.
 # The default value is: YES.
 SORT_MEMBER_DOCS       = NO
 # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
 # descriptions of file, namespace and class members alphabetically by member
 # name. If set to NO, the members will appear in declaration order. Note that
 # this will also influence the order of the classes in the class list.
 # The default value is: NO.
 SORT_BRIEF_DOCS        = NO
 # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
 # (brief and detailed) documentation of class members so that constructors and
 # destructors are listed first. If set to NO the constructors will appear in the
 # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
 # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
 # member documentation.
 # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
 # detailed member documentation.
 # The default value is: NO.
 SORT_MEMBERS_CTORS_1ST = NO
 # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
 # of group names into alphabetical order. If set to NO the group names will
 # appear in their defined order.
 # The default value is: NO.
 SORT_GROUP_NAMES       = NO
 # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
 # fully-qualified names, including namespaces. If set to NO, the class list will
 # be sorted only by class name, not including the namespace part.
 # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
 # Note: This option applies only to the class list, not to the alphabetical
 # list.
 # The default value is: NO.
 SORT_BY_SCOPE_NAME     = NO
 # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
 # type resolution of all parameters of a function it will reject a match between
 # the prototype and the implementation of a member function even if there is
 # only one candidate or it is obvious which candidate to choose by doing a
 # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
 # accept a match between prototype and implementation in such cases.
 # The default value is: NO.
 STRICT_PROTO_MATCHING  = NO
 # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
 # list. This list is created by putting \todo commands in the documentation.
 # The default value is: YES.
 GENERATE_TODOLIST      = YES
 # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
 # list. This list is created by putting \test commands in the documentation.
 # The default value is: YES.
 GENERATE_TESTLIST      = YES
 # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
 # list. This list is created by putting \bug commands in the documentation.
 # The default value is: YES.
 GENERATE_BUGLIST       = YES
 # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
 # the deprecated list. This list is created by putting \deprecated commands in
 # the documentation.
 # The default value is: YES.
 GENERATE_DEPRECATEDLIST= YES
 # The ENABLED_SECTIONS tag can be used to enable conditional documentation
 # sections, marked by \if <section_label> ... \endif and \cond <section_label>
 # ... \endcond blocks.
 ENABLED_SECTIONS       =
 # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
 # initial value of a variable or macro / define can have for it to appear in the
 # documentation. If the initializer consists of more lines than specified here
 # it will be hidden. Use a value of 0 to hide initializers completely. The
 # appearance of the value of individual variables and macros / defines can be
 # controlled using \showinitializer or \hideinitializer command in the
 # documentation regardless of this setting.
 # Minimum value: 0, maximum value: 10000, default value: 30.
 MAX_INITIALIZER_LINES  = 30
 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
 # the bottom of the documentation of classes and structs. If set to YES, the
 # list will mention the files that were used to generate the documentation.
 # The default value is: YES.
 SHOW_USED_FILES        = YES
 # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
 # will remove the Files entry from the Quick Index and from the Folder Tree View
 # (if specified).
 # The default value is: YES.
 SHOW_FILES             = NO
 # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
 # page. This will remove the Namespaces entry from the Quick Index and from the
 # Folder Tree View (if specified).
 # The default value is: YES.
 SHOW_NAMESPACES        = YES
 # The FILE_VERSION_FILTER tag can be used to specify a program or script that
 # doxygen should invoke to get the current version for each file (typically from
 # the version control system). Doxygen will invoke the program by executing (via
 # popen()) the command command input-file, where command is the value of the
 # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
 # by doxygen. Whatever the program writes to standard output is used as the file
 # version. For an example see the documentation.
 FILE_VERSION_FILTER    =
 # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
 # by doxygen. The layout file controls the global structure of the generated
 # output files in an output format independent way. To create the layout file
 # that represents doxygen's defaults, run doxygen with the -l option. You can
 # optionally specify a file name after the option, if omitted DoxygenLayout.xml
 # will be used as the name of the layout file.
 #
 # Note that if you run doxygen from a directory containing a file called
 # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
 # tag is left empty.
 LAYOUT_FILE            =
 # The CITE_BIB_FILES tag can be used to specify one or more bib files containing
 # the reference definitions. This must be a list of .bib files. The .bib
 # extension is automatically appended if omitted. This requires the bibtex tool
 # to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
 # For LaTeX the style of the bibliography can be controlled using
 # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
 # search path. See also \cite for info how to create references.
 CITE_BIB_FILES         =
 #---------------------------------------------------------------------------
 # Configuration options related to warning and progress messages
 #---------------------------------------------------------------------------
 # The QUIET tag can be used to turn on/off the messages that are generated to
 # standard output by doxygen. If QUIET is set to YES this implies that the
 # messages are off.
 # The default value is: NO.
 QUIET                  = NO
 # The WARNINGS tag can be used to turn on/off the warning messages that are
 # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
 # this implies that the warnings are on.
 #
 # Tip: Turn warnings on while writing the documentation.
 # The default value is: YES.
 WARNINGS               = YES
 # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
 # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
 # will automatically be disabled.
 # The default value is: YES.
 WARN_IF_UNDOCUMENTED   = YES
 # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
 # potential errors in the documentation, such as not documenting some parameters
 # in a documented function, or documenting parameters that don't exist or using
 # markup commands wrongly.
 # The default value is: YES.
 WARN_IF_DOC_ERROR      = YES
 # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
 # are documented, but have no documentation for their parameters or return
 # value. If set to NO, doxygen will only warn about wrong or incomplete
 # parameter documentation, but not about the absence of documentation. If
 # EXTRACT_ALL is set to YES then this flag will automatically be disabled.
 # The default value is: NO.
 WARN_NO_PARAMDOC       = YES
 # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
 # a warning is encountered.
 # The default value is: NO.
 WARN_AS_ERROR          = NO
 # The WARN_FORMAT tag determines the format of the warning messages that doxygen
 # can produce. The string should contain the $file, $line, and $text tags, which
 # will be replaced by the file and line number from which the warning originated
 # and the warning text. Optionally the format may contain $version, which will
 # be replaced by the version of the file (if it could be obtained via
 # FILE_VERSION_FILTER)
 # The default value is: $file:$line: $text.
 WARN_FORMAT            = "$file:$line: $text"
 # The WARN_LOGFILE tag can be used to specify a file to which warning and error
 # messages should be written. If left blank the output is written to standard
 # error (stderr).
 WARN_LOGFILE           = "../doxygen-warn.log"
 #---------------------------------------------------------------------------
 # Configuration options related to the input files
 #---------------------------------------------------------------------------
 # The INPUT tag is used to specify the files and/or directories that contain
 # documented source files. You may enter file names like myfile.cpp or
 # directories like /usr/src/myproject. Separate the files or directories with
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 INPUT                  = @LAMMPS_SOURCE_DIR@/utils.cpp      \
                         @LAMMPS_SOURCE_DIR@/utils.h        \
                         @LAMMPS_SOURCE_DIR@/library.cpp    \
                         @LAMMPS_SOURCE_DIR@/library.h      \
                         @LAMMPS_SOURCE_DIR@/lammps.cpp     \
                         @LAMMPS_SOURCE_DIR@/lammps.h       \
                         @LAMMPS_SOURCE_DIR@/lmptype.h      \
                         @LAMMPS_SOURCE_DIR@/pointers.h     \
                         @LAMMPS_SOURCE_DIR@/atom.cpp       \
                         @LAMMPS_SOURCE_DIR@/atom.h         \
                         @LAMMPS_SOURCE_DIR@/input.cpp      \
                         @LAMMPS_SOURCE_DIR@/input.h        \
                         @LAMMPS_SOURCE_DIR@/tokenizer.cpp  \
                         @LAMMPS_SOURCE_DIR@/tokenizer.h    \
                         @LAMMPS_SOURCE_DIR@/text_file_reader.cpp  \
                         @LAMMPS_SOURCE_DIR@/text_file_reader.h    \
                         @LAMMPS_SOURCE_DIR@/potential_file_reader.cpp  \
                         @LAMMPS_SOURCE_DIR@/potential_file_reader.h    \
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
 # from the input.
 # The default value is: NO.
 EXCLUDE_SYMLINKS       = YES
 #---------------------------------------------------------------------------
 # Configuration options related to output
 #---------------------------------------------------------------------------
 GENERATE_HTML          = NO
 GENERATE_LATEX         = NO
 GENERATE_XML           = YES
 XML_OUTPUT             = xml
 XML_PROGRAMLISTING     = YES
 XML_NS_MEMB_FILE_SCOPE = NO
 #---------------------------------------------------------------------------
 # Configuration options related to the preprocessor
 #---------------------------------------------------------------------------
 # If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
 # C-preprocessor directives found in the sources and include files.
 # The default value is: YES.
 #ENABLE_PREPROCESSING   = YES
 ENABLE_PREPROCESSING   = NO
 # If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
 # in the source code. If set to NO, only conditional compilation will be
 # performed. Macro expansion can be done in a controlled way by setting
 # EXPAND_ONLY_PREDEF to YES.
 # The default value is: NO.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 MACRO_EXPANSION        = NO
 # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
 # the macro expansion is limited to the macros specified with the PREDEFINED and
 # EXPAND_AS_DEFINED tags.
 # The default value is: NO.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 EXPAND_ONLY_PREDEF     = NO
 # If the SEARCH_INCLUDES tag is set to YES, the include files in the
 # INCLUDE_PATH will be searched if a #include is found.
 # The default value is: YES.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 SEARCH_INCLUDES        = YES
 # The INCLUDE_PATH tag can be used to specify one or more directories that
 # contain include files that are not input files but should be processed by the
 # preprocessor.
 # This tag requires that the tag SEARCH_INCLUDES is set to YES.
 INCLUDE_PATH           =
 # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
 # patterns (like *.h and *.hpp) to filter out the header-files in the
 # directories. If left blank, the patterns specified with FILE_PATTERNS will be
 # used.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 INCLUDE_FILE_PATTERNS  =
 # The PREDEFINED tag can be used to specify one or more macro names that are
 # defined before the preprocessor is started (similar to the -D option of e.g.
 # gcc). The argument of the tag is a list of macros of the form: name or
 # name=definition (no spaces). If the definition and the "=" are omitted, "=1"
 # is assumed. To prevent a macro definition from being undefined via #undef or
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 PREDEFINED             =
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
 # macro definition that is found in the sources will be used. Use the PREDEFINED
 # tag if you want to use a different macro definition that overrules the
 # definition found in the source code.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 EXPAND_AS_DEFINED      =
 # If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
 # remove all references to function-like macros that are alone on a line, have
 # an all uppercase name, and do not end with a semicolon. Such function macros
 # are typically used for boiler-plate code, and will confuse the parser if not
 # removed.
 # The default value is: YES.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 SKIP_FUNCTION_MACROS   = YES
--- a/doc/doxygen/lammps-logo.png
+++ b/doc/doxygen/lammps-logo.png
--- a/doc/graphviz/.gitignore
+++ b/doc/graphviz/.gitignore
@ -0,0 +1,3 @@
 /*.png
 /*.svg
 /*.pdf
--- a/doc/graphviz/Makefile
+++ b/doc/graphviz/Makefile
@ -0,0 +1,30 @@
 # Makefile for generating images with graphviz
 #
 SHELL      = /bin/bash
 BUILDDIR   = ${CURDIR}/..
 IMGDIR     = $(BUILDDIR)/src/JPG
 IMGSRC     = $(wildcard *.dot)
 IMGPNG     = $(IMGSRC:%.dot=$(IMGDIR)/%.png)
 HAS_DOT        = NO
 ifeq ($(shell which dot >/dev/null 2>&1; echo $$?), 0)
 HAS_DOT        = YES
 endif
 all:    $(IMGPNG)
 clean:
 	rm -f $(IMGSVG) $(IMGPDF) $(IMGPNG) *~
 ifeq ($(HAS_DOT),YES)
 $(IMGDIR)/%.png: %.dot
 	dot -Tpng -o $@ $<
 endif
 ifeq ($(HAS_DOT),NO)
 $(IMGDIR)/%.png: %.dot
 	@echo '###################################################'
 	@echo '# Need to install "graphviz" to regenerate graphs #'
 	@echo '###################################################'
 endif
--- a/doc/graphviz/lammps-classes.dot
+++ b/doc/graphviz/lammps-classes.dot
@ -0,0 +1,90 @@
 // LAMMPS Class topology
 digraph lammps {
    rankdir="LR"
    La [shape=circle label="LAMMPS"]
    At [shape=box label="Atom" color=blue]
    Ci [shape=box label="CiteMe"]
    Co [shape=box label="Comm" color=blue]
    Do [shape=box label="Domain" color=blue]
    Er [shape=box label="Error" color=blue]
    Fo [shape=box label="Force" color=blue]
    Gr [shape=box label="Group" color=blue]
    In [shape=box label="Input" color=blue]
    Ko [shape=box label="KokkosLMP"]
    Ak [shape=box label="AtomKK" color=blue]
    Mk [shape=box label="MemoryKK" color=blue]
    Me [shape=box label="Memory" color=blue]
    Mo [shape=box label="Modify" color=blue]
    Ne [shape=box label="Neighbor" color=blue]
    Ou [shape=box label="Output" color=blue]
    Py [shape=box label="Python" color=blue]
    Up [shape=box label="Update" color=blue]
    Un [shape=box label="Universe" color=blue]
    Ti [shape=box label="Timer" color=blue]
    Rg [label="Region" color=red]
    Rb [shape=box label="RegionBlock"]
    Rs [shape=box label="RegionSphere"]
    Av [label="AtomVec" color=red]
    It [label="Integrate" color=red]
    Mi [label="Min" color=red]
    Pa [label="Pair" color=red]
    Bo [label="Bond" color=red]
    An [label="Angle" color=red]
    Di [label="Dihedral" color=red]
    Im [label="Improper" color=red]
    Ks [label="Kspace" color=red]
    Du [label="Dump" color=red]
    Fi [label="Fix" color=red]
    Cp [label="Compute" color=red]
    Th [label="Thermo"]
    Va [label="Variable"]
    Ew [shape=box label="Ewald"]
    Pp [shape=box label="PPPM"]
    Ff [label="FFT3d"]
    Re [label="Remap"]
    Gc [label="GridComm"]
    Cb [shape=box label="CommBrick"]
    Ct [shape=box label="CommTiled"]
    Aa [shape=box label="AtomVecAtomic"]
    Am [shape=box label="AtomVecMolecular"]
    Lj [shape=box label="PairLJCut"]
    Lo [shape=box label="PairLJCutOMP"]
    Lg [shape=box label="PairLJCutGPU"]
    Te [shape=box label="PairTersoff"]
    Bh [shape=box label="BondHarmonic"]
    Bf [shape=box label="BondFENE"]
    Fa [shape=box label="FixAveTime"]
    Fn [shape=box label="FixNVE"]
    Fh [shape=box label="FixNH"]
    Fp [shape=box label="FixNPT"]
    Ft [shape=box label="FixNVT"]
    Da [shape=box label="DumpAtom"]
    Dc [shape=box label="DumpCustom"]
    Dg [shape=box label="DumpCFG"]
    Ve [shape=box label="Verlet"]
    Rr [shape=box label="Respa"]
    Po [shape=box label="PPPMOmp"]
    La -> {At Ci Co Do Er Fo Gr In Ko Ak Mk Me Mo Ne Ou Py Ti Up Un} [penwidth=2]
    Do -> {Rg} [penwidth=2]
    Co -> {Cb Ct} [style=dashed penwidth=2]
    Rg -> {Rb Rs} [style=dashed penwidth=2]
    In -> Va [penwidth=2]
    Mo -> {Fi Cp} [penwidth=2]
    Fo -> {Pa Bo An Di Im Ks} [penwidth=2]
    Ks -> {Ew Pp} [style=dashed penwidth=2]
    Pp -> {Ff Re Gc} [penwidth=2]
    Pp -> {Po} [style=dashed penwidth=2]
    Up -> {It Mi} [penwidth=2]
    It -> {Ve Rr} [style=dashed penwidth=2]
    Ou -> {Du Th} [penwidth=2]
    Du -> {Da Dc} [style=dashed penwidth=2]
    Dc -> {Dg} [style=dashed penwidth=2]
    At -> Av [penwidth=2]
    Av -> {Aa Am} [style=dashed penwidth=2]
    Pa -> {Lj Te} [style=dashed penwidth=2]
    Lj -> {Lo Lg} [style=dashed penwidth=2]
    Bo -> {Bh Bf} [style=dashed penwidth=2]
    Fi -> {Fa Fn Fh} [style=dashed penwidth=2]
    Fh -> {Fp Ft} [style=dashed penwidth=2]
 }
--- a/doc/include-file-conventions.md
+++ b/doc/include-file-conventions.md
@ -3,7 +3,7 @@
 This purpose of this document is to provide a point of reference
 for LAMMPS developers and contributors as to what include files
 and definitions to put where into LAMMPS source.
-Last change 2019-07-05
+Last change 2020-08-31
 ## Table of Contents
@ -99,10 +99,13 @@ Include files should be included in this order:
 #### pointers.h
-The `pointer.h` header file also includes `cstdio` and `lmptype.h`
+The `pointer.h` header file also includes `cstdio`, `cstddef`,
-(and through it `stdint.h`, `intttypes.h`, cstdlib, and `climits`).
+`string`, `lmptype.h`, and `utils.h` (and through those indirectly
 `stdint.h`, `intttypes.h`, cstdlib, and `climits`).
 This means any header including `pointers.h` can assume that `FILE`,
-`NULL`, `INT_MAX` are defined.
+`NULL`, `INT_MAX` are defined, they may freely use std::string
 and functions from the utils namespace without including the
 corresponding header files.
 ## Tools
--- a/doc/lammps.1
+++ b/doc/lammps.1
@ -1,4 +1,4 @@
-.TH LAMMPS "21 August 2020" "2020-08-21"
+.TH LAMMPS "24 August 2020" "2020-08-24"
 .SH NAME
 .B LAMMPS
 \- Molecular Dynamics Simulator.
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@ -1,4 +0,0 @@
 Sphinx
 sphinxcontrib-spelling
 breathe
 Pygments
--- a/doc/src/Build_basics.rst
+++ b/doc/src/Build_basics.rst
@ -471,7 +471,7 @@ LAMMPS source distribution.
 .. code-block:: bash
  make html          # create HTML doc pages in html directory
-  make pdf           # create Developer.pdf and Manual.pdf in this directory
+  make pdf           # create Manual.pdf in this directory
  make fetch         # fetch HTML and PDF files from LAMMPS web site
  make clean         # remove all intermediate files
  make clean-all     # reset the entire doc build environment
--- a/doc/src/Build_development.rst
+++ b/doc/src/Build_development.rst
@ -378,22 +378,22 @@ The images below illustrate how the data is presented.
 .. list-table::
      * - .. figure:: JPG/coverage-overview-top.png
-             :target: JPG/coverage-overview-top.png
+             :scale: 25%
          Top of the overview page
        - .. figure:: JPG/coverage-overview-manybody.png
-             :target: JPG/coverage-overview-manybody.png
+             :scale: 25%
          Styles with good coverage
        - .. figure:: JPG/coverage-file-top.png
-             :target: JPG/coverage-file-top.png
+             :scale: 25%
          Top of individual source page
        - .. figure:: JPG/coverage-file-branches.png
-             :target: JPG/coverage-file-branches.png
+             :scale: 25%
          Source page with branches
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@ -361,9 +361,12 @@ be specified in uppercase.
   *  - AMDAVX
      - HOST
      - AMD 64-bit x86 CPU (AVX 1)
-   *  - EPYC
+   *  - ZEN
      - HOST
-      - AMD EPYC Zen class CPU (AVX 2)
+      - AMD Zen class CPU (AVX 2)
   *  - ZEN2
      - HOST
      - AMD Zen2 class CPU (AVX 2)
   *  - ARMV80
      - HOST
      - ARMv8.0 Compatible CPU
@ -445,12 +448,18 @@ be specified in uppercase.
   *  - TURING75
      - GPU
      - NVIDIA Turing generation CC 7.5 GPU
   *  - AMPERE80
      - GPU
      - NVIDIA Ampere generation CC 8.0 GPU
   *  - VEGA900
      - GPU
      - AMD GPU MI25 GFX900
   *  - VEGA906
      - GPU
      - AMD GPU MI50/MI60 GFX906
   *  - INTEL_GEN
      - GPU
      - Intel GPUs Gen9+
 Basic CMake build settings:
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/doc/src/Developer/.gitignore
+++ b/doc/src/Developer/.gitignore
@ -1,3 +0,0 @@
 /developer.aux
 /developer.log
 /developer.toc
--- a/doc/src/Developer/classes.fig
+++ b/doc/src/Developer/classes.fig
@ -1,198 +0,0 @@
 #FIG 3.2  Produced by xfig version 3.2.5a
 Portrait
 Center
 Inches
 Letter  
 100.00
 Single
 -2
 1200 2
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2232 1170 3540 1170 3540 1505 2232 1505 2232 1170
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2220 1830 3015 1830 3015 2219 2220 2219 2220 1830
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2226 3285 3300 3285 3300 3665 2226 3665 2226 3285
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2223 5190 3225 5190 3225 5525 2223 5525 2223 5190
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2232 7125 3090 7125 3090 7478 2232 7478 2232 7125
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2226 10230 3300 10230 3300 10565 2226 10565 2226 10230
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4026 10305 4980 10305 4980 10592 4026 10592 4026 10305
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4029 9900 5205 9900 5205 10250 4029 10250 4029 9900
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4038 9315 5370 9315 5370 9659 4038 9659 4038 9315
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4023 8955 4530 8955 4530 9278 4023 9278 4023 8955
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4029 8475 5190 8475 5190 8762 4029 8762 4029 8475
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4008 8115 5430 8115 5430 8408 4008 8408 4008 8115
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4026 7425 4995 7425 4995 7712 4026 7712 4026 7425
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4035 6720 4650 6720 4650 7025 4035 7025 4035 6720
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4044 7080 4830 7080 4830 7358 4044 7358 4044 7080
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4032 6105 5205 6105 5205 6419 4032 6419 4032 6105
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4026 5715 5115 5715 5115 6062 4026 6062 4026 5715
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4023 3585 4605 3585 4605 3872 4023 3872 4023 3585
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 3954 1680 5175 1680 5175 1997 3954 1997 3954 1680
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 1620 5235 2100 615
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 1605 5445 2070 10695
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3120 1935 3855 1800
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3150 2115 3765 2250
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3135 7230 3945 6840
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3150 7335 3945 8610
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 5265 8610 6195 8400
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 5280 8655 6180 8820
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3345 10290 3930 10020
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3360 10395 3930 10425
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3360 10455 3930 10755
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2193 360 3435 360 3435 647 2193 647 2193 360
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3398 3472 3923 3307
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3413 3601 3923 3721
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3285 2806 3870 2802
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3315 5372 3900 5368
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 6354 2280 7470 2280 7470 2585 6354 2585 6354 2280
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 6348 1875 7320 1875 7320 2222 6348 2222 6348 1875
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 3954 2070 5505 2070 5505 2372 3954 2372 3954 2070
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 5634 2137 6230 2045
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 5670 2310 6265 2418
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 3900 2640 5400 2640 5400 2975 3900 2975 3900 2640
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4038 3165 5385 3165 5385 3497 4038 3497 4038 3165
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4245 4110 5730 4110 5730 4499 4245 4499 4245 4110
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4233 4545 6390 4545 6390 4862 4233 4862 4233 4545
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4026 5190 5385 5190 5385 5525 4026 5525 4026 5190
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4038 7755 5310 7755 5310 8075 4038 8075 4038 7755
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 6270 8250 7365 8250 7365 8610 6270 8610 6270 8250
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 6273 8655 7380 8655 7380 8978 6273 8978 6273 8655
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 4041 10620 5985 10620 5985 10943 4041 10943 4041 10620
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2217 10830 3135 10830 3135 11156 2217 11156 2217 10830
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2229 9780 3240 9780 3240 10118 2229 10118 2229 9780
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2214 9015 3285 9015 3285 9362 2214 9362 2214 9015
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2208 5850 3420 5850 3420 6209 2208 6209 2208 5850
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2217 4275 3615 4275 3615 4634 2217 4634 2217 4275
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2235 2655 3150 2655 3150 3000 2235 3000 2235 2655
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 60 5115 1500 5115 1500 5610 60 5610 60 5115
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3486 6018 4011 5853
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3486 6129 3996 6249
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3361 9291 3991 9531
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3345 9129 4005 9099
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3691 4412 4216 4277
 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
 	1 1 2.00 120.00 240.00
 	 3695 4561 4175 4711
 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
 	 2220 735 3129 735 3129 1043 2220 1043 2220 735
 4 0 1 50 -1 18 18 0.0000 4 225 1275 2265 1455 Universe\001
 4 0 1 50 -1 18 18 0.0000 4 285 735 2265 2175 Input\001
 4 0 1 50 -1 18 18 0.0000 4 225 780 2265 2925 Atom\001
 4 0 1 50 -1 18 18 0.0000 4 285 1020 2265 3600 Update\001
 4 0 1 50 -1 18 18 0.0000 4 285 1320 2265 4575 Neighbor\001
 4 0 1 50 -1 18 18 0.0000 4 225 945 2265 5475 Comm\001
 4 0 1 50 -1 18 18 0.0000 4 225 1110 2265 6150 Domain\001
 4 0 1 50 -1 18 18 0.0000 4 225 810 2265 7425 Force\001
 4 0 1 50 -1 18 18 0.0000 4 285 975 2265 9300 Modify\001
 4 0 1 50 -1 18 18 0.0000 4 285 900 2265 10050 Group\001
 4 0 1 50 -1 18 18 0.0000 4 285 990 2265 10500 Output\001
 4 0 1 50 -1 18 18 0.0000 4 225 825 2265 11100 Timer\001
 4 0 0 50 -1 18 18 0.0000 4 225 1170 3990 1950 Variable\001
 4 0 4 50 -1 18 18 0.0000 4 225 1470 3990 2325 Command\001
 4 0 4 50 -1 18 18 0.0000 4 285 1275 4065 3450 Integrate\001
 4 0 4 50 -1 18 18 0.0000 4 225 525 4065 3825 Min\001
 4 0 0 50 -1 18 18 0.0000 4 285 1230 4065 5475 Irregular\001
 4 0 4 50 -1 18 18 0.0000 4 285 1020 4065 6000 Region\001
 4 0 0 50 -1 18 18 0.0000 4 225 975 4065 6375 Lattice\001
 4 0 4 50 -1 18 18 0.0000 4 225 435 4065 9225 Fix\001
 4 0 4 50 -1 18 18 0.0000 4 285 1305 4065 9600 Compute\001
 4 0 4 50 -1 18 18 0.0000 4 225 570 4065 6975 Pair\001
 4 0 4 50 -1 18 18 0.0000 4 285 840 4065 7665 Angle\001
 4 0 4 50 -1 18 18 0.0000 4 225 1215 4065 8010 Dihedral\001
 4 0 4 50 -1 18 18 0.0000 4 285 1305 4065 8355 Improper\001
 4 0 4 50 -1 18 18 0.0000 4 285 1095 4065 8700 KSpace\001
 4 0 4 50 -1 18 18 0.0000 4 285 855 4065 10545 Dump\001
 4 0 0 50 -1 18 18 0.0000 4 225 1815 4065 10890 WriteRestart\001
 4 0 0 50 -1 18 18 0.0000 4 225 930 6315 8550 FFT3D\001
 4 0 0 50 -1 18 18 0.0000 4 285 1005 6315 8925 Remap\001
 4 0 0 50 -1 18 18 0.0000 4 225 885 6390 2175 Finish\001
 4 0 0 50 -1 18 18 0.0000 4 285 1050 6390 2550 Special\001
 4 0 4 50 -1 18 18 0.0000 4 225 1305 3990 2925 AtomVec\001
 4 0 4 50 -1 18 18 0.0000 4 225 765 4065 7320 Bond\001
 4 0 0 50 -1 18 18 0.0000 4 225 1095 4065 10200 Thermo\001
 4 0 0 50 -1 18 18 0.0000 4 285 1380 4305 4425 NeighList\001
 4 0 0 50 -1 18 18 0.0000 4 285 2025 4305 4800 NeighRequest\001
 4 0 1 50 -1 18 18 0.0000 4 285 1155 2250 600 Memory\001
 4 0 0 50 -1 18 18 0.0000 4 225 1305 120 5475 LAMMPS\001
 4 0 1 50 -1 18 18 0.0000 4 225 735 2265 1005 Error\001
--- a/doc/src/Developer/classes.pdf
+++ b/doc/src/Developer/classes.pdf
--- a/doc/src/Developer/developer.tex
+++ b/doc/src/Developer/developer.tex
@ -1,699 +0,0 @@
 \documentclass{article}
 \usepackage{graphicx}
 \begin{document}
 \centerline{\Large \bf LAMMPS Developer Guide}
 \centerline{\bf 23 Aug 2011}
 \vspace{0.5in}
 This document is a developer guide to the LAMMPS molecular dynamics
 package, whose WWW site is at lammps.sandia.gov.  It describes the
 internal structure and algorithms of the code.  Sections will be added
 as we have time, and in response to requests from developers and
 users.
 \tableofcontents
 \pagebreak
 \section{LAMMPS source files}
 LAMMPS source files are in two directories of the distribution
 tarball.  The src directory has the majority of them, all of which are
 C++ files (*.cpp and *.h).  Many of these files are in the src
 directory itself.  There are also dozens of ``packages'', which can be
 included or excluded when LAMMPS is built.  See the
 doc/Section\_build.html section of the manual for more information
 about packages, or type ``make'' from within the src directory, which
 lists package-related commands, such as ``make package-status''.  The
 source files for each package are in an all-uppercase sub-directory of
 src, like src/MOLECULE or src/USER-CUDA.  If the package is currently
 installed, copies of the package source files will also exist in the
 src directory itself.  The src/STUBS sub-directory is not a package
 but contains a dummy version of the MPI library, used when building a
 serial version of the code.
 The lib directory also contains source code for external libraries,
 used by a few of the packages.  Each sub-directory, like meam or gpu,
 contains the source files, some of which are in different languages
 such as Fortran.  The files are compiled into libraries from within
 each sub-directory, e.g. performing a ``make'' in the lib/meam directory
 creates a libmeam.a file.  These libraries are linked to during a
 LAMMPS build, if the corresponding package is installed.
 LAMMPS C++ source files almost always come in pairs, such as run.cpp
 and run.h.  The pair of files defines a C++ class, the Run class in
 this case, which contains the code invoked by the ``run'' command in a
 LAMMPS input script.  As this example illustrates, source file and
 class names often have a one-to-one correspondence with a command used
 in a LAMMPS input script.  Some source files and classes do not have a
 corresponding input script command, e.g. ``force.cpp'' and the Force
 class.  They are discussed in the next section.
 \pagebreak
 \section{Class hierarchy of LAMMPS}
 Though LAMMPS has a lot of source files and classes, its class
 hierarchy is quite simple, as outlined in Fig \ref{fig:classes}.  Each
 boxed name refers to a class and has a pair of associated source files
 in lammps/src, e.g. ``memory.cpp'' and ``memory.h''.  More details on the
 class and its methods and data structures can be found by examining
 its *.h file.
 LAMMPS (lammps.cpp/h) is the top-level class for the entire code.  It
 holds an ``instance'' of LAMMPS and can be instantiated one or more
 times by a calling code.  For example, the file src/main.cpp simply
 instantiates one instance of LAMMPS and passes it the input script.
 The file src/library.cpp contains a C-style library interface to the
 LAMMPS class.  See the lammps/couple and lammps/python directories for
 examples of simple programs that use LAMMPS through its library
 interface.  A driver program can instantiate the LAMMPS class multiple
 times, e.g. to embed several atomistic simulation regions within a
 mesoscale or continuum simulation domain.
 There are a dozen or so top-level classes within the LAMMPS class that
 are visible everywhere in the code.  They are shaded blue in Fig
 \ref{fig:classes}.  Thus any class can refer to the y-coordinate of
 local atom $I$ as atom$\rightarrow$x[i][1].  This visibility is
 enabled by a bit of cleverness in the Pointers class (see
 src/pointers.h) which every class inherits from.
 There are a handful of virtual parent classes in LAMMPS that define
 what LAMMPS calls ``styles''.  They are shaded red in Fig
 \ref{fig:classes}.  Each of these are parents of a number of child
 classes that implement the interface defined by the parent class.  For
 example, the fix style has around 100 child classes.  They are the
 possible fixes that can be specified by the fix command in an input
 script, e.g. fix nve, fix shake, fix ave/time, etc.  The corresponding
 classes are Fix (for the parent class), FixNVE, FixShake, FixAveTime,
 etc.  The source files for these classes are easy to identify in the
 src directory, since they begin with the word ``fix'', e,g,
 fix\_nve.cpp, fix\_shake,cpp, fix\_ave\_time.cpp, etc.
 The one exception is child class files for the ``command'' style.  These
 implement specific commands in the input script that can be invoked
 before/after/between runs or which launch a simulation.  Examples are
 the create\_box, minimize, run, and velocity commands which encode the
 CreateBox, Minimize, Run, and Velocity classes.  The corresponding
 files are create\_box,cpp, minimize.cpp, run.cpp, and velocity.cpp.
 The list of command style files can be found by typing ``grep
 COMMAND\_CLASS *.h'' from within the src directory, since that word in
 the header file identifies the class as an input script command.
 Similar words can be grepped to list files for the other LAMMPS
 styles.  E.g. ATOM\_CLASS, PAIR\_CLASS, BOND\_CLASS, REGION\_CLASS,
 FIX\_CLASS, COMPUTE\_CLASS, DUMP\_CLASS, etc.
 \begin{figure}[htb]
 \begin{center}
 \includegraphics[height=4in]{classes.pdf}
 \end{center}
 \caption{Class hierarchy within LAMMPS source code.}
 \label{fig:classes}
 \end{figure}
 More details on individual classes in Fig \ref{fig:classes} are as
 follows:
 \begin{itemize}
 \item The Memory class handles allocation of all large vectors and
  arrays.
 \item The Error class prints all error and warning messages.
 \item The Universe class sets up partitions of processors so that
  multiple simulations can be run, each on a subset of the processors
  allocated for a run, e.g. by the mpirun command.
 \item The Input class reads an input script, stores variables, and
  invokes stand-alone commands that are child classes of the Command
  class.
 \item As discussed above, the Command class is a parent class for
  certain input script commands that perform a one-time operation
  before/after/between simulations or which invoke a simulation.  They
  are instantiated from within the Input class, invoked, then
  immediately destructed.
 \item The Finish class is instantiated to print statistics to the
  screen after a simulation is performed, by commands like run and
  minimize.
 \item The Special class walks the bond topology of a molecular system
  to find first, second, third neighbors of each atom.  It is invoked by
  several commands, like read\_data, read\_restart, and replicate.
 \item The Atom class stores all per-atom arrays.  More precisely, they
  are allocated and stored by the AtomVec class, and the Atom class
  simply stores a pointer to them.  The AtomVec class is a parent
  class for atom styles, defined by the atom\_style command.
 \item The Update class holds an integrator and a minimizer.  The
  Integrate class is a parent style for the Verlet and rRESPA time
  integrators, as defined by the run\_style input command.  The Min
  class is a parent style for various energy minimizers.
 \item The Neighbor class builds and stores neighbor lists.  The
  NeighList class stores a single list (for all atoms).  The
  NeighRequest class is called by pair, fix, or compute styles when
  they need a particular kind of neighbor list.
 \item The Comm class performs interprocessor communication, typically
  of ghost atom information.  This usually involves MPI message
  exchanges with 6 neighboring processors in the 3d logical grid of
  processors mapped to the simulation box.  Sometimes the Irregular
  class is used, when atoms may migrate to arbitrary processors.
 \item The Domain class stores the simulation box geometry, as well as
  geometric Regions and any user definition of a Lattice.  The latter
  are defined by region and lattice commands in an input script.
 \item The Force class computes various forces between atoms.  The Pair
  parent class is for non-bonded or pair-wise forces, which in LAMMPS
  lingo includes many-body forces such as the Tersoff 3-body
  potential.  The Bond, Angle, Dihedral, Improper parent classes are
  styles for bonded interactions within a static molecular topology.
  The KSpace parent class is for computing long-range Coulombic
  interactions.  One of its child classes, PPPM, uses the FFT3D and
  Remap classes to communicate grid-based information with neighboring
  processors.
 \item The Modify class stores lists of Fix and Compute classes, both
  of which are parent styles.
 \item The Group class manipulates groups that atoms are assigned to
  via the group command.  It also computes various attributes of
  groups of atoms.
 \item The Output class is used to generate 3 kinds of output from a
  LAMMPS simulation: thermodynamic information printed to the screen
  and log file, dump file snapshots, and restart files.  These
  correspond to the Thermo, Dump, and WriteRestart classes
  respectively.  The Dump class is a parent style.
 \item The Timer class logs MPI timing information, output at the end
  of a run.
 \end{itemize}
 %%\pagebreak
 %%\section{Spatial decomposition and parallel operations}
 %%distributed memory
 %%Ref to JCP paper
 %%diagram of 3d grid of procs and spatial decomp
 %%6-way comm
 %%ghost atoms, PBC added when comm (in atom class)
 %%\pagebreak
 %%\section{Fixes, computes, variables}
 %%fixes intercolate in timestep, store per-atom info
 %%computes based on current snapshot
 %%equal- and atom-style variables
 %%output they produce - see write-up in HowTo
 \pagebreak
 \section{How a timestep works}
 The first and most fundamental operation within LAMMPS to understand
 is how a timestep is structured.  Timestepping is performed by the
 Integrate class within the Update class.  Since Integrate is a parent
 class, corresponding to the run\_style input script command, it has
 child classes.  In this section, the timestep implemented by the
 Verlet child class is described.  A similar timestep is implemented by
 the Respa child class, for the rRESPA hierarchical timestepping
 method.  The Min parent class performs energy minimization, so does
 not perform a literal timestep.  But it has logic similar to what is
 described here, to compute forces and invoke fixes at each iteration
 of a minimization.  Differences between time integration and
 minimization are highlighted at the end of this section.
 The Verlet class is encoded in the src/verlet.cpp and verlet.h files.
 It implements the velocity-Verlet timestepping algorithm.  The
 workhorse method is Verlet::run(), but first we highlight several
 other methods in the class.
 \begin{itemize}
 \item The init() method is called at the beginning of each dynamics
  run.  It simply sets some internal flags, based on user settings in
  other parts of the code.
 \item The setup() or setup\_minimal() methods are also called before
  each run.  The velocity-Verlet method requires current forces be
  calculated before the first timestep, so these routines compute
  forces due to all atomic interactions, using the same logic that
  appears in the timestepping described next.  A few fixes are also
  invoked, using the mechanism described in the next section.  Various
  counters are also initialized before the run begins.  The
  setup\_minimal() method is a variant that has a flag for performing
  less setup.  This is used when runs are continued and information
  from the previous run is still valid.  For example, if repeated
  short LAMMPS runs are being invoked, interleaved by other commands,
  via the ``pre no'' and ``every'' options of the run command, the
  setup\_minimal() method is used.
 \item The force\_clear() method initializes force and other arrays to
  zero before each timestep, so that forces (torques, etc) can be
  accumulated.
 \end{itemize}
 Now for the Verlet::run() method.  Its structure in hi-level pseudo
 code is shown in Fig \ref{fig:verlet}.  In the actual code in
 src/verlet.cpp some of these operations are conditionally invoked.
 \begin{figure}[htb]
 \begin{center}
 \begin{verbatim}
 loop over N timesteps:
  ev_set()
  fix->initial_integrate()
  fix->post_integrate()
  nflag = neighbor->decide()
  if nflag:
    fix->pre_exchange()
    domain->pbc()
    domain->reset_box()
    comm->setup()
    neighbor->setup_bins()
    comm->exchange()
    comm->borders()
    fix->pre_neighbor()
    neighbor->build()
  else
    comm->forward_comm()
  force_clear()
  fix->pre_force()
  pair->compute()
  bond->compute()
  angle->compute()
  dihedral->compute()
  improper->compute()
  kspace->compute()
  comm->reverse_comm()
  fix->post_force()
  fix->final_integrate()
  fix->end_of_step()
  if any output on this step: output->write()
  \end{verbatim}
 \end{center}
 \caption{Pseudo-code for the Verlet::run() method.}
 \label{fig:verlet}
 \end{figure}
 The ev\_set() method (in the parent Integrate class), sets two flags
 ({\em eflag} and {\em vflag}) for energy and virial computation.  Each
 flag encodes whether global and/or per-atom energy and virial should
 be calculated on this timestep, because some fix or variable or output
 will need it.  These flags are passed to the various methods that
 compute particle interactions, so that they can skip the extra
 calculations if the energy and virial are not needed.  See the
 comments with the Integrate::ev\_set() method which document the flag
 values.
 At various points of the timestep, fixes are invoked,
 e.g. fix$\rightarrow$initial\_integrate().  In the code, this is
 actually done via the Modify class which stores all the Fix objects
 and lists of which should be invoked at what point in the timestep.
 Fixes are the LAMMPS mechanism for tailoring the operations of a
 timestep for a particular simulation.  As described elsewhere
 (unwritten section), each fix has one or more methods, each of which
 is invoked at a specific stage of the timestep, as in Fig
 \ref{fig:verlet}.  All the fixes defined in an input script with an
 initial\_integrate() method are invoked at the beginning of each
 timestep.  Fix nve, nvt, npt are examples, since they perform the
 start-of-timestep velocity-Verlet integration to update velocities by
 a half-step, and coordinates by a full step.  The post\_integrate()
 method is next.  Only a few fixes use this, e.g. to reflect particles
 off box boundaries in the FixWallReflect class.
 The decide() method in the Neighbor class determines whether neighbor
 lists need to be rebuilt on the current timestep.  If not, coordinates
 of ghost atoms are acquired by each processor via the forward\_comm()
 method of the Comm class.  If neighbor lists need to be built, several
 operations within the inner if clause of Fig \ref{fig:verlet} are
 first invoked.  The pre\_exchange() method of any defined fixes is
 invoked first.  Typically this inserts or deletes particles from the
 system.
 Periodic boundary conditions are then applied by the Domain class via
 its pbc() method to remap particles that have moved outside the
 simulation box back into the box.  Note that this is not done every
 timestep. but only when neighbor lists are rebuilt.  This is so that
 each processor's sub-domain will have consistent (nearby) atom
 coordinates for its owned and ghost atoms.  It is also why dumped atom
 coordinates can be slightly outside the simulation box.
 The box boundaries are then reset (if needed) via the reset\_box()
 method of the Domain class, e.g. if box boundaries are shrink-wrapped
 to current particle coordinates.  A change in the box size or shape
 requires internal information for communicating ghost atoms (Comm
 class) and neighbor list bins (Neighbor class) be updated.  The
 setup() method of the Comm class and setup\_bins() method of the
 Neighbor class perform the update.
 The code is now ready to migrate atoms that have left a processor's
 geometric sub-domain to new processors.  The exchange() method of the
 Comm class performs this operation.  The borders() method of the Comm
 class then identifies ghost atoms surrounding each processor's
 sub-domain and communicates ghost atom information to neighboring
 processors.  It does this by looping over all the atoms owned by a
 processor to make lists of those to send to each neighbor processor.
 On subsequent timesteps, the lists are used by the
 Comm::forward\_comm() method.
 Fixes with a pre\_neighbor() method are then called.  These typically
 re-build some data structure stored by the fix that depends on the
 current atoms owned by each processor.
 Now that each processor has a current list of its owned and ghost
 atoms, LAMMPS is ready to rebuild neighbor lists via the build()
 method of the Neighbor class.  This is typically done by binning all
 owned and ghost atoms, and scanning a stencil of bins around each
 owned atom's bin to make a Verlet list of neighboring atoms within the
 force cutoff plus neighbor skin distance.
 In the next portion of the timestep, all interaction forces between
 particles are computed, after zeroing the per-atom force vector via
 the force\_clear() method.  If the newton flag is set to ``on'' by the
 newton command, forces on both owned and ghost atoms are calculated.
 Pairwise forces are calculated first, which enables the global virial
 (if requested) to be calculated cheaply (at the end of the
 Pair::compute() method), by a dot product of atom coordinates and
 forces.  By including owned and ghost atoms in the dot product, the
 effect of periodic boundary conditions is correctly accounted for.
 Molecular topology interactions (bonds, angles, dihedrals, impropers)
 are calculated next.  The final contribution is from long-range
 Coulombic interactions, invoked by the KSpace class.
 If the newton flag is on, forces on ghost atoms are communicated and
 summed back to their corresponding owned atoms.  The reverse\_comm()
 method of the Comm class performs this operation, which is essentially
 the inverse operation of sending copies of owned atom coordinates to
 other processor's ghost atoms.
 At this point in the timestep, the total force on each atom is known.
 Additional force constraints (external forces, SHAKE, etc) are applied
 by Fixes that have a post\_force() method.  The second half of the
 velocity-Verlet integration is then performed (another half-step
 update of the velocities) via fixes like nve, nvt, npt.
 At the end of the timestep, fixes that define an end\_of\_step()
 method are invoked.  These typically perform a diagnostic calculation,
 e.g. the ave/time and ave/spatial fixes.  The final operation of the
 timestep is to perform any requested output, via the write() method of
 the Output class.  There are 3 kinds of LAMMPS output: thermodynamic
 output to the screen and log file, snapshots of atom data to a dump
 file, and restart files.  See the thermo\_style, dump, and restart
 commands for more details.
 The iteration performed by an energy minimization is similar to the
 dynamics timestep of Fig \ref{fig:verlet}.  Forces are computed,
 neighbor lists are built as needed, atoms migrate to new processors,
 and atom coordinates and forces are communicated to neighboring
 processors.  The only difference is what Fix class operations are
 invoked when.  Only a subset of LAMMPS fixes are useful during energy
 minimization, as explained in their individual doc pages.  The
 relevant Fix class methods are min\_pre\_exchange(),
 min\_pre\_force(), and min\_post\_force().  Each is invoked at the
 appropriate place within the minimization iteration.  For example, the
 min\_post\_force() method is analogous to the post\_force() method for
 dynamics; it is used to alter or constrain forces on each atom, which
 affects the minimization procedure.
 \pagebreak
 \section{Extending LAMMPS}
 The Section\_modify.html file in the doc directory of
 the LAMMPS distribution gives an overview of how LAMMPS can
 be extended by writing new classes that derive from existing
 parent classes in LAMMPS.  Here, some specific coding
 details are provided for writing a new fix.
 \subsection{New fixes}
 (this section provided by Kirill Lykov)
 \vspace{0.25cm}
 Writing fixes is a flexible way of extending LAMMPS.  Users can
 implement many things using fixes:
 \begin{itemize}
 \item changing particles attributes (positions, velocities, forces, etc.).
 Example: FixFreeze.
 \item reading/writing data. Example: FixRestart.
 \item implementing boundary conditions. Example: FixWall.
 \item saving information about particles for future use (previous positions,
 for instance). Example: FixStoreState.
 \end{itemize}
 All fixes are derived from class Fix and must have constructor with the
 signature: FixMine(class LAMMPS *, int, char **).
 Every fix must be registered in LAMMPS by writing the following lines
 of code in the header before include guards:
 \begin{center}
 \begin{verbatim}
 #ifdef FIX_CLASS
 FixStyle(your/fix/name,FixMine)
 #else
  \end{verbatim}
 \end{center}
 Where ``your/fix/name'' is a name of your fix in the script and FixMine
 is the name of the class. This code allows LAMMPS to find your fix
 when it parses input script. In addition, your fix header must be
 included in the file ``style\_fix.h''. In case if you use LAMMPS make,
 this file is generated automatically - all files starting with prefix
 fix\_ are included, so call your header the same way. Otherwise, don't
 forget to add your include into ``style\_fix.h''.
 Let's write a simple fix which will print average velocity at the end
 of each timestep. First of all, implement a constructor:
 \begin{center}
 \begin{verbatim}
 FixPrintVel::FixPrintVel(LAMMPS *lmp, int narg, char **arg)
 : Fix(lmp, narg, arg)
 {
  if (narg < 4)
      error->all(FLERR,"Illegal fix print command");
  nevery = atoi(arg[3]);
  if (nevery <= 0)
      error->all(FLERR,"Illegal fix print command");
 }
  \end{verbatim}
 \end{center}
 In the constructor you should parse your fix arguments which are
 specified in the script. All fixes have pretty the same syntax: fix
 [fix\_identifier] [group\_name] [fix\_name] [fix\_arguments]. The
 first 3 parameters are parsed by Fix class constructor, while
 [fix\_arguments] should be parsed by you. In our case, we need to
 specify how often we want to print an average velocity. For instance,
 once in 50 timesteps: fix 1 print/vel 50. There is a special variable
 in Fix class called nevery which specifies how often method
 end\_of\_step() is called. Thus all we need to do is just set it up.
 The next method we need to implement is setmask():
 \begin{center}
 \begin{verbatim}
 int FixPrintVel::setmask()
 {
  int mask = 0;
  mask |= FixConst::END_OF_STEP;
  return mask;
 }
 \end{verbatim}
 \end{center}
 Here user specifies which methods of your fix should be called during
 the execution. For instance, END\_OF\_STEP corresponds to the
 end\_of\_step() method. Overall, there are 8 most important methods,
 methods are called in predefined order during the execution of the
 verlet algorithm as was mentioned in the Section 3:
 \begin{itemize}
 \item initial\_integrate()
 \item post\_integrate()
 \item pre\_exchange()
 \item pre\_neighbor()
 \item pre\_force()
 \item post\_force()
 \item final\_integrate()
 \item end\_of\_step()
 \end{itemize}
 Fix developer must understand when he wants to execute his code.  In
 case if we want to write FixPrintVel, we need only end\_of\_step():
 \begin{center}
 \begin{verbatim}
 void FixPrintVel::end_of_step()
 {
  // for add3, scale3
  using namespace MathExtra;
  double** v = atom->v;
  int nlocal = atom->nlocal;
  double localAvgVel[4]; // 4th element for particles count
  memset(localAvgVel, 0, 4 * sizeof(double));
  for (int particleInd = 0; particleInd < nlocal; ++particleInd) {
    add3(localAvgVel, v[particleInd], localAvgVel);
  }
  localAvgVel[3] = nlocal;
  double globalAvgVel[4];
  memset(globalAvgVel, 0, 4 * sizeof(double));
  MPI_Allreduce(localAvgVel, globalAvgVel, 4, MPI_DOUBLE, MPI_SUM, world);
  scale3(1.0 / globalAvgVel[3], globalAvgVel);
  if (comm->me == 0) {
    printf("\%e, \%e, \%e\n",
      globalAvgVel[0], globalAvgVel[1], globalAvgVel[2]);
  }
 }
 \end{verbatim}
 \end{center}
 In the code above, we use MathExtra routines defined in
 ``math\_extra.h''.  There are bunch of math functions to work with
 arrays of doubles as with math vectors.
 In this code we use an instance of Atom class. This object is stored
 in the Pointers class (see ``pointers.h''). This object contains all
 global information about the simulation system. Data from Pointers
 class available to all classes inherited from it using protected
 inheritance. Hence when you write you own class, which is going to use
 LAMMPS data, don't forget to inherit from Pointers.  When writing
 fixes we inherit from class Fix which is inherited from Pointers so
 there is no need to inherit from it directly.
 The code above computes average velocity for all particles in the
 simulation.  Yet you have one unused parameter in fix call from the
 script - [group\_name].  This parameter specifies the group of atoms
 used in the fix. So we should compute average for all particles in the
 simulation if group\_name == all, but it can be any group. The group
 information is specified by groupbit which is defined in class Fix:
 \begin{center}
 \begin{verbatim}
 for (int particleInd = 0; particleInd < nlocal; ++particleInd) {
  if (atom->mask[particleInd] & groupbit) {
  //Do all job here
  }
 }
 \end{verbatim}
 \end{center}
 Class Atom encapsulates atoms positions, velocities, forces, etc. User
 can access them using particle index. Note, that particle indexes are
 usually changed every timestep because of sorting.
 Lets consider another Fix example. We want to have a fix which stores
 atoms position from previous time step in your fix. The local atoms
 indexes will not be valid on the next iteration. In order to handle
 this situation there are several methods which should be implemented:
 \begin{itemize}
 \item \verb|double memory_usage| - return how much memory fix uses
 \item \verb|void grow_arrays(int)| - do reallocation of the per particle arrays
  in your fix
 \item \verb|void copy_arrays(int i, int j, int delflag)| - copy i-th per-particle
  information to j-th. Used when atoms sorting is performed. if delflag is set
  and atom j owns a body, move the body information to atom i.
 \item \verb|void set_arrays(int i)| - sets i-th particle related information to zero
 \end{itemize}
 Note, that if your class implements these methods, it must call add calls of
 add\_callback and delete\_callback to constructor and destructor:
 \begin{center}
 \begin{verbatim}
 FixSavePos::FixSavePos(LAMMPS *lmp, int narg, char **arg)  {
  //...
  atom->add_callback(0);
 }
 FixSavePos::~FixSavePos() {
  atom->delete_callback(id, 0);
 }
 \end{verbatim}
 \end{center}
 Since we want to store positions of atoms from previous timestep, we
 need to add double** x to the header file. Than add allocation code to
 constructor:
 \verb|memory->create(this->x, atom->nmax, 3, "FixSavePos:x");|. Free memory
 at destructor: \verb|memory->destroy(x);|
 Finally, implement mentioned methods:
 \begin{center}
 \begin{verbatim}
 double FixSavePos::memory_usage()
 {
  int nmax = atom->nmax;
  double bytes = 0.0;
  bytes += nmax * 3 * sizeof(double);
  return bytes;
 }
 void FixSavePos::grow_arrays(int nmax)
 {
    memory->grow(this->x, nmax, 3, "FixSavePos:x");
 }
 void FixSavePos::copy_arrays(int i, int j, int delflag)
 {
    memcpy(this->x[j], this->x[i], sizeof(double) * 3);
 }
 void FixSavePos::set_arrays(int i)
 {
    memset(this->x[i], 0, sizeof(double) * 3);
 }
 int FixSavePos::pack_exchange(int i, double *buf)
 {
  int m = 0;
  buf[m++] = x[i][0];
  buf[m++] = x[i][1];
  buf[m++] = x[i][2];
  return m;
 }
 int FixSavePos::unpack_exchange(int nlocal, double *buf)
 {
  int m = 0;
  x[nlocal][0] = buf[m++];
  x[nlocal][1] = buf[m++];
  x[nlocal][2] = buf[m++];
  return m;
 }
 \end{verbatim}
 \end{center}
 Now, a little bit about memory allocation. We used Memory class which
 is just a bunch of template functions for allocating 1D and 2D
 arrays. So you need to add include ``memory.h'' to have access to them.
 Finally, if you need to write/read some global information used in
 your fix to the restart file, you might do it by setting flag
 restart\_global = 1 in the constructor and implementing methods void
 write\_restart(FILE *fp) and void restart(char *buf).
 \end{document}
--- a/doc/src/Errors_messages.rst
+++ b/doc/src/Errors_messages.rst
@ -502,7 +502,7 @@ Doc page with :doc:`WARNING messages <Errors_warnings>`
 *Bond/react: Unknown section in map file*
   Please ensure reaction map files are properly formatted.
-*Bond/react: Atom affected by reaction too close to template edge*
+*Bond/react: Atom/Bond type affected by reaction too close to template edge*
   This means an atom which changes type or connectivity during the
   reaction is too close to an 'edge' atom defined in the map
   file.  This could cause incorrect assignment of bonds, angle, etc.
--- a/doc/src/Howto_cmake.rst
+++ b/doc/src/Howto_cmake.rst
@ -191,19 +191,19 @@ You start the command ``ccmake ../cmake`` in the ``build`` folder.
 .. list-table::
   * - .. figure:: JPG/ccmake-initial.png
-          :target: JPG/ccmake-initial.png
+          :scale: 33%
          :align: center
          Initial ``ccmake`` screen
     - .. figure:: JPG/ccmake-config.png
-          :target: JPG/ccmake-config.png
+          :scale: 33%
          :align: center
          Configure output of ``ccmake``
     - .. figure:: JPG/ccmake-options.png
-          :target: JPG/ccmake-options.png
+          :scale: 33%
          :align: center
          Options screen of ``ccmake``
@ -236,19 +236,19 @@ not required, it can also be entered from the GUI.
 .. list-table::
   * - .. figure:: JPG/cmake-gui-initial.png
-          :target: JPG/cmake-gui-initial.png
+          :scale: 40%
          :align: center
          Initial ``cmake-gui`` screen
     - .. figure:: JPG/cmake-gui-popup.png
-          :target: JPG/cmake-gui-popup.png
+          :scale: 60%
          :align: center
          Generator selection in ``cmake-gui``
     - .. figure:: JPG/cmake-gui-options.png
-          :target: JPG/cmake-gui-options.png
+          :scale: 40%
          :align: center
          Options screen of ``cmake-gui``
--- a/doc/src/Howto_couple.rst
+++ b/doc/src/Howto_couple.rst
@ -12,96 +12,52 @@ LAMMPS can be coupled to other codes in at least 4 ways.  Each has
 advantages and disadvantages, which you will have to think about in the
 context of your application.
----------
+1. Define a new :doc:`fix <fix>` command that calls the other code.  In
   this scenario, LAMMPS is the driver code.  During timestepping,
   the fix is invoked, and can make library calls to the other code,
   which has been linked to LAMMPS as a library.  This is the way how the
   :ref:`LATTE <PKG-LATTE>` package, which performs density-functional
   tight-binding calculations using the `LATTE software <https://github.com/lanl/LATTE>`_
   to compute forces, is hooked to LAMMPS.
   See the :doc:`fix latte <fix_latte>` command for more details.
   Also see the :doc:`Modify <Modify>` doc pages for info on how to
   add a new fix to LAMMPS.
-(1) Define a new :doc:`fix <fix>` command that calls the other code.  In
+.. spacer
 this scenario, LAMMPS is the driver code.  During its timestepping,
 the fix is invoked, and can make library calls to the other code,
 which has been linked to LAMMPS as a library.  This is the way the
 `POEMS <poems_>`_ package that performs constrained rigid-body motion on
 groups of atoms is hooked to LAMMPS.  See the :doc:`fix poems <fix_poems>` command for more details.  See the
 :doc:`Modify <Modify>` doc pages for info on how to add a new fix to
 LAMMPS.
-.. _poems: http://www.rpi.edu/~anderk5/lab
+2. Define a new LAMMPS command that calls the other code.  This is
   conceptually similar to method (1), but in this case LAMMPS and the
   other code are on a more equal footing.  Note that now the other code
   is not called during the timestepping of a LAMMPS run, but between
   runs.  The LAMMPS input script can be used to alternate LAMMPS runs
   with calls to the other code, invoked via the new command.  The
   :doc:`run <run>` command facilitates this with its *every* option,
   which makes it easy to run a few steps, invoke the command, run a few
   steps, invoke the command, etc.
----------
+   In this scenario, the other code can be called as a library, as in
   1., or it could be a stand-alone code, invoked by a system() call
   made by the command (assuming your parallel machine allows one or
   more processors to start up another program).  In the latter case the
   stand-alone code could communicate with LAMMPS through files that the
   command writes and reads.
-(2) Define a new LAMMPS command that calls the other code.  This is
+   See the :doc:`Modify command <Modify_command>` doc page for info on how
-conceptually similar to method (1), but in this case LAMMPS and the
+   to add a new command to LAMMPS.
 other code are on a more equal footing.  Note that now the other code
 is not called during the timestepping of a LAMMPS run, but between
 runs.  The LAMMPS input script can be used to alternate LAMMPS runs
 with calls to the other code, invoked via the new command.  The
 :doc:`run <run>` command facilitates this with its *every* option, which
 makes it easy to run a few steps, invoke the command, run a few steps,
 invoke the command, etc.
-In this scenario, the other code can be called as a library, as in
+.. spacer
 (1), or it could be a stand-alone code, invoked by a system() call
 made by the command (assuming your parallel machine allows one or more
 processors to start up another program).  In the latter case the
 stand-alone code could communicate with LAMMPS through files that the
 command writes and reads.
-See the :doc:`Modify command <Modify_command>` doc page for info on how
+3. Use LAMMPS as a library called by another code.  In this case the
-to add a new command to LAMMPS.
+   other code is the driver and calls LAMMPS as needed.  Or a wrapper
   code could link and call both LAMMPS and another code as libraries.
   Again, the :doc:`run <run>` command has options that allow it to be
   invoked with minimal overhead (no setup or clean-up) if you wish to
   do multiple short runs, driven by another program.  Details about
   using the library interface are given in the :doc:`library API
   <pg_library>` documentation.
----------
+.. spacer
-(3) Use LAMMPS as a library called by another code.  In this case the
+4. Couple LAMMPS with another code in a client/server mode.  This is
-other code is the driver and calls LAMMPS as needed.  Or a wrapper
+   described on the :doc:`Howto client/server <Howto_client_server>` doc
-code could link and call both LAMMPS and another code as libraries.
+   page.
 Again, the :doc:`run <run>` command has options that allow it to be
 invoked with minimal overhead (no setup or clean-up) if you wish to do
 multiple short runs, driven by another program.
 Examples of driver codes that call LAMMPS as a library are included in
 the examples/COUPLE directory of the LAMMPS distribution; see
 examples/COUPLE/README for more details:
 * simple: simple driver programs in C++ and C which invoke LAMMPS as a
  library
 * plugin: simple driver program in C which invokes LAMMPS as a plugin
  from a shared library.
 * lammps_quest: coupling of LAMMPS and `Quest <quest_>`_, to run classical
  MD with quantum forces calculated by a density functional code
 * lammps_spparks: coupling of LAMMPS and `SPPARKS <spparks_>`_, to couple
  a kinetic Monte Carlo model for grain growth using MD to calculate
  strain induced across grain boundaries
 .. _quest: http://dft.sandia.gov/Quest
 .. _spparks: http://www.sandia.gov/~sjplimp/spparks.html
 The :doc:`Build basics <Build_basics>` doc page describes how to build
 LAMMPS as a library.  Once this is done, you can interface with LAMMPS
 either via C++, C, Fortran, or Python (or any other language that
 supports a vanilla C-like interface).  For example, from C++ you could
 create one (or more) "instances" of LAMMPS, pass it an input script to
 process, or execute individual commands, all by invoking the correct
 class methods in LAMMPS.  From C or Fortran you can make function
 calls to do the same things.  See the :doc:`Python <Python_head>` doc
 pages for a description of the Python wrapper provided with LAMMPS
 that operates through the LAMMPS library interface.
 The files src/library.cpp and library.h contain the C-style interface
 to LAMMPS.  See the :doc:`Howto library <Howto_library>` doc page for a
 description of the interface and how to extend it for your needs.
 Note that the lammps_open() function that creates an instance of
 LAMMPS takes an MPI communicator as an argument.  This means that
 instance of LAMMPS will run on the set of processors in the
 communicator.  Thus the calling code can run LAMMPS on all or a subset
 of processors.  For example, a wrapper script might decide to
 alternate between LAMMPS and another code, allowing them both to run
 on all the processors.  Or it might allocate half the processors to
 LAMMPS and half to the other code and run both codes simultaneously
 before syncing them up periodically.  Or it might instantiate multiple
 instances of LAMMPS to perform different calculations.
 ----------
 (4) Couple LAMMPS with another code in a client/server mode.  This is
 described on the :doc:`Howto client/server <Howto_client_server>` doc
 page.
--- a/doc/src/Howto_library.rst
+++ b/doc/src/Howto_library.rst
@ -2,241 +2,36 @@ Library interface to LAMMPS
 ===========================
 As described on the :doc:`Build basics <Build_basics>` doc page, LAMMPS
-can be built as a library, so that it can be called by another code,
+can be built as a static or shared library, so that it can be called by
-used in a :doc:`coupled manner <Howto_couple>` with other codes, or
+another code, used in a :doc:`coupled manner <Howto_couple>` with other
-driven through a :doc:`Python interface <Python_head>`.
+codes, or driven through a :doc:`Python interface <Python_head>`.
-All of these methodologies use a C-style interface to LAMMPS that is
+At the core of LAMMPS is the ``LAMMPS`` class which encapsulates the
-provided in the files src/library.cpp and src/library.h.  The
+state of the simulation program through the state of the various class
-functions therein have a C-style argument list, but contain C++ code
+instances that it is composed of.  So a calculation using LAMMPS
-you could write yourself in a C++ application that was invoking LAMMPS
+requires to create an instance of the ``LAMMPS`` class and then send it
-directly.  The C++ code in the functions illustrates how to invoke
+(text) commands, either individually or from a file, or perform other
-internal LAMMPS operations.  Note that LAMMPS classes are defined
+operations that modify the state stored inside that instance or drive
-within a LAMMPS namespace (LAMMPS_NS) if you use them from another C++
+simulations.  This is essentially what the ``src/main.cpp`` file does
-application.
+as well for the standalone LAMMPS executable with reading commands
 either from an input file or stdin.
-The examples/COUPLE and python/examples directories have example C++
+Creating a LAMMPS instance can be done by using C++ code directly or
-and C and Python codes which show how a driver code can link to LAMMPS
+through a C-style interface library to LAMMPS that is provided in the
-as a library, run LAMMPS on a subset of processors, grab data from
+files ``src/library.cpp`` and ``library.h``.  This
-LAMMPS, change it, and put it back into LAMMPS.
+:ref:`C language API <lammps_c_api>`, can be used from C and C++,
 and is also the basis for the :doc:`Python <pg_python>` and
 :doc:`Fortran <pg_fortran>` interfaces or wrappers included in the
 LAMMPS source code.
-Thread-safety
+The ``examples/COUPLE`` and ``python/examples`` directories contain some
-------------
+example programs written in C++, C, Fortran, and Python, which show how
 a driver code can link to LAMMPS as a library, run LAMMPS on a subset of
 processors (so the others are available to run some other code
 concurrently), grab data from LAMMPS, change it, and send it back into
 LAMMPS.
-LAMMPS has not initially been conceived as a thread-safe program, but
+A detailed documentation of the available APIs and examples of how to
-over the years changes have been applied to replace operations that
+use them can be found in the :doc:`Programmer Documentation
-collide with creating multiple LAMMPS instances from multiple-threads
+<pg_library>` section of this manual.
 of the same process with thread-safe alternatives.  This primarily
 applies to the core LAMMPS code and less so on add-on packages, especially
 when those packages require additional code in the *lib* folder,
 interface LAMMPS to Fortran libraries, or the code uses static variables
 (like the USER-COLVARS package.
 Another major issue to deal with is to correctly handle MPI.  Creating
 a LAMMPS instance requires passing an MPI communicator, or it assumes
 the MPI_COMM_WORLD communicator, which spans all MPI processor ranks.
 When creating multiple LAMMPS object instances from different threads,
 this communicator has to be different for each thread or else collisions
 can happen, or it has to be guaranteed, that only one thread at a time
 is active.  MPI communicators, however, are not a problem, if LAMMPS is
 compiled with the MPI STUBS library, which implies that there is no MPI
 communication and only 1 MPI rank.
 Provided APIs
 -------------
 The file src/library.cpp contains the following functions for creating
 and destroying an instance of LAMMPS and sending it commands to
 execute.  See the documentation in the src/library.cpp file for
 details.
 .. note::
   You can write code for additional functions as needed to define
   how your code talks to LAMMPS and add them to src/library.cpp and
   src/library.h, as well as to the :doc:`Python interface <Python_head>`.
   The added functions can access or change any internal LAMMPS data you
   wish.
 .. code-block:: c
   void lammps_open(int, char **, MPI_Comm, void **)
   void lammps_open_no_mpi(int, char **, void **)
   void lammps_close(void *)
   int lammps_version(void *)
   void lammps_file(void *, char *)
   char *lammps_command(void *, char *)
   void lammps_commands_list(void *, int, char **)
   void lammps_commands_string(void *, char *)
   void lammps_free(void *)
 The lammps_open() function is used to initialize LAMMPS, passing in a
 list of strings as if they were :doc:`command-line arguments <Run_options>` when LAMMPS is run in stand-alone mode
 from the command line, and a MPI communicator for LAMMPS to run under.
 It returns a ptr to the LAMMPS object that is created, and which is
 used in subsequent library calls.  The lammps_open() function can be
 called multiple times, to create multiple instances of LAMMPS.
 LAMMPS will run on the set of processors in the communicator.  This
 means the calling code can run LAMMPS on all or a subset of
 processors.  For example, a wrapper script might decide to alternate
 between LAMMPS and another code, allowing them both to run on all the
 processors.  Or it might allocate half the processors to LAMMPS and
 half to the other code and run both codes simultaneously before
 syncing them up periodically.  Or it might instantiate multiple
 instances of LAMMPS to perform different calculations.
 The lammps_open_no_mpi() function is similar except that no MPI
 communicator is passed from the caller.  Instead, MPI_COMM_WORLD is
 used to instantiate LAMMPS, and MPI is initialized if necessary.
 The lammps_close() function is used to shut down an instance of LAMMPS
 and free all its memory.
 The lammps_version() function can be used to determined the specific
 version of the underlying LAMMPS code. This is particularly useful
 when loading LAMMPS as a shared library via dlopen(). The code using
 the library interface can than use this information to adapt to
 changes to the LAMMPS command syntax between versions. The returned
 LAMMPS version code is an integer (e.g. 2 Sep 2015 results in
 20150902) that grows with every new LAMMPS version.
 The lammps_file(), lammps_command(), lammps_commands_list(), and
 lammps_commands_string() functions are used to pass one or more
 commands to LAMMPS to execute, the same as if they were coming from an
 input script.
 Via these functions, the calling code can read or generate a series of
 LAMMPS commands one or multiple at a time and pass it through the library
 interface to setup a problem and then run it in stages.  The caller
 can interleave the command function calls with operations it performs,
 calls to extract information from or set information within LAMMPS, or
 calls to another code's library.
 The lammps_file() function passes the filename of an input script.
 The lammps_command() function passes a single command as a string.
 The lammps_commands_list() function passes multiple commands in a
 char\*\* list.  In both lammps_command() and lammps_commands_list(),
 individual commands may or may not have a trailing newline.  The
 lammps_commands_string() function passes multiple commands
 concatenated into one long string, separated by newline characters.
 In both lammps_commands_list() and lammps_commands_string(), a single
 command can be spread across multiple lines, if the last printable
 character of all but the last line is "&", the same as if the lines
 appeared in an input script.
 The lammps_free() function is a clean-up function to free memory that
 the library allocated previously via other function calls.  See
 comments in src/library.cpp file for which other functions need this
 clean-up.
 The file src/library.cpp also contains these functions for extracting
 information from LAMMPS and setting value within LAMMPS.  Again, see
 the documentation in the src/library.cpp file for details, including
 which quantities can be queried by name:
 .. code-block:: c
   int lammps_extract_setting(void *, char *)
   void *lammps_extract_global(void *, char *)
   void lammps_extract_box(void *, double *, double *,
                           double *, double *, double *, int *, int *)
   void *lammps_extract_atom(void *, char *)
   void *lammps_extract_compute(void *, char *, int, int)
   void *lammps_extract_fix(void *, char *, int, int, int, int)
   void *lammps_extract_variable(void *, char *, char *)
 The extract_setting() function returns info on the size
 of data types (e.g. 32-bit or 64-bit atom IDs) used
 by the LAMMPS executable (a compile-time choice).
 The other extract functions return a pointer to various global or
 per-atom quantities stored in LAMMPS or to values calculated by a
 compute, fix, or variable.  The pointer returned by the
 extract_global() function can be used as a permanent reference to a
 value which may change.  For the extract_atom() method, see the
 extract() method in the src/atom.cpp file for a list of valid per-atom
 properties.  New names could easily be added if the property you want
 is not listed.  For the other extract functions, the underlying
 storage may be reallocated as LAMMPS runs, so you need to re-call the
 function to assure a current pointer or returned value(s).
 .. code-block:: c
   double lammps_get_thermo(void *, char *)
   int lammps_get_natoms(void *)
   int lammps_set_variable(void *, char *, char *)
   void lammps_reset_box(void *, double *, double *, double, double, double)
 The lammps_get_thermo() function returns the current value of a thermo
 keyword as a double precision value.
 The lammps_get_natoms() function returns the total number of atoms in
 the system and can be used by the caller to allocate memory for the
 lammps_gather_atoms() and lammps_scatter_atoms() functions.
 The lammps_set_variable() function can set an existing string-style
 variable to a new string value, so that subsequent LAMMPS commands can
 access the variable.
 The lammps_reset_box() function resets the size and shape of the
 simulation box, e.g. as part of restoring a previously extracted and
 saved state of a simulation.
 .. code-block:: c
   void lammps_gather_atoms(void *, char *, int, int, void *)
   void lammps_gather_atoms_concat(void *, char *, int, int, void *)
   void lammps_gather_atoms_subset(void *, char *, int, int, int, int *, void *)
   void lammps_scatter_atoms(void *, char *, int, int, void *)
   void lammps_scatter_atoms_subset(void *, char *, int, int, int, int *, void *)
 The gather functions collect peratom info of the requested type (atom
 coords, atom types, forces, etc) from all processors, and returns the
 same vector of values to each calling processor.  The scatter
 functions do the inverse.  They distribute a vector of peratom values,
 passed by all calling processors, to individual atoms, which may be
 owned by different processors.
 .. warning::
   These functions are not compatible with the
   -DLAMMPS_BIGBIG setting when compiling LAMMPS.  Dummy functions
   that result in an error message and abort will be substituted
   instead of resulting in random crashes and memory corruption.
 The lammps_gather_atoms() function does this for all N atoms in the
 system, ordered by atom ID, from 1 to N.  The
 lammps_gather_atoms_concat() function does it for all N atoms, but
 simply concatenates the subset of atoms owned by each processor.  The
 resulting vector is not ordered by atom ID.  Atom IDs can be requested
 by the same function if the caller needs to know the ordering.  The
 lammps_gather_subset() function allows the caller to request values
 for only a subset of atoms (identified by ID).
 For all 3 gather function, per-atom image flags can be retrieved in 2 ways.
 If the count is specified as 1, they are returned
 in a packed format with all three image flags stored in a single integer.
 If the count is specified as 3, the values are unpacked into xyz flags
 by the library before returning them.
 The lammps_scatter_atoms() function takes a list of values for all N
 atoms in the system, ordered by atom ID, from 1 to N, and assigns
 those values to each atom in the system.  The
 lammps_scatter_atoms_subset() function takes a subset of IDs as an
 argument and only scatters those values to the owning atoms.
 .. code-block:: c
   void lammps_create_atoms(void *, int, tagint *, int *, double *, double *,
                            imageint *, int)
 The lammps_create_atoms() function takes a list of N atoms as input
 with atom types and coords (required), an optionally atom IDs and
 velocities and image flags.  It uses the coords of each atom to assign
 it as a new atom to the processor that owns it.  This function is
 useful to add atoms to a simulation or (in tandem with
 lammps_reset_box()) to restore a previously extracted and saved state
 of a simulation.  Additional properties for the new atoms can then be
 assigned via the lammps_scatter_atoms() or lammps_extract_atom()
 functions.
--- a/doc/src/Install_linux.rst
+++ b/doc/src/Install_linux.rst
@ -79,13 +79,13 @@ To get a copy of the current potentials files:
 which will download the potentials files to
 ``/usr/share/lammps-stable/potentials``.  The ``lmp_stable`` binary is
 hard-coded to look for potential files in this directory (it does not
-use the `LAMMPS_POTENTIALS` environment variable, as described
+use the ``LAMMPS_POTENTIALS`` environment variable, as described
 in :doc:`pair_coeff <pair_coeff>` command).
 The ``lmp_stable`` binary is built with the :ref:`KIM package <kim>` which
-results in the above command also installing the `kim-api` binaries when LAMMPS
+results in the above command also installing the ``kim-api`` binaries when LAMMPS
 is installed.  In order to use potentials from `openkim.org <openkim_>`_, you
-can install the `openkim-models` package
+can install the ``openkim-models`` package
 .. code-block:: bash
--- a/doc/src/Intro_website.rst
+++ b/doc/src/Intro_website.rst
@ -23,7 +23,6 @@ this Intr are included in this list.
 * `Mail list <https://lammps.sandia.gov/mail.html>`_
 * `Workshops <https://lammps.sandia.gov/workshops.html>`_
 * `Tutorials <https://lammps.sandia.gov/tutorials.html>`_
 * `Developer guide <https://lammps.sandia.gov/Developer.pdf>`_
 * `Pre- and post-processing tools for LAMMPS <https://lammps.sandia.gov/prepost.html>`_
 * `Other software usable with LAMMPS <https://lammps.sandia.gov/offsite.html>`_
--- a/doc/src/JPG/lammps-classes.png
+++ b/doc/src/JPG/lammps-classes.png
--- a/doc/src/Manual.rst
+++ b/doc/src/Manual.rst
@ -27,8 +27,7 @@ all LAMMPS development is coordinated.
 The content for this manual is part of the LAMMPS distribution.  You
 can build a local copy of the Manual as HTML pages or a PDF file, by
 following the steps on the :doc:`Manual build <Manual_build>` doc page.
-There is also a `Developer.pdf <Developer.pdf>`_ document which gives
+The manual is split into two parts: 1) User documentation and 2) Programmer documentation.
 a brief description of the basic code structure of LAMMPS.
 ----------
@ -55,11 +54,24 @@ every LAMMPS command.
   Howto
   Examples
   Tools
   Modify
   Python_head
   Errors
   Manual_build
 .. _programmer_documentation:
 .. toctree::
   :maxdepth: 2
   :numbered: 3
   :caption: Programmer Documentation
   :name: progdoc
   :includehidden:
   pg_library
   Modify
   pg_developer
 ..   pg_modify
 ..   pg_base
 .. toctree::
   :caption: Index
   :name: index
--- a/doc/src/Manual_build.rst
+++ b/doc/src/Manual_build.rst
@ -14,7 +14,6 @@ files. Here is a list with descriptions:
   lammps.1         # man page for the lammps command
   msi2lmp.1        # man page for the msi2lmp command
   Manual.pdf       # large PDF version of entire manual
   Developer.pdf    # small PDF with info about how LAMMPS is structured
   LAMMPS.epub      # Manual in ePUB e-book format
   LAMMPS.mobi      # Manual in MOBI e-book format
   docenv           # virtualenv folder for processing the manual sources
@ -35,7 +34,7 @@ of two ways:
 a. You can "fetch" the current HTML and PDF files from the LAMMPS web
   site.  Just type ``make fetch``.  This should download a html_www
-   directory and Manual_www.pdf/Developer_www.pdf files.  Note that if
+   directory and a Manual_www.pdf file.  Note that if
   new LAMMPS features have been added more recently than the date of
   your LAMMPS version, the fetched documentation will include those
   changes (but your source code will not, unless you update your local
@ -49,6 +48,11 @@ b. You can build the HTML or PDF files yourself, by typing ``make html``
   only once, unless you type ``make clean-all``.  After that, viewing and
   processing of the documentation can be done without internet access.
 A current version of the manual (latest patch release, aka unstable branch)
 is is available online at: `https://lammps.sandia.gov/doc/Manual.html <https://lammps.sandia.gov/doc/Manual.html>`_
 A version of the manual corresponding to the ongoing development
 (aka master branch) is available online at: `https://docs.lammps.org/ <https://docs.lammps.org/>`_
 ----------
 The generation of all documentation is managed by the Makefile in the
@ -58,10 +62,9 @@ available:
 .. code-block:: bash
   make html          # generate HTML in html dir using Sphinx
-   make pdf           # generate 2 PDF files (Manual.pdf,Developer.pdf)
+   make pdf           # generate PDF  as Manual.pdf using Sphinx and pdflatex
-                      #   in doc dir via htmldoc and pdflatex
+   make fetch         # fetch HTML doc pages and PDF file from web site
-   make fetch         # fetch HTML doc pages and 2 PDF files from web site
+                      #   as a tarball and unpack into html dir and PDF
                      #   as a tarball and unpack into html dir and 2 PDFs
   make epub          # generate LAMMPS.epub in ePUB format using Sphinx
   make mobi          # generate LAMMPS.mobi in MOBI format using ebook-convert
--- a/doc/src/Modify.rst
+++ b/doc/src/Modify.rst
@ -1,5 +1,5 @@
-Modify & extend LAMMPS
+Modifying & extending LAMMPS
-**********************
+****************************
 LAMMPS is designed in a modular fashion so as to be easy to modify and
 extend with new functionality.  In fact, about 95% of its source code
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@ -1692,7 +1692,7 @@ USER-MEAMC package
 **Contents:**
 A pair style for the modified embedded atom (MEAM) potential
-translated from the Fortran version in the (obsolete) "MEAM" package
+translated from the Fortran version in the (obsolete) MEAM package
 to plain C++. The USER-MEAMC fully replaces the MEAM package, which
 has been removed from LAMMPS after the 12 December 2018 version.
--- a/doc/src/Packages_user.rst
+++ b/doc/src/Packages_user.rst
@ -6,7 +6,7 @@ name gives more details.
 User packages have been contributed by users, and begin with the
 "user" prefix.  If a contribution is a single command (single file),
-it is typically in the user-misc package.  User packages don't
+it is typically in the USER-MISC package.  User packages don't
 necessarily meet the requirements of the :doc:`standard packages <Packages_standard>`. This means the developers will try
 to keep things working and usually can answer technical questions
 about compiling the package. If you have problems using a specific
--- a/doc/src/Tools.rst
+++ b/doc/src/Tools.rst
@ -89,7 +89,6 @@ Miscellaneous tools
   :columns: 6
   * :ref:`CMake <cmake>`
   * :ref:`doxygen <doxygen>`
   * :ref:`emacs <emacs>`
   * :ref:`i-pi <ipi>`
   * :ref:`kate <kate>`
@ -254,21 +253,6 @@ The tool is authored by Xiaowang Zhou (Sandia), xzhou at sandia.gov.
 ----------
 .. _doxygen:
 doxygen tool
 --------------------------
 The tools/doxygen directory contains a shell script called
 doxygen.sh which can generate a call graph and API lists using
 the `Doxygen software <http://doxygen.org>`_.
 See the included README file for details.
 The tool is authored by Nandor Tamaskovics, numericalfreedom at googlemail.com.
 ----------
 .. _drude:
 drude tool
--- a/doc/src/fix_bond_react.rst
+++ b/doc/src/fix_bond_react.rst
@ -14,19 +14,22 @@ Syntax
     react react-ID react-group-ID Nevery Rmin Rmax template-ID(pre-reacted) template-ID(post-reacted) map_file individual_keyword values ...
     ...
-* ID, group-ID are documented in :doc:`fix <fix>` command. Group-ID is ignored.
+* ID, group-ID are documented in :doc:`fix <fix>` command.
 * bond/react = style name of this fix command
 * the common keyword/values may be appended directly after 'bond/react'
 * this applies to all reaction specifications (below)
-* common_keyword = *stabilization*
+* common_keyword = *stabilization* or *reset_mol_ids*
  .. parsed-literal::
       *stabilization* values = *no* or *yes* *group-ID* *xmax*
-         *no* = no reaction site stabilization
+         *no* = no reaction site stabilization (default)
         *yes* = perform reaction site stabilization
           *group-ID* = user-assigned prefix for the dynamic group of atoms not currently involved in a reaction
           *xmax* = xmax value that is used by an internally-created :doc:`nve/limit <fix_nve_limit>` integrator
       *reset_mol_ids* values = *yes* or *no*
         *yes* = update molecule IDs based on new global topology (default)
         *no* = do not update molecule IDs
 * react = mandatory argument indicating new reaction specification
 * react-ID = user-assigned name for the reaction
@ -50,9 +53,9 @@ Syntax
         *stabilize_steps* value = timesteps
           timesteps = number of timesteps to apply the internally-created :doc:`nve/limit <fix_nve_limit>` fix to reacting atoms
         *update_edges* value = *none* or *charges* or *custom*
-           none = do not update topology near the edges of reaction templates
+           *none* = do not update topology near the edges of reaction templates
-           charges = update atomic charges of all atoms in reaction templates
+           *charges* = update atomic charges of all atoms in reaction templates
-           custom = force the update of user-specified atomic charges
+           *custom* = force the update of user-specified atomic charges
 Examples
 """"""""
@ -154,6 +157,13 @@ due to the internal dynamic grouping performed by fix bond/react.
   If the group-ID is an existing static group, react-group-IDs
   should also be specified as this static group, or a subset.
 The *reset_mol_ids* keyword invokes the :doc:`reset_mol_ids <reset_mol_ids>`
 command after a reaction occurs, to ensure that molecule IDs are
 consistent with the new bond topology. The group-ID used for
 :doc:`reset_mol_ids <reset_mol_ids>` is the group-ID for this fix.
 Resetting molecule IDs is necessarily a global operation, and so can
 be slow for very large systems.
 The following comments pertain to each *react* argument (in other
 words, can be customized for each reaction, or reaction step):
@ -203,9 +213,10 @@ surrounding topology. As described below, the bonding atom pairs of
 the pre-reacted template are specified by atom ID in the map file. The
 pre-reacted molecule template should contain as few atoms as possible
 while still completely describing the topology of all atoms affected
-by the reaction. For example, if the force field contains dihedrals,
+by the reaction (which includes all atoms that change atom type or
-the pre-reacted template should contain any atom within three bonds of
+connectivity, and all bonds that change bond type). For example, if
-reacting atoms.
+the force field contains dihedrals, the pre-reacted template should
 contain any atom within three bonds of reacting atoms.
 Some atoms in the pre-reacted template that are not reacting may have
 missing topology with respect to the simulation. For example, the
@ -300,8 +311,8 @@ either 'none' or 'charges.' Further details are provided in the
 discussion of the 'update_edges' keyword. The fifth optional section
 begins with the keyword 'Constraints' and lists additional criteria
 that must be satisfied in order for the reaction to occur. Currently,
-there are four types of constraints available, as discussed below:
+there are five types of constraints available, as discussed below:
-'distance', 'angle', 'dihedral', and 'arrhenius'.
+'distance', 'angle', 'dihedral', 'arrhenius', and 'rmsd'.
 A sample map file is given below:
@ -421,6 +432,25 @@ temperature calculations. A uniform random number between 0 and 1 is
 generated using *seed*\ ; if this number is less than the result of the
 Arrhenius equation above, the reaction is permitted to occur.
 The constraint of type 'rmsd' has the following syntax:
 .. parsed-literal::
   rmsd *RMSDmax* *molfragment*
 where 'rmsd' is the required keyword, and *RMSDmax* is the maximum
 root-mean-square deviation between atom positions of the pre-reaction
 template and the local reaction site (distance units), after optimal
 translation and rotation of the pre-reaction template. Optionally, the
 name of a molecule fragment (of the pre-reaction template) can be
 specified by *molfragment*\ . If a molecule fragment is specified,
 only atoms that are part of this molecule fragment are used to
 determine the RMSD. A molecule fragment must have been defined in the
 :doc:`molecule <molecule>` command for the pre-reaction template. For
 example, the molecule fragment could consist of only the backbone
 atoms of a polymer chain. This constraint can be used to enforce a
 specific relative position and orientation between reacting molecules.
 Once a reaction site has been successfully identified, data structures
 within LAMMPS that store bond topology are updated to reflect the
 post-reacted molecule template. All force fields with fixed bonds,
@ -554,7 +584,7 @@ Default
 """""""
 The option defaults are stabilization = no, prob = 1.0, stabilize_steps = 60,
-update_edges = none
+reset_mol_ids = yes, update_edges = none
 ----------
--- a/doc/src/fix_restrain.rst
+++ b/doc/src/fix_restrain.rst
@ -13,7 +13,7 @@ Syntax
 * ID, group-ID are documented in :doc:`fix <fix>` command
 * restrain = style name of this fix command
 * one or more keyword/arg pairs may be appended
-* keyword = *bond* or *angle* or *dihedral*
+* keyword = *bond* or *lbound* or *angle* or *dihedral*
  .. parsed-literal::
@ -23,7 +23,7 @@ Syntax
         r0start = equilibrium bond distance at start of run (distance units)
         r0stop = equilibrium bond distance at end of run (optional) (distance units). If not
           specified it is assumed to be equal to r0start
-       *lbond* args = atom1 atom2 Kstart Kstop r0start (r0stop)
+       *lbound* args = atom1 atom2 Kstart Kstop r0start (r0stop)
         atom1,atom2 = IDs of 2 atoms in bond
         Kstart,Kstop = restraint coefficients at start/end of run (energy units)
         r0start = equilibrium bond distance at start of run (distance units)
@ -46,7 +46,7 @@ Examples
 .. code-block:: LAMMPS
   fix holdem all restrain bond 45 48 2000.0 2000.0 2.75
-   fix holdem all restrain lbond 45 48 2000.0 2000.0 2.75
+   fix holdem all restrain lbound 45 48 2000.0 2000.0 2.75
   fix holdem all restrain dihedral 1 2 3 4 2000.0 2000.0 120.0
   fix holdem all restrain bond 45 48 2000.0 2000.0 2.75 dihedral 1 2 3 4 2000.0 2000.0 120.0
   fix texas_holdem all restrain dihedral 1 2 3 4 0.0 2000.0 120.0 dihedral 1 2 3 5 0.0 2000.0 -120.0 dihedral 1 2 3 6 0.0 2000.0 0.0
@ -150,7 +150,7 @@ is included in :math:`K`.
 ----------
-The *lbond* keyword applies a lower bound bond restraint to the specified atoms
+The *lbound* keyword applies a lower bound bond restraint to the specified atoms
 using the same functional form used by the :doc:`bond_style harmonic <bond_harmonic>` command if the distance between
 the atoms is smaller than the equilibrium bond distance and 0 otherwise. The potential associated with
 the restraint is
--- a/doc/src/pair_coeff.rst
+++ b/doc/src/pair_coeff.rst
@ -110,8 +110,8 @@ location specified.  E.g. if the file is specified as "niu3.eam", it
 is looked for in the current working directory.  If it is specified as
 "../potentials/niu3.eam", then it is looked for in the potentials
 directory, assuming it is a sister directory of the current working
-directory.  If the file is not found, it is then looked for in the
+directory.  If the file is not found, it is then looked for in one of
-directory specified by the LAMMPS_POTENTIALS environment variable.
+the directories specified by the ``LAMMPS_POTENTIALS`` environment variable.
 Thus if this is set to the potentials directory in the LAMMPS distribution,
 then you can use those files from anywhere on your system, without
 copying them into your working directory.  Environment variables are
@ -136,6 +136,11 @@ Windows:
   % set LAMMPS_POTENTIALS="C:\\Path to LAMMPS\\Potentials"
 The ``LAMMPS_POTENTIALS`` environment variable may contain paths
 to multiple folders, if they are separated by ";" on Windows and
 ":" on all other operating systems, just like the ``PATH`` and
 similar environment variables.
 ----------
 The alphabetic list of pair styles defined in LAMMPS is given on the
--- a/doc/src/pair_comb.rst
+++ b/doc/src/pair_comb.rst
@ -129,10 +129,10 @@ For style *comb3*\ , in addition to ffield.comb3, a special parameter
 file, *lib.comb3*\ , that is exclusively used for C/O/H systems, will be
 automatically loaded if carbon atom is detected in LAMMPS input
 structure.  This file must be in your working directory or in the
-directory pointed to by the environment variable LAMMPS_POTENTIALS, as
+directories listed in the environment variable ``LAMMPS_POTENTIALS``, as
 described on the :doc:`pair_coeff <pair_coeff>` command doc page.
-Keyword *polar* indicates whether the force field includes
+The keyword *polar* indicates whether the force field includes
 the atomic polarization.  Since the equilibration of the polarization
 has not yet been implemented, it can only set polar_off at present.
--- a/doc/src/pair_cosine_squared.rst
+++ b/doc/src/pair_cosine_squared.rst
@ -107,7 +107,7 @@ These pair styles can only be used via the *pair* keyword of the
 Restrictions
 """"""""""""
-The *cosine/squared* style is part of the "USER-MISC" package. It is only
+The *cosine/squared* style is part of the USER-MISC package. It is only
 enabled if LAMMPS is build with that package.  See the :doc:`Build package <Build_package>` doc page for more info.
 Related commands
--- a/doc/src/pair_coul_diel.rst
+++ b/doc/src/pair_coul_diel.rst
@ -95,7 +95,7 @@ This pair style can only be used via the *pair* keyword of the
 Restrictions
 """"""""""""
-This style is part of the "USER-MISC" package.  It is only enabled if
+This style is part of the USER-MISC package.  It is only enabled if
 LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` doc page for more info.
 Related commands
--- a/doc/src/pair_coul_slater.rst
+++ b/doc/src/pair_coul_slater.rst
@ -95,7 +95,7 @@ Restrictions
 The  *coul/slater/long* style requires the long-range solvers included in the KSPACE package.
-These styles are part of the "USER-MISC" package.  They are only enabled if
+These styles are part of the USER-MISC package.  They are only enabled if
 LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` doc page for more info.
 Related commands
--- a/doc/src/pair_gauss.rst
+++ b/doc/src/pair_gauss.rst
@ -164,8 +164,18 @@ heading) the following commands could be included in an input script:
 Restrictions
 """"""""""""
-The *gauss/cut* style is part of the "user-misc" package. It is only
+The *gauss/cut* style is part of the USER-MISC package. It is only
-enabled if LAMMPS is build with that package.  See the :doc:`Build package <Build_package>` doc page for more info.
+enabled if LAMMPS is build with that package.  See the :doc:`Build
 package <Build_package>` doc page for more info.
 The *gauss* style does not apply :doc:`special_bonds <special_bonds>`
 factors. When using this pair style on a system that has bonds, the
 special_bonds factors, if using the default setting of 0.0, may need to
 be adjusted to some very small number (e.g. 1.0e-100), so that those
 special pairs are not completely excluded from the neighbor lists, but
 won't contribute forces or energies from styles (e.g. when used in
 combination with a :doc:`hybrid pair style <pair_hybrid>`) that do
 apply those factors.
 Related commands
 """"""""""""""""
--- a/doc/src/pair_granular.rst
+++ b/doc/src/pair_granular.rst
@ -93,7 +93,7 @@ on particle *i* due to contact with particle *j* is given by:
 .. math::
-   \mathbf{F}_{ne, Hooke} = k_N \delta_{ij} \mathbf{n}
+   \mathbf{F}_{ne, Hooke} = k_n \delta_{ij} \mathbf{n}
 Where :math:`\delta_{ij} = R_i + R_j - \|\mathbf{r}_{ij}\|` is the particle
 overlap, :math:`R_i, R_j` are the particle radii, :math:`\mathbf{r}_{ij} = \mathbf{r}_i - \mathbf{r}_j` is the vector separating the two
@ -106,7 +106,7 @@ For the *hertz* model, the normal component of force is given by:
 .. math::
-   \mathbf{F}_{ne, Hertz} = k_N R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n}
+   \mathbf{F}_{ne, Hertz} = k_n R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n}
 Here, :math:`R_{eff} = \frac{R_i R_j}{R_i + R_j}` is the effective
 radius, denoted for simplicity as *R* from here on.  For *hertz*\ , the
@ -123,7 +123,7 @@ Here, :math:`E_{eff} = E = \left(\frac{1-\nu_i^2}{E_i} + \frac{1-\nu_j^2}{E_j}\r
 modulus, with :math:`\nu_i, \nu_j` the Poisson ratios of the particles of
 types *i* and *j*\ . Note that if the elastic modulus and the shear
 modulus of the two particles are the same, the *hertz/material* model
-is equivalent to the *hertz* model with :math:`k_N = 4/3 E_{eff}`
+is equivalent to the *hertz* model with :math:`k_n = 4/3 E_{eff}`
 The *dmt* model corresponds to the
 :ref:`(Derjaguin-Muller-Toporov) <DMT1975>` cohesive model, where the force
@ -140,7 +140,7 @@ where the force is computed as:
   \mathbf{F}_{ne, jkr} = \left(\frac{4Ea^3}{3R} - 2\pi a^2\sqrt{\frac{4\gamma E}{\pi a}}\right)\mathbf{n}
-Here, *a* is the radius of the contact zone, related to the overlap
+Here, :math:`a` is the radius of the contact zone, related to the overlap
 :math:`\delta` according to:
 .. math::
@ -167,7 +167,7 @@ following general form:
   \mathbf{F}_{n,damp} = -\eta_n \mathbf{v}_{n,rel}
-Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot \mathbf{n} \mathbf{n}` is the component of relative velocity along
+Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot \mathbf{n}\ \mathbf{n}` is the component of relative velocity along
 :math:`\mathbf{n}`.
 The optional *damping* keyword to the *pair_coeff* command followed by
@ -259,7 +259,9 @@ tangential model choices and their expected parameters are as follows:
 1. *linear_nohistory* : :math:`x_{\gamma,t}`, :math:`\mu_s`
 2. *linear_history* : :math:`k_t`, :math:`x_{\gamma,t}`, :math:`\mu_s`
 3. *mindlin* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
-4. *mindlin_rescale* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
+4. *mindlin/force* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
 5. *mindlin_rescale* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
 6. *mindlin_rescale/force* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
 Here, :math:`x_{\gamma,t}` is a dimensionless multiplier for the normal
 damping :math:`\eta_n` that determines the magnitude of the tangential
@ -268,11 +270,11 @@ coefficient, and :math:`k_t` is the tangential stiffness coefficient.
 For *tangential linear_nohistory*, a simple velocity-dependent Coulomb
 friction criterion is used, which mimics the behavior of the *pair
-gran/hooke* style. The tangential force (\mathbf{F}_t\) is given by:
+gran/hooke* style. The tangential force :math:`\mathbf{F}_t` is given by:
 .. math::
-   \mathbf{F}_t =  -min(\mu_t F_{n0}, \|\mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
+   \mathbf{F}_t =  -\min(\mu_t F_{n0}, \|\mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
 The tangential damping force :math:`\mathbf{F}_\mathrm{t,damp}` is given by:
@ -294,8 +296,8 @@ keyword also affects the tangential damping.  The parameter
 literature use :math:`x_{\gamma,t} = 1` (:ref:`Marshall <Marshall2009>`,
 :ref:`Tsuji et al <Tsuji1992>`, :ref:`Silbert et al <Silbert2001>`).  The relative
 tangential velocity at the point of contact is given by
-:math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\Omega_i + R_j\Omega_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}{n}`,
+:math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\mathbf{\Omega}_i + R_j\mathbf{\Omega}_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}\ \mathbf{n}`,
-:math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i`.
+:math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i` .
 The direction of the applied force is :math:`\mathbf{t} = \mathbf{v_{t,rel}}/\|\mathbf{v_{t,rel}}\|` .
 The normal force value :math:`F_{n0}` used to compute the critical force
@ -314,21 +316,24 @@ form:
 .. math::
-   F_{n0} = \|\mathbf{F}_ne + 2 F_{pulloff}\|
+   F_{n0} = \|\mathbf{F}_{ne} + 2 F_{pulloff}\|
 Where :math:`F_{pulloff} = 3\pi \gamma R` for *jkr*\ , and
 :math:`F_{pulloff} = 4\pi \gamma R` for *dmt*\ .
 The remaining tangential options all use accumulated tangential
-displacement (i.e. contact history). This is discussed below in the
+displacement (i.e. contact history), except for the options
-context of the *linear_history* option, but the same treatment of the
+*mindlin/force* and *mindlin_rescale/force*, that use accumulated
-accumulated displacement applies to the other options as well.
+tangential force instead, and are discussed further below.
 The accumulated tangential displacement is discussed in details below
 in the context of the *linear_history* option. The same treatment of
 the accumulated displacement applies to the other options as well.
 For *tangential linear_history*, the tangential force is given by:
 .. math::
-   \mathbf{F}_t =  -min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
+   \mathbf{F}_t =  -\min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
 Here, :math:`\mathbf{\xi}` is the tangential displacement accumulated
 during the entire duration of the contact:
@ -356,7 +361,7 @@ work:
 .. math::
-   \mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'}\| - \mathbf{n}\cdot\mathbf{\xi'}}
+   \mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'} - (\mathbf{n}\cdot\mathbf{\xi'})\mathbf{n}\|}
 Here, :math:`\mathbf{\xi'}` is the accumulated displacement prior to the
 current time step and :math:`\mathbf{\xi}` is the corrected
@ -372,7 +377,7 @@ discussion):
 .. math::
-   \mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} + \mathbf{F}_{t,damp}\right)
+   \mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} - \mathbf{F}_{t,damp}\right)
 The tangential force is added to the total normal force (elastic plus
 damping) to produce the total force on the particle. The tangential
@ -387,27 +392,68 @@ overlap region) to induce a torque on each particle according to:
   \mathbf{\tau}_j = -(R_j - 0.5 \delta) \mathbf{n} \times \mathbf{F}_t
-For *tangential mindlin*\ , the :ref:`Mindlin <Mindlin1949>` no-slip solution is used, which differs from the *linear_history*
+For *tangential mindlin*\ , the :ref:`Mindlin <Mindlin1949>` no-slip solution
-option by an additional factor of *a*\ , the radius of the contact region. The tangential force is given by:
+is used which differs from the *linear_history* option by an additional factor
 of :math:`a`, the radius of the contact region. The tangential force is given by:
 .. math::
-   \mathbf{F}_t =  -min(\mu_t F_{n0}, \|-k_t a \mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
+   \mathbf{F}_t =  -\min(\mu_t F_{n0}, \|-k_t a \mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
-Here, *a* is the radius of the contact region, given by :math:`a =\sqrt{R\delta}`
+
 Here, :math:`a` is the radius of the contact region, given by :math:`a =\sqrt{R\delta}`
 for all normal contact models, except for *jkr*\ , where it is given
 implicitly by :math:`\delta = a^2/R - 2\sqrt{\pi \gamma a/E}`, see
-discussion above. To match the Mindlin solution, one should set :math:`k_t = 4G/(2-\nu)`, where :math:`G` is the shear modulus, related to Young's modulus
+discussion above. To match the Mindlin solution, one should set
-:math:`E` by :math:`G = E/(2(1+\nu))`, where :math:`\nu` is Poisson's ratio. This
+:math:`k_t = 8G_{eff}`, where :math:`G_{eff}` is the effective shear modulus given by:
-can also be achieved by specifying *NULL* for :math:`k_t`, in which case a
+
 .. math::
   G_{eff} = \left(\frac{2-\nu_i}{G_i} + \frac{2-\nu_j}{G_j}\right)^{-1}
 where :math:`G` is the shear modulus, related to Young's modulus :math:`E`
 and Poisson's ratio :math:`\nu` by :math:`G = E/(2(1+\nu))`. This can also be
 achieved by specifying *NULL* for :math:`k_t`, in which case a
 normal contact model that specifies material parameters :math:`E` and
 :math:`\nu` is required (e.g. *hertz/material*\ , *dmt* or *jkr*\ ). In this
 case, mixing of the shear modulus for different particle types *i* and
-*j* is done according to:
+*j* is done according to the formula above.
 .. note::
   The radius of the contact region :math:`a` depends on the normal overlap.
   As a result, the tangential force for *mindlin* can change due to
   a variation in normal overlap, even with no change in tangential displacement.
 For *tangential mindlin/force*, the accumulated elastic tangential force
 characterizes the contact history, instead of the accumulated tangential
 displacement. This prevents the dependence of the tangential force on the
 normal overlap as noted above. The tangential force is given by:
 .. math::
-   1/G = 2(2-\nu_i)(1+\nu_i)/E_i + 2(2-\nu_j)(1+\nu_j)/E_j
+   \mathbf{F}_t =  -\min(\mu_t F_{n0}, \|\mathbf{F}_{te} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
 The increment of the elastic component of the tangential force
 :math:`\mathbf{F}_{te}` is given by:
 .. math::
   \mathrm{d}\mathbf{F}_{te} = -k_t a \mathbf{v}_{t,rel} \mathrm{d}\tau
 The changes in frame of reference of the contacting pair of particles during
 contact are accounted for by the same formula as above, replacing the
 accumulated tangential displacement :math:`\xi`, by the accumulated tangential
 elastic force :math:`F_{te}`. When the tangential force exceeds the critical
 force, the tangential force is directly re-scaled to match the value for
 the critical force:
 .. math::
   \mathbf{F}_{te} = - \mu_t F_{n0}\mathbf{t} + \mathbf{F}_{t,damp}
 The same rules as those described for *mindlin* apply regarding the tangential
 stiffness and mixing of the shear modulus for different particle types.
 The *mindlin_rescale* option uses the same form as *mindlin*\ , but the
 magnitude of the tangential displacement is re-scaled as the contact
@ -421,9 +467,32 @@ Here, :math:`t_{n-1}` indicates the value at the previous time
 step. This rescaling accounts for the fact that a decrease in the
 contact area upon unloading leads to the contact being unable to
 support the previous tangential loading, and spurious energy is
-created without the rescaling above (:ref:`Walton <WaltonPC>` ). See also
+created without the rescaling above (:ref:`Walton <WaltonPC>` ).
-discussion in :ref:`Thornton et al, 2013 <Thornton2013>` , particularly
+
-equation 18(b) of that work and associated discussion.
+.. note::
   For *mindlin*, a decrease in the tangential force already occurs as the
   contact unloads, due to the dependence of the tangential force on the normal
   force described above. By re-scaling :math:`\xi`, *mindlin_rescale*
   effectively re-scales the tangential force twice, i.e., proportionally to
   :math:`a^2`. This peculiar behavior results from use of the accumulated
   tangential displacement to characterize the contact history. Although
   *mindlin_rescale* remains available for historic reasons and backward
   compatibility purposes, it should be avoided in favor of *mindlin_rescale/force*.
 The *mindlin_rescale/force* option uses the same form as *mindlin/force*,
 but the magnitude of the tangential elastic force is re-scaled as the contact
 unloads, i.e. if :math:`a < a_{t_{n-1}}`:
 .. math::
   \mathbf{F}_{te} = \mathbf{F}_{te, t_{n-1}} \frac{a}{a_{t_{n-1}}}
 This approach provides a better approximation of the :ref:`Mindlin-Deresiewicz <Mindlin1953>`
 laws and is more consistent than *mindlin_rescale*. See discussions in
 :ref:`Thornton et al, 2013 <Thornton2013>`, particularly equation 18(b) of that
 work and associated discussion, and :ref:`Agnolin and Roux, 2007 <AgnolinRoux2007>`,
 particularly Appendix A.
 ----------
@ -460,7 +529,7 @@ exceeds a critical value:
 .. math::
-   \mathbf{F}_{roll} =  min(\mu_{roll} F_{n,0}, \|\mathbf{F}_{roll,0}\|)\mathbf{k}
+   \mathbf{F}_{roll} =  \min(\mu_{roll} F_{n,0}, \|\mathbf{F}_{roll,0}\|)\mathbf{k}
 Here, :math:`\mathbf{k} = \mathbf{v}_{roll}/\|\mathbf{v}_{roll}\|` is the direction of
 the pseudo-force.  As with tangential displacement, the rolling
@ -512,7 +581,7 @@ is then truncated according to:
 .. math::
-   \tau_{twist} = min(\mu_{twist} F_{n,0}, \tau_{twist,0})
+   \tau_{twist} = \min(\mu_{twist} F_{n,0}, \tau_{twist,0})
 Similar to the sliding and rolling displacement, the angular
 displacement is rescaled so that it corresponds to the critical value
@ -763,3 +832,15 @@ Technology, 233, 30-46.
 .. _WaltonPC:
 **(Otis R. Walton)** Walton, O.R., Personal Communication
 .. _Mindlin1953:
 **(Mindlin and Deresiewicz, 1953)** Mindlin, R.D., & Deresiewicz, H (1953).
 Elastic Spheres in Contact under Varying Oblique Force.
 J. Appl. Mech., ASME 20, 327-344.
 .. _AgnolinRoux2007:
 **(Agnolin and Roux 2007)** Agnolin, I. & Roux, J-N. (2007).
 Internal states of model isotropic granular packings.
 I. Assembling process, geometry, and contact networks. Phys. Rev. E, 76, 061302.
--- a/doc/src/pair_mesodpd.rst
+++ b/doc/src/pair_mesodpd.rst
@ -250,8 +250,12 @@ from :ref:`(Li2013_POF) <Li2013_POF>`.  The short mDPD run (about 2 minutes
 on a single core) generates a particle trajectory which can
 be visualized as follows.
 .. only:: html
   .. image:: JPG/examples_mdpd.gif
      :align: center
 .. image:: JPG/examples_mdpd_first.jpg
   :target: JPG/examples_mdpd.gif
   :align: center
 .. image:: JPG/examples_mdpd_last.jpg
--- a/doc/src/pair_peri.rst
+++ b/doc/src/pair_peri.rst
@ -128,7 +128,7 @@ viscoelastic relaxation parameter and time constant,
 respectively. m_lambdai varies within zero to one. For very small
 values of m_lambdai the viscoelastic model responds very similar to a
 linear elastic model. For details please see the description in
-"(Mtchell2011)".
+"(Mitchell2011)".
 For the *peri/eps* style:
@ -142,7 +142,7 @@ For the *peri/eps* style:
 K is the bulk modulus and G is the shear modulus. The horizon is a
 cutoff distance and s00 and :math:`\alpha` are used as a bond breaking
 criteria.  m_yield_stress is the yield stress of the material. For
-details please see the description in "(Mtchell2011a)".
+details please see the description in "(Mitchell2011a)".
 ----------
--- a/doc/src/pair_python.rst
+++ b/doc/src/pair_python.rst
@ -38,12 +38,12 @@ corresponding compiled code. This penalty can be significantly reduced
 through generating tabulations from the python code through the
 :doc:`pair_write <pair_write>` command, which is supported by this style.
-Only a single pair_coeff command is used with the *python* pair style
+Only a single :doc:`pair_coeff <pair_coeff>` command is used with the
-which specifies a python class inside a python module or file that
+*python* pair style which specifies a python class inside a python module
-LAMMPS will look up in the current directory, the folder pointed to by
+or a file that LAMMPS will look up in the current directory, a folder
-the LAMMPS_POTENTIALS environment variable or somewhere in your python
+pointed to by the ``LAMMPS_POTENTIALS`` environment variable or somewhere
-path.  A single python module can hold multiple python pair class
+in your python path.  A single python module can hold multiple python pair
-definitions. The class definitions itself have to follow specific
+class definitions.  The class definitions itself have to follow specific
 rules that are explained below.
 Atom types in the python class are specified through symbolic
--- a/doc/src/pg_cplusplus.rst
+++ b/doc/src/pg_cplusplus.rst
@ -0,0 +1,91 @@
 Using the C++ API directly
 **************************
 Using the C++ classes of the LAMMPS library is lacking some of the
 convenience of the C library API, but it allows a more direct access to
 simulation data and thus more low-level manipulations and tighter
 integration of LAMMPS into another code.  While for the complete C
 library API is provided in the ``library.h`` header file, for using
 the C++ API it is required to include the individual header files
 defining the individual classes in use.  Typically the name of the
 class and the name of the header follow some simple rule.  Examples
 are given below.
 Creating or deleting a LAMMPS object
 *************************************
 When using the LAMMPS library interfaces, the core task is to create an
 instance of the :cpp:class:`LAMMPS_NS::LAMMPS` class.  In C++ this can
 be done directly through the ``new`` operator.  All further operations
 are then initiated through calling member functions of some of the
 components of the LAMMPS class or accessing their data members.  The
 destruction of the LAMMPS instance is correspondingly initiated by using
 the ``delete`` operator.  Here is a simple example:
 .. code-block:: c++
   #include "lammps.h"
   #include "universe.h"
   #include <mpi.h>
   #include <iostream>
   int main(int argc, char **argv)
   {
       LAMMPS_NS::LAMMPS *lmp;
       // custom argument vector for LAMMPS library
       const char *lmpargv[] {"liblammps", "-log", "none"};
       int lmpargc = sizeof(lmpargv)/sizeof(const char *);
       // explicitly initialize MPI
       MPI_Init(&argc, &argv);
       // create LAMMPS instance
       lmp = new LAMMPS_NS::LAMMPS(lmpargc, (char **)lmpargv, MPI_COMM_WORLD);
       // output numerical version string
       std::cout << "LAMMPS version: " << lmp->universe->num_ver << std::endl;
       // delete LAMMPS instance
       delete lmp;
       // stop MPI environment
       MPI_Finalize();
       return 0;
   }
 Please note that this requires to include the ``lammps.h`` header for accessing
 the members of the LAMMPS class and then the ``universe.h`` header for accessing the ``num_ver`` member of the :cpp:class:`Universe` class.
 Executing LAMMPS commands
 *************************
 Once a LAMMPS instance is created by your C++ code, you need to set up a
 simulation and that is most conveniently done by "driving" it through
 issuing commands like you would do when running a LAMMPS simulation from
 an input script. Processing of input in LAMMPS is handled by the
 :cpp:class:`Input <LAMMPS_NS::Input>` class an instance of which is a
 member of the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class.  You have
 two options: reading commands from a file, or executing a single
 command from a string. See below for a small example:
 .. code-block:: c++
   #include "lammps.h"
   #include "input.h"
   #include <mpi.h>
   using namespace LAMMPS_NS;
   int main(int argc, char **argv)
   {
       const char *lmpargv[] {"liblammps", "-log", "none"};
       int lmpargc = sizeof(lmpargv)/sizeof(const char *);
       MPI_Init(&argc, &argv);
       LAMMPS *lmp = new LAMMPS(lmpargc, (char **)lmpargv, MPI_COMM_WORLD);
       lmp->input->file("in.melt");
       lmp->input->one("run 100 post no");
       delete lmp;
       return 0;
   }
--- a/doc/src/pg_developer.rst
+++ b/doc/src/pg_developer.rst
--- a/doc/src/pg_fortran.rst
+++ b/doc/src/pg_fortran.rst
@ -0,0 +1,202 @@
 The ``LIBLAMMPS`` Fortran Module
 ********************************
 The ``LIBLAMMPS`` module provides an interface to call LAMMPS from a
 Fortran code.  It is based on the LAMMPS C-library interface and
 requires a Fortran 2003 compatible compiler to be compiled.
 While C libraries have a defined binary interface (ABI) and can thus be
 used from multiple compiler versions from different vendors for as long
 as they are compatible with the hosting operating system, the same is
 not true for Fortran codes.  Thus the LAMMPS Fortran module needs to be
 compiled alongside the code using it from the source code in
 ``fortran/lammps.f90``.  When linking, you also need to
 :doc:`link to the LAMMPS library <Build_link>`.  A typical command line
 for a simple program using the Fortran interface would be:
 .. code-block:: bash
   mpifort -o testlib.x  lammps.f90 testlib.f90 -L. -llammps
 Please note, that the MPI compiler wrapper is only required when the
 calling the library from an MPI parallel code.  Please also note the order
 of the source files: the lammps.f90 file needs to be compiled first,
 since it provides the ``LIBLAMMPS`` module that is imported by the
 Fortran code using the interface.
 .. versionadded:: 30Sep2020
 .. admonition:: Work in Progress
   This Fortran module is work in progress and only the documented
   functionality is currently available. The final implementation should
   cover the entire range of functionality available in the C and
   Python library interfaces.
 ----------
 Creating or deleting a LAMMPS object
 ************************************
 With the Fortran interface the creation of a :cpp:class:`LAMMPS
 <LAMMPS_NS::LAMMPS>` instance is included in the constructor for
 creating the :f:func:`lammps` derived type.  To import the definition of
 that type and its type bound procedures you need to add a ``USE
 LIBLAMMPS`` statement.  Internally it will call either
 :cpp:func:`lammps_open_fortran` or :cpp:func:`lammps_open_no_mpi` from
 the C library API to create the class instance.  All arguments are
 optional and :cpp:func:`lammps_mpi_init` will be called automatically,
 if it is needed.  Similarly, a possible call to :cpp:func:`lammps_finalize`
 is integrated into the :f:func:`close` function and triggered with
 the optional logical argument set to ``.true.``. Here is a simple example:
 .. code-block:: fortran
   PROGRAM testlib
     USE LIBLAMMPS                 ! include the LAMMPS library interface
     TYPE(lammps)     :: lmp       ! derived type to hold LAMMPS instance
     CHARACTER(len=*), DIMENSION(*), PARAMETER :: args = &
         [ CHARACTER(len=12) :: 'liblammps', '-log', 'none' ]
     ! create a LAMMPS instance (and initialize MPI)
     lmp = lammps(args)
     ! get and print numerical version code
     PRINT*, 'LAMMPS Version: ', lmp%version()
     ! delete LAMMPS instance (and shuts down MPI)
     CALL lmp%close(.true.)
   END PROGRAM testlib
 --------------------
 Executing LAMMPS commands
 =========================
 Once a LAMMPS instance is created, it is possible to "drive" the LAMMPS
 simulation by telling LAMMPS to read commands from a file, or pass
 individual or multiple commands from strings or lists of strings.  This
 is done similar to how it is implemented in the `C-library
 <pg_lib_execute>` interface. Before handing off the calls to the
 C-library interface, the corresponding Fortran versions of the calls
 (:f:func:`file`, :f:func:`command`, :f:func:`commands_list`, and
 :f:func:`commands_string`) have to make a copy of the strings passed as
 arguments so that they can be modified to be compatible with the
 requirements of strings in C without affecting the original strings.
 Those copies are automatically deleted after the functions return.
 Below is a small demonstration of the uses of the different functions:
 .. code-block:: fortran
   PROGRAM testcmd
     USE LIBLAMMPS
     TYPE(lammps)     :: lmp
     CHARACTER(len=512) :: cmds
     CHARACTER(len=40),ALLOCATABLE :: cmdlist(:)
     CHARACTER(len=10) :: trimmed
     INTEGER :: i
     lmp = lammps()
     CALL lmp%file('in.melt')
     CALL lmp%command('variable zpos index 1.0')
     ! define 10 groups of 10 atoms each
     ALLOCATE(cmdlist(10))
     DO i=1,10
         WRITE(trimmed,'(I10)') 10*i
         WRITE(cmdlist(i),'(A,I1,A,I10,A,A)')       &
             'group g',i-1,' id ',10*(i-1)+1,':',ADJUSTL(trimmed)
     END DO
     CALL lmp%commands_list(cmdlist)
     ! run multiple commands from multi-line string
     cmds = 'clear' // NEW_LINE('A') //                       &
         'region  box block 0 2 0 2 0 2' // NEW_LINE('A') //  &
         'create_box 1 box' // NEW_LINE('A') //               &
         'create_atoms 1 single 1.0 1.0 ${zpos}'
     CALL lmp%commands_string(cmds)
     CALL lmp%close()
   END PROGRAM testcmd
 ---------------
 The ``LIBLAMMPS`` module API
 ****************************
 Below are the detailed descriptions of definitions and interfaces
 of the contents of the ``LIBLAMMPS`` Fortran interface to LAMMPS.
 .. f:type:: lammps
   Derived type that is the general class of the Fortran interface.
   It holds a reference to the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class instance
   that any of the included calls are forwarded to.
   :f c_ptr handle: reference to the LAMMPS class
   :f close: :f:func:`close`
   :f version: :f:func:`version`
   :f file: :f:func:`file`
   :f command: :f:func:`command`
   :f commands_list: :f:func:`commands_list`
   :f commands_string: :f:func:`commands_string`
 .. f:function:: lammps(args[,comm])
   This is the constructor for the Fortran class and will forward
   the arguments to a call to either :cpp:func:`lammps_open_fortran`
   or :cpp:func:`lammps_open_no_mpi`. If the LAMMPS library has been
   compiled with MPI support, it will also initialize MPI, if it has
   not already been initialized before.
   The *args* argument with the list of command line parameters is
   optional and so it the *comm* argument with the MPI communicator.
   If *comm* is not provided, ``MPI_COMM_WORLD`` is assumed. For
   more details please see the documentation of :cpp:func:`lammps_open`.
   :p character(len=*) args(*) [optional]: arguments as list of strings
   :o integer comm [optional]: MPI communicator
   :r lammps: an instance of the :f:type:`lammps` derived type
 .. f:subroutine:: close([finalize])
   This method will close down the LAMMPS instance through calling
   :cpp:func:`lammps_close`.  If the *finalize* argument is present and
   has a value of ``.true.``, then this subroutine also calls
   :cpp:func:`lammps_mpi_finalize`.
   :o logical finalize [optional]: shut down the MPI environment of the LAMMPS library if true.
 .. f:function:: version()
   This method returns the numeric LAMMPS version like :cpp:func:`lammps_version`
   :r integer: LAMMPS version
 --------
 .. f:subroutine:: file(filename)
   This method will call :cpp:func:`lammps_file` to have LAMMPS read
   and process commands from a file.
   :p character(len=*) filename: name of file with LAMMPS commands
 .. f:subroutine:: command(cmd)
   This method will call :cpp:func:`lammps_command` to have LAMMPS
   execute a single command.
   :p character(len=*) cmd: single LAMMPS command
 .. f:subroutine:: commands_list(cmds)
   This method will call :cpp:func:`lammps_commands_list` to have LAMMPS
   execute a list of input lines.
   :p character(len=*) cmd(*): list of LAMMPS input lines
 .. f:subroutine:: commands_string(str)
   This method will call :cpp:func:`lammps_commands_string` to have LAMMPS
   execute a block of commands from a string.
   :p character(len=*) str: LAMMPS input in string
--- a/doc/src/pg_lib_add.rst
+++ b/doc/src/pg_lib_add.rst
@ -0,0 +1,33 @@
 Adding code to the Library interface
 ====================================
 The functionality of the LAMMPS library interface has historically
 always been motivated by the needs of its users and functions were
 added or expanded as they were needed and used.  Contributions to
 the interface are always welcome.  However with a refactoring of
 the library interface and its documentation that started in 2020,
 there are now a few requirements for inclusion of changes.
  - New functions should be orthogonal to existing ones and not
    implement functionality that can already be achieved with the
    existing APIs.
  - All changes and additions should be documented with
    `Doxygen <https://doxgygen.org>`_ style comments and references
    to those functions added to the corresponding files in the
    ``doc/src`` folder.
  - If possible, new unit tests to test those new features should
    be added.
  - The new feature should also be implemented and documented for
    the Python and Fortran modules.
  - All additions should work and be compatible with ``-DLAMMPS_BIGBIG``,
    ``-DLAMMPS_SMALLBIG``, ``-DLAMMPS_SMALLSMALL`` and compiling
    with and without MPI support.
  - The ``library.h`` file should be kept compatible to C code at
    a level similar to C89. Its interfaces may not reference any
    custom data types (e.g. ``bigint``, ``tagint``, and so on) only
    known inside of LAMMPS.
  - only C style comments, not C++ style
 Please note, that these are *not* *strict* requirements, but the
 LAMMPS developers appreciate if they are followed closely and will
 assist with implementing what is missing.
--- a/doc/src/pg_lib_config.rst
+++ b/doc/src/pg_lib_config.rst
@ -0,0 +1,67 @@
 Retrieving LAMMPS configuration information
 ===========================================
 The following library functions can be used to query the
 LAMMPS library about compile time settings and included
 packages and styles.
 -----------------------
 .. doxygenfunction:: lammps_config_has_mpi_support
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_config_has_gzip_support
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_config_has_png_support
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_config_has_jpeg_support
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_config_has_ffmpeg_support
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_config_has_exceptions
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_config_has_package
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_config_package_count
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_config_package_name
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_has_style
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_style_count
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_style_name
   :project: progguide
--- a/doc/src/pg_lib_create.rst
+++ b/doc/src/pg_lib_create.rst
@ -0,0 +1,104 @@
 Creating or deleting a LAMMPS object
 ====================================
 The :cpp:func:`lammps_open` and :cpp:func:`lammps_open_no_mpi`
 functions are used to create and initialize a
 :cpp:func:`LAMMPS` instance.  The calling program has to
 provide a handle where a reference to this instance can be stored and
 which has to be used in all subsequent function calls until that
 instance is destroyed by calling :cpp:func:`lammps_close`.
 Here is a simple example demonstrating its use:
 .. code-block:: C
   #include "library.h"
   #include <stdio.h>
   int main(int argc, char **argv)
   {
     void *handle;
     int version;
     const char *lmpargv[] = { "liblammps", "-log", "none"};
     int lmpargc = sizeof(lmpargv)/sizeof(const char *);
     /* create LAMMPS instance */
     handle = lammps_open_no_mpi(lmpargc, lmpargv, NULL);
     if (handle == NULL) {
       printf("LAMMPS initialization failed");
       lammps_mpi_finalize();
       return 1;
     }
     /* get and print numerical version code */
     version = lammps_version(handle);
     printf("LAMMPS Version: %d\n",version);
     /* delete LAMMPS instance and shut down MPI */
     lammps_close(handle);
     lammps_mpi_finalize();
     return 0;
   }
 The LAMMPS library will be using the MPI library it was compiled with
 and will either run on all processors in the ``MPI_COMM_WORLD``
 communicator or on the set of processors in the communicator given in
 the ``comm`` argument of :cpp:func:`lammps_open`.  This means
 the calling code can run LAMMPS on all or a subset of processors.  For
 example, a wrapper code might decide to alternate between LAMMPS and
 another code, allowing them both to run on all the processors.  Or it
 might allocate part of the processors to LAMMPS and the rest to the
 other code by creating a custom communicator with ``MPI_Comm_split()``
 and running both codes concurrently before syncing them up periodically.
 Or it might instantiate multiple instances of LAMMPS to perform
 different calculations and either alternate between them, run them
 concurrently on split communicators, or run them one after the other.
 The :cpp:func:`lammps_open` function may be called multiple
 times for this latter purpose.
 The :cpp:func:`lammps_close` function is used to shut down
 the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class pointed to by the handle
 passed as an argument and free all its memory. This has to be called for
 every instance created with any of the :cpp:func:`lammps_open` functions.  It will, however, **not** call
 ``MPI_Finalize()``, since that may only be called once.  See
 :cpp:func:`lammps_mpi_finalize` for an alternative to calling
 ``MPI_Finalize()`` explicitly in the calling program.
 The :cpp:func:`lammps_free` function is a clean-up
 function to free memory that the library allocated previously
 via other function calls.  See below for notes in the descriptions
 of the individual commands where such memory buffers were allocated.
 -----------------------
 .. doxygenfunction:: lammps_open
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_open_no_mpi
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_open_fortran
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_close
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_mpi_init
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_mpi_finalize
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_free
   :project: progguide
--- a/doc/src/pg_lib_execute.rst
+++ b/doc/src/pg_lib_execute.rst
@ -0,0 +1,69 @@
 Executing LAMMPS commands
 =========================
 Once a LAMMPS instance is created, there are multiple ways to "drive" a
 simulation.  In most cases it is easiest to process single or multiple
 LAMMPS commands like in an input file.  This can be done through reading
 a file or passing single commands or lists of commands or blocks of
 commands with the following functions.
 Via these functions, the calling code can have the LAMMPS instance act
 on a series of :doc:`input file commands <Commands_all>` that are either
 read from a file or passed as strings.  This for, for example, allows to
 setup a problem from a template file and then run it in stages while
 performing other operations in between or concurrently.  The caller can
 interleave the LAMMPS function calls with operations it performs, calls
 to extract information from or set information within LAMMPS, or calls
 to another code's library.
 Also equivalent to regular :doc:`input script parsing <Commands_parse>`
 is the handling of comments and expansion of variables with ``${name}``
 or ``$(expression)`` syntax before the commands are parsed and
 executed. Below is a short example using some of these functions.
 .. code-block:: C
   #include "library.h"
   #include <mpi.h>
   #include <stdio.h>
   int main(int argc, char **argv)
   {
     void *handle;
     int i;
     MPI_Init(&argc, &argv);
     handle = lammps_open(0, NULL, MPI_COMM_WORLD, NULL);
     lammps_file(handle,"in.sysinit");
     lammps_command(handle,"run 1000 post no");
     for (i=0; i < 100; ++i) {
       lammps_commands_string(handle,"run 100 pre no post no\n"
                                     "print 'PE = $(pe)'\n"
                                     "print 'KE = $(ke)'\n");
     }
     lammps_close(handle);
     MPI_Finalize();
     return 0;
   }
 -----------------------
 .. doxygenfunction:: lammps_file
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_command
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_commands_list
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_commands_string
   :project: progguide
--- a/doc/src/pg_lib_neighbor.rst
+++ b/doc/src/pg_lib_neighbor.rst
@ -0,0 +1,30 @@
 Accessing LAMMPS Neighbor lists
 ===============================
 The following functions allow to access neighbor lists
 generated by LAMMPS or query their properties.
 -----------------------
 .. doxygenfunction:: lammps_find_compute_neighlist
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_find_fix_neighlist
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_find_pair_neighlist
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_neighlist_num_elements
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_neighlist_element_neighbors
   :project: progguide
--- a/doc/src/pg_lib_objects.rst
+++ b/doc/src/pg_lib_objects.rst
@ -0,0 +1,31 @@
 Retrieving or setting properties of LAMMPS objects
 ==================================================
 This section documents accessing or modifying data from objects like
 computes, fixes, or variables in LAMMPS.
 -----------------------
 .. doxygenfunction:: lammps_extract_compute
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_extract_fix
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_extract_variable
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_set_variable
   :project: progguide
 -----------------------
 .. doxygenenum:: _LMP_STYLE_CONST
 .. doxygenenum:: _LMP_TYPE_CONST
--- a/doc/src/pg_lib_properties.rst
+++ b/doc/src/pg_lib_properties.rst
@ -0,0 +1,62 @@
 Retrieving or setting LAMMPS system properties
 ==============================================
 The library interface allows to extract different kinds of information
 about the active simulation instance and also to modify some of them.
 This allows to combine MD simulation steps with other processing and
 simulation methods computed in the calling code or another code that is
 coupled to LAMMPS via the library interface.  In some cases the data
 returned is direct reference to the original data inside LAMMPS cast
 to a void pointer.  In that case the data needs to be cast to a suitable
 pointer to be able to access it, and you need to know the correct dimensions
 and lengths.  When accessing per-atom data, please note that this data
 is the per-processor **local** data and indexed accordingly. These arrays
 can change sizes and order at every neighbor list rebuild and atom sort
 event as atoms are migrating between sub-domains.
 -----------------------
 .. doxygenfunction:: lammps_version
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_get_natoms
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_get_thermo
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_extract_box
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_reset_box
   :project: progguide
 -------------------
 .. doxygenfunction:: lammps_extract_setting
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_extract_global
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_extract_atom
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_create_atoms(void *handle, int n, int *id, int *type, double *x, double *v, int *image, int bexpand)
   :project: progguide
--- a/doc/src/pg_lib_scatter.rst
+++ b/doc/src/pg_lib_scatter.rst
@ -0,0 +1,29 @@
 Library functions for scatter/gather operations
 ================================================
 .. TODO add description
 -----------------------
 .. doxygenfunction:: lammps_gather_atoms
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_gather_atoms_concat
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_gather_atoms_subset
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_scatter_atoms
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_scatter_atoms_subset
   :project: progguide
--- a/doc/src/pg_lib_utility.rst
+++ b/doc/src/pg_lib_utility.rst
@ -0,0 +1,30 @@
 Library interface utility functions
 ===================================
 To simplify some of the tasks, the library interface contains
 some utility functions that are not directly calling LAMMPS.
 -----------------------
 .. doxygenfunction:: lammps_encode_image_flags
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_decode_image_flags(int image, int *flags)
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_set_fix_external_callback(void *, char *, FixExternalFnPtr, void*)
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_has_error
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_get_last_error_message
   :project: progguide
--- a/doc/src/pg_library.rst
+++ b/doc/src/pg_library.rst
@ -0,0 +1,158 @@
 LAMMPS Library Interfaces
 *************************
 As described on the :doc:`library interface to LAMMPS <Howto_library>`
 doc page, LAMMPS can be built as a library (static or shared), so that
 it can be called by another code, used in a :doc:`coupled manner
 <Howto_couple>` with other codes, or driven through a :doc:`Python
 script <Python_head>`.  Even the LAMMPS standalone executable is
 essentially a thin wrapper on top of the LAMMPS library, creating a
 LAMMPS instance, processing input and then existing.
 Several of these approaches are based on C language wrapper functions
 in the files ``src/library.h`` and ``src/library.cpp``, but it is also
 possible to use C++ directly.  The basic procedure is always the same:
 you create one or more instances of the
 :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` and then pass commands as
 strings or from files to that LAMMPS instance to execute calculations,
 or read, manipulate, and update data from the active class instances
 inside the LAMMPS to do analysis or perform operations that are not
 possible with existing commands.
 .. _thread-safety:
 .. admonition:: Thread-safety
   :class: note
   LAMMPS was initially not conceived as a thread-safe program, but over
   the years changes have been applied to replace operations that
   collide with creating multiple LAMMPS instances from multiple-threads
   of the same process with thread-safe alternatives.  This primarily
   applies to the core LAMMPS code and less so on add-on packages,
   especially when those packages require additional code in the *lib*
   folder, interface LAMMPS to Fortran libraries, or the code uses
   static variables (like the USER-COLVARS package).
   Another major issue to deal with is to correctly handle MPI.
   Creating a LAMMPS instance requires passing an MPI communicator, or
   it assumes the ``MPI_COMM_WORLD`` communicator, which spans all MPI
   processor ranks.  When creating multiple LAMMPS object instances from
   different threads, this communicator has to be different for each
   thread or else collisions can happen.  or it has to be guaranteed,
   that only one thread at a time is active.  MPI communicators,
   however, are not a problem, if LAMMPS is compiled with the MPI STUBS
   library, which implies that there is no MPI communication and only 1
   MPI rank.
 ----------
 .. _lammps_c_api:
 LAMMPS C Library API
 ====================
 The C library interface is most commonly used path to manage LAMMPS
 instances from a compiled code and it is the basis for the :doc:`Python
 <pg_python>` and :doc:`Fortran <pg_fortran>` modules.  Almost all
 functions of the C language API require an argument containing a
 "handle" in the form of a ``void *`` type variable, which points to the
 location of a LAMMPS class instance.
 The ``library.h`` header file by default includes the ``mpi.h`` header
 for an MPI library, so it must be present when compiling code using the
 library interface.  This usually must be the header from the same MPI
 library as the LAMMPS library was compiled with.  The exception is when
 LAMMPS was compiled in serial mode using the ``STUBS`` MPI library.  In
 that case the calling code may be compiled with a different MPI library
 for as long as :cpp:func:`lammps_open_no_mpi` is called to create a
 LAMMPS instance. Then you may set the define ``-DLAMMPS_LIB_NO_MPI``
 when compiling your code and the inclusion of ``mpi.h`` will be skipped
 and consequently the function :cpp:func:`lammps_open` may not be used.
 .. admonition:: Errors versus exceptions
   :class: note
   If any of the function calls in the LAMMPS library API will trigger
   an error inside LAMMPS, this will result in an abort of the entire
   program.  This is not always desirable.  Instead, LAMMPS can be
   compiled to instead :ref:`throw a C++ exception <exceptions>`.
 .. warning::
   No checks are made on the arguments of the function calls of the C
   library interface.  *All* function arguments must be non-NULL unless
   *explicitly* allowed and point to consistent and valid data.  Buffers
   for storing returned data must be allocated to a suitable size.
   Passing invalid or unsuitable information will likely cause crashes
   or corrupt data.
 ------------------------------
 .. toctree::
   :maxdepth: 1
   pg_lib_create
   pg_lib_execute
   pg_lib_properties
   pg_lib_objects
   pg_lib_scatter
   pg_lib_neighbor
   pg_lib_config
   pg_lib_utility
   pg_lib_add
 --------------------
 .. _lammps_python_api:
 LAMMPS Python APIs
 ==================
 The LAMMPS Python module enables calling the LAMMPS C library API from
 Python by dynamically loading functions in the LAMMPS shared library through
 the `Python ctypes module <https://docs.python.org/3/library/ctypes.html>`_.
 Because of the dynamic loading, it is **required** that LAMMPS is compiled
 in :ref:`"shared" mode <exe>`.  The Python interface is object oriented, but
 otherwise trying to be very similar to the C library API.  Three different
 Python classes to run LAMMPS are available and they build on each other.
 .. toctree::
   :maxdepth: 1
   pg_python
 -------------------
 .. _lammps_fortran_api:
 LAMMPS Fortran API
 ==================
 The LAMMPS Fortran module is a wrapper around calling functions from the
 LAMMPS C library API from Fortran through the ISO_C_BINDING feature in
 Fortran 2003.  The interface is object oriented but otherwise trying to
 be very similar to the C library API and the basic Python module.
 .. toctree::
   :maxdepth: 1
   pg_fortran
 -------------------
 .. _lammps_cplusplus_api:
 LAMMPS C++ API
 ==============
 It is also possible to invoke the LAMMPS C++ API directly in your code.
 It is lacking some of the convenience of the C library API, but it allows
 a more direct access to simulation data and thus more low-level manipulations.
 The following links provide some examples and references to the C++ API.
 .. toctree::
   :maxdepth: 1
   pg_cplusplus
--- a/doc/src/pg_python.rst
+++ b/doc/src/pg_python.rst
@ -0,0 +1,188 @@
 The ``lammps`` Python module
 ****************************
 .. py:module:: lammps
 The LAMMPS Python interface is implemented as a module called
 :py:mod:`lammps` in the ``lammps.py`` file in the ``python`` folder of
 the LAMMPS source code distribution.  After compilation of LAMMPS, the
 module can be installed into a Python system folder or a user folder
 with ``make install-python``.  Components of the module can then loaded
 into a Python session with the ``import`` command.
 There are multiple Python interface classes in the :py:mod:`lammps` module:
 - the :py:class:`lammps <lammps.lammps>` class. This is a wrapper around
  the C-library interface and its member functions try to replicate the
  :doc:`C-library API <pg_library>` closely.  This is the most
  feature-complete Python API.
 - the :py:class:`PyLammps <lammps.PyLammps>` class. This is a more high-level
  and more Python style class implemented on top of the
  :py:class:`lammps <lammps.lammps>` class.
 - the :py:class:`IPyLammps <lammps.IPyLammps>` class is derived from
  :py:class:`PyLammps <lammps.PyLammps>` and adds embedded graphics
  features to conveniently include LAMMPS into `Jupyter
  <https://jupyter.org/>`_ notebooks.
 .. _mpi4py_url: https://mpi4py.readthedocs.io
 ----------
 Creating or deleting a LAMMPS object
 ************************************
 With the Python interface the creation of a :cpp:class:`LAMMPS
 <LAMMPS_NS::LAMMPS>` instance is included in the constructor for the
 :py:func:`lammps <lammps.lammps>` class.  Internally it will call either
 :cpp:func:`lammps_open` or :cpp:func:`lammps_open_no_mpi` from the C
 library API to create the class instance.
 All arguments are optional.  The *name* argument is to allow loading a
 LAMMPS shared library that is named ``liblammps_machine.so`` instead of
 the default name of ``liblammps.so``.  In most cases the latter will be
 installed or used.  The *ptr* argument is for use of the
 :py:mod:`lammps` module from inside a LAMMPS instance, e.g. with the
 :doc:`python <python>` command, where a pointer to the already existing
 :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class instance can be passed
 to the Python class and used instead of creating a new instance.  The
 *comm* argument may be used in combination with the `mpi4py <mpi4py_url_>`_
 module to pass an MPI communicator to LAMMPS and thus it is possible
 to run the Python module like the library interface on a subset of the
 MPI ranks after splitting the communicator. Here is a simple example:
 .. code-block:: python
   from lammps import lammps
   # NOTE: argv[0] is set by the Python module
   args = ["-log", "none"]
   # create LAMMPS instance
   lmp = lammps(cmdargs=args)
   # get and print numerical version code
   print("LAMMPS Version: ", lmp.version())
   # explicitly close and delete LAMMPS instance (optional)
   lmp.close()
 Same as with the :doc:`C library API <pg_lib_create>` this will use the
 ``MPI_COMM_WORLD`` communicator for the MPI library that LAMMPS was
 compiled with.  The :py:func:`lmp.close() <lammps.lammps.close>` call is
 optional since the LAMMPS class instance will also be deleted
 automatically during the :py:class:`lammps <lammps.lammps>` class
 destructor.
 Executing LAMMPS commands
 *************************
 Once an instance of the :py:class:`lammps <lammps.lammps>` class is
 created, there are multiple ways to "feed" it commands. In a way that is
 not very different from running a LAMMPS input script, except that
 Python has many more facilities for structured programming than the
 LAMMPS input script syntax.  Furthermore it is possible to "compute"
 what the next LAMMPS command should be. Same as in the equivalent `C
 library functions <pg_lib_execute>`, commands can be read from a file, a
 single string, a list of strings and a block of commands in a single
 multi-line string. They are processed under the same boundary conditions
 as the C library counterparts.  The example below demonstrates the use
 of :py:func:`lammps.file`, :py:func:`lammps.command`,
 :py:func:`lammps.commands_list`, and :py:func:`lammps.commands_string`:
 .. code-block:: python
   from lammps import lammps
   lmp = lammps()
   # read commands from file 'in.melt'
   lmp.file('in.melt')
   # issue a single command
   lmp.command('variable zpos index 1.0')
   # create 10 groups with 10 atoms each
   cmds = ["group g{} id {}:{}".format(i,10*i+1,10*(i+1)) for i in range(10)]
   lmp.commands_list(cmds)
   # run commands from a multi-line string
   block = """
   clear
   region  box block 0 2 0 2 0 2
   create_box 1 box
   create_atoms 1 single 1.0 1.0 ${zpos}
   """
   lmp.commands_string(block)
 ----------
 The ``lammps`` class API
 ************************
 The :py:class:`lammps <lammps.lammps>` class is the core of the LAMMPS
 Python interfaces.  It is a wrapper around the :doc:`LAMMPS C library
 API <pg_library>` using the `Python ctypes module
 <https://docs.python.org/3/library/ctypes.html>`_ and a shared library
 compiled from the LAMMPS sources code.  The individual methods in this
 class try to closely follow the corresponding C functions.  The handle
 argument that needs to be passed to the C functions is stored internally
 in the class and automatically added when calling the C library
 functions. Below is a detailed documentation of the API.
 .. autoclass:: lammps.lammps
   :members:
 ----------
 The ``PyLammps`` class API
 **************************
 .. autoclass:: lammps.PyLammps
   :members:
 ----------
 The ``IPyLammps`` class API
 ***************************
 .. autoclass:: lammps.IPyLammps
   :members:
 ----------
 Additional components of the ``lammps`` module
 **********************************************
 The :py:mod:`lammps` module additionally contains several constants
 and the :py:class:`NeighList <lammps.NeighList>` class:
 .. _py_data_constants:
 .. py:data:: LAMMPS_INT, LAMMPS_DOUBLE, LAMMPS_BIGINT, LAMMPS_TAGINT, LAMMPS_STRING
   :type: int
   Constants in the :py:mod:`lammps` module to indicate how to
   cast data when the C library function returns a void pointer.
   Used in :py:func:`lammps.extract_global`.
 .. _py_style_constants:
 .. py:data:: LMP_STYLE_GLOBAL, LMP_STYLE_ATOM, LMP_STYLE_LOCAL
   :type: int
   Constants in the :py:mod:`lammps` module to select what style of data
   to request from computes or fixes. See :cpp:enum:`_LMP_STYLE_CONST`
   for the equivalent constants in the C library interface. Used in
   :py:func:`lammps.extract_compute` and :py:func:`lammps.extract_fix`.
 .. _py_type_constants:
 .. py:data:: LMP_TYPE_SCALAR, LMP_TYLE_VECTOR, LMP_TYPE_ARRAY, LMP_SIZE_VECTOR, LMP_SIZE_ROWS, LMP_SIZE_COLS
   :type: int
   Constants in the :py:mod:`lammps` module to select what type of data
   to request  from computes  or fixes.  See :cpp:enum:`_LMP_TYPE_CONST`
   for the equivalent constants in the C library interface. Used in
   :py:func:`lammps.extract_compute` and :py:func:`lammps.extract_fix`.
 .. _py_var_constants:
 .. py:data:: LMP_VAR_EQUAL, LMP_VAR_ATOM
   :type: int
   Constants in the :py:mod:`lammps` module to select what style of
   variable to query when calling :py:func:`lammps.extract_variable`.
 .. autoclass:: lammps.NeighList
   :members:
   :no-undoc-members:
--- a/doc/utils/requirements.txt
+++ b/doc/utils/requirements.txt
@ -1 +1,5 @@
 Sphinx
 sphinxcontrib-spelling
 sphinx-fortran
 breathe
 Pygments
--- a/doc/utils/sphinx-config/_static/css/lammps.css
+++ b/doc/utils/sphinx-config/_static/css/lammps.css
@ -7,3 +7,10 @@
    display: block;
    margin-bottom: 0.809em;
 }
 .lammps_release {
    text-align: center;
    font-size: 11px;
    display: block;
    margin-bottom: 0.405em;
 }
--- a/doc/utils/sphinx-config/_static/lammps-logo.png
+++ b/doc/utils/sphinx-config/_static/lammps-logo.png
--- a/doc/utils/sphinx-config/_themes/lammps_theme/layout.html
+++ b/doc/utils/sphinx-config/_themes/lammps_theme/layout.html
@ -103,6 +103,12 @@
    {%- endif %}
  {%- endblock %}
  {%- block extrahead %} {% endblock %}
  {# Keep modernizr in head - http://modernizr.com/docs/#installing #}
  <script src="{{ pathto('_static/js/modernizr.min.js', 1) }}"></script>
  {# for improved browser compatibility #}
  <script src="{{ pathto('_static/polyfill.js', 1) }}"></script>
 </head>
 <body class="wy-body-for-nav">
@ -135,9 +141,8 @@
              {%- set nav_version = current_version %}
            {% endif %}
            {% if nav_version %}
-              <div class="version">
+              <div class="lammps_version">Version: <b>{{ nav_version }}</b></div>
-                {{ nav_version }}
+              <div class="lammps_release">git info: {{ release }}</div>
              </div>
            {% endif %}
          {% endif %}
--- a/doc/utils/sphinx-config/conf.py.in
+++ b/doc/utils/sphinx-config/conf.py.in
@ -23,11 +23,16 @@ try:
 except:
    pass
 LAMMPS_DOC_DIR = '@LAMMPS_DOC_DIR@'
 LAMMPS_SOURCE_DIR = '@LAMMPS_SOURCE_DIR@'
 LAMMPS_PYTHON_DIR = '@LAMMPS_PYTHON_DIR@'
 LAMMPS_DOXYGEN_XML_DIR = '@DOXYGEN_XML_DIR@'
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #sys.path.insert(0, os.path.abspath('.'))
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../src/_ext'))
+sys.path.append(os.path.join(LAMMPS_DOC_DIR, 'src', '_ext'))
 # -- General configuration ------------------------------------------------
@ -41,7 +46,9 @@ extensions = [
    'sphinx.ext.mathjax',
    'sphinx.ext.imgmath',
    'sphinx.ext.autodoc',
    'sphinxfortran.fortran_domain',
    'table_from_list',
    'breathe',
 ]
 # 2017-12-07: commented out, since this package is broken with Sphinx 16.x
 #             yet we can no longer use Sphinx 15.x, since that breaks with
@ -72,12 +79,24 @@ copyright = '2003-2020 Sandia Corporation'
 def get_lammps_version():
    import os
    script_dir = os.path.dirname(os.path.realpath(__file__))
-    with open(os.path.join(script_dir, '../../../src/version.h'), 'r') as f:
+    with open(os.path.join(LAMMPS_SOURCE_DIR, 'version.h'), 'r') as f:
        line = f.readline()
        start_pos = line.find('"')+1
        end_pos = line.find('"', start_pos)
        return line[start_pos:end_pos]
 def get_git_info():
    import subprocess,time
    git_n_date = ''
    try:
        gitinfo = subprocess.run(['git','describe'],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        if gitinfo.returncode == 0:
            git_n_date = gitinfo.stdout.decode().replace('_',' ')
    except:
        pass
    return git_n_date
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
@ -85,7 +104,7 @@ def get_lammps_version():
 # The short X.Y version.
 version = get_lammps_version()
 # The full version, including alpha/beta/rc tags.
-release = ''
+release = get_git_info()
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@ -153,7 +172,7 @@ html_title = "LAMMPS documentation"
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-html_logo = 'lammps-logo.png'
+html_logo = '_static/lammps-logo.png'
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@ -314,7 +333,7 @@ texinfo_documents = [
 epub_title = 'LAMMPS Documentation - ' + get_lammps_version()
-epub_cover = ('lammps-logo.png', '')
+epub_cover = ('_static/lammps-logo.png', '')
 epub_description = """
 This is the Manual for the LAMMPS software package.
@ -342,13 +361,29 @@ if spelling_spec and has_enchant:
    spelling_lang='en_US'
    spelling_word_list_filename='false_positives.txt'
-sys.path.append(os.path.join(os.path.dirname(__file__), '.'))
+conf_script_dir = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(os.path.join(conf_script_dir, '.'))
 import LAMMPSLexer
 from sphinx.highlighting import lexers
 lexers['LAMMPS'] = LAMMPSLexer.LAMMPSLexer(startinline=True)
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../../python'))
+sys.path.append(LAMMPS_PYTHON_DIR)
 # avoid syntax highlighting in blocks that don't specify language
 highlight_language = 'none'
 # autodoc configuration
 autodoc_member_order = 'bysource'
 #autoclass_content = 'both'
 # breathe configuration
 breathe_projects = { 'progguide' : LAMMPS_DOXYGEN_XML_DIR }
 breathe_default_project = 'progguide'
 breathe_show_define_initializer = True
 breathe_domain_by_extension = { 'h'   : 'cpp',
                                'cpp' : 'cpp',
                                'c'   : 'c',
                                }
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -43,6 +43,7 @@ Afshar
 agilio
 Agilio
 agni
 Agnolin
 Ai
 Aidan
 aij
@ -114,6 +115,7 @@ Archlinux
 arcsin
 arg
 args
 argv
 arrhenius
 Arun
 arXiv
@ -137,6 +139,8 @@ atc
 AtC
 ATC
 athermal
 atime
 atimestep
 athomps
 atm
 atomeye
@ -206,7 +210,6 @@ bcolor
 bdiam
 bdw
 Beckman
 behaviour
 Belak
 Bellott
 benchmarking
@ -247,6 +250,7 @@ bispectrum
 Bispectrum
 bitbucket
 bitmapped
 bitmask
 bitrate
 bitrates
 Bitzek
@ -265,6 +269,7 @@ bodystyle
 Bogaerts
 Bogusz
 Bohrs
 boltz
 Boltzman
 BondAngle
 BondBond
@ -283,6 +288,14 @@ Botu
 Bouguet
 Bourne
 boxcolor
 boxlo
 boxhi
 boxxlo
 boxxhi
 boxylo
 boxyhi
 boxzlo
 boxzhi
 bp
 bpclermont
 bpls
@ -301,6 +314,7 @@ Bryantsev
 Btarget
 btype
 buckPlusAttr
 buf
 builtin
 Bulatov
 Bureekaew
@ -369,6 +383,7 @@ charmm
 CHARMM
 charmmfsh
 charmmfsw
 charptr
 Chaudhuri
 checkbox
 checkmark
@ -407,6 +422,7 @@ cmap
 Cmax
 cmd
 cmdlist
 cmds
 Cmin
 cmm
 CMM
@ -436,6 +452,7 @@ Colvars
 COLVARS
 comID
 Commun
 compositing
 compressibility
 compressive
 Comput
@ -584,6 +601,7 @@ del
 delaystep
 DeleteIDs
 deleteIDs
 delflag
 Dellago
 delocalization
 delocalized
@ -599,6 +617,7 @@ Dequidt
 der
 dereference
 derekt
 Deresiewicz
 Derjagin
 Derjaguin
 Derlet
@ -668,6 +687,8 @@ Donadio
 dotc
 Doty
 doxygen
 doxygenclass
 doxygenfunction
 downarrow
 Doye
 dpd
@ -721,6 +742,7 @@ Eaat
 Eacn
 eam
 eangle
 earg
 eatom
 Eb
 Eba
@ -841,6 +863,7 @@ Erhart
 erorate
 erose
 erotate
 errno
 Ertas
 ervel
 Espanol
@ -899,6 +922,7 @@ Fc
 fcc
 fcm
 Fd
 fd
 fdotr
 fdt
 Fehlberg
@ -923,6 +947,7 @@ ffplay
 fft
 fftbench
 fftw
 fgets
 fhg
 Fi
 Fichthorn
@ -958,6 +983,7 @@ fmackay
 fmag
 fmass
 fmm
 fmt
 fmx
 fmy
 fmz
@ -971,6 +997,7 @@ Fock
 Fogarty
 Foiles
 fopenmp
 forceclear
 forestgreen
 formatarg
 formulae
@ -987,6 +1014,7 @@ Fraige
 framerate
 Frauenheim
 Fraunhofer
 fread
 Freitas
 Frenkel
 Friedrichs
@ -994,6 +1022,7 @@ fs
 fsh
 fstyle
 fsw
 ftm
 ftol
 fugacity
 Fumi
@ -1101,6 +1130,7 @@ gromos
 Gronbech
 Groot
 groupbig
 groupbit
 grp
 Grueneisen
 gsmooth
@ -1163,6 +1193,7 @@ hexorder
 Heyes
 HfO
 hgrid
 hhmrr
 Hibbs
 Higdon
 Hijazi
@ -1172,6 +1203,7 @@ histogrammed
 histogramming
 hma
 hmaktulga
 hplanck
 hoc
 Hochbruck
 Hofling
@ -1214,6 +1246,7 @@ hyperspherical
 hysteretic
 hz
 Ibanez
 iatom
 ibar
 ibm
 icc
@ -1256,6 +1289,7 @@ indices
 inertiax
 inertiay
 inertiaz
 infile
 infty
 inhomogeneities
 inhomogeneous
@ -1296,6 +1330,7 @@ ipp
 Ippolito
 IPv
 IPython
 ipython
 Isele
 isenthalpic
 ish
@ -1444,6 +1479,7 @@ Kloza
 kmax
 Kmax
 KMP
 kmu
 Knizhnik
 knl
 Kofke
@ -1931,6 +1967,7 @@ muz
 mv
 mV
 Mvapich
 mvh
 mvv
 MxN
 myCompute
@ -1943,11 +1980,13 @@ na
 nabla
 Nagaosa
 Nakano
 nall
 namespace
 namespaces
 nan
 NaN
 Nandor
 nangles
 Nangletype
 nangletypes
 Nangletypes
@ -1976,6 +2015,7 @@ Nbin
 Nbins
 nbody
 Nbody
 nbonds
 nbondtype
 Nbondtype
 nbondtypes
@ -1988,9 +2028,11 @@ Nc
 nchunk
 Nchunk
 ncoeff
 ncol
 ncorr
 ncount
 nd
 ndihedrals
 Ndihedraltype
 Ndirango
 ndof
@ -2032,10 +2074,12 @@ Ngyuen
 nh
 nharmonic
 nhc
 nhi
 NiAlH
 Nicklas
 Niklasson
 Nikolskiy
 nimpropers
 Nimpropertype
 Ninteger
 Nissila
@ -2044,9 +2088,11 @@ nitride
 nitrides
 niu
 Nk
 nktv
 nl
 nlen
 Nlines
 nlo
 nlocal
 Nlocal
 Nlog
@ -2054,7 +2100,9 @@ nlp
 nm
 Nm
 Nmax
 nmax
 Nmin
 nmin
 Nmols
 nn
 Nocedal
@ -2107,6 +2155,7 @@ Nrepeat
 nreset
 Nrho
 Nroff
 nrow
 nrun
 Ns
 Nsample
@ -2125,6 +2174,7 @@ Nt
 Ntable
 ntheta
 nthreads
 ntimestep
 Ntptask
 Ntriples
 Ntype
@ -2220,6 +2270,7 @@ oxdna
 oxrna
 oxDNA
 oxRNA
 packings
 padua
 Padua
 pafi
@ -2252,6 +2303,8 @@ Particuology
 pastewka
 Pastewka
 pathangle
 pathname
 pathnames
 Patomtrans
 Pattnaik
 Pavese
@ -2352,6 +2405,7 @@ polydisperse
 polydispersity
 polyelectrolyte
 polyhedra
 polymorphism
 popen
 Popov
 popstore
@ -2385,6 +2439,7 @@ proc
 Proc
 procs
 Prony
 progguide
 ps
 Ps
 pscreen
@ -2431,7 +2486,9 @@ qbmsst
 qcore
 qdist
 qE
 qe
 qeff
 qelectron
 qeq
 QeQ
 QEq
@ -2449,6 +2506,8 @@ qmol
 qoffload
 qopenmp
 qoverride
 qqr
 qqrd
 qtb
 quadratically
 quadrupolar
@ -2504,6 +2563,7 @@ rebo
 recursing
 Ree
 refactored
 refactoring
 reflectionstyle
 regoin
 Reinders
@ -2589,6 +2649,7 @@ Rkouter
 RkouterN
 rmask
 Rmask
 rmass
 rmax
 Rmax
 rmdir
@ -2723,6 +2784,7 @@ shlib
 SHM
 shm
 shockvel
 shrinkexceed
 Shugaev
 si
 SiC
@ -2851,11 +2913,16 @@ strcmp
 streitz
 Streitz
 Streiz
 strerror
 strided
 strietz
 strmatch
 strncmp
 strstr
 Stukowski
 Su
 subbox
 Subclassed
 subcutoff
 subcycle
 subcycling
@ -2996,6 +3063,7 @@ Tmin
 tmp
 tN
 Tobias
 tokenizer
 tokyo
 tol
 toolchain
@ -3226,6 +3294,7 @@ vv
 vx
 Vx
 vxcm
 vxmu
 vy
 Vy
 vycm
@ -3258,8 +3327,9 @@ Widom
 widom
 Wijk
 Wikipedia
 wildcard
 Wildcard
 wildcard
 wildcards
 Wirnsberger
 wirtes
 witin
@ -3301,6 +3371,7 @@ Xmax
 xmgrace
 xMIC
 xmin
 xml
 xmovie
 Xmovie
 xmu
@ -3315,6 +3386,7 @@ xsu
 xtc
 xu
 Xu
 xxt
 xxxxx
 xy
 xyz
--- a/doc/utils/sphinx-config/lammps-logo.png
+++ b/doc/utils/sphinx-config/lammps-logo.png
@ -1 +0,0 @@
 ../../src/JPG/lammps-logo.png
--- a/fortran/README
+++ b/fortran/README
@ -0,0 +1,11 @@
 This directory contains Fortran code which interface LAMMPS as a library
 and allows the LAMMPS library interface to be invoked from Fortran codes.
 It requires a Fortran compiler that supports the Fortran 2003 standard.
 This interface is based on and supersedes the previous Fortran interfaces
 in the examples/COUPLE/fortran* folders.  But is fully supported by the
 LAMMPS developers and included in the documentation and unit testing.
 Details on this Fortran interface and how to build programs using it
 are in the manual in the doc/html/pg_fortran.html file.
--- a/fortran/lammps.f90
+++ b/fortran/lammps.f90
@ -0,0 +1,281 @@
 ! -------------------------------------------------------------------------
 !   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 !   http://lammps.sandia.gov, Sandia National Laboratories
 !   Steve Plimpton, sjplimp@sandia.gov
 !
 !   Copyright (2003) Sandia Corporation.  Under the terms of Contract
 !   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
 !   certain rights in this software.  This software is distributed under
 !   the GNU General Public License.
 !
 !   See the README file in the top-level LAMMPS directory.
 ! -------------------------------------------------------------------------
 !
 ! Fortran interface to the LAMMPS library implemented as a Fortran 2003
 ! style module that wraps the C-style library interface in library.cpp
 ! and library.h using the ISO_C_BINDING module of the Fortran compiler.
 !
 ! Based on the LAMMPS Fortran 2003 module contributed by:
 !   Karl D. Hammond <karlh@ugcs.caltech.edu>
 !   University of Tennessee, Knoxville (USA), 2012
 !
 ! The Fortran module tries to follow the API of the C-library interface
 ! closely, but like the Python wrapper it employs an object oriented
 ! approach.  To accommodate the object oriented approach, all exported
 ! subroutine and functions have to be implemented in Fortran to then
 ! call the interfaced C style functions with adapted calling conventions
 ! as needed.  The C-library interfaced functions retain their names
 ! starting with "lammps_" while the Fortran versions start with "lmp_".
 !
 MODULE LIBLAMMPS
  USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_ptr, c_null_ptr, c_loc, &
      c_int, c_char, c_null_char, c_double
  IMPLICIT NONE
  PRIVATE
  PUBLIC :: lammps
  TYPE lammps
      TYPE(c_ptr) :: handle
    CONTAINS
      PROCEDURE :: close              => lmp_close
      PROCEDURE :: file               => lmp_file
      PROCEDURE :: command            => lmp_command
      PROCEDURE :: commands_list      => lmp_commands_list
      PROCEDURE :: commands_string    => lmp_commands_string
      PROCEDURE :: version            => lmp_version
      PROCEDURE :: get_natoms         => lmp_get_natoms
  END TYPE lammps
  INTERFACE lammps
      MODULE PROCEDURE lmp_open
  END INTERFACE lammps
  ! interface definitions for calling functions in library.cpp
  INTERFACE
      FUNCTION lammps_open(argc,argv,comm,handle) &
          BIND(C, name='lammps_open_fortran')
        IMPORT :: c_ptr, c_int
        INTEGER(c_int), VALUE, INTENT(in)     :: argc, comm
        TYPE(c_ptr), DIMENSION(*), INTENT(in) :: argv
        TYPE(c_ptr), INTENT(out)              :: handle
        TYPE(c_ptr)                           :: lammps_open
      END FUNCTION lammps_open
      FUNCTION lammps_open_no_mpi(argc,argv,handle) &
          BIND(C, name='lammps_open_no_mpi')
        IMPORT :: c_ptr, c_int
        INTEGER(c_int), VALUE, INTENT(in)     :: argc
        TYPE(c_ptr), DIMENSION(*), INTENT(in) :: argv
        TYPE(c_ptr), INTENT(out)              :: handle
        TYPE(c_ptr)                           :: lammps_open_no_mpi
      END FUNCTION lammps_open_no_mpi
      SUBROUTINE lammps_close(handle) BIND(C, name='lammps_close')
        IMPORT :: c_ptr
        TYPE(c_ptr), VALUE :: handle
      END SUBROUTINE lammps_close
      SUBROUTINE lammps_mpi_init(handle) BIND(C, name='lammps_mpi_init')
        IMPORT :: c_ptr
        TYPE(c_ptr), VALUE :: handle
      END SUBROUTINE lammps_mpi_init
      SUBROUTINE lammps_mpi_finalize(handle) &
          BIND(C, name='lammps_mpi_finalize')
        IMPORT :: c_ptr
        TYPE(c_ptr), VALUE :: handle
      END SUBROUTINE lammps_mpi_finalize
      SUBROUTINE lammps_file(handle,filename) BIND(C, name='lammps_file')
        IMPORT :: c_ptr
        TYPE(c_ptr), VALUE :: handle
        TYPE(c_ptr), VALUE :: filename
      END SUBROUTINE lammps_file
      SUBROUTINE lammps_command(handle,cmd) BIND(C, name='lammps_command')
        IMPORT :: c_ptr
        TYPE(c_ptr), VALUE :: handle
        TYPE(c_ptr), VALUE :: cmd
      END SUBROUTINE lammps_command
      SUBROUTINE lammps_commands_list(handle,ncmd,cmds) &
          BIND(C, name='lammps_commands_list')
        IMPORT :: c_ptr, c_int
        TYPE(c_ptr), VALUE :: handle
        INTEGER(c_int), VALUE, INTENT(in)     :: ncmd
        TYPE(c_ptr), DIMENSION(*), INTENT(in) :: cmds
      END SUBROUTINE lammps_commands_list
      SUBROUTINE lammps_commands_string(handle,str) &
          BIND(C, name='lammps_commands_string')
        IMPORT :: c_ptr
        TYPE(c_ptr), VALUE :: handle
        TYPE(c_ptr), VALUE :: str
      END SUBROUTINE lammps_commands_string
      SUBROUTINE lammps_free(ptr) BIND(C, name='lammps_free')
        IMPORT :: c_ptr
        TYPE(c_ptr), VALUE :: ptr
      END SUBROUTINE lammps_free
      FUNCTION lammps_version(handle) BIND(C, name='lammps_version')
        IMPORT :: c_ptr, c_int
        TYPE(c_ptr), VALUE :: handle
        INTEGER(c_int) :: lammps_version
      END FUNCTION lammps_version
      FUNCTION lammps_get_natoms(handle) BIND(C, name='lammps_get_natoms')
        IMPORT :: c_ptr, c_double
        TYPE(c_ptr), VALUE :: handle
        REAL(c_double) :: lammps_get_natoms
      END FUNCTION lammps_get_natoms
  END INTERFACE
 CONTAINS
  ! Fortran wrappers and helper functions.
  ! Constructor for the LAMMPS class.
  ! Combined wrapper around lammps_open_fortran() and lammps_open_no_mpi()
  TYPE(lammps) FUNCTION lmp_open(args,comm)
    IMPLICIT NONE
    INTEGER,INTENT(in), OPTIONAL :: comm
    CHARACTER(len=*), INTENT(in), OPTIONAL :: args(:)
    TYPE(c_ptr), ALLOCATABLE     :: argv(:)
    TYPE(c_ptr)                  :: dummy=c_null_ptr
    INTEGER :: i,argc
    IF (PRESENT(args)) THEN
        ! convert argument list to c style
        argc = SIZE(args)
        ALLOCATE(argv(argc))
        DO i=1,argc
           argv(i) = f2c_string(args(i))
        END DO
    ELSE
        argc = 1
        ALLOCATE(argv(1))
        argv(1) = f2c_string("liblammps")
    ENDIF
    IF (PRESENT(comm)) THEN
        lmp_open%handle = lammps_open(argc,argv,comm,dummy)
    ELSE
        lmp_open%handle = lammps_open_no_mpi(argc,argv,dummy)
    END IF
    ! Clean up allocated memory
    DO i=1,argc
        CALL lammps_free(argv(i))
    END DO
    DEALLOCATE(argv)
  END FUNCTION lmp_open
  ! Combined Fortran wrapper around lammps_close() and lammps_mpi_finalize()
  SUBROUTINE lmp_close(self,finalize)
    IMPLICIT NONE
    CLASS(lammps) :: self
    LOGICAL,INTENT(in),OPTIONAL :: finalize
    CALL lammps_close(self%handle)
    IF (PRESENT(finalize)) THEN
        IF (finalize) THEN
            CALL lammps_mpi_finalize(self%handle)
        END IF
    END IF
  END SUBROUTINE lmp_close
  INTEGER FUNCTION lmp_version(self)
    IMPLICIT NONE
    CLASS(lammps) :: self
    lmp_version = lammps_version(self%handle)
  END FUNCTION lmp_version
  DOUBLE PRECISION FUNCTION lmp_get_natoms(self)
    IMPLICIT NONE
    CLASS(lammps) :: self
    lmp_get_natoms = lammps_get_natoms(self%handle)
  END FUNCTION lmp_get_natoms
  SUBROUTINE lmp_file(self,filename)
    IMPLICIT NONE
    CLASS(lammps) :: self
    CHARACTER(len=*) :: filename
    TYPE(c_ptr) :: str
    str = f2c_string(filename)
    CALL lammps_file(self%handle,str)
    CALL lammps_free(str)
  END SUBROUTINE lmp_file
  ! equivalent function to lammps_command()
  SUBROUTINE lmp_command(self,cmd)
    IMPLICIT NONE
    CLASS(lammps) :: self
    CHARACTER(len=*) :: cmd
    TYPE(c_ptr) :: str
    str = f2c_string(cmd)
    CALL lammps_command(self%handle,str)
    CALL lammps_free(str)
  END SUBROUTINE lmp_command
  ! equivalent function to lammps_commands_list()
  SUBROUTINE lmp_commands_list(self,cmds)
    IMPLICIT NONE
    CLASS(lammps) :: self
    CHARACTER(len=*), INTENT(in), OPTIONAL :: cmds(:)
    TYPE(c_ptr), ALLOCATABLE     :: cmdv(:)
    INTEGER :: i,ncmd
    ! convert command list to c style
    ncmd = SIZE(cmds)
    ALLOCATE(cmdv(ncmd))
    DO i=1,ncmd
        cmdv(i) = f2c_string(cmds(i))
    END DO
    CALL lammps_commands_list(self%handle,ncmd,cmdv)
    ! Clean up allocated memory
    DO i=1,ncmd
        CALL lammps_free(cmdv(i))
    END DO
    DEALLOCATE(cmdv)
  END SUBROUTINE lmp_commands_list
  ! equivalent function to lammps_commands_string()
  SUBROUTINE lmp_commands_string(self,str)
    IMPLICIT NONE
    CLASS(lammps) :: self
    CHARACTER(len=*) :: str
    TYPE(c_ptr) :: tmp
    tmp = f2c_string(str)
    CALL lammps_commands_string(self%handle,tmp)
    CALL lammps_free(tmp)
  END SUBROUTINE lmp_commands_string
  ! ----------------------------------------------------------------------
  ! local helper functions
  ! copy fortran string to zero terminated c string
  FUNCTION f2c_string(f_string) RESULT(ptr)
    CHARACTER (len=*), INTENT(in)           :: f_string
    CHARACTER (len=1, kind=c_char), POINTER :: c_string(:)
    TYPE(c_ptr) :: ptr
    INTEGER :: i, n
    n = LEN_TRIM(f_string)
    ALLOCATE(c_string(n+1))
    DO i=1,n
        c_string(i) = f_string(i:i)
    END DO
    c_string(n+1) = c_null_char
    ptr = c_loc(c_string(1))
  END FUNCTION f2c_string
 END MODULE LIBLAMMPS
--- a/lib/gpu/Makefile.linux
+++ b/lib/gpu/Makefile.linux
@ -22,13 +22,13 @@ NVCC = nvcc
 #CUDA_ARCH = -arch=sm_21
 # Kepler hardware
-CUDA_ARCH = -arch=sm_30
+#CUDA_ARCH = -arch=sm_30
 #CUDA_ARCH = -arch=sm_32
 #CUDA_ARCH = -arch=sm_35
 #CUDA_ARCH = -arch=sm_37
 # Maxwell hardware
-#CUDA_ARCH = -arch=sm_50
+CUDA_ARCH = -arch=sm_50
 #CUDA_ARCH = -arch=sm_52
 # Pascal hardware
--- a/lib/gpu/Makefile.linux.double
+++ b/lib/gpu/Makefile.linux.double
@ -7,18 +7,40 @@
 EXTRAMAKE = Makefile.lammps.standard
 ifeq ($(CUDA_HOME),)
 CUDA_HOME = /usr/local/cuda
 endif
 NVCC = nvcc
-# Kepler CUDA
+# obsolete hardware. not supported by current drivers anymore.
 #CUDA_ARCH = -arch=sm_35
 # Tesla CUDA
 CUDA_ARCH = -arch=sm_21
 # newer CUDA
 #CUDA_ARCH = -arch=sm_13
 # older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
 # Fermi hardware
 #CUDA_ARCH = -arch=sm_20
 #CUDA_ARCH = -arch=sm_21
 # Kepler hardware
 #CUDA_ARCH = -arch=sm_30
 #CUDA_ARCH = -arch=sm_32
 #CUDA_ARCH = -arch=sm_35
 #CUDA_ARCH = -arch=sm_37
 # Maxwell hardware
 CUDA_ARCH = -arch=sm_50
 #CUDA_ARCH = -arch=sm_52
 # Pascal hardware
 #CUDA_ARCH = -arch=sm_60
 #CUDA_ARCH = -arch=sm_61
 # Volta hardware
 #CUDA_ARCH = -arch=sm_70
 # Turing hardware
 #CUDA_ARCH = -arch=sm_75
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
@ -33,7 +55,7 @@ CUDA_PRECISION = -D_DOUBLE_DOUBLE
 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs
-CUDA_OPTS = -DUNIX -O3 --use_fast_math
+CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC
 CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
 CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
--- a/lib/gpu/Makefile.linux.mixed
+++ b/lib/gpu/Makefile.linux.mixed
@ -7,18 +7,41 @@
 EXTRAMAKE = Makefile.lammps.standard
 ifeq ($(CUDA_HOME),)
 CUDA_HOME = /usr/local/cuda
 endif
 NVCC = nvcc
-# Kepler CUDA
+# obsolete hardware. not supported by current drivers anymore.
 #CUDA_ARCH = -arch=sm_35
 # Tesla CUDA
 CUDA_ARCH = -arch=sm_21
 # newer CUDA
 #CUDA_ARCH = -arch=sm_13
 # older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
 # Fermi hardware
 #CUDA_ARCH = -arch=sm_20
 #CUDA_ARCH = -arch=sm_21
 # Kepler hardware
 #CUDA_ARCH = -arch=sm_30
 #CUDA_ARCH = -arch=sm_32
 #CUDA_ARCH = -arch=sm_35
 #CUDA_ARCH = -arch=sm_37
 # Maxwell hardware
 CUDA_ARCH = -arch=sm_50
 #CUDA_ARCH = -arch=sm_52
 # Pascal hardware
 #CUDA_ARCH = -arch=sm_60
 #CUDA_ARCH = -arch=sm_61
 # Volta hardware
 #CUDA_ARCH = -arch=sm_70
 # Turing hardware
 #CUDA_ARCH = -arch=sm_75
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
@ -33,7 +56,7 @@ CUDA_PRECISION = -D_SINGLE_DOUBLE
 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs
-CUDA_OPTS = -DUNIX -O3 --use_fast_math
+CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC
 CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
 CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
--- a/lib/gpu/Makefile.linux.single
+++ b/lib/gpu/Makefile.linux.single
@ -7,18 +7,40 @@
 EXTRAMAKE = Makefile.lammps.standard
 ifeq ($(CUDA_HOME),)
 CUDA_HOME = /usr/local/cuda
 endif
 NVCC = nvcc
-# Kepler CUDA
+# obsolete hardware. not supported by current drivers anymore.
 #CUDA_ARCH = -arch=sm_35
 # Tesla CUDA
 CUDA_ARCH = -arch=sm_21
 # newer CUDA
 #CUDA_ARCH = -arch=sm_13
 # older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
 # Fermi hardware
 #CUDA_ARCH = -arch=sm_20
 #CUDA_ARCH = -arch=sm_21
 # Kepler hardware
 #CUDA_ARCH = -arch=sm_30
 #CUDA_ARCH = -arch=sm_32
 #CUDA_ARCH = -arch=sm_35
 #CUDA_ARCH = -arch=sm_37
 # Maxwell hardware
 CUDA_ARCH = -arch=sm_50
 #CUDA_ARCH = -arch=sm_52
 # Pascal hardware
 #CUDA_ARCH = -arch=sm_60
 #CUDA_ARCH = -arch=sm_61
 # Volta hardware
 #CUDA_ARCH = -arch=sm_70
 # Turing hardware
 #CUDA_ARCH = -arch=sm_75
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
@ -33,7 +55,7 @@ CUDA_PRECISION = -D_SINGLE_SINGLE
 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs
-CUDA_OPTS = -DUNIX -O3 --use_fast_math
+CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC
 CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
 CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
--- a/lib/gpu/Makefile.linux_multi
+++ b/lib/gpu/Makefile.linux_multi
@ -13,17 +13,27 @@ endif
 NVCC = nvcc
-# Kepler CUDA
+# obsolete hardware. not supported by current drivers anymore.
 #CUDA_ARCH = -arch=sm_35
 # newer CUDA
 #CUDA_ARCH = -arch=sm_13
 # older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
 CUDA_ARCH = -arch=sm_30
-CUDA_CODE = -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] \
+# Fermi hardware
-	    -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] \
+#CUDA_ARCH = -arch=sm_20
-	    -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]
+#CUDA_ARCH = -arch=sm_21
 # Kepler hardware
 #CUDA_ARCH = -arch=sm_30
 #CUDA_ARCH = -arch=sm_32
 #CUDA_ARCH = -arch=sm_35
 #CUDA_ARCH = -arch=sm_37
 # Maxwell hardware
 CUDA_ARCH = -arch=sm_50
 #CUDA_ARCH = -arch=sm_52
 CUDA_CODE = -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] \
 	    -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] \
 	    -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_75,code=[sm_75,compute_75]
 CUDA_ARCH += $(CUDA_CODE)
--- a/lib/gpu/Makefile.serial
+++ b/lib/gpu/Makefile.serial
@ -13,13 +13,33 @@ endif
 NVCC = nvcc
-# Tesla CUDA
+# obsolete hardware. not supported by current drivers anymore.
 CUDA_ARCH = -arch=sm_21
 # newer CUDA
 #CUDA_ARCH = -arch=sm_13
 # older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
-CUDA_ARCH = -arch=sm_35
+
 # Fermi hardware
 #CUDA_ARCH = -arch=sm_20
 #CUDA_ARCH = -arch=sm_21
 # Kepler hardware
 #CUDA_ARCH = -arch=sm_30
 #CUDA_ARCH = -arch=sm_32
 #CUDA_ARCH = -arch=sm_35
 #CUDA_ARCH = -arch=sm_37
 # Maxwell hardware
 CUDA_ARCH = -arch=sm_50
 #CUDA_ARCH = -arch=sm_52
 # Pascal hardware
 #CUDA_ARCH = -arch=sm_60
 #CUDA_ARCH = -arch=sm_61
 # Volta hardware
 #CUDA_ARCH = -arch=sm_70
 # Turing hardware
 #CUDA_ARCH = -arch=sm_75
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
@ -35,7 +55,7 @@ CUDA_PRECISION = -D_SINGLE_DOUBLE
 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs -L../../src/STUBS -lmpi_stubs
-CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC)
+CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC
 CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -fPIC -I../../src/STUBS
 CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
--- a/lib/gpu/lal_gauss.cu
+++ b/lib/gpu/lal_gauss.cu
@ -81,7 +81,7 @@ __kernel void k_gauss(const __global numtyp4 *restrict x_,
        numtyp r2inv = ucl_recip(rsq);
        numtyp r = ucl_sqrt(rsq);
        numtyp force = (numtyp)-2.0*gauss1[mtype].x*gauss1[mtype].y*rsq*
-        ucl_exp(-gauss1[mtype].y*rsq)*r2inv*factor_lj;
+        ucl_exp(-gauss1[mtype].y*rsq)*r2inv; //*factor_lj;
        f.x+=delx*force;
        f.y+=dely*force;
@ -90,7 +90,7 @@ __kernel void k_gauss(const __global numtyp4 *restrict x_,
        if (eflag>0) {
          numtyp e=-(gauss1[mtype].x*ucl_exp(-gauss1[mtype].y*rsq) -
            gauss1[mtype].w);
-          energy+=factor_lj*e;
+          energy+=e; //factor_lj*e;
        }
        if (vflag>0) {
          virial[0] += delx*delx*force;
@ -168,7 +168,7 @@ __kernel void k_gauss_fast(const __global numtyp4 *restrict x_,
        numtyp r2inv = ucl_recip(rsq);
        numtyp r = ucl_sqrt(rsq);
        numtyp force = (numtyp)-2.0*gauss1[mtype].x*gauss1[mtype].y*rsq*
-        ucl_exp(-gauss1[mtype].y*rsq)*r2inv*factor_lj;
+        ucl_exp(-gauss1[mtype].y*rsq)*r2inv; //*factor_lj;
        f.x+=delx*force;
        f.y+=dely*force;
@ -177,7 +177,7 @@ __kernel void k_gauss_fast(const __global numtyp4 *restrict x_,
        if (eflag>0) {
          numtyp e=-(gauss1[mtype].x*ucl_exp(-gauss1[mtype].y*rsq) -
            gauss1[mtype].w);
-          energy+=factor_lj*e;
+          energy+=e; //factor_lj*e;
        }
        if (vflag>0) {
          virial[0] += delx*delx*force;
--- a/lib/gpu/lal_tersoff.cu
+++ b/lib/gpu/lal_tersoff.cu
@ -709,7 +709,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
  for (int i=0; i<6; i++)
    virial[i]=(acctyp)0;
-  __local int red_acc[BLOCK_PAIR];
+  __local int ijnum_shared[BLOCK_PAIR];
  __syncthreads();
@ -789,14 +789,14 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
        k &= NEIGHMASK;
        if (k == i) {
          ijnum = nbor_k;
-          red_acc[m] = ijnum;
+          ijnum_shared[m] = ijnum;
          break;
        }
      }
      numtyp r1 = ucl_sqrt(rsq1);
      numtyp r1inv = ucl_rsqrt(rsq1);
-      if (ijnum < 0) ijnum = red_acc[m];
+      if (ijnum < 0) ijnum = ijnum_shared[m];
      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
--- a/lib/gpu/lal_tersoff_mod.cu
+++ b/lib/gpu/lal_tersoff_mod.cu
@ -719,7 +719,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
  for (int i=0; i<6; i++)
    virial[i]=(acctyp)0;
-  __local int red_acc[BLOCK_PAIR];
+  __local int ijnum_shared[BLOCK_PAIR];
  __syncthreads();
@ -799,14 +799,14 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
        k &= NEIGHMASK;
        if (k == i) {
          ijnum = nbor_k;
-          red_acc[m] = ijnum;
+          ijnum_shared[m] = ijnum;
          break;
        }
      }
      numtyp r1 = ucl_sqrt(rsq1);
      numtyp r1inv = ucl_rsqrt(rsq1);
-      if (ijnum < 0) ijnum = red_acc[m];
+      if (ijnum < 0) ijnum = ijnum_shared[m];
      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
@ -957,7 +957,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
  for (int i=0; i<6; i++)
    virial[i]=(acctyp)0;
-  __local int red_acc[BLOCK_PAIR];
+  __local int ijnum_shared[BLOCK_PAIR];
  __syncthreads();
@ -1037,14 +1037,14 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
        k &= NEIGHMASK;
        if (k == i) {
          ijnum = nbor_k;
-          red_acc[m] = ijnum;
+          ijnum_shared[m] = ijnum;
          break;
        }
      }
      numtyp r1 = ucl_sqrt(rsq1);
      numtyp r1inv = ucl_rsqrt(rsq1);
-      if (ijnum < 0) ijnum = red_acc[m];
+      if (ijnum < 0) ijnum = ijnum_shared[m];
      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
--- a/lib/gpu/lal_tersoff_zbl.cu
+++ b/lib/gpu/lal_tersoff_zbl.cu
@ -729,7 +729,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
  for (int i=0; i<6; i++)
    virial[i]=(acctyp)0;
-  __local int red_acc[BLOCK_PAIR];
+  __local int ijnum_shared[BLOCK_PAIR];
  __syncthreads();
@ -809,14 +809,14 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
        k &= NEIGHMASK;
        if (k == i) {
          ijnum = nbor_k;
-          red_acc[m] = ijnum;
+          ijnum_shared[m] = ijnum;
          break;
        }
      }
      numtyp r1 = ucl_sqrt(rsq1);
      numtyp r1inv = ucl_rsqrt(rsq1);
-      if (ijnum < 0) ijnum = red_acc[m];
+      if (ijnum < 0) ijnum = ijnum_shared[m];
      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
--- a/lib/kokkos/BUILD.md
+++ b/lib/kokkos/BUILD.md
@ -10,33 +10,45 @@ for C++.  Applications heavily leveraging Kokkos are strongly encouraged to use
 You can either use Kokkos as an installed package (encouraged) or use Kokkos in-tree in your project.
 Modern CMake is exceedingly simple at a high-level (with the devil in the details).
 Once Kokkos is installed In your `CMakeLists.txt` simply use:
-````
+````cmake
 find_package(Kokkos REQUIRED)
 ````
 Then for every executable or library in your project:
-````
+````cmake
 target_link_libraries(myTarget Kokkos::kokkos)
 ````
 That's it! There is no checking Kokkos preprocessor, compiler, or linker flags.
 Kokkos propagates all the necessary flags to your project.
 This means not only is linking to Kokkos easy, but Kokkos itself can actually configure compiler and linker flags for *your*
-project. If building in-tree, there is no `find_package` and you link with `target_link_libraries(kokkos)`.
+project.
 When configuring your project just set:
 ````bash
 > cmake ${srcdir} \
  -DKokkos_ROOT=${kokkos_install_prefix} \
  -DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos}
 ````
 Note: You may need the following if using some versions of CMake (e.g. 3.12):
 ````cmake
 cmake_policy(SET CMP0074 NEW)
 ````
 If building in-tree, there is no `find_package`. You can use `add_subdirectory(kokkos)` with the Kokkos source and again just link with `target_link_libraries(Kokkos::kokkos)`.
 The examples in `examples/cmake_build_installed` and `examples/cmake_build_in_tree` can help get you started.
 ## Configuring CMake
-A very basic installation is done with:
+A very basic installation of Kokkos is done with:
-````
+````bash
-cmake ${srcdir} \
+> cmake ${srcdir} \
 -DCMAKE_CXX_COMPILER=g++ \
- -DCMAKE_INSTALL_PREFIX=${my_install_folder}
+ -DCMAKE_INSTALL_PREFIX=${kokkos_install_folder}
 ````
 which builds and installed a default Kokkos when you run `make install`.
 There are numerous device backends, options, and architecture-specific optimizations that can be configured, e.g.
-````
+````bash
-cmake ${srcdir} \
+> cmake ${srcdir} \
 -DCMAKE_CXX_COMPILER=g++ \
- -DCMAKE_INSTALL_PREFIX=${my_install_folder} \
+ -DCMAKE_INSTALL_PREFIX=${kokkos_install_folder} \
- -DKokkos_ENABLE_OPENMP=On
+ -DKokkos_ENABLE_OPENMP=ON
 ````
 which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below.
@ -50,16 +62,16 @@ which activates the OpenMP backend. All of the options controlling device backen
 ## Spack
 An alternative to manually building with the CMake is to use the Spack package manager.
 To do so, download the `kokkos-spack` git repo and add to the package list:
-````
+````bash
-spack repo add $path-to-kokkos-spack
+> spack repo add $path-to-kokkos-spack
 ````
 A basic installation would be done as:
-````
+````bash
-spack install kokkos
+> spack install kokkos
 ````
 Spack allows options and and compilers to be tuned in the install command.
-````
+````bash
-spack install kokkos@3.0 %gcc@7.3.0 +openmp
+> spack install kokkos@3.0 %gcc@7.3.0 +openmp
 ````
 This example illustrates the three most common parameters to Spack:
 * Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
@ -67,17 +79,17 @@ This example illustrates the three most common parameters to Spack:
 * Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.
 For a complete list of Kokkos options, run:
 ````bash
 > spack info kokkos
 ````
-spack info kokkos
+More details can be found in the [Spack README](Spack.md)
 ````
 More details can be found in the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
 #### Spack Development
 Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
 Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
 If you must know, you can locate Spack Kokkos installations with:
-````
+````bash
-spack find -p kokkos ...
+> spack find -p kokkos ...
 ````
 where `...` is the unique spec identifying the particular Kokkos configuration and version.
@ -104,6 +116,12 @@ Device backends can be enabled by specifying `-DKokkos_ENABLE_X`.
 * Kokkos_ENABLE_SERIAL
    * Whether to build serial backend
    * BOOL Default: ON
 * Kokkos_ENABLE_HIP (Experimental)
    * Whether to build HIP backend
    * BOOL Default: OFF
 * Kokkos_ENABLE_OPENMPTARGET (Experimental)
    * Whether to build the OpenMP target backend
    * BOOL Default: OFF
 ## Enable Options
 Options can be enabled by specifying `-DKokkos_ENABLE_X`.
@ -138,9 +156,6 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
 * Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
    * Debug check on dual views
    * BOOL Default: OFF
 * Kokkos_ENABLE_DEPRECATED_CODE
    * Whether to enable deprecated code
    * BOOL Default: OFF
 * Kokkos_ENABLE_EXAMPLES
    * Whether to enable building examples
    * BOOL Default: OFF
@ -150,9 +165,6 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
 * Kokkos_ENABLE_LARGE_MEM_TESTS
    * Whether to perform extra large memory tests
    * BOOL_Default: OFF
 * Kokkos_ENABLE_PROFILING
    * Whether to create bindings for profiling tools
    * BOOL Default: ON
 * Kokkos_ENABLE_PROFILING_LOAD_PRINT
    * Whether to print information about which profiling tools gotloaded
    * BOOL Default: OFF
@ -235,8 +247,11 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_
 * Kokkos_ARCH_BGQ
    * Whether to optimize for the BGQ architecture
    * BOOL Default: OFF
-* Kokkos_ARCH_EPYC
+* Kokkos_ARCH_ZEN
-    * Whether to optimize for the EPYC architecture
+    * Whether to optimize for the Zen architecture
    * BOOL Default: OFF
 * Kokkos_ARCH_ZEN2
    * Whether to optimize for the Zen2 architecture
    * BOOL Default: OFF
 * Kokkos_ARCH_HSW
    * Whether to optimize for the HSW architecture
--- a/lib/kokkos/CHANGELOG.md
+++ b/lib/kokkos/CHANGELOG.md
@ -1,6 +1,113 @@
 # Change Log
-## [3.1.1](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14)
+## [3.2.00](https://github.com/kokkos/kokkos/tree/3.2.00) (2020-08-19)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.01...3.2.00)
 **Implemented enhancements:**
 - HIP:Enable stream in HIP [\#3163](https://github.com/kokkos/kokkos/issues/3163)
 - HIP:Add support for shuffle reduction for the HIP backend [\#3154](https://github.com/kokkos/kokkos/issues/3154)
 - HIP:Add implementations of missing HIPHostPinnedSpace methods for LAMMPS [\#3137](https://github.com/kokkos/kokkos/issues/3137)
 - HIP:Require HIP 3.5.0 or higher [\#3099](https://github.com/kokkos/kokkos/issues/3099)
 - HIP:WorkGraphPolicy for HIP [\#3096](https://github.com/kokkos/kokkos/issues/3096)
 - OpenMPTarget: Significant update to the new experimental backend.  Requires C++17, works on Intel GPUs, reference counting fixes. [\#3169](https://github.com/kokkos/kokkos/issues/3169)
 - Windows Cuda support [\#3018](https://github.com/kokkos/kokkos/issues/3018)
 - Pass `-Wext-lambda-captures-this` to NVCC when support for `__host__ __device__` lambda is enabled from CUDA 11 [\#3241](https://github.com/kokkos/kokkos/issues/3241)
 - Use explicit staging buffer for constant memory kernel launches and cleanup host/device synchronization [\#3234](https://github.com/kokkos/kokkos/issues/3234)
 - Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 1: [\#3202](https://github.com/kokkos/kokkos/issues/3202)
 - Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 2: [\#3203](https://github.com/kokkos/kokkos/issues/3203)
 - Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 3: [\#3196](https://github.com/kokkos/kokkos/issues/3196)
 - Annotations for `DefaultExectutionSpace` and `DefaultHostExectutionSpace` to use in static analysis [\#3189](https://github.com/kokkos/kokkos/issues/3189)
 - Add documentation on using Spack to install Kokkos and developing packages that depend on Kokkos [\#3187](https://github.com/kokkos/kokkos/issues/3187)
 - Improve support for nvcc\_wrapper with exotic host compiler [\#3186](https://github.com/kokkos/kokkos/issues/3186)
 - Add OpenMPTarget backend flags for NVC++ compiler [\#3185](https://github.com/kokkos/kokkos/issues/3185)
 - Move deep\_copy/create\_mirror\_view on Experimental::OffsetView into Kokkos:: namespace [\#3166](https://github.com/kokkos/kokkos/issues/3166)
 - Allow for larger block size in HIP [\#3165](https://github.com/kokkos/kokkos/issues/3165)
 - View: Added names of Views to the different View initialize/free kernels [\#3159](https://github.com/kokkos/kokkos/issues/3159)
 - Cuda: Caching cudaFunctorAttributes and whether L1/Shmem prefer was set [\#3151](https://github.com/kokkos/kokkos/issues/3151)
 - BuildSystem: Provide an explicit default CMAKE\_BUILD\_TYPE [\#3131](https://github.com/kokkos/kokkos/issues/3131)
 - Cuda: Update CUDA occupancy calculation [\#3124](https://github.com/kokkos/kokkos/issues/3124)
 - Vector: Adding data() to Vector [\#3123](https://github.com/kokkos/kokkos/issues/3123)
 - BuildSystem: Add CUDA Ampere configuration support [\#3122](https://github.com/kokkos/kokkos/issues/3122)
 - General: Apply [[noreturn]] to Kokkos::abort when applicable [\#3106](https://github.com/kokkos/kokkos/issues/3106)
 - TeamPolicy: Validate storage level argument passed to TeamPolicy::set\_scratch\_size() [\#3098](https://github.com/kokkos/kokkos/issues/3098)
 - nvcc\_wrapper: send --cudart to nvcc instead of host compiler [\#3092](https://github.com/kokkos/kokkos/issues/3092)
 - BuildSystem: Make kokkos\_has\_string() function in Makefile.kokkos case insensitive [\#3091](https://github.com/kokkos/kokkos/issues/3091)
 - Modify KOKKOS\_FUNCTION macro for clang-tidy analysis [\#3087](https://github.com/kokkos/kokkos/issues/3087)
 - Move allocation profiling to allocate/deallocate calls [\#3084](https://github.com/kokkos/kokkos/issues/3084)
 - BuildSystem: FATAL\_ERROR when attempting in-source build [\#3082](https://github.com/kokkos/kokkos/issues/3082)
 - Change enums in ScatterView to types [\#3076](https://github.com/kokkos/kokkos/issues/3076)
 - HIP: Changes for new compiler/runtime [\#3067](https://github.com/kokkos/kokkos/issues/3067)
 - Extract and use get\_gpu [\#3061](https://github.com/kokkos/kokkos/issues/3061)
 - Extract and use get\_gpu [\#3048](https://github.com/kokkos/kokkos/issues/3048)
 - Add is\_allocated to View-like containers [\#3059](https://github.com/kokkos/kokkos/issues/3059)
 - Combined reducers for scalar references [\#3052](https://github.com/kokkos/kokkos/issues/3052)
 - Add configurable capacity for UniqueToken [\#3051](https://github.com/kokkos/kokkos/issues/3051)
 - Add installation testing [\#3034](https://github.com/kokkos/kokkos/issues/3034)
 - BuildSystem: Add -expt-relaxed-constexpr flag to nvcc\_wrapper [\#3021](https://github.com/kokkos/kokkos/issues/3021)
 - HIP: Add UniqueToken [\#3020](https://github.com/kokkos/kokkos/issues/3020)
 - Autodetect number of devices [\#3013](https://github.com/kokkos/kokkos/issues/3013)
 **Fixed bugs:**
 - Check error code from `cudaStreamSynchronize` in CUDA fences [\#3255](https://github.com/kokkos/kokkos/issues/3255)
 - Fix issue with C++ standard flags when using `nvcc\_wrapper` with PGI [\#3254](https://github.com/kokkos/kokkos/issues/3254)
 - Add missing threadfence in lock-based atomics [\#3208](https://github.com/kokkos/kokkos/issues/3208)
 - Fix dedup of linker flags for shared lib on CMake <=3.12 [\#3176](https://github.com/kokkos/kokkos/issues/3176)
 - Fix memory leak with CUDA streams [\#3170](https://github.com/kokkos/kokkos/issues/3170)
 - BuildSystem: Fix OpenMP Target flags for Cray [\#3161](https://github.com/kokkos/kokkos/issues/3161)
 - ScatterView: fix for OpenmpTarget remove inheritance from reducers [\#3162](https://github.com/kokkos/kokkos/issues/3162)
 - BuildSystem: Set OpenMP flags according to host compiler [\#3127](https://github.com/kokkos/kokkos/issues/3127)
 - OpenMP: Fix logic for nested omp in partition\_master bug [\#3101](https://github.com/kokkos/kokkos/issues/3101)
 - BuildSystem: Fixes for Cuda/11 and c++17 [\#3085](https://github.com/kokkos/kokkos/issues/3085)
 - HIP: Fix print\_configuration [\#3080](https://github.com/kokkos/kokkos/issues/3080)
 - Conditionally define get\_gpu [\#3072](https://github.com/kokkos/kokkos/issues/3072)
 - Fix bounds for ranges in random number generator [\#3069](https://github.com/kokkos/kokkos/issues/3069)
 - Fix Cuda minor arch check [\#3035](https://github.com/kokkos/kokkos/issues/3035)
 **Incompatibilities:**
 - Remove ETI support [\#3157](https://github.com/kokkos/kokkos/issues/3157)
 - Remove KOKKOS\_INTERNAL\_ENABLE\_NON\_CUDA\_BACKEND [\#3147](https://github.com/kokkos/kokkos/issues/3147)
 - Remove core/unit\_test/config [\#3146](https://github.com/kokkos/kokkos/issues/3146)
 - Removed the preprocessor branch for KOKKOS\_ENABLE\_PROFILING [\#3115](https://github.com/kokkos/kokkos/issues/3115)
 - Disable profiling with MSVC [\#3066](https://github.com/kokkos/kokkos/issues/3066)
 **Closed issues:**
 - Silent error (Validate storage level arg to set_scratch_size) [\#3097](https://github.com/kokkos/kokkos/issues/3097)
 - Remove KOKKKOS\_ENABLE\_PROFILING Option [\#3095](https://github.com/kokkos/kokkos/issues/3095)
 - Cuda 11 -\> allow C++17 [\#3083](https://github.com/kokkos/kokkos/issues/3083)
 - In source build failure not explained [\#3081](https://github.com/kokkos/kokkos/issues/3081)
 - Allow naming of Views for initialization kernel [\#3070](https://github.com/kokkos/kokkos/issues/3070)
 - DefaultInit tests failing when using CTest resource allocation feature [\#3040](https://github.com/kokkos/kokkos/issues/3040)
 - Add installation testing.  [\#3037](https://github.com/kokkos/kokkos/issues/3037)
 - nvcc\_wrapper needs to handle `-expt-relaxed-constexpr` flag [\#3017](https://github.com/kokkos/kokkos/issues/3017)
 - CPU core oversubscription warning on macOS with OpenMP backend [\#2996](https://github.com/kokkos/kokkos/issues/2996)
 - Default behavior of KOKKOS\_NUM\_DEVICES to use all devices available [\#2975](https://github.com/kokkos/kokkos/issues/2975)
 - Assert blocksize \> 0 [\#2974](https://github.com/kokkos/kokkos/issues/2974)
 - Add ability to assign kokkos profile function from executable  [\#2973](https://github.com/kokkos/kokkos/issues/2973)
 - ScatterView Support for the pre/post increment operator [\#2967](https://github.com/kokkos/kokkos/issues/2967)
 - Compiler issue: Cuda build with clang 10 has errors with the atomic unit tests [\#3237](https://github.com/kokkos/kokkos/issues/3237)
 - Incompatibility of flags for C++ standard with PGI v20.4 on Power9/NVIDIA V100 system [\#3252](https://github.com/kokkos/kokkos/issues/3252)
 - Error configuring as subproject [\#3140](https://github.com/kokkos/kokkos/issues/3140)
 - CMake fails with Nvidia compilers when the GPU architecture option is not supplied (Fix configure with OMPT and Cuda) [\#3207](https://github.com/kokkos/kokkos/issues/3207)
 - PGI compiler being passed the gcc -fopenmp flag [\#3125](https://github.com/kokkos/kokkos/issues/3125)
 - Cuda: Memory leak when using CUDA stream [\#3167](https://github.com/kokkos/kokkos/issues/3167)
 - RangePolicy has an implicitly deleted assignment operator [\#3192](https://github.com/kokkos/kokkos/issues/3192)
 - MemorySpace::allocate needs to have memory pool counting.  [\#3064](https://github.com/kokkos/kokkos/issues/3064)
 - Missing write fence for lock based atomics on CUDA [\#3038](https://github.com/kokkos/kokkos/issues/3038)
 - CUDA compute capability version check problem [\#3026](https://github.com/kokkos/kokkos/issues/3026)
 - Make DynRankView fencing consistent [\#3014](https://github.com/kokkos/kokkos/issues/3014)
 - nvcc\_wrapper cant handle -Xcompiler -o out.o [\#2993](https://github.com/kokkos/kokkos/issues/2993)
 - Reductions of non-trivial types of size 4 fail in CUDA shfl operations [\#2990](https://github.com/kokkos/kokkos/issues/2990)
 - complex\_double misalignment in reduce, clang+CUDA [\#2989](https://github.com/kokkos/kokkos/issues/2989)
 - Span of degenerated \(zero-length\) subviews is not zero in some special cases [\#2979](https://github.com/kokkos/kokkos/issues/2979)
 - Rank 1 custom layouts dont work as expected. [\#2840](https://github.com/kokkos/kokkos/issues/2840)
 ## [3.1.01](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.00...3.1.1)
 **Fixed bugs:**
--- a/lib/kokkos/CMakeLists.txt
+++ b/lib/kokkos/CMakeLists.txt
@ -1,4 +1,9 @@
 # Disable in-source builds to prevent source tree corruption.
 if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" )
  message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files." )
 endif()
 # We want to determine if options are given with the wrong case
 # In order to detect which arguments are given to compare against
 # the list of valid arguments, at the beginning here we need to
@ -34,6 +39,9 @@ IF(COMMAND TRIBITS_PACKAGE_DECL)
 ELSE()
  SET(KOKKOS_HAS_TRILINOS OFF)
 ENDIF()
 # Is this build a subdirectory of another project
 GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY)
 INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake)
 INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_pick_cxx_std.cmake)
@ -75,16 +83,17 @@ IF(NOT KOKKOS_HAS_TRILINOS)
      SET(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE)
      SET(ENV{CXX} ${SPACK_CXX})
    ENDIF()
  ENDif()
  IF(NOT DEFINED ${PROJECT_NAME})
    # WORKAROUND FOR HIPCC
    IF(Kokkos_ENABLE_HIP)
      SET(KOKKOS_INTERNAL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
      SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --amdgpu-target=gfx906")
  ENDIF()
  # Always call the project command to define Kokkos_ variables
  # and to make sure that C++ is an enabled language
  PROJECT(Kokkos CXX)
-    IF(Kokkos_ENABLE_HIP)
+  IF(NOT HAS_PARENT)
-      SET(CMAKE_CXX_FLAGS ${KOKKOS_INTERNAL_CMAKE_CXX_FLAGS})
+    IF (NOT CMAKE_BUILD_TYPE)
      SET(DEFAULT_BUILD_TYPE "RelWithDebInfo")
      MESSAGE(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.")
      SET(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING
          "Choose the type of build, options are: Debug, Release, RelWithDebInfo and MinSizeRel."
          FORCE)
    ENDIF()
  ENDIF()
 ENDIF()
@ -102,8 +111,8 @@ ENDIF()
 set(Kokkos_VERSION_MAJOR 3)
-set(Kokkos_VERSION_MINOR 1)
+set(Kokkos_VERSION_MINOR 2)
-set(Kokkos_VERSION_PATCH 1)
+set(Kokkos_VERSION_PATCH 0)
 set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
 math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
@ -147,6 +156,7 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake)
 # Check the environment and set certain variables
 # to allow platform-specific checks
 INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake)
 # The build environment setup goes in the following steps
 # 1) Check all the enable options. This includes checking Kokkos_DEVICES
 # 2) Check the compiler ID (type and version)
@ -169,7 +179,6 @@ SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontain
 SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms)
 SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES})
 GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY)
 IF (KOKKOS_HAS_TRILINOS)
  SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
  SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR})
@ -203,7 +212,7 @@ IF (KOKKOS_HAS_TRILINOS)
    SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
    LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
  ENDFOREACH()
-  SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
+  SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
  IF (KOKKOS_ENABLE_CUDA)
    STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS    "${KOKKOS_CUDA_OPTIONS}")
    FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
@ -246,7 +255,7 @@ KOKKOS_PACKAGE_POSTPROCESS()
 #We are ready to configure the header
 CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY)
-IF (NOT KOKKOS_HAS_TRILINOS)
+IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING)
  ADD_LIBRARY(kokkos INTERFACE)
  #Make sure in-tree projects can reference this as Kokkos::
  #to match the installed target names
@ -262,8 +271,6 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake)
 # If the argument of DESTINATION is a relative path, CMake computes it
 # as relative to ${CMAKE_INSTALL_PATH}.
 INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION ${CMAKE_INSTALL_BINDIR})
 INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 #  Finally - if we are a subproject - make sure the enabled devices are visible
 IF (HAS_PARENT)
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@ -11,8 +11,8 @@ CXXFLAGS += $(SHFLAGS)
 endif
 KOKKOS_VERSION_MAJOR = 3
-KOKKOS_VERSION_MINOR = 1
+KOKKOS_VERSION_MINOR = 2
-KOKKOS_VERSION_PATCH = 1
+KOKKOS_VERSION_PATCH = 0
 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
 # Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial
@ -20,11 +20,11 @@ KOKKOS_DEVICES ?= "OpenMP"
 #KOKKOS_DEVICES ?= "Pthread"
 # Options: 
 # Intel:    KNC,KNL,SNB,HSW,BDW,SKX
-# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75
+# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80
 # ARM:      ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
 # IBM:      BGQ,Power7,Power8,Power9
 # AMD-GPUS: Vega900,Vega906
-# AMD-CPUS: AMDAVX,EPYC
+# AMD-CPUS: AMDAVX,Zen,Zen2
 KOKKOS_ARCH ?= ""
 # Options: yes,no
 KOKKOS_DEBUG ?= "no"
@ -32,10 +32,8 @@ KOKKOS_DEBUG ?= "no"
 KOKKOS_USE_TPLS ?= ""
 # Options: c++11,c++14,c++1y,c++17,c++1z,c++2a
 KOKKOS_CXX_STANDARD ?= "c++11"
-# Options: aggressive_vectorization,disable_profiling,enable_deprecated_code,disable_deprecated_code,enable_large_mem_tests,disable_complex_align
+# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align
 KOKKOS_OPTIONS ?= ""
 # Option for setting ETI path
 KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti
 KOKKOS_CMAKE ?= "no"
 KOKKOS_TRIBITS ?= "no"
 KOKKOS_STANDALONE_CMAKE ?= "no"
@ -74,6 +72,7 @@ KOKKOS_INTERNAL_ENABLE_CXX1Y := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),
 KOKKOS_INTERNAL_ENABLE_CXX17 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++17)
 KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z)
 KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a)
 KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++20)
 # Check for external libraries.
 KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
@ -83,9 +82,7 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),exper
 # Check for advanced settings.
 KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
 KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
-KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling)
+KOKKOS_INTERNAL_ENABLE_TUNING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_tuning)
 KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code)
 KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecated_code)
 KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align)
 KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
 KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print)
@ -96,7 +93,6 @@ KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS
 KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
 KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr)
 KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
 KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti)
 KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)
@ -140,6 +136,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
  KOKKOS_DEVICELIST += OPENMPTARGET
  KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER := $(shell expr $(KOKKOS_INTERNAL_ENABLE_CXX17) \
                                                    + $(KOKKOS_INTERNAL_ENABLE_CXX20) \
                                                    + $(KOKKOS_INTERNAL_ENABLE_CXX2A))
  ifneq ($(KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER), 1)
    $(error OpenMPTarget backend requires C++17 or newer)
  endif
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
@ -281,7 +283,7 @@ endif
 ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
  KOKKOS_INTERNAL_CXX11_FLAG := --c++11
  KOKKOS_INTERNAL_CXX14_FLAG := --c++14
-  #KOKKOS_INTERNAL_CXX17_FLAG := --c++17
+  KOKKOS_INTERNAL_CXX17_FLAG := --c++17
 else
  ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
     KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
@ -338,35 +340,27 @@ KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pas
 KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70)
 KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72)
 KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75)
 KOKKOS_INTERNAL_USE_ARCH_AMPERE80 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere80)
 KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70)   \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72)   \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_TURING75)  \
-                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+                                              + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80))
                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
 #SEK: This seems like a bug to me
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
  KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell)
  KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler)
-  KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30)  \
+  KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32)  \
+                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50))
                                                + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37)  \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61)  \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70)   \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72)   \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_TURING75)  \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
@ -394,19 +388,20 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
 # AMD based.
 KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
-KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC)
+KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
 KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
 KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
 KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906)
 # Any AVX?
 KOKKOS_INTERNAL_USE_ARCH_SSE42      := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
 KOKKOS_INTERNAL_USE_ARCH_AVX        := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
-KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
+KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
 KOKKOS_INTERNAL_USE_ARCH_AVX512MIC  := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
 KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
 # Decide what ISA level we are able to support.
-KOKKOS_INTERNAL_USE_ISA_X86_64    := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
+KOKKOS_INTERNAL_USE_ISA_X86_64    := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
 KOKKOS_INTERNAL_USE_ISA_KNC       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
 KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
 KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
@ -430,7 +425,7 @@ endif
 KOKKOS_CPPFLAGS =
 KOKKOS_LIBDIRS =
 ifneq ($(KOKKOS_CMAKE), yes)
-  KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH)
+  KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
 endif
 KOKKOS_TPL_INCLUDE_DIRS =
 KOKKOS_TPL_LIBRARY_DIRS =
@ -458,88 +453,91 @@ KOKKOS_CONFIG_HEADER=KokkosCore_config.h
 # Functions for generating config header file
 kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP))
 # assign hash sign to variable for compat. with make 4.3
 H := \#
 # Do not append first line
 tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
 tmp := $(call kokkos_append_header,"Makefile constructed configuration:")
 tmp := $(call kokkos_append_header,"$(shell date)")
 tmp := $(call kokkos_append_header,"----------------------------------------------*/")
-tmp := $(call kokkos_append_header,'\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
+tmp := $(call kokkos_append_header,'$H''if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
-tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
+tmp := $(call kokkos_append_header,'$H''error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
-tmp := $(call kokkos_append_header,'\#else')
+tmp := $(call kokkos_append_header,'$H''else')
-tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H')
+tmp := $(call kokkos_append_header,'$H''define KOKKOS_CORE_CONFIG_H')
-tmp := $(call kokkos_append_header,'\#endif')
+tmp := $(call kokkos_append_header,'$H''endif')
 tmp := $(call kokkos_append_header,"")
-tmp := $(call kokkos_append_header,"\#define KOKKOS_VERSION $(KOKKOS_VERSION)")
+tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION $(KOKKOS_VERSION)")
 tmp := $(call kokkos_append_header,"")
 tmp := $(call kokkos_append_header,"/* Execution Spaces */")
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)")
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
-  tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM')
+  tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_ROCM')
-  tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
+  tmp := $(call kokkos_append_header,'$H''define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
-  tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_HIP')
+  tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_HIP')
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
-  tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET')
+  tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMPTARGET')
  ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
  endif
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
-  tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMP')
+  tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMP')
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_THREADS")
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX")
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_SERIAL")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL")
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
-  tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
+  tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TM")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TM")
-  tmp := $(call kokkos_append_header,"\#endif")
+  tmp := $(call kokkos_append_header,"$H""endif")
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
-  tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
+  tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_X86_64")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_X86_64")
-  tmp := $(call kokkos_append_header,"\#endif")
+  tmp := $(call kokkos_append_header,"$H""endif")
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
-  tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
+  tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_KNC")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_KNC")
-  tmp := $(call kokkos_append_header,"\#endif")
+  tmp := $(call kokkos_append_header,"$H""endif")
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
-  tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
+  tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCLE")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCLE")
-  tmp := $(call kokkos_append_header,"\#endif")
+  tmp := $(call kokkos_append_header,"$H""endif")
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
-  tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
+  tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCBE")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCBE")
-  tmp := $(call kokkos_append_header,"\#endif")
+  tmp := $(call kokkos_append_header,"$H""endif")
 endif
 #only add the c++ standard flags if this is not CMake
@ -548,34 +546,39 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
 ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
 endif
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX11")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1)
 ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG)
 endif
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX14")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1)
  #I cannot make CMake add this in a good way - so add it here
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Y_FLAG)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX14")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1)
 ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG)
 endif
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
  #I cannot make CMake add this in a good way - so add it here
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1)
  #I cannot make CMake add this in a good way - so add it here
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX20")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX20), 1)
  #I cannot make CMake add this in a good way - so add it here
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX20_FLAG)
  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
@ -585,20 +588,26 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
  KOKKOS_CXXFLAGS += -g
  KOKKOS_LDFLAGS += -g
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG")
  ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
  endif
 endif
 ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_COMPLEX_ALIGN")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_COMPLEX_ALIGN")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_TUNING), 1)
  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TUNING")
 endif
 tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LIBDL")
 ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
  ifneq ($(KOKKOS_CMAKE), yes)
    ifneq ($(HWLOC_PATH),)
@ -611,11 +620,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
    KOKKOS_LIBS += -lhwloc
    KOKKOS_TPL_LIBRARY_NAMES += hwloc
  endif
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HWLOC")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC")
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_LIBRT")
  KOKKOS_LIBS += -lrt
  KOKKOS_TPL_LIBRARY_NAMES += rt
 endif
@ -632,50 +641,36 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
    KOKKOS_LIBS += -lmemkind -lnuma
    KOKKOS_TPL_LIBRARY_NAMES += memkind numa
  endif
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HBWSPACE")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HBWSPACE")
 endif
 ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING")
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
  ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE), 1)
    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE")
  endif
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_ETI")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LARGE_MEM_TESTS")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS")
 endif
 tmp := $(call kokkos_append_header,"/* Optimization Settings */")
 ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
 endif
 tmp := $(call kokkos_append_header,"/* Cuda Settings */")
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
  else
    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-      tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
+      tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
    endif
  endif
  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_UVM")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_UVM")
  endif
  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
      KOKKOS_CXXFLAGS += -fcuda-rdc
      KOKKOS_LDFLAGS += -fcuda-rdc
@ -696,7 +691,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
    ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
      ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
-        tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA")
+        tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
        KOKKOS_CXXFLAGS += -expt-extended-lambda
      else
        $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
@ -704,14 +699,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
    endif
    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-      tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA")
+      tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
    endif
  endif
  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1)
    ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
      ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -ge 80; echo $$?),0)
-        tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR")
+        tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR")
        KOKKOS_CXXFLAGS += -expt-relaxed-constexpr
      else
        $(warning Warning: Cuda relaxed constexpr support was requested but NVCC version is too low. This requires NVCC for Cuda version 8.0 or higher. Disabling relaxed constexpr support now.)
@ -719,25 +714,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
    endif
    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-      tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR")
+      tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR")
    endif
  endif
  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
  endif
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
  ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH")
  endif
 endif
 # Add Architecture flags.
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
    KOKKOS_CXXFLAGS +=
@ -754,7 +749,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
    KOKKOS_CXXFLAGS +=
@ -770,9 +765,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
  endif
 endif
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_EPYC")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_AVX2")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -mavx2
@ -783,9 +778,22 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
  endif
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1)
  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2")
  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -mavx2
    KOKKOS_LDFLAGS += -mavx2
  else
    KOKKOS_CXXFLAGS += -march=znver2 -mtune=znver2
    KOKKOS_LDFLAGS += -march=znver2 -mtune=znver2
  endif
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
    KOKKOS_CXXFLAGS +=
@ -802,8 +810,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX2")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
    KOKKOS_CXXFLAGS +=
@ -820,7 +828,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_SSE42")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -xSSE4.2
@ -842,7 +850,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -mavx
@ -864,7 +872,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER7")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER7")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -876,7 +884,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER8")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER8")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -897,7 +905,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER9")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER9")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -918,7 +926,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -xCORE-AVX2
@ -940,7 +948,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -xCORE-AVX2
@ -962,7 +970,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512MIC")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512MIC")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -xMIC-AVX512
@ -983,7 +991,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512XEON")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -xCORE-AVX512
@ -1004,7 +1012,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KNC")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC")
  KOKKOS_CXXFLAGS += -mmic
  KOKKOS_LDFLAGS += -mmic
 endif
@ -1039,65 +1047,70 @@ endif
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER32")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER35")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER37")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL50")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL52")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL53")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL60")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA70")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING75")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1)
    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80
  endif
  ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
    KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
@ -1121,13 +1134,13 @@ endif
 ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
  # Lets start with adding architecture defines
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 900")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 900")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA900")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900")
    KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 906")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 906")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA906")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906")
    KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906
  endif
@ -1138,7 +1151,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
  KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
  ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
    KOKKOS_CXXFLAGS+=-fgpu-rdc
    KOKKOS_LDFLAGS+=-fgpu-rdc
  else
@ -1171,9 +1184,6 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
 ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Cuda/*.cpp)
 endif
  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
  ifneq ($(CUDA_PATH),)
    KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include
@ -1211,9 +1221,6 @@ endif
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
 ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/OpenMP/*.cpp)
 endif
  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -1228,9 +1235,6 @@ endif
 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
 ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Threads/*.cpp)
 endif
  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
  KOKKOS_LIBS += -lpthread
  KOKKOS_TPL_LIBRARY_NAMES += pthread
@ -1279,9 +1283,6 @@ endif
 # Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
 # device to avoid a link warning.
 ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
 ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Serial/*.cpp)
 endif
 endif
 ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
  KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))
--- a/lib/kokkos/Makefile.targets
+++ b/lib/kokkos/Makefile.targets
@ -26,21 +26,17 @@ Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spi
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
 Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
-Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
+Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
 Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
 Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
 Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
 Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp 
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
 ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
 ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
  include $(KOKKOS_ETI_PATH)/Serial/Makefile.eti_Serial
 endif
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
@ -50,9 +46,6 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
 Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
 ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
  include $(KOKKOS_ETI_PATH)/Cuda/Makefile.eti_Cuda
 endif
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
@ -75,9 +68,6 @@ Kokkos_ROCm_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_RO
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
 Kokkos_ROCm_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
 ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
  include $(KOKKOS_ETI_PATH)/ROCm/Makefile.eti_ROCm
 endif
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
@ -85,9 +75,6 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
 Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
 ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
  include $(KOKKOS_ETI_PATH)/Threads/Makefile.eti_Threads
 endif
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -95,9 +82,6 @@ Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokko
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
 Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
 ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
  include $(KOKKOS_ETI_PATH)/OpenMP/Makefile.eti_OpenMP
 endif
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
--- a/lib/kokkos/README.md
+++ b/lib/kokkos/README.md
@ -151,7 +151,7 @@ Full details are given in the [build instructions](BUILD.md). Basic setups are s
 ## CMake
 The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
-````
+````bash
 cmake $srcdir \
  -DCMAKE_CXX_COMPILER=$path_to_compiler \
  -DCMAKE_INSTALL_PREFIX=$path_to_install \
@ -170,7 +170,7 @@ and run `make test` after completing the build.
 For your CMake project using Kokkos, code such as the following:
-````
+````cmake
 find_package(Kokkos)
 ...
 target_link_libraries(myTarget Kokkos::kokkos)
@ -187,17 +187,15 @@ for the install location given above.
 ## Spack
 An alternative to manually building with the CMake is to use the Spack package manager.
-To do so, download the `kokkos-spack` git repo and add to the package list:
+To get started, download the Spack [repo](https://github.com/spack/spack).
 ````
 spack repo add $path-to-kokkos-spack
 ````
 A basic installation would be done as:
-````
+````bash
-spack install kokkos
+> spack install kokkos
 ````
 Spack allows options and and compilers to be tuned in the install command.
-````
+````bash
-spack install kokkos@3.0 %gcc@7.3.0 +openmp
+> spack install kokkos@3.0 %gcc@7.3.0 +openmp
 ````
 This example illustrates the three most common parameters to Spack:
 * Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
@ -205,33 +203,33 @@ This example illustrates the three most common parameters to Spack:
 * Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.
 For a complete list of Kokkos options, run:
-````
+````bash
-spack info kokkos
+> spack info kokkos
 ````
 Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
 Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
 More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with:
-````
+````bash
-spack find -p kokkos ...
+> spack find -p kokkos ...
 ````
 where `...` is the unique spec identifying the particular Kokkos configuration and version.
-
+Some more details can found in the Kokkos spack [documentation](Spack.md) or the Spack [website](https://spack.readthedocs.io/en/latest).
 ## Raw Makefile
 A bash script is provided to generate raw makefiles.
 To install Kokkos as a library create a build directory and run the following
-````
+````bash
-$KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install
+> $KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install
 ````
 Once the Makefile is generated, run:
-````
+````bash
-make kokkoslib
+> make kokkoslib
-make install
+> make install
 ````
 To additionally run the unit tests:
-````
+````bash
-make build-test
+> make build-test
-make test
+> make test
 ````
 Run `generate_makefile.bash --help` for more detailed options such as
 changing the device type for which to build.
@ -274,7 +272,7 @@ more than a single GPU is used by a single process.
 If you publish work which mentions Kokkos, please cite the following paper:
-````
+````BibTeX
@article{CarterEdwards20143202,
  title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",
  journal = "Journal of Parallel and Distributed Computing ",
--- a/lib/kokkos/Spack.md
+++ b/lib/kokkos/Spack.md
@ -0,0 +1,267 @@
 ![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4)
 # Kokkos Spack
 This gives instructions for using Spack to install Kokkos and developing packages that depend on Kokkos.
 ## Getting Started
 Make sure you have downloaded [Spack](https://github.com/spack/spack).
 The easiest way to configure the Spack environment is:
 ````bash
 > source spack/share/spack/setup-env.sh
 ````
 with other scripts available for other shells.
 You can display information about how to install packages with:
 ````bash
 > spack info kokkos
 ````
 This will print all the information about how to install Kokkos with Spack.
 For detailed instructions on how to use Spack, see the [User Manual](https://spack.readthedocs.io).
 ## Setting Up Spack: Avoiding the Package Cascade
 By default, Spack doesn't 'see' anything on your system - including things like CMake and CUDA.
 This can be limited by adding a `packages.yaml` to your `$HOME/.spack` folder that includes CMake (and CUDA, if applicable).  For example, your `packages.yaml` file could be:
 ````yaml
 packages:
 cuda:
  modules:
   cuda@10.1.243: [cuda/10.1.243]
  paths:
   cuda@10.1.243:
    /opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243
  buildable: false
 cmake:
  modules:
   cmake: [cmake/3.16.8]
  paths:
   cmake:
    /opt/local/ppc64le/cmake/3.16.8
  buildable: false
 ````
 The `modules` entry is only necessary on systems that require loading Modules (i.e. most DOE systems).
 The `buildable` flag is useful to make sure Spack crashes if there is a path error,
 rather than having a type-o and Spack rebuilding everything because `cmake` isn't found.
 You can verify your environment is set up correctly by running `spack graph` or `spack spec`.
 For example:
 ````bash
 > spack graph kokkos +cuda
 o  kokkos
 |\
 o |  cuda
 /
 o  cmake
 ````
 Without the existing CUDA and CMake being identified in `packages.yaml`, a (subset!) of the output would be:
 ````bash
 o  kokkos
 |\
 | o  cmake
 | |\
 | | | |\
 | | | | | |\
 | | | | | | | |\
 | | | | | | | | | |\
 | | | | | | | o | | |  libarchive
 | | | | | | | |\ \ \ \
 | | | | | | | | | |\ \ \ \
 | | | | | | | | | | | | |_|/
 | | | | | | | | | | | |/| |
 | | | | | | | | | | | | | o  curl
 | | |_|_|_|_|_|_|_|_|_|_|/|
 | |/| | | |_|_|_|_|_|_|_|/
 | | | | |/| | | | | | | |
 | | | | o | | | | | | | |  openssl
 | |/| | | | | | | | | | |
 | | | | | | | | | | o | |  libxml2
 | | |_|_|_|_|_|_|_|/| | |
 | | | | | | | | | | |\ \ \
 | o | | | | | | | | | | | |  zlib
 |  / / / / / / / / / / / /
 | o | | | | | | | | | | |  xz
 |  / / / / / / / / / / /
 | o | | | | | | | | | |  rhash
 |  / / / / / / / / / /
 | | | | o | | | | | |  nettle
 | | | | |\ \ \ \ \ \ \
 | | | o | | | | | | | |  libuv
 | | | | o | | | | | | |  autoconf
 | | |_|/| | | | | | | |
 | | | | |/ / / / / / /
 | o | | | | | | | | |  perl
 | o | | | | | | | | |  gdbm
 | o | | | | | | | | |  readline
 ````
 ## Configuring Kokkos as a Project Dependency
 Say you have a project "SuperScience" which needs to use Kokkos.
 In your `package.py` file, you would generally include something like:
 ````python
 class SuperScience(CMakePackage):
  ...
  depends_on("kokkos")
 ````
 Often projects want to tweak behavior when using certain features, e.g.
 ````python
  depends_on("kokkos+cuda", when="+cuda")
 ````
 if your project needs CUDA-specific logic to configure and build.
 This illustrates the general principle in Spack of "flowing-up".
 A user requests a feature in the final app:
 ````bash
 > spack install superscience+cuda
 ````
 This flows upstream to the Kokkos dependency, causing the `kokkos+cuda` variant to build.
 The downstream app (SuperScience) tells the upstream app (Kokkos) how to build.
 Because Kokkos is a performance portability library, it somewhat inverts this principle.
 Kokkos "flows-down", telling your application how best to configure for performance.
 Rather than a downstream app (SuperScience) telling the upstream (Kokkos) what variants to build,
 a pre-built Kokkos should be telling the downstream app SuperScience what variants to use.
 Kokkos works best when there is an "expert" configuration installed on your system.
 Your build should simply request `-DKokkos_ROOT=<BEST_KOKKOS_FOR_MY_SYSTEM>` and configure appropriately based on the Kokkos it finds.
 Kokkos has many, many build variants.
 Where possible, projects should only depend on a general Kokkos, not specific variants.
 We recommend instead adding for each system you build on a Kokkos configuration to your `packages.yaml` file (usually found in `~/.spack` for specific users).
 For a Xeon + Volta system, this could look like:
 ````yaml
 kokkos:
  variants: +cuda +openmp +cuda_lambda +wrapper ^cuda@10.1 cuda_arch=70
  compiler: [gcc@7.2.0]
 ````
 which gives the "best" Kokkos configuration as CUDA+OpenMP optimized for a Volta 70 architecture using CUDA 10.1.
 It also enables support for CUDA Lambdas.
 The `+wrapper` option tells Kokkos to build with the special `nvcc_wrapper` (more below).
 Note here that we use the built-in `cuda_arch` variant of Spack to specify the archicture.
 For a Haswell system, we use
 ````yaml
 kokkos:
  variants: +openmp std=14 target=haswell
  compiler: [intel@18]
 ````
 which uses the built-in microarchitecture variants of Spack.
 Consult the Spack documentation for more details of Spack microarchitectures
 and CUDA architectures.
 Spack does not currently provide an AMD GPU microarchitecture option.
 If building for HIP or an AMD GPU, Kokkos provides an `amd_gpu_arch` similar to `cuda_arch`.
 ````yaml
 kokkos:
  variants: +hip amd_gpu_arch=vega900
 ````
 Without an optimal default in your `packages.yaml` file, it is highly likely that the default Kokkos configuration you get will not be what you want.
 For example, CUDA is not enabled by default (there is no easy logic to conditionally activate this for CUDA-enabled systems).
 If you don't specify a CUDA build variant in a `packages.yaml` and you build your Kokkos-dependent project:
 ````bash
 > spack install superscience
 ````
 you may end up just getting the default Kokkos (i.e. Serial).
 Some examples are included in the `config/yaml` folder for common platforms.
 Before running `spack install <package>` we recommend running `spack spec <package>` to confirm your dependency tree is correct.
 For example, with Kokkos Kernels:
 ````bash
 kokkos-kernels@3.0%gcc@8.3.0~blas build_type=RelWithDebInfo ~cblas~complex_double~complex_float~cublas~cuda cuda_arch=none ~cusparse~diy+double execspace_cuda=auto execspace_openmp=auto execspace_serial=auto execspace_threads=auto ~float~lapack~lapacke+layoutleft~layoutright memspace_cudaspace=auto memspace_cudauvmspace=auto +memspace_hostspace~mkl+offset_int+offset_size_t~openmp+ordinal_int~ordinal_int64_t~serial~superlu arch=linux-rhel7-skylake_avx512
    ^cmake@3.16.2%gcc@8.3.0~doc+ncurses+openssl+ownlibs~qt arch=linux-rhel7-skylake_avx512
        ^kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=14 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512
                ^cuda@10.1%gcc@8.3.0 arch=linux-rhel7-skylake_avx512
                        ^kokkos-nvcc-wrapper@old%gcc@8.3.0 build_type=RelWithDebInfo +mpi arch=linux-rhel7-skylake_avx512
                                    ^openmpi@4.0.2%gcc@8.3.0~cuda+cxx_exceptions fabrics=none ~java~legacylaunchers~memchecker patches=073477a76bba780c67c36e959cd3ee6910743e2735c7e76850ffba6791d498e4 ~pmi schedulers=none ~sqlite3~thread_multiple+vt arch=linux-rhel7-skylake_avx512
 ````
 The output can be very verbose, but we can verify the expected `kokkos`:
 ````bash
 kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=11 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512
 ````
 We see that we do have `+volta70` and `+wrapper`, e.g.
 ### Spack Environments
 The encouraged way to use Spack is with Spack environments ([more details here](https://spack-tutorial.readthedocs.io/en/latest/tutorial_environments.html#dealing-with-many-specs-at-once)).
 Rather than installing packages one-at-a-time, you add packages to an environment.
 After adding all packages, you concretize and install them all.
 Using environments, one can explicitly add a desired Kokkos for the environment, e.g.
 ````bash
 > spack add kokkos +cuda +cuda_lambda +volta70
 > spack add my_project +my_variant
 > ...
 > spack install
 ````
 All packages within the environment will build against the CUDA-enabled Kokkos,
 even if they only request a default Kokkos.
 ## NVCC Wrapper
 Kokkos is a C++ project, but often builds for the CUDA backend.
 This is particularly problematic with CMake. At this point, `nvcc` does not accept all the flags that normally get passed to a C++ compiler.
 Kokkos provides `nvcc_wrapper` that identifies correctly as a C++ compiler to CMake and accepts C++ flags, but uses `nvcc` as the underlying compiler.
 `nvcc` itself also uses an underlying host compiler, e.g. GCC.
 In Spack, the underlying host compiler is specified as below, e.g.:
 ````bash
 > spack install package %gcc@8.0.0
 ````
 This is still valid for Kokkos. To use the special wrapper for CUDA builds, request a desired compiler and simply add the `+wrapper` variant.
 ````bash
 > spack install kokkos +cuda +wrapper %gcc@7.2.0
 ````
 Downstream projects depending on Kokkos need to override their compiler.
 Kokkos provides the compiler in a `kokkos_cxx` variable,
 which points to either `nvcc_wrapper` when needed or the regular compiler otherwise.
 Spack projects already do this to use MPI compiler wrappers.
 ````python
 def cmake_args(self):
  options = []
  ...
  options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["kokkos"].kokkos_cxx)
  ...
  return options
 ````
 Note: `nvcc_wrapper` works with the MPI compiler wrappers.
 If building your project with MPI, do NOT set your compiler to `nvcc_wrapper`.
 Instead set your compiler to `mpicxx` and `nvcc_wrapper` will be used under the hood.
 ````python
 def cmake_args(self):
  options = []
  ...
  options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["mpi"].mpicxx)
  ...
  return options
 ````
 To accomplish this, `nvcc_wrapper` must depend on MPI (even though it uses no MPI).
 This has the unfortunate consequence that Kokkos CUDA projects not using MPI will implicitly depend on MPI anyway.
 This behavior is necessary for now, but will hopefully be removed later.
 When using environments, if MPI is not needed, you can remove the MPI dependency with:
 ````bash
 > spack add kokkos-nvcc-wrapper ~mpi
 ````
 ## Developing With Spack
 Spack has historically been much more suited to *deployment* of mature packages than active testing or developing.
 However, recent features have improved support for development.
 Future releases are likely to make this even easier and incorporate Git integration.
 The most common commands will do a full build and install of the packages.
 If doing development, you may wish to merely set up a build environment.
 This allows you to modify the source and re-build.
 In this case, you can stop after configuring.
 Suppose you have Kokkos checkout in the folder `kokkos-src`:
 ````bash
 > spack dev-build -d kokkos-src -u cmake kokkos@develop +wrapper +openmp
 ````
 This sets up a development environment for you in `kokkos-src` which you can use (Bash example shown):
 Note: Always specify `develop` as the version when doing `dev-build`, except in rare cases.
 You are usually developing a feature branch that will merge into `develop`,
 hence you are making a new `develop` branch.
 ````bash
 > cd kokko-src
 > source spack-build-env.txt
 > cd spack-build
 > make
 ````
 Before sourcing the Spack development environment, you may wish to save your current environment:
 ````bash
 > declare -px > myenv.sh
 ````
 When done with Spack, you can then restore your original environment:
 ````bash
 > source myenv.sh
 ````
--- a/lib/kokkos/algorithms/CMakeLists.txt
+++ b/lib/kokkos/algorithms/CMakeLists.txt
@ -2,7 +2,9 @@
 KOKKOS_SUBPACKAGE(Algorithms)
-ADD_SUBDIRECTORY(src)
+IF (NOT Kokkos_INSTALL_TESTING)
  ADD_SUBDIRECTORY(src)
 ENDIF()
 KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
--- a/lib/kokkos/algorithms/src/CMakeLists.txt
+++ b/lib/kokkos/algorithms/src/CMakeLists.txt
@ -7,9 +7,15 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 #-----------------------------------------------------------------------------
-FILE(GLOB HEADERS *.hpp)
+FILE(GLOB ALGO_HEADERS *.hpp)
-FILE(GLOB SOURCES *.cpp)
+FILE(GLOB ALGO_SOURCES *.cpp)
-LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
+LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
 INSTALL (
  DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
  DESTINATION ${KOKKOS_HEADER_DIR}
  FILES_MATCHING PATTERN "*.hpp"
 )
 #-----------------------------------------------------------------------------
@ -17,8 +23,8 @@ LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
 # These will get ignored for standalone CMake and a true interface library made
 KOKKOS_ADD_INTERFACE_LIBRARY(
  kokkosalgorithms
-  HEADERS ${HEADERS}
+  HEADERS ${ALGO_HEADERS}
-  SOURCES ${SOURCES}
+  SOURCES ${ALGO_SOURCES}
 )
 KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
  ${KOKKOS_TOP_BUILD_DIR}
--- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
@ -94,9 +94,9 @@ namespace Kokkos {
    class Pool {
     public:
      //The Kokkos device type
-      typedef Device device_type;
+      using device_type = Device;
      //The actual generator type
-      typedef Generator<Device> generator_type;
+      using generator_type = Generator<Device>;
      //Default constructor: does not initialize a pool
      Pool();
@ -124,7 +124,7 @@ namespace Kokkos {
    class Generator {
     public:
     //The Kokkos device type
-    typedef DeviceType device_type;
+    using device_type = DeviceType;
    //Max return values of respective [X]rand[S]() functions
    enum {MAX_URAND = 0xffffffffU};
@ -138,75 +138,75 @@ namespace Kokkos {
    KOKKOS_INLINE_FUNCTION
    Generator (STATE_ARGUMENTS, int state_idx = 0);
-    //Draw a equidistributed uint32_t in the range (0,MAX_URAND]
+    //Draw a equidistributed uint32_t in the range [0,MAX_URAND)
    KOKKOS_INLINE_FUNCTION
    uint32_t urand();
-    //Draw a equidistributed uint64_t in the range (0,MAX_URAND64]
+    //Draw a equidistributed uint64_t in the range [0,MAX_URAND64)
    KOKKOS_INLINE_FUNCTION
    uint64_t urand64();
-    //Draw a equidistributed uint32_t in the range (0,range]
+    //Draw a equidistributed uint32_t in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    uint32_t urand(const uint32_t& range);
-    //Draw a equidistributed uint32_t in the range (start,end]
+    //Draw a equidistributed uint32_t in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    uint32_t urand(const uint32_t& start, const uint32_t& end );
-    //Draw a equidistributed uint64_t in the range (0,range]
+    //Draw a equidistributed uint64_t in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    uint64_t urand64(const uint64_t& range);
-    //Draw a equidistributed uint64_t in the range (start,end]
+    //Draw a equidistributed uint64_t in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    uint64_t urand64(const uint64_t& start, const uint64_t& end );
-    //Draw a equidistributed int in the range (0,MAX_RAND]
+    //Draw a equidistributed int in the range [0,MAX_RAND)
    KOKKOS_INLINE_FUNCTION
    int rand();
-    //Draw a equidistributed int in the range (0,range]
+    //Draw a equidistributed int in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    int rand(const int& range);
-    //Draw a equidistributed int in the range (start,end]
+    //Draw a equidistributed int in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    int rand(const int& start, const int& end );
-    //Draw a equidistributed int64_t in the range (0,MAX_RAND64]
+    //Draw a equidistributed int64_t in the range [0,MAX_RAND64)
    KOKKOS_INLINE_FUNCTION
    int64_t rand64();
-    //Draw a equidistributed int64_t in the range (0,range]
+    //Draw a equidistributed int64_t in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    int64_t rand64(const int64_t& range);
-    //Draw a equidistributed int64_t in the range (start,end]
+    //Draw a equidistributed int64_t in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    int64_t rand64(const int64_t& start, const int64_t& end );
-    //Draw a equidistributed float in the range (0,1.0]
+    //Draw a equidistributed float in the range [0,1.0)
    KOKKOS_INLINE_FUNCTION
    float frand();
-    //Draw a equidistributed float in the range (0,range]
+    //Draw a equidistributed float in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    float frand(const float& range);
-    //Draw a equidistributed float in the range (start,end]
+    //Draw a equidistributed float in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    float frand(const float& start, const float& end );
-    //Draw a equidistributed double in the range (0,1.0]
+    //Draw a equidistributed double in the range [0,1.0)
    KOKKOS_INLINE_FUNCTION
    double drand();
-    //Draw a equidistributed double in the range (0,range]
+    //Draw a equidistributed double in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    double drand(const double& range);
-    //Draw a equidistributed double in the range (start,end]
+    //Draw a equidistributed double in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    double drand(const double& start, const double& end );
@ -221,11 +221,11 @@ namespace Kokkos {
    //Additional Functions:
-    //Fills view with random numbers in the range (0,range]
+    //Fills view with random numbers in the range [0,range)
    template<class ViewType, class PoolType>
    void fill_random(ViewType view, PoolType pool, ViewType::value_type range);
-    //Fills view with random numbers in the range (start,end]
+    //Fills view with random numbers in the range [start,end)
    template<class ViewType, class PoolType>
    void fill_random(ViewType view, PoolType pool,
                     ViewType::value_type start, ViewType::value_type end);
@ -381,7 +381,7 @@ struct rand<Generator, unsigned long> {
 // NOTE (mfh 26 oct 2014) This is a partial specialization for long
 // long, a C99 / C++11 signed type which is guaranteed to be at
 // least 64 bits.  Do NOT write a partial specialization for
-// int64_t!!!  This is just a typedef!  It could be either long or
+// int64_t!!!  This is just an alias!  It could be either long or
 // long long.  We don't know which a priori, and I've seen both.
 // The types long and long long are guaranteed to differ, so it's
 // always safe to specialize for both.
@ -413,7 +413,7 @@ struct rand<Generator, long long> {
 // NOTE (mfh 26 oct 2014) This is a partial specialization for
 // unsigned long long, a C99 / C++11 unsigned type which is
 // guaranteed to be at least 64 bits.  Do NOT write a partial
-// specialization for uint64_t!!!  This is just a typedef!  It could
+// specialization for uint64_t!!!  This is just an alias!  It could
 // be either unsigned long or unsigned long long.  We don't know
 // which a priori, and I've seen both.  The types unsigned long and
 // unsigned long long are guaranteed to differ, so it's always safe
@ -604,11 +604,7 @@ struct Random_UniqueIndex {
  KOKKOS_FUNCTION
  static int get_state_idx(const locks_view_type) {
 #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
    const int i = ExecutionSpace::hardware_thread_id();
 #else
    const int i = ExecutionSpace::impl_hardware_thread_id();
 #endif
    return i;
 #else
    return 0;
@ -652,15 +648,13 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
  static int get_state_idx(const locks_view_type& locks_) {
 #ifdef __HIP_DEVICE_COMPILE__
    const int i_offset =
-        (hipThreadIdx_x * hipBlockDim_y + hipThreadIdx_y) * hipBlockDim_z +
+        (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
-        hipThreadIdx_z;
+    int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
-    int i = (((hipBlockIdx_x * hipGridDim_y + hipBlockIdx_y) * hipGridDim_z +
+                 blockDim.x * blockDim.y * blockDim.z +
              hipBlockIdx_z) *
                 hipBlockDim_x * hipBlockDim_y * hipBlockDim_z +
             i_offset) %
            locks_.extent(0);
    while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
-      i += hipBlockDim_x * hipBlockDim_y * hipBlockDim_z;
+      i += blockDim.x * blockDim.y * blockDim.z;
      if (i >= static_cast<int>(locks_.extent(0))) {
        i = i_offset;
      }
@ -687,7 +681,7 @@ class Random_XorShift64 {
  friend class Random_XorShift64_Pool<DeviceType>;
 public:
-  typedef DeviceType device_type;
+  using device_type = DeviceType;
  constexpr static uint32_t MAX_URAND   = std::numeric_limits<uint32_t>::max();
  constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
@ -805,11 +799,6 @@ class Random_XorShift64 {
  // number
  KOKKOS_INLINE_FUNCTION
  double normal() {
 #ifndef __HIP_DEVICE_COMPILE__  // FIXME_HIP
    using std::sqrt;
 #else
    using ::sqrt;
 #endif
    double S = 2.0;
    double U;
    while (S >= 1.0) {
@ -817,7 +806,7 @@ class Random_XorShift64 {
      const double V = 2.0 * drand() - 1.0;
      S              = U * U + V * V;
    }
-    return U * sqrt(-2.0 * log(S) / S);
+    return U * std::sqrt(-2.0 * log(S) / S);
  }
  KOKKOS_INLINE_FUNCTION
@ -830,15 +819,15 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
 class Random_XorShift64_Pool {
 private:
  using execution_space = typename DeviceType::execution_space;
-  typedef View<int*, execution_space> locks_type;
+  using locks_type      = View<int*, execution_space>;
-  typedef View<uint64_t*, DeviceType> state_data_type;
+  using state_data_type = View<uint64_t*, DeviceType>;
  locks_type locks_;
  state_data_type state_;
  int num_states_;
 public:
-  typedef Random_XorShift64<DeviceType> generator_type;
+  using generator_type = Random_XorShift64<DeviceType>;
-  typedef DeviceType device_type;
+  using device_type    = DeviceType;
  KOKKOS_INLINE_FUNCTION
  Random_XorShift64_Pool() { num_states_ = 0; }
@ -923,8 +912,8 @@ class Random_XorShift1024 {
  friend class Random_XorShift1024_Pool<DeviceType>;
 public:
-  typedef Random_XorShift1024_Pool<DeviceType> pool_type;
+  using pool_type   = Random_XorShift1024_Pool<DeviceType>;
-  typedef DeviceType device_type;
+  using device_type = DeviceType;
  constexpr static uint32_t MAX_URAND   = std::numeric_limits<uint32_t>::max();
  constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
@ -1046,11 +1035,6 @@ class Random_XorShift1024 {
  // number
  KOKKOS_INLINE_FUNCTION
  double normal() {
 #ifndef KOKKOS_ENABLE_HIP  // FIXME_HIP
    using std::sqrt;
 #else
    using ::sqrt;
 #endif
    double S = 2.0;
    double U;
    while (S >= 1.0) {
@ -1058,7 +1042,7 @@ class Random_XorShift1024 {
      const double V = 2.0 * drand() - 1.0;
      S              = U * U + V * V;
    }
-    return U * sqrt(-2.0 * log(S) / S);
+    return U * std::sqrt(-2.0 * log(S) / S);
  }
  KOKKOS_INLINE_FUNCTION
@ -1071,9 +1055,9 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
 class Random_XorShift1024_Pool {
 private:
  using execution_space = typename DeviceType::execution_space;
-  typedef View<int*, execution_space> locks_type;
+  using locks_type      = View<int*, execution_space>;
-  typedef View<int*, DeviceType> int_view_type;
+  using int_view_type   = View<int*, DeviceType>;
-  typedef View<uint64_t * [16], DeviceType> state_data_type;
+  using state_data_type = View<uint64_t * [16], DeviceType>;
  locks_type locks_;
  state_data_type state_;
@ -1082,9 +1066,9 @@ class Random_XorShift1024_Pool {
  friend class Random_XorShift1024<DeviceType>;
 public:
-  typedef Random_XorShift1024<DeviceType> generator_type;
+  using generator_type = Random_XorShift1024<DeviceType>;
-  typedef DeviceType device_type;
+  using device_type = DeviceType;
  KOKKOS_INLINE_FUNCTION
  Random_XorShift1024_Pool() { num_states_ = 0; }
@ -1176,14 +1160,13 @@ struct fill_random_functor_begin_end;
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1203,14 +1186,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1232,14 +1214,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1262,14 +1243,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1293,14 +1273,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1326,14 +1305,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1361,14 +1339,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1398,14 +1375,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1437,14 +1413,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1466,14 +1441,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1497,14 +1471,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1529,14 +1502,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1562,14 +1534,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1597,14 +1568,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1634,14 +1604,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1673,14 +1642,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;
-  typedef rand<typename RandomPool::generator_type,
+  using Rand = rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
+                    typename ViewType::non_const_value_type>;
      Rand;
  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
--- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
@ -95,9 +95,9 @@ class BinSort {
 public:
  template <class DstViewType, class SrcViewType>
  struct copy_functor {
-    typedef typename SrcViewType::const_type src_view_type;
+    using src_view_type = typename SrcViewType::const_type;
-    typedef Impl::CopyOp<DstViewType, src_view_type> copy_op;
+    using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
    DstViewType dst_values;
    src_view_type src_values;
@ -120,17 +120,17 @@ class BinSort {
    // If a Kokkos::View then can generate constant random access
    // otherwise can only use the constant type.
-    typedef typename std::conditional<
+    using src_view_type = typename std::conditional<
        Kokkos::is_view<SrcViewType>::value,
        Kokkos::View<typename SrcViewType::const_data_type,
                     typename SrcViewType::array_layout,
                     typename SrcViewType::device_type,
                     Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
-        typename SrcViewType::const_type>::type src_view_type;
+        typename SrcViewType::const_type>::type;
-    typedef typename PermuteViewType::const_type perm_view_type;
+    using perm_view_type = typename PermuteViewType::const_type;
-    typedef Impl::CopyOp<DstViewType, src_view_type> copy_op;
+    using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
    DstViewType dst_values;
    perm_view_type sort_order;
@ -151,8 +151,8 @@ class BinSort {
    }
  };
-  typedef typename Space::execution_space execution_space;
+  using execution_space = typename Space::execution_space;
-  typedef BinSortOp bin_op_type;
+  using bin_op_type     = BinSortOp;
  struct bin_count_tag {};
  struct bin_offset_tag {};
@ -160,30 +160,30 @@ class BinSort {
  struct bin_sort_bins_tag {};
 public:
-  typedef SizeType size_type;
+  using size_type  = SizeType;
-  typedef size_type value_type;
+  using value_type = size_type;
-  typedef Kokkos::View<size_type*, Space> offset_type;
+  using offset_type    = Kokkos::View<size_type*, Space>;
-  typedef Kokkos::View<const int*, Space> bin_count_type;
+  using bin_count_type = Kokkos::View<const int*, Space>;
-  typedef typename KeyViewType::const_type const_key_view_type;
+  using const_key_view_type = typename KeyViewType::const_type;
  // If a Kokkos::View then can generate constant random access
  // otherwise can only use the constant type.
-  typedef typename std::conditional<
+  using const_rnd_key_view_type = typename std::conditional<
      Kokkos::is_view<KeyViewType>::value,
      Kokkos::View<typename KeyViewType::const_data_type,
                   typename KeyViewType::array_layout,
                   typename KeyViewType::device_type,
                   Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
-      const_key_view_type>::type const_rnd_key_view_type;
+      const_key_view_type>::type;
-  typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
+  using non_const_key_scalar = typename KeyViewType::non_const_value_type;
-  typedef typename KeyViewType::const_value_type const_key_scalar;
+  using const_key_scalar     = typename KeyViewType::const_value_type;
-  typedef Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >
+  using bin_count_atomic_type =
-      bin_count_atomic_type;
+      Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >;
 private:
  const_key_view_type keys;
@ -266,10 +266,10 @@ class BinSort {
  template <class ValuesViewType>
  void sort(ValuesViewType const& values, int values_range_begin,
            int values_range_end) const {
-    typedef Kokkos::View<typename ValuesViewType::data_type,
+    using scratch_view_type =
        Kokkos::View<typename ValuesViewType::data_type,
                     typename ValuesViewType::array_layout,
-                         typename ValuesViewType::device_type>
+                     typename ValuesViewType::device_type>;
        scratch_view_type;
    const size_t len        = range_end - range_begin;
    const size_t values_len = values_range_end - values_range_begin;
@ -278,13 +278,6 @@ class BinSort {
          "BinSort::sort: values range length != permutation vector length");
    }
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
    scratch_view_type sorted_values(
        ViewAllocateWithoutInitializing(
            "Kokkos::SortImpl::BinSortFunctor::sorted_values"),
        len, values.extent(1), values.extent(2), values.extent(3),
        values.extent(4), values.extent(5), values.extent(6), values.extent(7));
 #else
    scratch_view_type sorted_values(
        ViewAllocateWithoutInitializing(
            "Kokkos::SortImpl::BinSortFunctor::sorted_values"),
@ -303,7 +296,6 @@ class BinSort {
                                : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
        values.rank_dynamic > 7 ? values.extent(7)
                                : KOKKOS_IMPL_CTOR_DEFAULT_ARG);
 #endif
    {
      copy_permute_functor<scratch_view_type /* DstViewType */
@ -511,8 +503,8 @@ bool try_std_sort(ViewType view) {
 template <class ViewType>
 struct min_max_functor {
-  typedef Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>
+  using minmax_scalar =
-      minmax_scalar;
+      Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>;
  ViewType view;
  min_max_functor(const ViewType& view_) : view(view_) {}
@ -531,7 +523,7 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
  if (!always_use_kokkos_sort) {
    if (Impl::try_std_sort(view)) return;
  }
-  typedef BinOp1D<ViewType> CompType;
+  using CompType = BinOp1D<ViewType>;
  Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
  Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
@ -548,8 +540,8 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
 template <class ViewType>
 void sort(ViewType view, size_t const begin, size_t const end) {
-  typedef Kokkos::RangePolicy<typename ViewType::execution_space> range_policy;
+  using range_policy = Kokkos::RangePolicy<typename ViewType::execution_space>;
-  typedef BinOp1D<ViewType> CompType;
+  using CompType     = BinOp1D<ViewType>;
  Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
  Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
--- a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
+++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
@ -20,14 +20,18 @@ KOKKOS_ADD_TEST_LIBRARY(
  HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
  SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
 )
-# WORKAROUND FOR HIPCC
+
-IF(Kokkos_ENABLE_HIP)
+# avoid deprecation warnings from MSVC
-  TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0 --amdgpu-target=gfx906")
+TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC GTEST_HAS_TR1_TUPLE=0 GTEST_HAS_PTHREAD=0)
-ELSE()
+
-  TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0")
+IF(NOT (Kokkos_ENABLE_CUDA AND WIN32))
 TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
 ENDIF()
-TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
+# Suppress clang-tidy diagnostics on code that we do not have control over
 IF(CMAKE_CXX_CLANG_TIDY)
  SET_TARGET_PROPERTIES(kokkosalgorithms_gtest PROPERTIES CXX_CLANG_TIDY "")
 ENDIF()
 SET(SOURCES
  UnitTestMain.cpp
--- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp
@ -111,10 +111,10 @@ struct RandomProperties {
 template <class GeneratorPool, class Scalar>
 struct test_random_functor {
-  typedef typename GeneratorPool::generator_type rnd_type;
+  using rnd_type = typename GeneratorPool::generator_type;
-  typedef RandomProperties value_type;
+  using value_type  = RandomProperties;
-  typedef typename GeneratorPool::device_type device_type;
+  using device_type = typename GeneratorPool::device_type;
  GeneratorPool rand_pool;
  const double mean;
@ -125,12 +125,12 @@ struct test_random_functor {
  // implementations might violate this upper bound, due to rounding
  // error.  Just in case, we leave an extra space at the end of each
  // dimension, in the View types below.
-  typedef Kokkos::View<int[HIST_DIM1D + 1], typename GeneratorPool::device_type>
+  using type_1d =
-      type_1d;
+      Kokkos::View<int[HIST_DIM1D + 1], typename GeneratorPool::device_type>;
  type_1d density_1d;
-  typedef Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
+  using type_3d =
-                       typename GeneratorPool::device_type>
+      Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
-      type_3d;
+                   typename GeneratorPool::device_type>;
  type_3d density_3d;
  test_random_functor(GeneratorPool rand_pool_, type_1d d1d, type_3d d3d)
@ -200,9 +200,9 @@ struct test_random_functor {
 template <class DeviceType>
 struct test_histogram1d_functor {
-  typedef RandomProperties value_type;
+  using value_type      = RandomProperties;
-  typedef typename DeviceType::execution_space execution_space;
+  using execution_space = typename DeviceType::execution_space;
-  typedef typename DeviceType::memory_space memory_space;
+  using memory_space    = typename DeviceType::memory_space;
  // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
  // an exclusive upper bound on the range of random numbers that
@ -210,7 +210,7 @@ struct test_histogram1d_functor {
  // implementations might violate this upper bound, due to rounding
  // error.  Just in case, we leave an extra space at the end of each
  // dimension, in the View type below.
-  typedef Kokkos::View<int[HIST_DIM1D + 1], memory_space> type_1d;
+  using type_1d = Kokkos::View<int[HIST_DIM1D + 1], memory_space>;
  type_1d density_1d;
  double mean;
@ -219,7 +219,7 @@ struct test_histogram1d_functor {
  KOKKOS_INLINE_FUNCTION void operator()(
      const typename memory_space::size_type i, RandomProperties& prop) const {
-    typedef typename memory_space::size_type size_type;
+    using size_type    = typename memory_space::size_type;
    const double count = density_1d(i);
    prop.mean += count;
    prop.variance += 1.0 * (count - mean) * (count - mean);
@ -234,9 +234,9 @@ struct test_histogram1d_functor {
 template <class DeviceType>
 struct test_histogram3d_functor {
-  typedef RandomProperties value_type;
+  using value_type      = RandomProperties;
-  typedef typename DeviceType::execution_space execution_space;
+  using execution_space = typename DeviceType::execution_space;
-  typedef typename DeviceType::memory_space memory_space;
+  using memory_space    = typename DeviceType::memory_space;
  // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
  // an exclusive upper bound on the range of random numbers that
@ -244,9 +244,9 @@ struct test_histogram3d_functor {
  // implementations might violate this upper bound, due to rounding
  // error.  Just in case, we leave an extra space at the end of each
  // dimension, in the View type below.
-  typedef Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
+  using type_3d =
-                       memory_space>
+      Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
-      type_3d;
+                   memory_space>;
  type_3d density_3d;
  double mean;
@ -255,7 +255,7 @@ struct test_histogram3d_functor {
  KOKKOS_INLINE_FUNCTION void operator()(
      const typename memory_space::size_type i, RandomProperties& prop) const {
-    typedef typename memory_space::size_type size_type;
+    using size_type    = typename memory_space::size_type;
    const double count = density_3d(
        i / (HIST_DIM3D * HIST_DIM3D),
        (i % (HIST_DIM3D * HIST_DIM3D)) / HIST_DIM3D, i % HIST_DIM3D);
@ -276,7 +276,7 @@ struct test_histogram3d_functor {
 //
 template <class RandomGenerator, class Scalar>
 struct test_random_scalar {
-  typedef typename RandomGenerator::generator_type rnd_type;
+  using rnd_type = typename RandomGenerator::generator_type;
  int pass_mean, pass_var, pass_covar;
  int pass_hist1d_mean, pass_hist1d_var, pass_hist1d_covar;
@ -294,7 +294,7 @@ struct test_random_scalar {
      cout << " -- Testing randomness properties" << endl;
      RandomProperties result;
-      typedef test_random_functor<RandomGenerator, Scalar> functor_type;
+      using functor_type = test_random_functor<RandomGenerator, Scalar>;
      parallel_reduce(num_draws / 1024,
                      functor_type(pool, density_1d, density_3d), result);
@ -325,8 +325,8 @@ struct test_random_scalar {
      cout << " -- Testing 1-D histogram" << endl;
      RandomProperties result;
-      typedef test_histogram1d_functor<typename RandomGenerator::device_type>
+      using functor_type =
-          functor_type;
+          test_histogram1d_functor<typename RandomGenerator::device_type>;
      parallel_reduce(HIST_DIM1D, functor_type(density_1d, num_draws), result);
      double tolerance   = 6 * std::sqrt(1.0 / HIST_DIM1D);
@ -357,8 +357,8 @@ struct test_random_scalar {
      cout << " -- Testing 3-D histogram" << endl;
      RandomProperties result;
-      typedef test_histogram3d_functor<typename RandomGenerator::device_type>
+      using functor_type =
-          functor_type;
+          test_histogram3d_functor<typename RandomGenerator::device_type>;
      parallel_reduce(HIST_DIM1D, functor_type(density_3d, num_draws), result);
      double tolerance   = 6 * std::sqrt(1.0 / HIST_DIM1D);
--- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp
@ -55,8 +55,8 @@ namespace Impl {
 template <class ExecutionSpace, class Scalar>
 struct is_sorted_struct {
-  typedef unsigned int value_type;
+  using value_type      = unsigned int;
-  typedef ExecutionSpace execution_space;
+  using execution_space = ExecutionSpace;
  Kokkos::View<Scalar*, ExecutionSpace> keys;
@ -69,8 +69,8 @@ struct is_sorted_struct {
 template <class ExecutionSpace, class Scalar>
 struct sum {
-  typedef double value_type;
+  using value_type      = double;
-  typedef ExecutionSpace execution_space;
+  using execution_space = ExecutionSpace;
  Kokkos::View<Scalar*, ExecutionSpace> keys;
@ -81,8 +81,8 @@ struct sum {
 template <class ExecutionSpace, class Scalar>
 struct bin3d_is_sorted_struct {
-  typedef unsigned int value_type;
+  using value_type      = unsigned int;
-  typedef ExecutionSpace execution_space;
+  using execution_space = ExecutionSpace;
  Kokkos::View<Scalar * [3], ExecutionSpace> keys;
@ -115,8 +115,8 @@ struct bin3d_is_sorted_struct {
 template <class ExecutionSpace, class Scalar>
 struct sum3D {
-  typedef double value_type;
+  using value_type      = double;
-  typedef ExecutionSpace execution_space;
+  using execution_space = ExecutionSpace;
  Kokkos::View<Scalar * [3], ExecutionSpace> keys;
@ -131,7 +131,7 @@ struct sum3D {
 template <class ExecutionSpace, typename KeyType>
 void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
-  typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
+  using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
  KeyViewType keys("Keys", n);
  // Test sorting array with all numbers equal
@ -166,7 +166,7 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
 template <class ExecutionSpace, typename KeyType>
 void test_3D_sort_impl(unsigned int n) {
-  typedef Kokkos::View<KeyType * [3], ExecutionSpace> KeyViewType;
+  using KeyViewType = Kokkos::View<KeyType * [3], ExecutionSpace>;
  KeyViewType keys("Keys", n * n * n);
@ -186,7 +186,7 @@ void test_3D_sort_impl(unsigned int n) {
  typename KeyViewType::value_type min[3] = {0, 0, 0};
  typename KeyViewType::value_type max[3] = {100, 100, 100};
-  typedef Kokkos::BinOp3D<KeyViewType> BinOp;
+  using BinOp = Kokkos::BinOp3D<KeyViewType>;
  BinOp bin_op(bin_max, min, max);
  Kokkos::BinSort<KeyViewType, BinOp> Sorter(keys, bin_op, false);
  Sorter.create_permute_vector();
@ -215,9 +215,9 @@ void test_3D_sort_impl(unsigned int n) {
 template <class ExecutionSpace, typename KeyType>
 void test_dynamic_view_sort_impl(unsigned int n) {
-  typedef Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>
+  using KeyDynamicViewType =
-      KeyDynamicViewType;
+      Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>;
-  typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
+  using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
  const size_t upper_bound    = 2 * n;
  const size_t min_chunk_size = 1024;
@ -305,8 +305,8 @@ void test_issue_1160_impl() {
  Kokkos::deep_copy(x_, h_x);
  Kokkos::deep_copy(v_, h_v);
-  typedef decltype(element_) KeyViewType;
+  using KeyViewType = decltype(element_);
-  typedef Kokkos::BinOp1D<KeyViewType> BinOp;
+  using BinOp       = Kokkos::BinOp1D<KeyViewType>;
  int begin = 3;
  int end   = 8;
--- a/lib/kokkos/appveyor.yml
+++ b/lib/kokkos/appveyor.yml
@ -5,6 +5,6 @@ build_script:
 - cmd: >-
    mkdir build &&
    cd build &&
-    cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_LIBDL=OFF -DKokkos_ENABLE_PROFILING=OFF &&
+    cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON &&
    cmake --build . --target install &&
    ctest -C Debug -V
--- a/lib/kokkos/benchmarks/atomic/main.cpp
+++ b/lib/kokkos/benchmarks/atomic/main.cpp
@ -69,13 +69,13 @@ int main(int argc, char* argv[]) {
      return 0;
    }
-    int L    = atoi(argv[1]);
+    int L    = std::stoi(argv[1]);
-    int N    = atoi(argv[2]);
+    int N    = std::stoi(argv[2]);
-    int M    = atoi(argv[3]);
+    int M    = std::stoi(argv[3]);
-    int D    = atoi(argv[4]);
+    int D    = std::stoi(argv[4]);
-    int K    = atoi(argv[5]);
+    int K    = std::stoi(argv[5]);
-    int R    = atoi(argv[6]);
+    int R    = std::stoi(argv[6]);
-    int type = atoi(argv[7]);
+    int type = std::stoi(argv[7]);
    Kokkos::View<int*> offsets("Offsets", L, M);
    Kokkos::Random_XorShift64_Pool<> pool(12371);
--- a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp
+++ b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp
@ -73,15 +73,15 @@ int main(int argc, char* argv[]) {
    return 0;
  }
-  int P = atoi(argv[1]);
+  int P = std::stoi(argv[1]);
-  int N = atoi(argv[2]);
+  int N = std::stoi(argv[2]);
-  int K = atoi(argv[3]);
+  int K = std::stoi(argv[3]);
-  int R = atoi(argv[4]);
+  int R = std::stoi(argv[4]);
-  int D = atoi(argv[5]);
+  int D = std::stoi(argv[5]);
-  int U = atoi(argv[6]);
+  int U = std::stoi(argv[6]);
-  int F = atoi(argv[7]);
+  int F = std::stoi(argv[7]);
-  int T = atoi(argv[8]);
+  int T = std::stoi(argv[8]);
-  int S = atoi(argv[9]);
+  int S = std::stoi(argv[9]);
  if (U > 8) {
    printf("U must be 1-8\n");
--- a/lib/kokkos/benchmarks/gather/main.cpp
+++ b/lib/kokkos/benchmarks/gather/main.cpp
@ -72,13 +72,13 @@ int main(int argc, char* argv[]) {
    return 0;
  }
-  int S = atoi(argv[1]);
+  int S = std::stoi(argv[1]);
-  int N = atoi(argv[2]);
+  int N = std::stoi(argv[2]);
-  int K = atoi(argv[3]);
+  int K = std::stoi(argv[3]);
-  int D = atoi(argv[4]);
+  int D = std::stoi(argv[4]);
-  int R = atoi(argv[5]);
+  int R = std::stoi(argv[5]);
-  int U = atoi(argv[6]);
+  int U = std::stoi(argv[6]);
-  int F = atoi(argv[7]);
+  int F = std::stoi(argv[7]);
  if ((S != 1) && (S != 2) && (S != 4)) {
    printf("S must be one of 1,2,4\n");
--- a/lib/kokkos/benchmarks/gups/gups-kokkos.cc
+++ b/lib/kokkos/benchmarks/gups/gups-kokkos.cc
@ -50,58 +50,61 @@
 #define HLINE "-------------------------------------------------------------\n"
 #if defined(KOKKOS_ENABLE_CUDA)
-typedef Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror GUPSHostArray;
+using GUPSHostArray   = Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror;
-typedef Kokkos::View<int64_t*, Kokkos::CudaSpace> GUPSDeviceArray;
+using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>;
 #else
-typedef Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror GUPSHostArray;
+using GUPSHostArray   = Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror;
-typedef Kokkos::View<int64_t*, Kokkos::HostSpace> GUPSDeviceArray;
+using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::HostSpace>;
 #endif
-typedef int GUPSIndex;
+using GUPSIndex = int;
 double now() {
  struct timeval now;
  gettimeofday(&now, nullptr);
-	return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
+  return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
 }
-void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices, const int64_t dataCount) {
+void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices,
-	for( GUPSIndex i = 0; i < indices.extent(0); ++i ) {
+                       const int64_t dataCount) {
  for (GUPSIndex i = 0; i < indices.extent(0); ++i) {
    indices[i] = lrand48() % dataCount;
  }
  Kokkos::deep_copy(dev_indices, indices);
 }
-void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data, const int64_t datum,
+void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data,
-	const bool performAtomics) {
+              const int64_t datum, const bool performAtomics) {
-
+  if (performAtomics) {
-	if( performAtomics ) {
+    Kokkos::parallel_for(
-		Kokkos::parallel_for("bench-gups-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) {
+        "bench-gups-atomic", indices.extent(0),
-			Kokkos::atomic_fetch_xor( &data[indices[i]], datum );
+        KOKKOS_LAMBDA(const GUPSIndex i) {
          Kokkos::atomic_fetch_xor(&data[indices[i]], datum);
        });
  } else {
-		Kokkos::parallel_for("bench-gups-non-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) {
+    Kokkos::parallel_for(
-			data[indices[i]] ^= datum;
+        "bench-gups-non-atomic", indices.extent(0),
-		});
+        KOKKOS_LAMBDA(const GUPSIndex i) { data[indices[i]] ^= datum; });
  }
  Kokkos::fence();
 }
-int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const int repeats,
+int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount,
-	const bool useAtomics) {
+                  const int repeats, const bool useAtomics) {
  printf("Reports fastest timing per kernel\n");
  printf("Creating Views...\n");
  printf("Memory Sizes:\n");
-	printf("- Elements:      %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(dataCount),
+  printf("- Elements:      %15" PRIu64 " (%12.4f MB)\n",
-		1.0e-6 * ((double) dataCount * (double) sizeof(int64_t)));
+         static_cast<uint64_t>(dataCount),
-	printf("- Indices:       %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(indicesCount),
+         1.0e-6 * ((double)dataCount * (double)sizeof(int64_t)));
-		1.0e-6 * ((double) indicesCount * (double) sizeof(int64_t)));
+  printf("- Indices:       %15" PRIu64 " (%12.4f MB)\n",
-	printf(" - Atomics:      %15s\n", (useAtomics ? "Yes" : "No") );
+         static_cast<uint64_t>(indicesCount),
         1.0e-6 * ((double)indicesCount * (double)sizeof(int64_t)));
  printf(" - Atomics:      %15s\n", (useAtomics ? "Yes" : "No"));
  printf("Benchmark kernels will be performed for %d iterations.\n", repeats);
  printf(HLINE);
@ -118,24 +121,22 @@ int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const
  printf("Initializing Views...\n");
 #if defined(KOKKOS_HAVE_OPENMP)
-	Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
+  Kokkos::parallel_for(
      "init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
 #else
-	Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
+  Kokkos::parallel_for(
      "init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
 #endif
-		KOKKOS_LAMBDA(const int i) {
+      KOKKOS_LAMBDA(const int i) { data[i] = 10101010101; });
 		data[i] = 10101010101;
 	});
 #if defined(KOKKOS_HAVE_OPENMP)
-	Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
+  Kokkos::parallel_for(
      "init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
 #else
-	Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
+  Kokkos::parallel_for(
      "init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
 #endif
-		KOKKOS_LAMBDA(const int i) {
+      KOKKOS_LAMBDA(const int i) { indices[i] = 0; });
 		indices[i] = 0;
 	});
  Kokkos::deep_copy(dev_data, data);
  Kokkos::deep_copy(dev_indices, indices);
@ -143,7 +144,7 @@ int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const
  printf("Starting benchmarking...\n");
-	for( GUPSIndex k = 0; k < repeats; ++k ) {
+  for (GUPSIndex k = 0; k < repeats; ++k) {
    randomize_indices(indices, dev_indices, data.extent(0));
    start = now();
@ -155,15 +156,15 @@ int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const
  Kokkos::deep_copy(data, dev_data);
  printf(HLINE);
-	printf("GUP/s Random:      %18.6f\n",
+  printf(
-		(1.0e-9 * ((double) repeats) * (double) dev_indices.extent(0)) / gupsTime);
+      "GUP/s Random:      %18.6f\n",
      (1.0e-9 * ((double)repeats) * (double)dev_indices.extent(0)) / gupsTime);
  printf(HLINE);
  return 0;
 }
 int main(int argc, char* argv[]) {
  printf(HLINE);
  printf("Kokkos GUPS Benchmark\n");
  printf(HLINE);
@ -177,17 +178,17 @@ int main(int argc, char* argv[]) {
  int64_t repeats = 10;
  bool useAtomics = false;
-	for( int i = 1; i < argc; ++i ) {
+  for (int i = 1; i < argc; ++i) {
-		if( strcmp( argv[i], "--indices" ) == 0 ) {
+    if (strcmp(argv[i], "--indices") == 0) {
-			indices = std::atoll(argv[i+1]);
+      indices = std::atoll(argv[i + 1]);
      ++i;
-		} else if( strcmp( argv[i], "--data" ) == 0 ) {
+    } else if (strcmp(argv[i], "--data") == 0) {
-			data = std::atoll(argv[i+1]);
+      data = std::atoll(argv[i + 1]);
      ++i;
-		} else if( strcmp( argv[i], "--repeats" ) == 0 ) {
+    } else if (strcmp(argv[i], "--repeats") == 0) {
-			repeats = std::atoll(argv[i+1]);
+      repeats = std::atoll(argv[i + 1]);
      ++i;
-		} else if( strcmp( argv[i], "--atomics" ) == 0 ) {
+    } else if (strcmp(argv[i], "--atomics") == 0) {
      useAtomics = true;
    }
  }
--- a/Show More
+++ b/Show More