Merge branch 'master' into master

2020-09-01 10:59:03 +02:00
parent d601acd0ca 24f5807623
commit 0541996919
2136 changed files with 37709 additions and 82215 deletions
--- a/cmake/Modules/Documentation.cmake
+++ b/cmake/Modules/Documentation.cmake
@ -15,75 +15,93 @@ if(BUILD_DOC)
    endif()
    set(VIRTUALENV ${Python3_EXECUTABLE} -m virtualenv -p ${Python3_EXECUTABLE})
  endif()
+  find_package(Doxygen 1.8.10 REQUIRED)

  file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.rst)

+
  add_custom_command(
    OUTPUT docenv
    COMMAND ${VIRTUALENV} docenv
  )

  set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
+  set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt)
+
+  set(SPHINX_CONFIG_DIR ${LAMMPS_DOC_DIR}/utils/sphinx-config)
+  set(SPHINX_CONFIG_FILE_TEMPLATE ${SPHINX_CONFIG_DIR}/conf.py.in)
+  set(SPHINX_STATIC_DIR  ${SPHINX_CONFIG_DIR}/_static)
+
+  # configuration and static files are copied to binary dir to avoid collisions with parallel builds
+  set(DOC_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/doc)
+  set(DOC_BUILD_CONFIG_FILE ${DOC_BUILD_DIR}/conf.py)
+  set(DOC_BUILD_STATIC_DIR ${DOC_BUILD_DIR}/_static)
+  set(DOXYGEN_BUILD_DIR ${DOC_BUILD_DIR}/doxygen)
+  set(DOXYGEN_XML_DIR ${DOXYGEN_BUILD_DIR}/xml)
+
+  # copy entire configuration folder to doc build directory
+  # files in _static are automatically copied during sphinx-build, so no need to copy them individually
+  file(COPY ${SPHINX_CONFIG_DIR}/ DESTINATION ${DOC_BUILD_DIR})
+
+  # configure paths in conf.py, since relative paths change when file is copied
+  configure_file(${SPHINX_CONFIG_FILE_TEMPLATE} ${DOC_BUILD_CONFIG_FILE})

  add_custom_command(
-    OUTPUT requirements.txt
-    DEPENDS docenv
-    COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/utils/requirements.txt requirements.txt
+    OUTPUT ${DOC_BUILD_DIR}/requirements.txt
+    DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE}
+    COMMAND ${CMAKE_COMMAND} -E copy ${DOCENV_REQUIREMENTS_FILE} ${DOC_BUILD_DIR}/requirements.txt
+    COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade pip
    COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
-    COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r requirements.txt --upgrade
+    COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
  )

  # download mathjax distribution and unpack to folder "mathjax"
-  if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/mathjax/es5)
+  if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/mathjax/es5)
    file(DOWNLOAD "https://github.com/mathjax/MathJax/archive/3.0.5.tar.gz"
      "${CMAKE_CURRENT_BINARY_DIR}/mathjax.tar.gz"
      EXPECTED_MD5 5d9d3799cce77a1a95eee6be04eb68e7)
    execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf mathjax.tar.gz WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
    file(GLOB MATHJAX_VERSION_DIR ${CMAKE_CURRENT_BINARY_DIR}/MathJax-*)
-    execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${CMAKE_CURRENT_BINARY_DIR}/mathjax)
+    execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${DOC_BUILD_STATIC_DIR}/mathjax)
  endif()
-  file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax)
-  file(COPY ${CMAKE_CURRENT_BINARY_DIR}/mathjax/es5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax/)

  # for increased browser compatibility
-  if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/html/_static/polyfill.js)
+  if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/polyfill.js)
    file(DOWNLOAD "https://polyfill.io/v3/polyfill.min.js?features=es6"
-      "${CMAKE_CURRENT_BINARY_DIR}/html/_static/polyfill.js")
+      "${DOC_BUILD_STATIC_DIR}/polyfill.js")
  endif()

-  # note, this may run in parallel with other tasks, so we must not use multiple processes here
+  # set up doxygen and add targets to run it
+  file(MAKE_DIRECTORY ${DOXYGEN_BUILD_DIR})
+  file(COPY ${LAMMPS_DOC_DIR}/doxygen/lammps-logo.png DESTINATION ${DOXYGEN_BUILD_DIR}/lammps-logo.png)
+  configure_file(${LAMMPS_DOC_DIR}/doxygen/Doxyfile.in ${DOXYGEN_BUILD_DIR}/Doxyfile)
+  get_target_property(LAMMPS_SOURCES lammps SOURCES)
  add_custom_command(
-    OUTPUT html
-    DEPENDS ${DOC_SOURCES} docenv requirements.txt
-    COMMAND ${DOCENV_BINARY_DIR}/sphinx-build -b html -c ${LAMMPS_DOC_DIR}/utils/sphinx-config -d ${CMAKE_BINARY_DIR}/doctrees ${LAMMPS_DOC_DIR}/src html
-    COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${CMAKE_CURRENT_BINARY_DIR}/html/index.html
+    OUTPUT ${DOXYGEN_XML_DIR}/index.xml
+    DEPENDS ${DOC_SOURCES} ${LAMMPS_SOURCES}
+    COMMAND Doxygen::doxygen ${DOXYGEN_BUILD_DIR}/Doxyfile WORKING_DIRECTORY ${DOXYGEN_BUILD_DIR}
+    COMMAND ${CMAKE_COMMAND} -E touch ${DOXYGEN_XML_DIR}/run.stamp
  )

-  # copy selected image files to html output tree
-  file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/html/JPG)
-  set(HTML_EXTRA_IMAGES balance_nonuniform.jpg balance_rcb.jpg
-    balance_uniform.jpg bow_tutorial_01.png bow_tutorial_02.png
-    bow_tutorial_03.png bow_tutorial_04.png bow_tutorial_05.png
-    dump1.jpg dump2.jpg examples_mdpd.gif gran_funnel.png gran_mixer.png
-    hop1.jpg hop2.jpg saed_ewald_intersect.jpg saed_mesh.jpg
-    screenshot_atomeye.jpg screenshot_gl.jpg screenshot_pymol.jpg
-    screenshot_vmd.jpg sinusoid.jpg xrd_mesh.jpg)
-  set(HTML_IMAGE_TARGETS "")
-  foreach(_IMG ${HTML_EXTRA_IMAGES})
-    string(PREPEND _IMG JPG/)
-    list(APPEND HTML_IMAGE_TARGETS "${CMAKE_CURRENT_BINARY_DIR}/html/${_IMG}")
+  if(EXISTS ${DOXYGEN_XML_DIR}/run.stamp)
+    set(SPHINX_EXTRA_OPTS "-E")
+  else()
+    set(SPHINX_EXTRA_OPTS "")
+  endif()
  add_custom_command(
-      OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/html/${_IMG}
-      DEPENDS ${LAMMPS_DOC_DIR}/src/${_IMG} ${CMAKE_CURRENT_BINARY_DIR}/html/JPG
-      COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/src/${_IMG} ${CMAKE_BINARY_DIR}/html/${_IMG}
+    OUTPUT html
+    DEPENDS ${DOC_SOURCES} docenv ${DOC_BUILD_DIR}/requirements.txt ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE}
+    COMMAND ${DOCENV_BINARY_DIR}/sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html
+    COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${DOC_BUILD_DIR}/html/index.html
+    COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src/PDF ${DOC_BUILD_DIR}/html/PDF
+    COMMAND ${CMAKE_COMMAND} -E remove -f ${DOXYGEN_XML_DIR}/run.stamp
  )
-  endforeach()

  add_custom_target(
    doc ALL
-    DEPENDS html ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax/es5 ${HTML_IMAGE_TARGETS}
+    DEPENDS html ${DOC_BUILD_STATIC_DIR}/mathjax/es5
    SOURCES ${LAMMPS_DOC_DIR}/utils/requirements.txt ${DOC_SOURCES}
  )

-  install(DIRECTORY ${CMAKE_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
+  install(DIRECTORY ${DOC_BUILD_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
 endif()
--- a/cmake/Modules/Packages/GPU.cmake
+++ b/cmake/Modules/Packages/GPU.cmake
@ -75,7 +75,7 @@ if(GPU_API STREQUAL "CUDA")
  endif()
  # Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
  if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
-    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35]")
+    string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
  endif()
  # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
  if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@ -35,8 +35,8 @@ if(DOWNLOAD_KOKKOS)
  list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
  include(ExternalProject)
  ExternalProject_Add(kokkos_build
-    URL https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz
-    URL_MD5 3ccb2100f7fc316891e7dad3bc33fa37
+    URL https://github.com/kokkos/kokkos/archive/3.2.00.tar.gz
+    URL_MD5 81569170fe232e5e64ab074f7cca5e50
    CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS}
    BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a
  )
@ -50,7 +50,7 @@ if(DOWNLOAD_KOKKOS)
  target_link_libraries(lammps PRIVATE LAMMPS::KOKKOS)
  add_dependencies(LAMMPS::KOKKOS kokkos_build)
 elseif(EXTERNAL_KOKKOS)
-  find_package(Kokkos 3.1.01 REQUIRED CONFIG)
+  find_package(Kokkos 3.2.00 REQUIRED CONFIG)
  target_link_libraries(lammps PRIVATE Kokkos::kokkos)
 else()
  set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
--- a/doc/.gitignore
+++ b/doc/.gitignore
@ -1,6 +1,7 @@
 /old
 /html
 /html-offline
+/epub
 /latex
 /mathjax
 /spelling
@ -10,3 +11,9 @@
 /Developer.pdf
 /doctrees
 /docenv
+/doxygen-warn.log
+/utils/sphinx-config/conf.py
+/doxygen/Doxyfile
+*.el
+/utils/sphinx-config/_static/mathjax
+/utils/sphinx-config/_static/polyfill.js
--- a/doc/Makefile
+++ b/doc/Makefile
@ -4,20 +4,28 @@ SHELL         = /bin/bash
 BUILDDIR       = ${CURDIR}
 RSTDIR         = $(BUILDDIR)/src
 VENV           = $(BUILDDIR)/docenv
-MATHJAX       = $(BUILDDIR)/mathjax
 TXT2RST        = $(VENV)/bin/txt2rst
 ANCHORCHECK    = $(VENV)/bin/rst_anchor_check
+SPHINXCONFIG   = $(BUILDDIR)/utils/sphinx-config
+MATHJAX        = $(SPHINXCONFIG)/_static/mathjax
+POLYFILL       = $(SPHINXCONFIG)/_static/polyfill.js

 PYTHON         = $(shell which python3)
+DOXYGEN        = $(shell which doxygen)
 VIRTUALENV     = virtualenv
 HAS_PYTHON3    = NO
 HAS_VIRTUALENV = NO
+HAS_DOXYGEN    = NO
 HAS_PDFLATEX   = NO

 ifeq ($(shell which python3 >/dev/null 2>&1; echo $$?), 0)
 HAS_PYTHON3    = YES
 endif

+ifeq ($(shell which doxygen >/dev/null 2>&1; echo $$?), 0)
+HAS_DOXYGEN    = YES
+endif
+
 ifeq ($(shell which virtualenv-3 >/dev/null 2>&1; echo $$?), 0)
 VIRTUALENV     = virtualenv-3
 HAS_VIRTUALENV = YES
@ -33,16 +41,20 @@ HAS_PDFLATEX = YES
 endif


-SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())')
+SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())') $(shell test -f $(BUILDDIR)/doxygen/xml/run.stamp && printf -- "-E")

-.PHONY: help clean-all clean clean-spelling epub mobi rst html pdf spelling anchor_check style_check
+# grab list of sources from doxygen config file.
+# we only want to use explicitly listed files.
+DOXYFILES      = $(shell sed -n -e 's/\#.*$$//' -e '/^ *INPUT \+=/,/^[A-Z_]\+ \+=/p' doxygen/Doxyfile.in | sed -e 's/@LAMMPS_SOURCE_DIR@/..\/src/g' -e 's/\\//g' -e 's/ \+/ /' -e 's/[A-Z_]\+ \+= *\(YES\|NO\|\)//') 
+
+.PHONY: help clean-all clean clean-spelling epub mobi rst html pdf spelling anchor_check style_check xmlgen

 # ------------------------------------------

 help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html          create HTML doc pages in html dir"
-	@echo "  pdf           create Developer.pdf and Manual.pdf in this dir"
+	@echo "  pdf           create Manual.pdf in this dir"
 	@echo "  fetch         fetch HTML and PDF files from LAMMPS web site"
 	@echo "  epub          create ePUB format manual for e-book readers"
 	@echo "  mobi          convert ePUB to MOBI format manual for e-book readers (e.g. Kindle)"
@ -57,23 +69,32 @@ help:
 # ------------------------------------------

 clean-all: clean
-	rm -rf $(BUILDDIR)/docenv $(BUILDDIR)/doctrees $(BUILDDIR)/mathjax Manual.pdf Developer.pdf
+	rm -rf $(BUILDDIR)/docenv $(MATHJAX) $(BUILDDIR)/LAMMPS.mobi $(BUILDDIR)/LAMMPS.epub $(BUILDDIR)/Manual.pdf

 clean: clean-spelling
-	rm -rf html epub latex
+	rm -rf $(BUILDDIR)/html $(BUILDDIR)/epub $(BUILDDIR)/latex $(BUILDDIR)/doctrees $(BUILDDIR)/doxygen/xml $(BUILDDIR)/doxygen-warn.log $(BUILDDIR)/doxygen/Doxyfile $(SPHINXCONFIG)/conf.py

 clean-spelling:
-	rm -rf spelling
+	rm -rf $(BUILDDIR)/spelling

-html: $(ANCHORCHECK) $(MATHJAX)
+$(SPHINXCONFIG)/conf.py: $(SPHINXCONFIG)/conf.py.in
+	sed -e 's,@DOXYGEN_XML_DIR@,$(BUILDDIR)/doxygen/xml,g'   \
+	    -e 's,@LAMMPS_SOURCE_DIR@,$(BUILDDIR)/../src,g'    \
+	    -e 's,@LAMMPS_PYTHON_DIR@,$(BUILDDIR)/../python,g' \
+	    -e 's,@LAMMPS_DOC_DIR@,$(BUILDDIR),g' $< > $@
+
+html: xmlgen $(SPHINXCONFIG)/conf.py $(ANCHORCHECK) $(MATHJAX) $(POLYFILL)
+	@$(MAKE) $(MFLAGS) -C graphviz all
 	@(\
-		. $(VENV)/bin/activate ;\
-		sphinx-build $(SPHINXEXTRA) -b html -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) html ;\
+		. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
+		sphinx-build $(SPHINXEXTRA) -b html -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) html ;\
+		ln -sf Manual.html html/index.html;\
+		rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
 		echo "############################################" ;\
 		rst_anchor_check src/*.rst ;\
-		python utils/check-packages.py -s ../src -d src ;\
+		python $(BUILDDIR)/utils/check-packages.py -s ../src -d src ;\
 		env LC_ALL=C grep -n '[^ -~]' $(RSTDIR)/*.rst ;\
-		python utils/check-styles.py -s ../src -d src ;\
+		python $(BUILDDIR)/utils/check-styles.py -s ../src -d src ;\
 		echo "############################################" ;\
 		deactivate ;\
 	)
@ -82,30 +103,28 @@ html: $(ANCHORCHECK) $(MATHJAX)
 	@rm -rf html/USER
 	@rm -rf html/JPG
 	@cp -r src/PDF html/PDF
-	@mkdir -p html/JPG
-	@cp `grep -A2 '\.\. .*\(image\|figure\)::' src/*.rst | grep ':target: JPG' | sed -e 's,.*:target: JPG/,src/JPG/,' | sort | uniq` html/JPG/
 	@rm -rf html/PDF/.[sg]*
-	@mkdir -p html/_static/mathjax
-	@cp -r $(MATHJAX)/es5 html/_static/mathjax/
 	@echo "Build finished. The HTML pages are in doc/html."

-spelling: $(VENV) utils/sphinx-config/false_positives.txt
+spelling: xmlgen $(VENV) $(SPHINXCONFIG)/false_positives.txt
 	@(\
-		. $(VENV)/bin/activate ;\
-		cp utils/sphinx-config/false_positives.txt $(RSTDIR)/ ; env PYTHONWARNINGS= \
-		sphinx-build -b spelling -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) spelling ;\
+		. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
+		cp $(SPHINXCONFIG)/false_positives.txt $(RSTDIR)/ ; env PYTHONWARNINGS= \
+		sphinx-build -b spelling -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) spelling ;\
+		rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
 		deactivate ;\
 	)
 	@echo "Spell check finished."

-epub: $(VENV)
+epub: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
+	@$(MAKE) $(MFLAGS) -C graphviz all
 	@mkdir -p epub/JPG
 	@rm -f LAMMPS.epub
-	@cp src/JPG/lammps-logo.png epub/
 	@cp src/JPG/*.* epub/JPG
 	@(\
 		. $(VENV)/bin/activate ;\
-		sphinx-build $(SPHINXEXTRA) -b epub -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) epub ;\
+		sphinx-build $(SPHINXEXTRA) -b epub -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) epub ;\
+		rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
 		deactivate ;\
 	)
 	@mv  epub/LAMMPS.epub .
@ -117,18 +136,13 @@ mobi: epub
 	@ebook-convert LAMMPS.epub LAMMPS.mobi
 	@echo "Conversion finished. The MOBI manual file is created."

-pdf: $(ANCHORCHECK)
+pdf: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
+	@$(MAKE) $(MFLAGS) -C graphviz all
 	@if [ "$(HAS_PDFLATEX)" == "NO" ] ; then echo "PDFLaTeX was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
 	@(\
-		cd src/Developer; \
-		pdflatex developer; \
-		pdflatex developer; \
-		mv developer.pdf ../../Developer.pdf; \
-		cd ../../; \
-	)
-	@(\
-		. $(VENV)/bin/activate ;\
-		sphinx-build $(SPHINXEXTRA) -b latex -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) latex ;\
+		. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
+		sphinx-build $(SPHINXEXTRA) -b latex -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) latex ;\
+		rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
 		echo "############################################" ;\
 		rst_anchor_check src/*.rst ;\
 		python utils/check-packages.py -s ../src -d src ;\
@ -154,12 +168,11 @@ pdf: $(ANCHORCHECK)
 	@rm -rf latex/USER
 	@cp -r src/PDF latex/PDF
 	@rm -rf latex/PDF/.[sg]*
-	@echo "Build finished. Manual.pdf and Developer.pdf are in this directory."
+	@echo "Build finished. Manual.pdf is in this directory."

 fetch:
-	@rm -rf html_www Manual_www.pdf Developer_www.pdf
+	@rm -rf html_www Manual_www.pdf
 	@curl -s -o Manual_www.pdf http://lammps.sandia.gov/doc/Manual.pdf
-	@curl -s -o Developer_www.pdf http://lammps.sandia.gov/doc/Developer.pdf
 	@curl -s -o lammps-doc.tar.gz http://lammps.sandia.gov/tars/lammps-doc.tar.gz
 	@tar xzf lammps-doc.tar.gz
 	@rm -f lammps-doc.tar.gz
@ -185,21 +198,32 @@ package_check : $(VENV)
 		deactivate ;\
 	)

+xmlgen : doxygen/xml/index.xml
+
+doxygen/Doxyfile: doxygen/Doxyfile.in
+	sed -e 's/@LAMMPS_SOURCE_DIR@/..\/..\/src/g' $< > $@
+
+doxygen/xml/index.xml : $(VENV) doxygen/Doxyfile $(DOXYFILES)
+	@(cd doxygen; $(DOXYGEN) Doxyfile && touch xml/run.stamp)
 # ------------------------------------------

 $(VENV):
-	@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "Python3 was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
-	@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
+	@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "python3 was not found! Please see README for further instructions" 1>&2; exit 1; fi
+	@if [ "$(HAS_DOXYGEN)" == "NO" ] ; then echo "doxygen was not found! Please see README for further instructions" 1>&2; exit 1; fi
+	@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please see README for further instructions" 1>&2; exit 1; fi
 	@( \
 		$(VIRTUALENV) -p $(PYTHON) $(VENV); \
 		. $(VENV)/bin/activate; \
 		pip install --upgrade pip; \
-		pip install --use-feature=2020-resolver -r requirements.txt; \
+		pip install --use-feature=2020-resolver -r $(BUILDDIR)/utils/requirements.txt; \
 		deactivate;\
 	)

 $(MATHJAX):
-	@git clone --depth 1 https://github.com/mathjax/MathJax.git mathjax
+	@git clone --depth 1 https://github.com/mathjax/MathJax.git $@
+
+$(POLYFILL): $(MATHJAX)
+	@curl -s -o $@ "https://polyfill.io/v3/polyfill.min.js?features=es6"

 $(TXT2RST) $(ANCHORCHECK): $(VENV)
 	@( \
--- a/doc/README
+++ b/doc/README
@ -1,97 +1,60 @@
 LAMMPS Documentation

-Depending on how you obtained LAMMPS, this directory has 2 or 3
-sub-directories and optionally 2 PDF files and an ePUB file:
+Depending on how you obtained LAMMPS and whether you have built
+the manual yourself, this directory has a varying number of
+sub-directories and files. Here is a list with descriptions:

+README            this file
 src               content files for LAMMPS documentation
 html              HTML version of the LAMMPS manual (see html/Manual.html)
 utils             utilities and settings for building the documentation
-Manual.pdf      large PDF version of entire manual
-Developer.pdf   small PDF with info about how LAMMPS is structured
+Manual.pdf        PDF version of entire manual
+Developer.pdf     PDF with info about how LAMMPS is structured
 LAMMPS.epub       Manual in ePUB format
+LAMMPS.mobi       Manual in MOBI (Kindle) format
+lammps.1          man page for the lammps command
+msi2lmp.1         man page for the msi2lmp command
+mathjax           code and fonts for rendering math in html
+doctree           temporary data
+docenv            python virtual environment for generating the manual
+doxygen           Doxygen configuration and output
+.gitignore        list of files and folders to be ignored by git
+doxygen-warn.log  logfile with warnings from running doxygen

-If you downloaded LAMMPS as a tarball from the web site, all these
-directories and files should be included.
+and:

-If you downloaded LAMMPS from the public SVN or Git repositories, then
-the HTML and PDF files are not included.  Instead you need to create
-them, in one of three ways:
+github-development-workflow.md   notes on the LAMMPS development workflow
+include-file-conventions.md      notes on LAMMPS' include file conventions
+documentation_conventions.md     notes on writing documentation for LAMMPS
+
+If you downloaded a LAMMPS tarball from lammps.sandia.gov, then the html
+folder and the PDF manual should be included. If you downloaded LAMMPS
+from GitHub then you either need to download them or build them.

 (a) You can "fetch" the current HTML and PDF files from the LAMMPS web
 site.  Just type "make fetch".  This should create a html_www dir and
-Manual_www.pdf/Developer_www.pdf files.  Note that if new LAMMPS
-features have been added more recently than the date of your version,
-the fetched documentation will include those changes (but your source
-code will not, unless you update your local repository).
+Manual_www.pdf/Developer_www.pdf files.  These files will always
+represent the latest published patch/development version of LAMMPS.

-(b) You can build the HTML and PDF files yourself, by typing "make
-html" or by "make pdf", respectively.  This requires various tools
-including the Python documentation processing tool Sphinx, which the
-build process will attempt to download and install on your system into
-a python virtual environment, if not already available.  The PDF file
-will require a working LaTeX installation with several add-on packages
-in addition to the Python/Sphinx setup.  See more details below.
+(b) You can build the HTML and PDF files yourself, by typing "make html"
+or by "make pdf", respectively.  This requires various tools and files.
+Some of them have to be installed (more on that below).  For the rest the
+build process will attempt to download and install into a python virtual
+environment and local folders.

 ----------------

-The generation of all documentation is managed by the Makefile in this
-dir.
+Installing prerequisites for the documentation build

-Options:
+To run the HTML documention build toolchain, python 3.x, doxygen, git,
+and virtualenv have to be installed.  Also internet access is initially
+required to download external files and tools.

-make html         # generate HTML in html dir using Sphinx
-make pdf          # generate 2 PDF files (Manual.pdf,Developer.pdf)
-                  #   in this dir via Sphinx and PDFLaTeX
-make fetch        # fetch HTML doc pages and 2 PDF files from web site
-                  #   as a tarball and unpack into html dir and 2 PDFs
-make epub         # generate LAMMPS.epub in ePUB format using Sphinx
-make clean        # remove intermediate RST files created by HTML build
-make clean-all    # remove entire build folder and any cached data
-
----------------
-
-Installing prerequisites for HTML build
-
-To run the HTML documention build toolchain, Python 3 and virtualenv
-have to be installed.  Here are instructions for common setups:
-
-# Ubuntu
-
-sudo apt-get install python-virtualenv
-
-# Fedora (up to version 21)
-# Red Hat Enterprise Linux or CentOS (up to version 7.x)
-
-sudo yum install python3-virtualenv
-
-# Fedora (since version 22)
-
-sudo dnf install python3-virtualenv
-
-# MacOS X
-
-## Python 3
-
-Download the latest Python 3 MacOS X package from
-https://www.python.org and install it.  This will install both Python
-3 and pip3.
-
-## virtualenv
-
-Once Python 3 is installed, open a Terminal and type
-
-pip3 install virtualenv
-
-This will install virtualenv from the Python Package Index.
-
----------------
-
-Installing prerequisites for PDF build
-
-Same as for HTML plus a compatible LaTeX installation with
-support for PDFLaTeX. Also the following LaTeX packages need
-to be installed (e.g. from texlive):
+Building the PDF format manual requires in addition a compatible LaTeX
+installation with support for PDFLaTeX and several add-on LaTeX packages
+installed.  This includes:
 - amsmath
+- anysize
 - babel
 - capt-of
 - cmap
@ -105,24 +68,13 @@ to be installed (e.g. from texlive):
 - tabulary
 - upquote
 - wrapfig
+
+Building the EPUB format requires LaTeX installation with the same packages
+as for the PDF format plus the 'dvipng' command to convert the embedded math
+into images. The MOBI format is generated from the EPUB format file by using
+the tool 'ebook-convert' from the 'calibre' e-book management software
+(https://calibre-ebook.com).
 ----------------

-Installing prerequisites for epub build
-
-## ePUB
-
-Same as for HTML. This uses the same tools and configuration
-files as the HTML tree. The ePUB format conversion currently
-does not support processing mathematical expressions via MathJAX,
-so there will be limitations on some pages. For the time being
-until this is resolved, building and using the PDF format file
-is recommended instead.
-
-For converting the generated ePUB file to a mobi format file
-(for e-book readers like Kindle, that cannot read ePUB), you
-also need to have the 'ebook-convert' tool from the "calibre"
-software installed. http://calibre-ebook.com/
-You first create the ePUB file with 'make epub' and then do:
-
-ebook-convert LAMMPS.epub LAMMPS.mobi
-
+More details this can be found in the manual itself. The online
+version is at: https://lammps.sandia.gov/doc/Manual_build.html
--- a/doc/documentation_conventions.md
+++ b/doc/documentation_conventions.md
@ -0,0 +1,93 @@
+# Outline of LAMMPS documentation file conventions
+
+This purpose of this document is to provide a point of reference
+for LAMMPS developers and contributors as to what conventions
+should be used to structure and format files in the LAMMPS manual.
+
+Last change: 2020-04-23
+
+## File format and tools
+
+In fall 2019, the LAMMPS documentation file format has changed from
+a home grown minimal markup designed to generate HTML format files
+from a mostly plain text format to using the reStructuredText file
+format.  For a transition period all files in the old .txt format
+were transparently converted to .rst and then processed.  The txt2rst
+tool is still included in the distribution to obtain an initial .rst
+file for integration into the manual.  Since the transition to
+reStructured text as source format, many of the artifacts or the
+translation have been removed though and parts of the documentation
+refactored and expanded to take advantage of the capabilities
+reStructuredText and associated tools.  The conversion from the
+source to the final formats (HTML, PDF, and optionally e-book
+reader formats ePUB and MOBI) is mostly automated and controlled
+by a Makefile in the `doc` folder. This makefile assumes that the
+processing is done on a Unix-like machine and Python 3.5 or later
+and a matching virtualenv module are available.  Additional Python
+packages (like the Sphinx tool and several extensions) are
+transparently installed into a virtual environment over the
+internet using the `pip` package manager.  Further requirements
+and details are discussed in the manual.
+
+## Work in progress
+
+The refactoring and improving of the documentation is an ongoing
+process, so statements in this document may not always be fully
+up-to-date.  If in doubt, contact the LAMMPS developers.
+
+## General structure
+
+The layout and formatting of added files should follow the example
+of the existing files.  Since those are directly derived from their
+former .txt format versions and the manual has been maintained in
+that format for many years, there is a large degree of consistency
+already, so comparision with similar files should give you a good
+idea what kind of information and sections are needed.
+
+## Formatting conventions
+
+Filenames, folders, paths, (shell) commands, definitions, makefile
+settings and similar should be formatted as "literals" with
+double backward quotes bracketing the item: \`\`path/to/some/file\`\`
+
+Keywords and options are formatted in italics:  \*option\*
+
+Mathematical expressions, equations, symbols are typeset using
+either a `.. math:`` block or the `:math:` role.
+
+Groups of shell commands or LAMMPS input script or C/C++ source
+code should be typeset into a `.. code-block::` section. A syntax
+highlighting extension for LAMMPS input scripts is provided, so
+`LAMMPS` can be used to indicate the language in the code block
+in addition to `bash`, `c`, or `python`.  When no syntax style
+is indicated, no syntax highlighting is performed.
+
+As an alternative, e.g. to typeset the syntax of file formats
+a `.. parsed-literal::` block can be used, which allows some
+formatting directives, which means that related characters need
+to be escaped with a preceding backslash: `\*`.
+
+Special remarks can be highlighted with a `.. note::` block and
+strong warnings can be put into a `.. warning::` block.
+
+## Required steps when adding a custom style to LAMMPS
+
+When adding a new style (e.g. pair style or a compute or a fix)
+or a new command, it is **required** to include the corresponding
+documentation.  Those are often new files that need to be added.
+In order to be included in the documentation, those new files
+need to be reference in a `.. toctree::` block.  Most of those
+use patterns with wildcards, so the addition will be automatic.
+However, those additions also need to be added to some lists of
+styles or commands.  The `make style\_check` command will perform
+a test and report any missing entries and list the affected files.
+Any references defined with `.. \_refname:` have to be unique
+across all documentation files and this can be checked for with
+`make anchor\_check`.  Finally, a spell-check should be done,
+which is triggered via `make spelling`.  Any offenses need to
+be corrected and false positives should be added to the file
+`utils/sphinx-config/false\_positives.txt`.
+
+## Required additional steps when adding a new package to LAMMPS
+
+TODO
--- a/doc/doxygen/.gitignore
+++ b/doc/doxygen/.gitignore
@ -0,0 +1 @@
+/xml
--- a/doc/doxygen/Doxyfile.in
+++ b/doc/doxygen/Doxyfile.in
@ -0,0 +1,528 @@
+# Doxyfile 1.8.15 -*- makefile -*-
+
+DOXYFILE_ENCODING      = UTF-8
+PROJECT_NAME           = "LAMMPS Programmer's Guide"
+PROJECT_NUMBER         = "24 August 2020"
+PROJECT_BRIEF          = "Documentation of the LAMMPS library interface and Python wrapper"
+PROJECT_LOGO           = lammps-logo.png
+CREATE_SUBDIRS         = NO
+ALLOW_UNICODE_NAMES    = NO
+OUTPUT_LANGUAGE        = English
+OUTPUT_TEXT_DIRECTION  = LTR
+
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = NO
+INHERIT_DOCS           = YES
+TAB_SIZE               = 2
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = YES
+IDL_PROPERTY_SUPPORT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 2
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = YES
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = YES
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = YES
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# (class|struct|union) declarations. If set to NO, these declarations will be
+# included in the documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES, upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = YES
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = NO
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = NO
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = NO
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation. If
+# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = YES
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered.
+# The default value is: NO.
+
+WARN_AS_ERROR          = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           = "../doxygen-warn.log"
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = @LAMMPS_SOURCE_DIR@/utils.cpp      \
+                         @LAMMPS_SOURCE_DIR@/utils.h        \
+                         @LAMMPS_SOURCE_DIR@/library.cpp    \
+                         @LAMMPS_SOURCE_DIR@/library.h      \
+                         @LAMMPS_SOURCE_DIR@/lammps.cpp     \
+                         @LAMMPS_SOURCE_DIR@/lammps.h       \
+                         @LAMMPS_SOURCE_DIR@/lmptype.h      \
+                         @LAMMPS_SOURCE_DIR@/pointers.h     \
+                         @LAMMPS_SOURCE_DIR@/atom.cpp       \
+                         @LAMMPS_SOURCE_DIR@/atom.h         \
+                         @LAMMPS_SOURCE_DIR@/input.cpp      \
+                         @LAMMPS_SOURCE_DIR@/input.h        \
+                         @LAMMPS_SOURCE_DIR@/tokenizer.cpp  \
+                         @LAMMPS_SOURCE_DIR@/tokenizer.h    \
+                         @LAMMPS_SOURCE_DIR@/text_file_reader.cpp  \
+                         @LAMMPS_SOURCE_DIR@/text_file_reader.h    \
+                         @LAMMPS_SOURCE_DIR@/potential_file_reader.cpp  \
+                         @LAMMPS_SOURCE_DIR@/potential_file_reader.h    \
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to output
+#---------------------------------------------------------------------------
+
+GENERATE_HTML          = NO
+GENERATE_LATEX         = NO
+GENERATE_XML           = YES
+XML_OUTPUT             = xml
+XML_PROGRAMLISTING     = YES
+XML_NS_MEMB_FILE_SCOPE = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+#ENABLE_PREPROCESSING   = YES
+ENABLE_PREPROCESSING   = NO
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
--- a/doc/doxygen/lammps-logo.png
+++ b/doc/doxygen/lammps-logo.png
--- a/doc/graphviz/.gitignore
+++ b/doc/graphviz/.gitignore
@ -0,0 +1,3 @@
+/*.png
+/*.svg
+/*.pdf
--- a/doc/graphviz/Makefile
+++ b/doc/graphviz/Makefile
@ -0,0 +1,30 @@
+# Makefile for generating images with graphviz
+#
+SHELL      = /bin/bash
+BUILDDIR   = ${CURDIR}/..
+IMGDIR     = $(BUILDDIR)/src/JPG
+IMGSRC     = $(wildcard *.dot)
+IMGPNG     = $(IMGSRC:%.dot=$(IMGDIR)/%.png)
+
+HAS_DOT        = NO
+ifeq ($(shell which dot >/dev/null 2>&1; echo $$?), 0)
+HAS_DOT        = YES
+endif
+
+all:    $(IMGPNG)
+
+clean:
+	rm -f $(IMGSVG) $(IMGPDF) $(IMGPNG) *~
+
+ifeq ($(HAS_DOT),YES)
+$(IMGDIR)/%.png: %.dot
+	dot -Tpng -o $@ $<
+endif
+
+ifeq ($(HAS_DOT),NO)
+$(IMGDIR)/%.png: %.dot
+	@echo '###################################################'
+	@echo '# Need to install "graphviz" to regenerate graphs #'
+	@echo '###################################################'
+endif
+
--- a/doc/graphviz/lammps-classes.dot
+++ b/doc/graphviz/lammps-classes.dot
@ -0,0 +1,90 @@
+// LAMMPS Class topology
+digraph lammps {
+    rankdir="LR"
+    La [shape=circle label="LAMMPS"]
+    At [shape=box label="Atom" color=blue]
+    Ci [shape=box label="CiteMe"]
+    Co [shape=box label="Comm" color=blue]
+    Do [shape=box label="Domain" color=blue]
+    Er [shape=box label="Error" color=blue]
+    Fo [shape=box label="Force" color=blue]
+    Gr [shape=box label="Group" color=blue]
+    In [shape=box label="Input" color=blue]
+    Ko [shape=box label="KokkosLMP"]
+    Ak [shape=box label="AtomKK" color=blue]
+    Mk [shape=box label="MemoryKK" color=blue]
+    Me [shape=box label="Memory" color=blue]
+    Mo [shape=box label="Modify" color=blue]
+    Ne [shape=box label="Neighbor" color=blue]
+    Ou [shape=box label="Output" color=blue]
+    Py [shape=box label="Python" color=blue]
+    Up [shape=box label="Update" color=blue]
+    Un [shape=box label="Universe" color=blue]
+    Ti [shape=box label="Timer" color=blue]
+    Rg [label="Region" color=red]
+    Rb [shape=box label="RegionBlock"]
+    Rs [shape=box label="RegionSphere"]
+    Av [label="AtomVec" color=red]
+    It [label="Integrate" color=red]
+    Mi [label="Min" color=red]
+    Pa [label="Pair" color=red]
+    Bo [label="Bond" color=red]
+    An [label="Angle" color=red]
+    Di [label="Dihedral" color=red]
+    Im [label="Improper" color=red]
+    Ks [label="Kspace" color=red]
+    Du [label="Dump" color=red]
+    Fi [label="Fix" color=red]
+    Cp [label="Compute" color=red]
+    Th [label="Thermo"]
+    Va [label="Variable"]
+    Ew [shape=box label="Ewald"]
+    Pp [shape=box label="PPPM"]
+    Ff [label="FFT3d"]
+    Re [label="Remap"]
+    Gc [label="GridComm"]
+    Cb [shape=box label="CommBrick"]
+    Ct [shape=box label="CommTiled"]
+    Aa [shape=box label="AtomVecAtomic"]
+    Am [shape=box label="AtomVecMolecular"]
+    Lj [shape=box label="PairLJCut"]
+    Lo [shape=box label="PairLJCutOMP"]
+    Lg [shape=box label="PairLJCutGPU"]
+    Te [shape=box label="PairTersoff"]
+    Bh [shape=box label="BondHarmonic"]
+    Bf [shape=box label="BondFENE"]
+    Fa [shape=box label="FixAveTime"]
+    Fn [shape=box label="FixNVE"]
+    Fh [shape=box label="FixNH"]
+    Fp [shape=box label="FixNPT"]
+    Ft [shape=box label="FixNVT"]
+    Da [shape=box label="DumpAtom"]
+    Dc [shape=box label="DumpCustom"]
+    Dg [shape=box label="DumpCFG"]
+    Ve [shape=box label="Verlet"]
+    Rr [shape=box label="Respa"]
+    Po [shape=box label="PPPMOmp"]
+    La -> {At Ci Co Do Er Fo Gr In Ko Ak Mk Me Mo Ne Ou Py Ti Up Un} [penwidth=2]
+    Do -> {Rg} [penwidth=2]
+    Co -> {Cb Ct} [style=dashed penwidth=2]
+    Rg -> {Rb Rs} [style=dashed penwidth=2]
+    In -> Va [penwidth=2]
+    Mo -> {Fi Cp} [penwidth=2]
+    Fo -> {Pa Bo An Di Im Ks} [penwidth=2]
+    Ks -> {Ew Pp} [style=dashed penwidth=2]
+    Pp -> {Ff Re Gc} [penwidth=2]
+    Pp -> {Po} [style=dashed penwidth=2]
+    Up -> {It Mi} [penwidth=2]
+    It -> {Ve Rr} [style=dashed penwidth=2]
+    Ou -> {Du Th} [penwidth=2]
+    Du -> {Da Dc} [style=dashed penwidth=2]
+    Dc -> {Dg} [style=dashed penwidth=2]
+    At -> Av [penwidth=2]
+    Av -> {Aa Am} [style=dashed penwidth=2]
+    Pa -> {Lj Te} [style=dashed penwidth=2]
+    Lj -> {Lo Lg} [style=dashed penwidth=2]
+    Bo -> {Bh Bf} [style=dashed penwidth=2]
+    Fi -> {Fa Fn Fh} [style=dashed penwidth=2]
+    Fh -> {Fp Ft} [style=dashed penwidth=2]
+}
+
--- a/doc/include-file-conventions.md
+++ b/doc/include-file-conventions.md
@ -3,7 +3,7 @@
 This purpose of this document is to provide a point of reference
 for LAMMPS developers and contributors as to what include files
 and definitions to put where into LAMMPS source.
-Last change 2019-07-05
+Last change 2020-08-31

 ## Table of Contents

@ -99,10 +99,13 @@ Include files should be included in this order:

 #### pointers.h

-The `pointer.h` header file also includes `cstdio` and `lmptype.h`
-(and through it `stdint.h`, `intttypes.h`, cstdlib, and `climits`).
+The `pointer.h` header file also includes `cstdio`, `cstddef`,
+`string`, `lmptype.h`, and `utils.h` (and through those indirectly
+ `stdint.h`, `intttypes.h`, cstdlib, and `climits`).
 This means any header including `pointers.h` can assume that `FILE`,
-`NULL`, `INT_MAX` are defined.
+`NULL`, `INT_MAX` are defined, they may freely use std::string
+and functions from the utils namespace without including the
+corresponding header files.

 ## Tools

--- a/doc/lammps.1
+++ b/doc/lammps.1
@ -1,4 +1,4 @@
-.TH LAMMPS "21 August 2020" "2020-08-21"
+.TH LAMMPS "24 August 2020" "2020-08-24"
 .SH NAME
 .B LAMMPS
 \- Molecular Dynamics Simulator.
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@ -1,4 +0,0 @@
-Sphinx
-sphinxcontrib-spelling
-breathe
-Pygments
--- a/doc/src/Build_basics.rst
+++ b/doc/src/Build_basics.rst
@ -471,7 +471,7 @@ LAMMPS source distribution.
 .. code-block:: bash

  make html          # create HTML doc pages in html directory
-  make pdf           # create Developer.pdf and Manual.pdf in this directory
+  make pdf           # create Manual.pdf in this directory
  make fetch         # fetch HTML and PDF files from LAMMPS web site
  make clean         # remove all intermediate files
  make clean-all     # reset the entire doc build environment
--- a/doc/src/Build_development.rst
+++ b/doc/src/Build_development.rst
@ -378,22 +378,22 @@ The images below illustrate how the data is presented.
 .. list-table::

      * - .. figure:: JPG/coverage-overview-top.png
-             :target: JPG/coverage-overview-top.png
+             :scale: 25%

          Top of the overview page

        - .. figure:: JPG/coverage-overview-manybody.png
-             :target: JPG/coverage-overview-manybody.png
+             :scale: 25%

          Styles with good coverage

        - .. figure:: JPG/coverage-file-top.png
-             :target: JPG/coverage-file-top.png
+             :scale: 25%

          Top of individual source page

        - .. figure:: JPG/coverage-file-branches.png
-             :target: JPG/coverage-file-branches.png
+             :scale: 25%

          Source page with branches

--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@ -361,9 +361,12 @@ be specified in uppercase.
   *  - AMDAVX
      - HOST
      - AMD 64-bit x86 CPU (AVX 1)
-   *  - EPYC
+   *  - ZEN
      - HOST
-      - AMD EPYC Zen class CPU (AVX 2)
+      - AMD Zen class CPU (AVX 2)
+   *  - ZEN2
+      - HOST
+      - AMD Zen2 class CPU (AVX 2)
   *  - ARMV80
      - HOST
      - ARMv8.0 Compatible CPU
@ -445,12 +448,18 @@ be specified in uppercase.
   *  - TURING75
      - GPU
      - NVIDIA Turing generation CC 7.5 GPU
+   *  - AMPERE80
+      - GPU
+      - NVIDIA Ampere generation CC 8.0 GPU
   *  - VEGA900
      - GPU
      - AMD GPU MI25 GFX900
   *  - VEGA906
      - GPU
      - AMD GPU MI50/MI60 GFX906
+   *  - INTEL_GEN
+      - GPU
+      - Intel GPUs Gen9+

 Basic CMake build settings:
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/doc/src/Developer/.gitignore
+++ b/doc/src/Developer/.gitignore
@ -1,3 +0,0 @@
-/developer.aux
-/developer.log
-/developer.toc
--- a/doc/src/Developer/classes.fig
+++ b/doc/src/Developer/classes.fig
@ -1,198 +0,0 @@
-#FIG 3.2  Produced by xfig version 3.2.5a
-Portrait
-Center
-Inches
-Letter  
-100.00
-Single
-2
-1200 2
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2232 1170 3540 1170 3540 1505 2232 1505 2232 1170
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2220 1830 3015 1830 3015 2219 2220 2219 2220 1830
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2226 3285 3300 3285 3300 3665 2226 3665 2226 3285
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2223 5190 3225 5190 3225 5525 2223 5525 2223 5190
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2232 7125 3090 7125 3090 7478 2232 7478 2232 7125
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2226 10230 3300 10230 3300 10565 2226 10565 2226 10230
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4026 10305 4980 10305 4980 10592 4026 10592 4026 10305
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4029 9900 5205 9900 5205 10250 4029 10250 4029 9900
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4038 9315 5370 9315 5370 9659 4038 9659 4038 9315
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4023 8955 4530 8955 4530 9278 4023 9278 4023 8955
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4029 8475 5190 8475 5190 8762 4029 8762 4029 8475
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4008 8115 5430 8115 5430 8408 4008 8408 4008 8115
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4026 7425 4995 7425 4995 7712 4026 7712 4026 7425
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4035 6720 4650 6720 4650 7025 4035 7025 4035 6720
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4044 7080 4830 7080 4830 7358 4044 7358 4044 7080
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4032 6105 5205 6105 5205 6419 4032 6419 4032 6105
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4026 5715 5115 5715 5115 6062 4026 6062 4026 5715
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4023 3585 4605 3585 4605 3872 4023 3872 4023 3585
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 3954 1680 5175 1680 5175 1997 3954 1997 3954 1680
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 1620 5235 2100 615
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 1605 5445 2070 10695
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3120 1935 3855 1800
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3150 2115 3765 2250
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3135 7230 3945 6840
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3150 7335 3945 8610
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 5265 8610 6195 8400
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 5280 8655 6180 8820
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3345 10290 3930 10020
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3360 10395 3930 10425
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3360 10455 3930 10755
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2193 360 3435 360 3435 647 2193 647 2193 360
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3398 3472 3923 3307
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3413 3601 3923 3721
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3285 2806 3870 2802
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3315 5372 3900 5368
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 6354 2280 7470 2280 7470 2585 6354 2585 6354 2280
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 6348 1875 7320 1875 7320 2222 6348 2222 6348 1875
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 3954 2070 5505 2070 5505 2372 3954 2372 3954 2070
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 5634 2137 6230 2045
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 5670 2310 6265 2418
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 3900 2640 5400 2640 5400 2975 3900 2975 3900 2640
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4038 3165 5385 3165 5385 3497 4038 3497 4038 3165
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4245 4110 5730 4110 5730 4499 4245 4499 4245 4110
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4233 4545 6390 4545 6390 4862 4233 4862 4233 4545
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4026 5190 5385 5190 5385 5525 4026 5525 4026 5190
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4038 7755 5310 7755 5310 8075 4038 8075 4038 7755
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 6270 8250 7365 8250 7365 8610 6270 8610 6270 8250
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 6273 8655 7380 8655 7380 8978 6273 8978 6273 8655
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 4041 10620 5985 10620 5985 10943 4041 10943 4041 10620
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2217 10830 3135 10830 3135 11156 2217 11156 2217 10830
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2229 9780 3240 9780 3240 10118 2229 10118 2229 9780
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2214 9015 3285 9015 3285 9362 2214 9362 2214 9015
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2208 5850 3420 5850 3420 6209 2208 6209 2208 5850
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2217 4275 3615 4275 3615 4634 2217 4634 2217 4275
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2235 2655 3150 2655 3150 3000 2235 3000 2235 2655
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 60 5115 1500 5115 1500 5610 60 5610 60 5115
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3486 6018 4011 5853
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3486 6129 3996 6249
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3361 9291 3991 9531
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3345 9129 4005 9099
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3691 4412 4216 4277
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
-	1 1 2.00 120.00 240.00
-	 3695 4561 4175 4711
-2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
-	 2220 735 3129 735 3129 1043 2220 1043 2220 735
-4 0 1 50 -1 18 18 0.0000 4 225 1275 2265 1455 Universe\001
-4 0 1 50 -1 18 18 0.0000 4 285 735 2265 2175 Input\001
-4 0 1 50 -1 18 18 0.0000 4 225 780 2265 2925 Atom\001
-4 0 1 50 -1 18 18 0.0000 4 285 1020 2265 3600 Update\001
-4 0 1 50 -1 18 18 0.0000 4 285 1320 2265 4575 Neighbor\001
-4 0 1 50 -1 18 18 0.0000 4 225 945 2265 5475 Comm\001
-4 0 1 50 -1 18 18 0.0000 4 225 1110 2265 6150 Domain\001
-4 0 1 50 -1 18 18 0.0000 4 225 810 2265 7425 Force\001
-4 0 1 50 -1 18 18 0.0000 4 285 975 2265 9300 Modify\001
-4 0 1 50 -1 18 18 0.0000 4 285 900 2265 10050 Group\001
-4 0 1 50 -1 18 18 0.0000 4 285 990 2265 10500 Output\001
-4 0 1 50 -1 18 18 0.0000 4 225 825 2265 11100 Timer\001
-4 0 0 50 -1 18 18 0.0000 4 225 1170 3990 1950 Variable\001
-4 0 4 50 -1 18 18 0.0000 4 225 1470 3990 2325 Command\001
-4 0 4 50 -1 18 18 0.0000 4 285 1275 4065 3450 Integrate\001
-4 0 4 50 -1 18 18 0.0000 4 225 525 4065 3825 Min\001
-4 0 0 50 -1 18 18 0.0000 4 285 1230 4065 5475 Irregular\001
-4 0 4 50 -1 18 18 0.0000 4 285 1020 4065 6000 Region\001
-4 0 0 50 -1 18 18 0.0000 4 225 975 4065 6375 Lattice\001
-4 0 4 50 -1 18 18 0.0000 4 225 435 4065 9225 Fix\001
-4 0 4 50 -1 18 18 0.0000 4 285 1305 4065 9600 Compute\001
-4 0 4 50 -1 18 18 0.0000 4 225 570 4065 6975 Pair\001
-4 0 4 50 -1 18 18 0.0000 4 285 840 4065 7665 Angle\001
-4 0 4 50 -1 18 18 0.0000 4 225 1215 4065 8010 Dihedral\001
-4 0 4 50 -1 18 18 0.0000 4 285 1305 4065 8355 Improper\001
-4 0 4 50 -1 18 18 0.0000 4 285 1095 4065 8700 KSpace\001
-4 0 4 50 -1 18 18 0.0000 4 285 855 4065 10545 Dump\001
-4 0 0 50 -1 18 18 0.0000 4 225 1815 4065 10890 WriteRestart\001
-4 0 0 50 -1 18 18 0.0000 4 225 930 6315 8550 FFT3D\001
-4 0 0 50 -1 18 18 0.0000 4 285 1005 6315 8925 Remap\001
-4 0 0 50 -1 18 18 0.0000 4 225 885 6390 2175 Finish\001
-4 0 0 50 -1 18 18 0.0000 4 285 1050 6390 2550 Special\001
-4 0 4 50 -1 18 18 0.0000 4 225 1305 3990 2925 AtomVec\001
-4 0 4 50 -1 18 18 0.0000 4 225 765 4065 7320 Bond\001
-4 0 0 50 -1 18 18 0.0000 4 225 1095 4065 10200 Thermo\001
-4 0 0 50 -1 18 18 0.0000 4 285 1380 4305 4425 NeighList\001
-4 0 0 50 -1 18 18 0.0000 4 285 2025 4305 4800 NeighRequest\001
-4 0 1 50 -1 18 18 0.0000 4 285 1155 2250 600 Memory\001
-4 0 0 50 -1 18 18 0.0000 4 225 1305 120 5475 LAMMPS\001
-4 0 1 50 -1 18 18 0.0000 4 225 735 2265 1005 Error\001
--- a/doc/src/Developer/classes.pdf
+++ b/doc/src/Developer/classes.pdf
--- a/doc/src/Developer/developer.tex
+++ b/doc/src/Developer/developer.tex
@ -1,699 +0,0 @@
-\documentclass{article}
-\usepackage{graphicx}
-
-\begin{document}
-
-\centerline{\Large \bf LAMMPS Developer Guide}
-\centerline{\bf 23 Aug 2011}
-
-\vspace{0.5in}
-
-This document is a developer guide to the LAMMPS molecular dynamics
-package, whose WWW site is at lammps.sandia.gov.  It describes the
-internal structure and algorithms of the code.  Sections will be added
-as we have time, and in response to requests from developers and
-users.
-
-\tableofcontents
-
-\pagebreak
-\section{LAMMPS source files}
-
-LAMMPS source files are in two directories of the distribution
-tarball.  The src directory has the majority of them, all of which are
-C++ files (*.cpp and *.h).  Many of these files are in the src
-directory itself.  There are also dozens of ``packages'', which can be
-included or excluded when LAMMPS is built.  See the
-doc/Section\_build.html section of the manual for more information
-about packages, or type ``make'' from within the src directory, which
-lists package-related commands, such as ``make package-status''.  The
-source files for each package are in an all-uppercase sub-directory of
-src, like src/MOLECULE or src/USER-CUDA.  If the package is currently
-installed, copies of the package source files will also exist in the
-src directory itself.  The src/STUBS sub-directory is not a package
-but contains a dummy version of the MPI library, used when building a
-serial version of the code.
-
-The lib directory also contains source code for external libraries,
-used by a few of the packages.  Each sub-directory, like meam or gpu,
-contains the source files, some of which are in different languages
-such as Fortran.  The files are compiled into libraries from within
-each sub-directory, e.g. performing a ``make'' in the lib/meam directory
-creates a libmeam.a file.  These libraries are linked to during a
-LAMMPS build, if the corresponding package is installed.
-
-LAMMPS C++ source files almost always come in pairs, such as run.cpp
-and run.h.  The pair of files defines a C++ class, the Run class in
-this case, which contains the code invoked by the ``run'' command in a
-LAMMPS input script.  As this example illustrates, source file and
-class names often have a one-to-one correspondence with a command used
-in a LAMMPS input script.  Some source files and classes do not have a
-corresponding input script command, e.g. ``force.cpp'' and the Force
-class.  They are discussed in the next section.
-
-\pagebreak
-\section{Class hierarchy of LAMMPS}
-
-Though LAMMPS has a lot of source files and classes, its class
-hierarchy is quite simple, as outlined in Fig \ref{fig:classes}.  Each
-boxed name refers to a class and has a pair of associated source files
-in lammps/src, e.g. ``memory.cpp'' and ``memory.h''.  More details on the
-class and its methods and data structures can be found by examining
-its *.h file.
-
-LAMMPS (lammps.cpp/h) is the top-level class for the entire code.  It
-holds an ``instance'' of LAMMPS and can be instantiated one or more
-times by a calling code.  For example, the file src/main.cpp simply
-instantiates one instance of LAMMPS and passes it the input script.
-
-The file src/library.cpp contains a C-style library interface to the
-LAMMPS class.  See the lammps/couple and lammps/python directories for
-examples of simple programs that use LAMMPS through its library
-interface.  A driver program can instantiate the LAMMPS class multiple
-times, e.g. to embed several atomistic simulation regions within a
-mesoscale or continuum simulation domain.
-
-There are a dozen or so top-level classes within the LAMMPS class that
-are visible everywhere in the code.  They are shaded blue in Fig
-\ref{fig:classes}.  Thus any class can refer to the y-coordinate of
-local atom $I$ as atom$\rightarrow$x[i][1].  This visibility is
-enabled by a bit of cleverness in the Pointers class (see
-src/pointers.h) which every class inherits from.
-
-There are a handful of virtual parent classes in LAMMPS that define
-what LAMMPS calls ``styles''.  They are shaded red in Fig
-\ref{fig:classes}.  Each of these are parents of a number of child
-classes that implement the interface defined by the parent class.  For
-example, the fix style has around 100 child classes.  They are the
-possible fixes that can be specified by the fix command in an input
-script, e.g. fix nve, fix shake, fix ave/time, etc.  The corresponding
-classes are Fix (for the parent class), FixNVE, FixShake, FixAveTime,
-etc.  The source files for these classes are easy to identify in the
-src directory, since they begin with the word ``fix'', e,g,
-fix\_nve.cpp, fix\_shake,cpp, fix\_ave\_time.cpp, etc.
-
-The one exception is child class files for the ``command'' style.  These
-implement specific commands in the input script that can be invoked
-before/after/between runs or which launch a simulation.  Examples are
-the create\_box, minimize, run, and velocity commands which encode the
-CreateBox, Minimize, Run, and Velocity classes.  The corresponding
-files are create\_box,cpp, minimize.cpp, run.cpp, and velocity.cpp.
-The list of command style files can be found by typing ``grep
-COMMAND\_CLASS *.h'' from within the src directory, since that word in
-the header file identifies the class as an input script command.
-Similar words can be grepped to list files for the other LAMMPS
-styles.  E.g. ATOM\_CLASS, PAIR\_CLASS, BOND\_CLASS, REGION\_CLASS,
-FIX\_CLASS, COMPUTE\_CLASS, DUMP\_CLASS, etc.
-
-\begin{figure}[htb]
- \begin{center}
- \includegraphics[height=4in]{classes.pdf}
- \end{center}
- \caption{Class hierarchy within LAMMPS source code.}
-\label{fig:classes}
-\end{figure}
-
-More details on individual classes in Fig \ref{fig:classes} are as
-follows:
-
-\begin{itemize}
-
-\item The Memory class handles allocation of all large vectors and
-  arrays.
-
-\item The Error class prints all error and warning messages.
-
-\item The Universe class sets up partitions of processors so that
-  multiple simulations can be run, each on a subset of the processors
-  allocated for a run, e.g. by the mpirun command.
-
-\item The Input class reads an input script, stores variables, and
-  invokes stand-alone commands that are child classes of the Command
-  class.
-
-\item As discussed above, the Command class is a parent class for
-  certain input script commands that perform a one-time operation
-  before/after/between simulations or which invoke a simulation.  They
-  are instantiated from within the Input class, invoked, then
-  immediately destructed.
-
-\item The Finish class is instantiated to print statistics to the
-  screen after a simulation is performed, by commands like run and
-  minimize.
-
-\item The Special class walks the bond topology of a molecular system
-  to find first, second, third neighbors of each atom.  It is invoked by
-  several commands, like read\_data, read\_restart, and replicate.
-
-\item The Atom class stores all per-atom arrays.  More precisely, they
-  are allocated and stored by the AtomVec class, and the Atom class
-  simply stores a pointer to them.  The AtomVec class is a parent
-  class for atom styles, defined by the atom\_style command.
-
-\item The Update class holds an integrator and a minimizer.  The
-  Integrate class is a parent style for the Verlet and rRESPA time
-  integrators, as defined by the run\_style input command.  The Min
-  class is a parent style for various energy minimizers.
-
-\item The Neighbor class builds and stores neighbor lists.  The
-  NeighList class stores a single list (for all atoms).  The
-  NeighRequest class is called by pair, fix, or compute styles when
-  they need a particular kind of neighbor list.
-
-\item The Comm class performs interprocessor communication, typically
-  of ghost atom information.  This usually involves MPI message
-  exchanges with 6 neighboring processors in the 3d logical grid of
-  processors mapped to the simulation box.  Sometimes the Irregular
-  class is used, when atoms may migrate to arbitrary processors.
-
-\item The Domain class stores the simulation box geometry, as well as
-  geometric Regions and any user definition of a Lattice.  The latter
-  are defined by region and lattice commands in an input script.
-
-\item The Force class computes various forces between atoms.  The Pair
-  parent class is for non-bonded or pair-wise forces, which in LAMMPS
-  lingo includes many-body forces such as the Tersoff 3-body
-  potential.  The Bond, Angle, Dihedral, Improper parent classes are
-  styles for bonded interactions within a static molecular topology.
-  The KSpace parent class is for computing long-range Coulombic
-  interactions.  One of its child classes, PPPM, uses the FFT3D and
-  Remap classes to communicate grid-based information with neighboring
-  processors.
-
-\item The Modify class stores lists of Fix and Compute classes, both
-  of which are parent styles.
-
-\item The Group class manipulates groups that atoms are assigned to
-  via the group command.  It also computes various attributes of
-  groups of atoms.
-
-\item The Output class is used to generate 3 kinds of output from a
-  LAMMPS simulation: thermodynamic information printed to the screen
-  and log file, dump file snapshots, and restart files.  These
-  correspond to the Thermo, Dump, and WriteRestart classes
-  respectively.  The Dump class is a parent style.
-
-\item The Timer class logs MPI timing information, output at the end
-  of a run.
-
-\end{itemize}
-
-%%\pagebreak
-%%\section{Spatial decomposition and parallel operations}
-%%distributed memory
-%%Ref to JCP paper
-%%diagram of 3d grid of procs and spatial decomp
-%%6-way comm
-%%ghost atoms, PBC added when comm (in atom class)
-
-%%\pagebreak
-%%\section{Fixes, computes, variables}
-%%fixes intercolate in timestep, store per-atom info
-%%computes based on current snapshot
-%%equal- and atom-style variables
-%%output they produce - see write-up in HowTo
-
-\pagebreak
-\section{How a timestep works}
-
-The first and most fundamental operation within LAMMPS to understand
-is how a timestep is structured.  Timestepping is performed by the
-Integrate class within the Update class.  Since Integrate is a parent
-class, corresponding to the run\_style input script command, it has
-child classes.  In this section, the timestep implemented by the
-Verlet child class is described.  A similar timestep is implemented by
-the Respa child class, for the rRESPA hierarchical timestepping
-method.  The Min parent class performs energy minimization, so does
-not perform a literal timestep.  But it has logic similar to what is
-described here, to compute forces and invoke fixes at each iteration
-of a minimization.  Differences between time integration and
-minimization are highlighted at the end of this section.
-
-The Verlet class is encoded in the src/verlet.cpp and verlet.h files.
-It implements the velocity-Verlet timestepping algorithm.  The
-workhorse method is Verlet::run(), but first we highlight several
-other methods in the class.
-
-\begin{itemize}
-
-\item The init() method is called at the beginning of each dynamics
-  run.  It simply sets some internal flags, based on user settings in
-  other parts of the code.
-
-\item The setup() or setup\_minimal() methods are also called before
-  each run.  The velocity-Verlet method requires current forces be
-  calculated before the first timestep, so these routines compute
-  forces due to all atomic interactions, using the same logic that
-  appears in the timestepping described next.  A few fixes are also
-  invoked, using the mechanism described in the next section.  Various
-  counters are also initialized before the run begins.  The
-  setup\_minimal() method is a variant that has a flag for performing
-  less setup.  This is used when runs are continued and information
-  from the previous run is still valid.  For example, if repeated
-  short LAMMPS runs are being invoked, interleaved by other commands,
-  via the ``pre no'' and ``every'' options of the run command, the
-  setup\_minimal() method is used.
-
-\item The force\_clear() method initializes force and other arrays to
-  zero before each timestep, so that forces (torques, etc) can be
-  accumulated.
-
-\end{itemize}
-
-Now for the Verlet::run() method.  Its structure in hi-level pseudo
-code is shown in Fig \ref{fig:verlet}.  In the actual code in
-src/verlet.cpp some of these operations are conditionally invoked.
-
-\begin{figure}[htb]
- \begin{center}
- \begin{verbatim}
-loop over N timesteps:
-  ev_set()
-
-  fix->initial_integrate()
-  fix->post_integrate()
-
-  nflag = neighbor->decide()
-  if nflag:
-    fix->pre_exchange()
-    domain->pbc()
-    domain->reset_box()
-    comm->setup()
-    neighbor->setup_bins()
-    comm->exchange()
-    comm->borders()
-    fix->pre_neighbor()
-    neighbor->build()
-  else
-    comm->forward_comm()
-
-  force_clear()
-  fix->pre_force()
-
-  pair->compute()
-  bond->compute()
-  angle->compute()
-  dihedral->compute()
-  improper->compute()
-  kspace->compute()
-
-  comm->reverse_comm()
-
-  fix->post_force()
-  fix->final_integrate()
-  fix->end_of_step()
-
-  if any output on this step: output->write()
-  \end{verbatim}
- \end{center}
- \caption{Pseudo-code for the Verlet::run() method.}
-\label{fig:verlet}
-\end{figure}
-
-The ev\_set() method (in the parent Integrate class), sets two flags
-({\em eflag} and {\em vflag}) for energy and virial computation.  Each
-flag encodes whether global and/or per-atom energy and virial should
-be calculated on this timestep, because some fix or variable or output
-will need it.  These flags are passed to the various methods that
-compute particle interactions, so that they can skip the extra
-calculations if the energy and virial are not needed.  See the
-comments with the Integrate::ev\_set() method which document the flag
-values.
-
-At various points of the timestep, fixes are invoked,
-e.g. fix$\rightarrow$initial\_integrate().  In the code, this is
-actually done via the Modify class which stores all the Fix objects
-and lists of which should be invoked at what point in the timestep.
-Fixes are the LAMMPS mechanism for tailoring the operations of a
-timestep for a particular simulation.  As described elsewhere
-(unwritten section), each fix has one or more methods, each of which
-is invoked at a specific stage of the timestep, as in Fig
-\ref{fig:verlet}.  All the fixes defined in an input script with an
-initial\_integrate() method are invoked at the beginning of each
-timestep.  Fix nve, nvt, npt are examples, since they perform the
-start-of-timestep velocity-Verlet integration to update velocities by
-a half-step, and coordinates by a full step.  The post\_integrate()
-method is next.  Only a few fixes use this, e.g. to reflect particles
-off box boundaries in the FixWallReflect class.
-
-The decide() method in the Neighbor class determines whether neighbor
-lists need to be rebuilt on the current timestep.  If not, coordinates
-of ghost atoms are acquired by each processor via the forward\_comm()
-method of the Comm class.  If neighbor lists need to be built, several
-operations within the inner if clause of Fig \ref{fig:verlet} are
-first invoked.  The pre\_exchange() method of any defined fixes is
-invoked first.  Typically this inserts or deletes particles from the
-system.
-
-Periodic boundary conditions are then applied by the Domain class via
-its pbc() method to remap particles that have moved outside the
-simulation box back into the box.  Note that this is not done every
-timestep. but only when neighbor lists are rebuilt.  This is so that
-each processor's sub-domain will have consistent (nearby) atom
-coordinates for its owned and ghost atoms.  It is also why dumped atom
-coordinates can be slightly outside the simulation box.
-
-The box boundaries are then reset (if needed) via the reset\_box()
-method of the Domain class, e.g. if box boundaries are shrink-wrapped
-to current particle coordinates.  A change in the box size or shape
-requires internal information for communicating ghost atoms (Comm
-class) and neighbor list bins (Neighbor class) be updated.  The
-setup() method of the Comm class and setup\_bins() method of the
-Neighbor class perform the update.
-
-The code is now ready to migrate atoms that have left a processor's
-geometric sub-domain to new processors.  The exchange() method of the
-Comm class performs this operation.  The borders() method of the Comm
-class then identifies ghost atoms surrounding each processor's
-sub-domain and communicates ghost atom information to neighboring
-processors.  It does this by looping over all the atoms owned by a
-processor to make lists of those to send to each neighbor processor.
-On subsequent timesteps, the lists are used by the
-Comm::forward\_comm() method.
-
-Fixes with a pre\_neighbor() method are then called.  These typically
-re-build some data structure stored by the fix that depends on the
-current atoms owned by each processor.
-
-Now that each processor has a current list of its owned and ghost
-atoms, LAMMPS is ready to rebuild neighbor lists via the build()
-method of the Neighbor class.  This is typically done by binning all
-owned and ghost atoms, and scanning a stencil of bins around each
-owned atom's bin to make a Verlet list of neighboring atoms within the
-force cutoff plus neighbor skin distance.
-
-In the next portion of the timestep, all interaction forces between
-particles are computed, after zeroing the per-atom force vector via
-the force\_clear() method.  If the newton flag is set to ``on'' by the
-newton command, forces on both owned and ghost atoms are calculated.
-
-Pairwise forces are calculated first, which enables the global virial
-(if requested) to be calculated cheaply (at the end of the
-Pair::compute() method), by a dot product of atom coordinates and
-forces.  By including owned and ghost atoms in the dot product, the
-effect of periodic boundary conditions is correctly accounted for.
-Molecular topology interactions (bonds, angles, dihedrals, impropers)
-are calculated next.  The final contribution is from long-range
-Coulombic interactions, invoked by the KSpace class.
-
-If the newton flag is on, forces on ghost atoms are communicated and
-summed back to their corresponding owned atoms.  The reverse\_comm()
-method of the Comm class performs this operation, which is essentially
-the inverse operation of sending copies of owned atom coordinates to
-other processor's ghost atoms.
-
-At this point in the timestep, the total force on each atom is known.
-Additional force constraints (external forces, SHAKE, etc) are applied
-by Fixes that have a post\_force() method.  The second half of the
-velocity-Verlet integration is then performed (another half-step
-update of the velocities) via fixes like nve, nvt, npt.
-
-At the end of the timestep, fixes that define an end\_of\_step()
-method are invoked.  These typically perform a diagnostic calculation,
-e.g. the ave/time and ave/spatial fixes.  The final operation of the
-timestep is to perform any requested output, via the write() method of
-the Output class.  There are 3 kinds of LAMMPS output: thermodynamic
-output to the screen and log file, snapshots of atom data to a dump
-file, and restart files.  See the thermo\_style, dump, and restart
-commands for more details.
-
-The iteration performed by an energy minimization is similar to the
-dynamics timestep of Fig \ref{fig:verlet}.  Forces are computed,
-neighbor lists are built as needed, atoms migrate to new processors,
-and atom coordinates and forces are communicated to neighboring
-processors.  The only difference is what Fix class operations are
-invoked when.  Only a subset of LAMMPS fixes are useful during energy
-minimization, as explained in their individual doc pages.  The
-relevant Fix class methods are min\_pre\_exchange(),
-min\_pre\_force(), and min\_post\_force().  Each is invoked at the
-appropriate place within the minimization iteration.  For example, the
-min\_post\_force() method is analogous to the post\_force() method for
-dynamics; it is used to alter or constrain forces on each atom, which
-affects the minimization procedure.
-
-\pagebreak
-\section{Extending LAMMPS}
-
-The Section\_modify.html file in the doc directory of
-the LAMMPS distribution gives an overview of how LAMMPS can
-be extended by writing new classes that derive from existing
-parent classes in LAMMPS.  Here, some specific coding
-details are provided for writing a new fix.
-
-\subsection{New fixes}
-
-(this section provided by Kirill Lykov)
-\vspace{0.25cm}
-
-Writing fixes is a flexible way of extending LAMMPS.  Users can
-implement many things using fixes:
-
-\begin{itemize}
-\item changing particles attributes (positions, velocities, forces, etc.).
-Example: FixFreeze.
-\item reading/writing data. Example: FixRestart.
-\item implementing boundary conditions. Example: FixWall.
-\item saving information about particles for future use (previous positions,
-for instance). Example: FixStoreState.
-\end{itemize}
-
-All fixes are derived from class Fix and must have constructor with the
-signature: FixMine(class LAMMPS *, int, char **).
-
-Every fix must be registered in LAMMPS by writing the following lines
-of code in the header before include guards:
-
- \begin{center}
- \begin{verbatim}
-#ifdef FIX_CLASS
-FixStyle(your/fix/name,FixMine)
-#else
-  \end{verbatim}
- \end{center}
-
-Where ``your/fix/name'' is a name of your fix in the script and FixMine
-is the name of the class. This code allows LAMMPS to find your fix
-when it parses input script. In addition, your fix header must be
-included in the file ``style\_fix.h''. In case if you use LAMMPS make,
-this file is generated automatically - all files starting with prefix
-fix\_ are included, so call your header the same way. Otherwise, don't
-forget to add your include into ``style\_fix.h''.
-
-Let's write a simple fix which will print average velocity at the end
-of each timestep. First of all, implement a constructor:
-
- \begin{center}
- \begin{verbatim}
-FixPrintVel::FixPrintVel(LAMMPS *lmp, int narg, char **arg)
-: Fix(lmp, narg, arg)
-{
-  if (narg < 4)
-      error->all(FLERR,"Illegal fix print command");
-
-  nevery = atoi(arg[3]);
-  if (nevery <= 0)
-      error->all(FLERR,"Illegal fix print command");
-}
-  \end{verbatim}
- \end{center}
-
-In the constructor you should parse your fix arguments which are
-specified in the script. All fixes have pretty the same syntax: fix
-[fix\_identifier] [group\_name] [fix\_name] [fix\_arguments]. The
-first 3 parameters are parsed by Fix class constructor, while
-[fix\_arguments] should be parsed by you. In our case, we need to
-specify how often we want to print an average velocity. For instance,
-once in 50 timesteps: fix 1 print/vel 50. There is a special variable
-in Fix class called nevery which specifies how often method
-end\_of\_step() is called. Thus all we need to do is just set it up.
-
-The next method we need to implement is setmask():
-\begin{center}
-\begin{verbatim}
-int FixPrintVel::setmask()
-{
-  int mask = 0;
-  mask |= FixConst::END_OF_STEP;
-  return mask;
-}
-\end{verbatim}
-\end{center}
-
-Here user specifies which methods of your fix should be called during
-the execution. For instance, END\_OF\_STEP corresponds to the
-end\_of\_step() method. Overall, there are 8 most important methods,
-methods are called in predefined order during the execution of the
-verlet algorithm as was mentioned in the Section 3:
-
-\begin{itemize}
-\item initial\_integrate()
-\item post\_integrate()
-\item pre\_exchange()
-\item pre\_neighbor()
-\item pre\_force()
-\item post\_force()
-\item final\_integrate()
-\item end\_of\_step()
-\end{itemize}
-
-Fix developer must understand when he wants to execute his code.  In
-case if we want to write FixPrintVel, we need only end\_of\_step():
-
-\begin{center}
-\begin{verbatim}
-void FixPrintVel::end_of_step()
-{
-  // for add3, scale3
-  using namespace MathExtra;
-
-  double** v = atom->v;
-  int nlocal = atom->nlocal;
-  double localAvgVel[4]; // 4th element for particles count
-  memset(localAvgVel, 0, 4 * sizeof(double));
-  for (int particleInd = 0; particleInd < nlocal; ++particleInd) {
-    add3(localAvgVel, v[particleInd], localAvgVel);
-  }
-  localAvgVel[3] = nlocal;
-  double globalAvgVel[4];
-  memset(globalAvgVel, 0, 4 * sizeof(double));
-  MPI_Allreduce(localAvgVel, globalAvgVel, 4, MPI_DOUBLE, MPI_SUM, world);
-  scale3(1.0 / globalAvgVel[3], globalAvgVel);
-  if (comm->me == 0) {
-    printf("\%e, \%e, \%e\n",
-      globalAvgVel[0], globalAvgVel[1], globalAvgVel[2]);
-  }
-}
-\end{verbatim}
-\end{center}
-
-In the code above, we use MathExtra routines defined in
-``math\_extra.h''.  There are bunch of math functions to work with
-arrays of doubles as with math vectors.
-
-In this code we use an instance of Atom class. This object is stored
-in the Pointers class (see ``pointers.h''). This object contains all
-global information about the simulation system. Data from Pointers
-class available to all classes inherited from it using protected
-inheritance. Hence when you write you own class, which is going to use
-LAMMPS data, don't forget to inherit from Pointers.  When writing
-fixes we inherit from class Fix which is inherited from Pointers so
-there is no need to inherit from it directly.
-
-The code above computes average velocity for all particles in the
-simulation.  Yet you have one unused parameter in fix call from the
-script - [group\_name].  This parameter specifies the group of atoms
-used in the fix. So we should compute average for all particles in the
-simulation if group\_name == all, but it can be any group. The group
-information is specified by groupbit which is defined in class Fix:
-
-\begin{center}
-\begin{verbatim}
-for (int particleInd = 0; particleInd < nlocal; ++particleInd) {
-  if (atom->mask[particleInd] & groupbit) {
-  //Do all job here
-  }
-}
-\end{verbatim}
-\end{center}
-
-Class Atom encapsulates atoms positions, velocities, forces, etc. User
-can access them using particle index. Note, that particle indexes are
-usually changed every timestep because of sorting.
-
-Lets consider another Fix example. We want to have a fix which stores
-atoms position from previous time step in your fix. The local atoms
-indexes will not be valid on the next iteration. In order to handle
-this situation there are several methods which should be implemented:
-
-\begin{itemize}
-\item \verb|double memory_usage| - return how much memory fix uses
-\item \verb|void grow_arrays(int)| - do reallocation of the per particle arrays
-  in your fix
-\item \verb|void copy_arrays(int i, int j, int delflag)| - copy i-th per-particle
-  information to j-th. Used when atoms sorting is performed. if delflag is set
-  and atom j owns a body, move the body information to atom i.
-\item \verb|void set_arrays(int i)| - sets i-th particle related information to zero
-\end{itemize}
-
-Note, that if your class implements these methods, it must call add calls of
-add\_callback and delete\_callback to constructor and destructor:
-
-\begin{center}
-\begin{verbatim}
-FixSavePos::FixSavePos(LAMMPS *lmp, int narg, char **arg)  {
-  //...
-  atom->add_callback(0);
-}
-
-FixSavePos::~FixSavePos() {
-  atom->delete_callback(id, 0);
-}
-\end{verbatim}
-\end{center}
-
-Since we want to store positions of atoms from previous timestep, we
-need to add double** x to the header file. Than add allocation code to
-constructor:
-
-\verb|memory->create(this->x, atom->nmax, 3, "FixSavePos:x");|. Free memory
-at destructor: \verb|memory->destroy(x);|
-
-Finally, implement mentioned methods:
-
-\begin{center}
-\begin{verbatim}
-double FixSavePos::memory_usage()
-{
-  int nmax = atom->nmax;
-  double bytes = 0.0;
-  bytes += nmax * 3 * sizeof(double);
-  return bytes;
-}
-
-void FixSavePos::grow_arrays(int nmax)
-{
-    memory->grow(this->x, nmax, 3, "FixSavePos:x");
-}
-
-void FixSavePos::copy_arrays(int i, int j, int delflag)
-{
-    memcpy(this->x[j], this->x[i], sizeof(double) * 3);
-}
-
-void FixSavePos::set_arrays(int i)
-{
-    memset(this->x[i], 0, sizeof(double) * 3);
-}
-
-int FixSavePos::pack_exchange(int i, double *buf)
-{
-  int m = 0;
-  buf[m++] = x[i][0];
-  buf[m++] = x[i][1];
-  buf[m++] = x[i][2];
-
-  return m;
-}
-
-int FixSavePos::unpack_exchange(int nlocal, double *buf)
-{
-  int m = 0;
-  x[nlocal][0] = buf[m++];
-  x[nlocal][1] = buf[m++];
-  x[nlocal][2] = buf[m++];
-
-  return m;
-}
-\end{verbatim}
-\end{center}
-
-Now, a little bit about memory allocation. We used Memory class which
-is just a bunch of template functions for allocating 1D and 2D
-arrays. So you need to add include ``memory.h'' to have access to them.
-
-Finally, if you need to write/read some global information used in
-your fix to the restart file, you might do it by setting flag
-restart\_global = 1 in the constructor and implementing methods void
-write\_restart(FILE *fp) and void restart(char *buf).
-
-\end{document}
--- a/doc/src/Errors_messages.rst
+++ b/doc/src/Errors_messages.rst
@ -502,7 +502,7 @@ Doc page with :doc:`WARNING messages <Errors_warnings>`
 *Bond/react: Unknown section in map file*
   Please ensure reaction map files are properly formatted.

-*Bond/react: Atom affected by reaction too close to template edge*
+*Bond/react: Atom/Bond type affected by reaction too close to template edge*
   This means an atom which changes type or connectivity during the
   reaction is too close to an 'edge' atom defined in the map
   file.  This could cause incorrect assignment of bonds, angle, etc.
--- a/doc/src/Howto_cmake.rst
+++ b/doc/src/Howto_cmake.rst
@ -191,19 +191,19 @@ You start the command ``ccmake ../cmake`` in the ``build`` folder.
 .. list-table::

   * - .. figure:: JPG/ccmake-initial.png
-          :target: JPG/ccmake-initial.png
+          :scale: 33%
          :align: center

          Initial ``ccmake`` screen

     - .. figure:: JPG/ccmake-config.png
-          :target: JPG/ccmake-config.png
+          :scale: 33%
          :align: center

          Configure output of ``ccmake``

     - .. figure:: JPG/ccmake-options.png
-          :target: JPG/ccmake-options.png
+          :scale: 33%
          :align: center

          Options screen of ``ccmake``
@ -236,19 +236,19 @@ not required, it can also be entered from the GUI.
 .. list-table::

   * - .. figure:: JPG/cmake-gui-initial.png
-          :target: JPG/cmake-gui-initial.png
+          :scale: 40%
          :align: center

          Initial ``cmake-gui`` screen

     - .. figure:: JPG/cmake-gui-popup.png
-          :target: JPG/cmake-gui-popup.png
+          :scale: 60%
          :align: center

          Generator selection in ``cmake-gui``

     - .. figure:: JPG/cmake-gui-options.png
-          :target: JPG/cmake-gui-options.png
+          :scale: 40%
          :align: center

          Options screen of ``cmake-gui``
--- a/doc/src/Howto_couple.rst
+++ b/doc/src/Howto_couple.rst
@ -12,96 +12,52 @@ LAMMPS can be coupled to other codes in at least 4 ways.  Each has
 advantages and disadvantages, which you will have to think about in the
 context of your application.

----------
+1. Define a new :doc:`fix <fix>` command that calls the other code.  In
+   this scenario, LAMMPS is the driver code.  During timestepping,
+   the fix is invoked, and can make library calls to the other code,
+   which has been linked to LAMMPS as a library.  This is the way how the
+   :ref:`LATTE <PKG-LATTE>` package, which performs density-functional
+   tight-binding calculations using the `LATTE software <https://github.com/lanl/LATTE>`_
+   to compute forces, is hooked to LAMMPS.
+   See the :doc:`fix latte <fix_latte>` command for more details.
+   Also see the :doc:`Modify <Modify>` doc pages for info on how to
+   add a new fix to LAMMPS.

-(1) Define a new :doc:`fix <fix>` command that calls the other code.  In
-this scenario, LAMMPS is the driver code.  During its timestepping,
-the fix is invoked, and can make library calls to the other code,
-which has been linked to LAMMPS as a library.  This is the way the
-`POEMS <poems_>`_ package that performs constrained rigid-body motion on
-groups of atoms is hooked to LAMMPS.  See the :doc:`fix poems <fix_poems>` command for more details.  See the
-:doc:`Modify <Modify>` doc pages for info on how to add a new fix to
-LAMMPS.
+.. spacer

-.. _poems: http://www.rpi.edu/~anderk5/lab
+2. Define a new LAMMPS command that calls the other code.  This is
+   conceptually similar to method (1), but in this case LAMMPS and the
+   other code are on a more equal footing.  Note that now the other code
+   is not called during the timestepping of a LAMMPS run, but between
+   runs.  The LAMMPS input script can be used to alternate LAMMPS runs
+   with calls to the other code, invoked via the new command.  The
+   :doc:`run <run>` command facilitates this with its *every* option,
+   which makes it easy to run a few steps, invoke the command, run a few
+   steps, invoke the command, etc.

----------
+   In this scenario, the other code can be called as a library, as in
+   1., or it could be a stand-alone code, invoked by a system() call
+   made by the command (assuming your parallel machine allows one or
+   more processors to start up another program).  In the latter case the
+   stand-alone code could communicate with LAMMPS through files that the
+   command writes and reads.

-(2) Define a new LAMMPS command that calls the other code.  This is
-conceptually similar to method (1), but in this case LAMMPS and the
-other code are on a more equal footing.  Note that now the other code
-is not called during the timestepping of a LAMMPS run, but between
-runs.  The LAMMPS input script can be used to alternate LAMMPS runs
-with calls to the other code, invoked via the new command.  The
-:doc:`run <run>` command facilitates this with its *every* option, which
-makes it easy to run a few steps, invoke the command, run a few steps,
-invoke the command, etc.
+   See the :doc:`Modify command <Modify_command>` doc page for info on how
+   to add a new command to LAMMPS.

-In this scenario, the other code can be called as a library, as in
-(1), or it could be a stand-alone code, invoked by a system() call
-made by the command (assuming your parallel machine allows one or more
-processors to start up another program).  In the latter case the
-stand-alone code could communicate with LAMMPS through files that the
-command writes and reads.
+.. spacer

-See the :doc:`Modify command <Modify_command>` doc page for info on how
-to add a new command to LAMMPS.
+3. Use LAMMPS as a library called by another code.  In this case the
+   other code is the driver and calls LAMMPS as needed.  Or a wrapper
+   code could link and call both LAMMPS and another code as libraries.
+   Again, the :doc:`run <run>` command has options that allow it to be
+   invoked with minimal overhead (no setup or clean-up) if you wish to
+   do multiple short runs, driven by another program.  Details about
+   using the library interface are given in the :doc:`library API
+   <pg_library>` documentation.

----------
+.. spacer

-(3) Use LAMMPS as a library called by another code.  In this case the
-other code is the driver and calls LAMMPS as needed.  Or a wrapper
-code could link and call both LAMMPS and another code as libraries.
-Again, the :doc:`run <run>` command has options that allow it to be
-invoked with minimal overhead (no setup or clean-up) if you wish to do
-multiple short runs, driven by another program.
-
-Examples of driver codes that call LAMMPS as a library are included in
-the examples/COUPLE directory of the LAMMPS distribution; see
-examples/COUPLE/README for more details:
-
-* simple: simple driver programs in C++ and C which invoke LAMMPS as a
-  library
-* plugin: simple driver program in C which invokes LAMMPS as a plugin
-  from a shared library.
-* lammps_quest: coupling of LAMMPS and `Quest <quest_>`_, to run classical
-  MD with quantum forces calculated by a density functional code
-* lammps_spparks: coupling of LAMMPS and `SPPARKS <spparks_>`_, to couple
-  a kinetic Monte Carlo model for grain growth using MD to calculate
-  strain induced across grain boundaries
-
-.. _quest: http://dft.sandia.gov/Quest
-
-.. _spparks: http://www.sandia.gov/~sjplimp/spparks.html
-
-The :doc:`Build basics <Build_basics>` doc page describes how to build
-LAMMPS as a library.  Once this is done, you can interface with LAMMPS
-either via C++, C, Fortran, or Python (or any other language that
-supports a vanilla C-like interface).  For example, from C++ you could
-create one (or more) "instances" of LAMMPS, pass it an input script to
-process, or execute individual commands, all by invoking the correct
-class methods in LAMMPS.  From C or Fortran you can make function
-calls to do the same things.  See the :doc:`Python <Python_head>` doc
-pages for a description of the Python wrapper provided with LAMMPS
-that operates through the LAMMPS library interface.
-
-The files src/library.cpp and library.h contain the C-style interface
-to LAMMPS.  See the :doc:`Howto library <Howto_library>` doc page for a
-description of the interface and how to extend it for your needs.
-
-Note that the lammps_open() function that creates an instance of
-LAMMPS takes an MPI communicator as an argument.  This means that
-instance of LAMMPS will run on the set of processors in the
-communicator.  Thus the calling code can run LAMMPS on all or a subset
-of processors.  For example, a wrapper script might decide to
-alternate between LAMMPS and another code, allowing them both to run
-on all the processors.  Or it might allocate half the processors to
-LAMMPS and half to the other code and run both codes simultaneously
-before syncing them up periodically.  Or it might instantiate multiple
-instances of LAMMPS to perform different calculations.
-
----------
-
-(4) Couple LAMMPS with another code in a client/server mode.  This is
-described on the :doc:`Howto client/server <Howto_client_server>` doc
-page.
+4. Couple LAMMPS with another code in a client/server mode.  This is
+   described on the :doc:`Howto client/server <Howto_client_server>` doc
+   page.
--- a/doc/src/Howto_library.rst
+++ b/doc/src/Howto_library.rst
@ -2,241 +2,36 @@ Library interface to LAMMPS
 ===========================

 As described on the :doc:`Build basics <Build_basics>` doc page, LAMMPS
-can be built as a library, so that it can be called by another code,
-used in a :doc:`coupled manner <Howto_couple>` with other codes, or
-driven through a :doc:`Python interface <Python_head>`.
+can be built as a static or shared library, so that it can be called by
+another code, used in a :doc:`coupled manner <Howto_couple>` with other
+codes, or driven through a :doc:`Python interface <Python_head>`.

-All of these methodologies use a C-style interface to LAMMPS that is
-provided in the files src/library.cpp and src/library.h.  The
-functions therein have a C-style argument list, but contain C++ code
-you could write yourself in a C++ application that was invoking LAMMPS
-directly.  The C++ code in the functions illustrates how to invoke
-internal LAMMPS operations.  Note that LAMMPS classes are defined
-within a LAMMPS namespace (LAMMPS_NS) if you use them from another C++
-application.
+At the core of LAMMPS is the ``LAMMPS`` class which encapsulates the
+state of the simulation program through the state of the various class
+instances that it is composed of.  So a calculation using LAMMPS
+requires to create an instance of the ``LAMMPS`` class and then send it
+(text) commands, either individually or from a file, or perform other
+operations that modify the state stored inside that instance or drive
+simulations.  This is essentially what the ``src/main.cpp`` file does
+as well for the standalone LAMMPS executable with reading commands
+either from an input file or stdin.

-The examples/COUPLE and python/examples directories have example C++
-and C and Python codes which show how a driver code can link to LAMMPS
-as a library, run LAMMPS on a subset of processors, grab data from
-LAMMPS, change it, and put it back into LAMMPS.
+Creating a LAMMPS instance can be done by using C++ code directly or
+through a C-style interface library to LAMMPS that is provided in the
+files ``src/library.cpp`` and ``library.h``.  This
+:ref:`C language API <lammps_c_api>`, can be used from C and C++,
+and is also the basis for the :doc:`Python <pg_python>` and
+:doc:`Fortran <pg_fortran>` interfaces or wrappers included in the
+LAMMPS source code.

-Thread-safety
-------------
+The ``examples/COUPLE`` and ``python/examples`` directories contain some
+example programs written in C++, C, Fortran, and Python, which show how
+a driver code can link to LAMMPS as a library, run LAMMPS on a subset of
+processors (so the others are available to run some other code
+concurrently), grab data from LAMMPS, change it, and send it back into
+LAMMPS.

-LAMMPS has not initially been conceived as a thread-safe program, but
-over the years changes have been applied to replace operations that
-collide with creating multiple LAMMPS instances from multiple-threads
-of the same process with thread-safe alternatives.  This primarily
-applies to the core LAMMPS code and less so on add-on packages, especially
-when those packages require additional code in the *lib* folder,
-interface LAMMPS to Fortran libraries, or the code uses static variables
-(like the USER-COLVARS package.
+A detailed documentation of the available APIs and examples of how to
+use them can be found in the :doc:`Programmer Documentation
+<pg_library>` section of this manual.

-Another major issue to deal with is to correctly handle MPI.  Creating
-a LAMMPS instance requires passing an MPI communicator, or it assumes
-the MPI_COMM_WORLD communicator, which spans all MPI processor ranks.
-When creating multiple LAMMPS object instances from different threads,
-this communicator has to be different for each thread or else collisions
-can happen, or it has to be guaranteed, that only one thread at a time
-is active.  MPI communicators, however, are not a problem, if LAMMPS is
-compiled with the MPI STUBS library, which implies that there is no MPI
-communication and only 1 MPI rank.
-
-Provided APIs
-------------
-
-The file src/library.cpp contains the following functions for creating
-and destroying an instance of LAMMPS and sending it commands to
-execute.  See the documentation in the src/library.cpp file for
-details.
-
-.. note::
-
-   You can write code for additional functions as needed to define
-   how your code talks to LAMMPS and add them to src/library.cpp and
-   src/library.h, as well as to the :doc:`Python interface <Python_head>`.
-   The added functions can access or change any internal LAMMPS data you
-   wish.
-
-.. code-block:: c
-
-   void lammps_open(int, char **, MPI_Comm, void **)
-   void lammps_open_no_mpi(int, char **, void **)
-   void lammps_close(void *)
-   int lammps_version(void *)
-   void lammps_file(void *, char *)
-   char *lammps_command(void *, char *)
-   void lammps_commands_list(void *, int, char **)
-   void lammps_commands_string(void *, char *)
-   void lammps_free(void *)
-
-The lammps_open() function is used to initialize LAMMPS, passing in a
-list of strings as if they were :doc:`command-line arguments <Run_options>` when LAMMPS is run in stand-alone mode
-from the command line, and a MPI communicator for LAMMPS to run under.
-It returns a ptr to the LAMMPS object that is created, and which is
-used in subsequent library calls.  The lammps_open() function can be
-called multiple times, to create multiple instances of LAMMPS.
-
-LAMMPS will run on the set of processors in the communicator.  This
-means the calling code can run LAMMPS on all or a subset of
-processors.  For example, a wrapper script might decide to alternate
-between LAMMPS and another code, allowing them both to run on all the
-processors.  Or it might allocate half the processors to LAMMPS and
-half to the other code and run both codes simultaneously before
-syncing them up periodically.  Or it might instantiate multiple
-instances of LAMMPS to perform different calculations.
-
-The lammps_open_no_mpi() function is similar except that no MPI
-communicator is passed from the caller.  Instead, MPI_COMM_WORLD is
-used to instantiate LAMMPS, and MPI is initialized if necessary.
-
-The lammps_close() function is used to shut down an instance of LAMMPS
-and free all its memory.
-
-The lammps_version() function can be used to determined the specific
-version of the underlying LAMMPS code. This is particularly useful
-when loading LAMMPS as a shared library via dlopen(). The code using
-the library interface can than use this information to adapt to
-changes to the LAMMPS command syntax between versions. The returned
-LAMMPS version code is an integer (e.g. 2 Sep 2015 results in
-20150902) that grows with every new LAMMPS version.
-
-The lammps_file(), lammps_command(), lammps_commands_list(), and
-lammps_commands_string() functions are used to pass one or more
-commands to LAMMPS to execute, the same as if they were coming from an
-input script.
-
-Via these functions, the calling code can read or generate a series of
-LAMMPS commands one or multiple at a time and pass it through the library
-interface to setup a problem and then run it in stages.  The caller
-can interleave the command function calls with operations it performs,
-calls to extract information from or set information within LAMMPS, or
-calls to another code's library.
-
-The lammps_file() function passes the filename of an input script.
-The lammps_command() function passes a single command as a string.
-The lammps_commands_list() function passes multiple commands in a
-char\*\* list.  In both lammps_command() and lammps_commands_list(),
-individual commands may or may not have a trailing newline.  The
-lammps_commands_string() function passes multiple commands
-concatenated into one long string, separated by newline characters.
-In both lammps_commands_list() and lammps_commands_string(), a single
-command can be spread across multiple lines, if the last printable
-character of all but the last line is "&", the same as if the lines
-appeared in an input script.
-
-The lammps_free() function is a clean-up function to free memory that
-the library allocated previously via other function calls.  See
-comments in src/library.cpp file for which other functions need this
-clean-up.
-
-The file src/library.cpp also contains these functions for extracting
-information from LAMMPS and setting value within LAMMPS.  Again, see
-the documentation in the src/library.cpp file for details, including
-which quantities can be queried by name:
-
-.. code-block:: c
-
-   int lammps_extract_setting(void *, char *)
-   void *lammps_extract_global(void *, char *)
-   void lammps_extract_box(void *, double *, double *,
-                           double *, double *, double *, int *, int *)
-   void *lammps_extract_atom(void *, char *)
-   void *lammps_extract_compute(void *, char *, int, int)
-   void *lammps_extract_fix(void *, char *, int, int, int, int)
-   void *lammps_extract_variable(void *, char *, char *)
-
-The extract_setting() function returns info on the size
-of data types (e.g. 32-bit or 64-bit atom IDs) used
-by the LAMMPS executable (a compile-time choice).
-
-The other extract functions return a pointer to various global or
-per-atom quantities stored in LAMMPS or to values calculated by a
-compute, fix, or variable.  The pointer returned by the
-extract_global() function can be used as a permanent reference to a
-value which may change.  For the extract_atom() method, see the
-extract() method in the src/atom.cpp file for a list of valid per-atom
-properties.  New names could easily be added if the property you want
-is not listed.  For the other extract functions, the underlying
-storage may be reallocated as LAMMPS runs, so you need to re-call the
-function to assure a current pointer or returned value(s).
-
-.. code-block:: c
-
-   double lammps_get_thermo(void *, char *)
-   int lammps_get_natoms(void *)
-
-   int lammps_set_variable(void *, char *, char *)
-   void lammps_reset_box(void *, double *, double *, double, double, double)
-
-The lammps_get_thermo() function returns the current value of a thermo
-keyword as a double precision value.
-
-The lammps_get_natoms() function returns the total number of atoms in
-the system and can be used by the caller to allocate memory for the
-lammps_gather_atoms() and lammps_scatter_atoms() functions.
-
-The lammps_set_variable() function can set an existing string-style
-variable to a new string value, so that subsequent LAMMPS commands can
-access the variable.
-
-The lammps_reset_box() function resets the size and shape of the
-simulation box, e.g. as part of restoring a previously extracted and
-saved state of a simulation.
-
-.. code-block:: c
-
-   void lammps_gather_atoms(void *, char *, int, int, void *)
-   void lammps_gather_atoms_concat(void *, char *, int, int, void *)
-   void lammps_gather_atoms_subset(void *, char *, int, int, int, int *, void *)
-   void lammps_scatter_atoms(void *, char *, int, int, void *)
-   void lammps_scatter_atoms_subset(void *, char *, int, int, int, int *, void *)
-
-The gather functions collect peratom info of the requested type (atom
-coords, atom types, forces, etc) from all processors, and returns the
-same vector of values to each calling processor.  The scatter
-functions do the inverse.  They distribute a vector of peratom values,
-passed by all calling processors, to individual atoms, which may be
-owned by different processors.
-
-.. warning::
-
-   These functions are not compatible with the
-   -DLAMMPS_BIGBIG setting when compiling LAMMPS.  Dummy functions
-   that result in an error message and abort will be substituted
-   instead of resulting in random crashes and memory corruption.
-
-The lammps_gather_atoms() function does this for all N atoms in the
-system, ordered by atom ID, from 1 to N.  The
-lammps_gather_atoms_concat() function does it for all N atoms, but
-simply concatenates the subset of atoms owned by each processor.  The
-resulting vector is not ordered by atom ID.  Atom IDs can be requested
-by the same function if the caller needs to know the ordering.  The
-lammps_gather_subset() function allows the caller to request values
-for only a subset of atoms (identified by ID).
-For all 3 gather function, per-atom image flags can be retrieved in 2 ways.
-If the count is specified as 1, they are returned
-in a packed format with all three image flags stored in a single integer.
-If the count is specified as 3, the values are unpacked into xyz flags
-by the library before returning them.
-
-The lammps_scatter_atoms() function takes a list of values for all N
-atoms in the system, ordered by atom ID, from 1 to N, and assigns
-those values to each atom in the system.  The
-lammps_scatter_atoms_subset() function takes a subset of IDs as an
-argument and only scatters those values to the owning atoms.
-
-.. code-block:: c
-
-   void lammps_create_atoms(void *, int, tagint *, int *, double *, double *,
-                            imageint *, int)
-
-The lammps_create_atoms() function takes a list of N atoms as input
-with atom types and coords (required), an optionally atom IDs and
-velocities and image flags.  It uses the coords of each atom to assign
-it as a new atom to the processor that owns it.  This function is
-useful to add atoms to a simulation or (in tandem with
-lammps_reset_box()) to restore a previously extracted and saved state
-of a simulation.  Additional properties for the new atoms can then be
-assigned via the lammps_scatter_atoms() or lammps_extract_atom()
-functions.
--- a/doc/src/Install_linux.rst
+++ b/doc/src/Install_linux.rst
@ -79,13 +79,13 @@ To get a copy of the current potentials files:
 which will download the potentials files to
 ``/usr/share/lammps-stable/potentials``.  The ``lmp_stable`` binary is
 hard-coded to look for potential files in this directory (it does not
-use the `LAMMPS_POTENTIALS` environment variable, as described
+use the ``LAMMPS_POTENTIALS`` environment variable, as described
 in :doc:`pair_coeff <pair_coeff>` command).

 The ``lmp_stable`` binary is built with the :ref:`KIM package <kim>` which
-results in the above command also installing the `kim-api` binaries when LAMMPS
+results in the above command also installing the ``kim-api`` binaries when LAMMPS
 is installed.  In order to use potentials from `openkim.org <openkim_>`_, you
-can install the `openkim-models` package
+can install the ``openkim-models`` package

 .. code-block:: bash

--- a/doc/src/Intro_website.rst
+++ b/doc/src/Intro_website.rst
@ -23,7 +23,6 @@ this Intr are included in this list.
 * `Mail list <https://lammps.sandia.gov/mail.html>`_
 * `Workshops <https://lammps.sandia.gov/workshops.html>`_
 * `Tutorials <https://lammps.sandia.gov/tutorials.html>`_
-* `Developer guide <https://lammps.sandia.gov/Developer.pdf>`_

 * `Pre- and post-processing tools for LAMMPS <https://lammps.sandia.gov/prepost.html>`_
 * `Other software usable with LAMMPS <https://lammps.sandia.gov/offsite.html>`_
--- a/doc/src/JPG/lammps-classes.png
+++ b/doc/src/JPG/lammps-classes.png
--- a/doc/src/Manual.rst
+++ b/doc/src/Manual.rst
@ -27,8 +27,7 @@ all LAMMPS development is coordinated.
 The content for this manual is part of the LAMMPS distribution.  You
 can build a local copy of the Manual as HTML pages or a PDF file, by
 following the steps on the :doc:`Manual build <Manual_build>` doc page.
-There is also a `Developer.pdf <Developer.pdf>`_ document which gives
-a brief description of the basic code structure of LAMMPS.
+The manual is split into two parts: 1) User documentation and 2) Programmer documentation.

 ----------

@ -55,11 +54,24 @@ every LAMMPS command.
   Howto
   Examples
   Tools
-   Modify
   Python_head
   Errors
   Manual_build

+.. _programmer_documentation:
+.. toctree::
+   :maxdepth: 2
+   :numbered: 3
+   :caption: Programmer Documentation
+   :name: progdoc
+   :includehidden:
+
+   pg_library
+   Modify
+   pg_developer
+..   pg_modify
+..   pg_base
+
 .. toctree::
   :caption: Index
   :name: index
--- a/doc/src/Manual_build.rst
+++ b/doc/src/Manual_build.rst
@ -14,7 +14,6 @@ files. Here is a list with descriptions:
   lammps.1         # man page for the lammps command
   msi2lmp.1        # man page for the msi2lmp command
   Manual.pdf       # large PDF version of entire manual
-   Developer.pdf    # small PDF with info about how LAMMPS is structured
   LAMMPS.epub      # Manual in ePUB e-book format
   LAMMPS.mobi      # Manual in MOBI e-book format
   docenv           # virtualenv folder for processing the manual sources
@ -35,7 +34,7 @@ of two ways:

 a. You can "fetch" the current HTML and PDF files from the LAMMPS web
   site.  Just type ``make fetch``.  This should download a html_www
-   directory and Manual_www.pdf/Developer_www.pdf files.  Note that if
+   directory and a Manual_www.pdf file.  Note that if
   new LAMMPS features have been added more recently than the date of
   your LAMMPS version, the fetched documentation will include those
   changes (but your source code will not, unless you update your local
@ -49,6 +48,11 @@ b. You can build the HTML or PDF files yourself, by typing ``make html``
   only once, unless you type ``make clean-all``.  After that, viewing and
   processing of the documentation can be done without internet access.

+A current version of the manual (latest patch release, aka unstable branch)
+is is available online at: `https://lammps.sandia.gov/doc/Manual.html <https://lammps.sandia.gov/doc/Manual.html>`_
+A version of the manual corresponding to the ongoing development
+(aka master branch) is available online at: `https://docs.lammps.org/ <https://docs.lammps.org/>`_
+
 ----------

 The generation of all documentation is managed by the Makefile in the
@ -58,10 +62,9 @@ available:
 .. code-block:: bash

   make html          # generate HTML in html dir using Sphinx
-   make pdf           # generate 2 PDF files (Manual.pdf,Developer.pdf)
-                      #   in doc dir via htmldoc and pdflatex
-   make fetch         # fetch HTML doc pages and 2 PDF files from web site
-                      #   as a tarball and unpack into html dir and 2 PDFs
+   make pdf           # generate PDF  as Manual.pdf using Sphinx and pdflatex
+   make fetch         # fetch HTML doc pages and PDF file from web site
+                      #   as a tarball and unpack into html dir and PDF
   make epub          # generate LAMMPS.epub in ePUB format using Sphinx
   make mobi          # generate LAMMPS.mobi in MOBI format using ebook-convert

--- a/doc/src/Modify.rst
+++ b/doc/src/Modify.rst
@ -1,5 +1,5 @@
-Modify & extend LAMMPS
-**********************
+Modifying & extending LAMMPS
+****************************

 LAMMPS is designed in a modular fashion so as to be easy to modify and
 extend with new functionality.  In fact, about 95% of its source code
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@ -1692,7 +1692,7 @@ USER-MEAMC package
 **Contents:**

 A pair style for the modified embedded atom (MEAM) potential
-translated from the Fortran version in the (obsolete) "MEAM" package
+translated from the Fortran version in the (obsolete) MEAM package
 to plain C++. The USER-MEAMC fully replaces the MEAM package, which
 has been removed from LAMMPS after the 12 December 2018 version.

--- a/doc/src/Packages_user.rst
+++ b/doc/src/Packages_user.rst
@ -6,7 +6,7 @@ name gives more details.

 User packages have been contributed by users, and begin with the
 "user" prefix.  If a contribution is a single command (single file),
-it is typically in the user-misc package.  User packages don't
+it is typically in the USER-MISC package.  User packages don't
 necessarily meet the requirements of the :doc:`standard packages <Packages_standard>`. This means the developers will try
 to keep things working and usually can answer technical questions
 about compiling the package. If you have problems using a specific
--- a/doc/src/Tools.rst
+++ b/doc/src/Tools.rst
@ -89,7 +89,6 @@ Miscellaneous tools
   :columns: 6

   * :ref:`CMake <cmake>`
-   * :ref:`doxygen <doxygen>`
   * :ref:`emacs <emacs>`
   * :ref:`i-pi <ipi>`
   * :ref:`kate <kate>`
@ -254,21 +253,6 @@ The tool is authored by Xiaowang Zhou (Sandia), xzhou at sandia.gov.

 ----------

-.. _doxygen:
-
-doxygen tool
--------------------------
-
-The tools/doxygen directory contains a shell script called
-doxygen.sh which can generate a call graph and API lists using
-the `Doxygen software <http://doxygen.org>`_.
-
-See the included README file for details.
-
-The tool is authored by Nandor Tamaskovics, numericalfreedom at googlemail.com.
-
----------
-
 .. _drude:

 drude tool
--- a/doc/src/fix_bond_react.rst
+++ b/doc/src/fix_bond_react.rst
@ -14,19 +14,22 @@ Syntax
     react react-ID react-group-ID Nevery Rmin Rmax template-ID(pre-reacted) template-ID(post-reacted) map_file individual_keyword values ...
     ...

-* ID, group-ID are documented in :doc:`fix <fix>` command. Group-ID is ignored.
+* ID, group-ID are documented in :doc:`fix <fix>` command.
 * bond/react = style name of this fix command
 * the common keyword/values may be appended directly after 'bond/react'
 * this applies to all reaction specifications (below)
-* common_keyword = *stabilization*
+* common_keyword = *stabilization* or *reset_mol_ids*

  .. parsed-literal::

       *stabilization* values = *no* or *yes* *group-ID* *xmax*
-         *no* = no reaction site stabilization
+         *no* = no reaction site stabilization (default)
         *yes* = perform reaction site stabilization
           *group-ID* = user-assigned prefix for the dynamic group of atoms not currently involved in a reaction
           *xmax* = xmax value that is used by an internally-created :doc:`nve/limit <fix_nve_limit>` integrator
+       *reset_mol_ids* values = *yes* or *no*
+         *yes* = update molecule IDs based on new global topology (default)
+         *no* = do not update molecule IDs

 * react = mandatory argument indicating new reaction specification
 * react-ID = user-assigned name for the reaction
@ -50,9 +53,9 @@ Syntax
         *stabilize_steps* value = timesteps
           timesteps = number of timesteps to apply the internally-created :doc:`nve/limit <fix_nve_limit>` fix to reacting atoms
         *update_edges* value = *none* or *charges* or *custom*
-           none = do not update topology near the edges of reaction templates
-           charges = update atomic charges of all atoms in reaction templates
-           custom = force the update of user-specified atomic charges
+           *none* = do not update topology near the edges of reaction templates
+           *charges* = update atomic charges of all atoms in reaction templates
+           *custom* = force the update of user-specified atomic charges

 Examples
 """"""""
@ -154,6 +157,13 @@ due to the internal dynamic grouping performed by fix bond/react.
   If the group-ID is an existing static group, react-group-IDs
   should also be specified as this static group, or a subset.

+The *reset_mol_ids* keyword invokes the :doc:`reset_mol_ids <reset_mol_ids>`
+command after a reaction occurs, to ensure that molecule IDs are
+consistent with the new bond topology. The group-ID used for
+:doc:`reset_mol_ids <reset_mol_ids>` is the group-ID for this fix.
+Resetting molecule IDs is necessarily a global operation, and so can
+be slow for very large systems.
+
 The following comments pertain to each *react* argument (in other
 words, can be customized for each reaction, or reaction step):

@ -203,9 +213,10 @@ surrounding topology. As described below, the bonding atom pairs of
 the pre-reacted template are specified by atom ID in the map file. The
 pre-reacted molecule template should contain as few atoms as possible
 while still completely describing the topology of all atoms affected
-by the reaction. For example, if the force field contains dihedrals,
-the pre-reacted template should contain any atom within three bonds of
-reacting atoms.
+by the reaction (which includes all atoms that change atom type or
+connectivity, and all bonds that change bond type). For example, if
+the force field contains dihedrals, the pre-reacted template should
+contain any atom within three bonds of reacting atoms.

 Some atoms in the pre-reacted template that are not reacting may have
 missing topology with respect to the simulation. For example, the
@ -300,8 +311,8 @@ either 'none' or 'charges.' Further details are provided in the
 discussion of the 'update_edges' keyword. The fifth optional section
 begins with the keyword 'Constraints' and lists additional criteria
 that must be satisfied in order for the reaction to occur. Currently,
-there are four types of constraints available, as discussed below:
-'distance', 'angle', 'dihedral', and 'arrhenius'.
+there are five types of constraints available, as discussed below:
+'distance', 'angle', 'dihedral', 'arrhenius', and 'rmsd'.

 A sample map file is given below:

@ -421,6 +432,25 @@ temperature calculations. A uniform random number between 0 and 1 is
 generated using *seed*\ ; if this number is less than the result of the
 Arrhenius equation above, the reaction is permitted to occur.

+The constraint of type 'rmsd' has the following syntax:
+
+.. parsed-literal::
+
+   rmsd *RMSDmax* *molfragment*
+
+where 'rmsd' is the required keyword, and *RMSDmax* is the maximum
+root-mean-square deviation between atom positions of the pre-reaction
+template and the local reaction site (distance units), after optimal
+translation and rotation of the pre-reaction template. Optionally, the
+name of a molecule fragment (of the pre-reaction template) can be
+specified by *molfragment*\ . If a molecule fragment is specified,
+only atoms that are part of this molecule fragment are used to
+determine the RMSD. A molecule fragment must have been defined in the
+:doc:`molecule <molecule>` command for the pre-reaction template. For
+example, the molecule fragment could consist of only the backbone
+atoms of a polymer chain. This constraint can be used to enforce a
+specific relative position and orientation between reacting molecules.
+
 Once a reaction site has been successfully identified, data structures
 within LAMMPS that store bond topology are updated to reflect the
 post-reacted molecule template. All force fields with fixed bonds,
@ -554,7 +584,7 @@ Default
 """""""

 The option defaults are stabilization = no, prob = 1.0, stabilize_steps = 60,
-update_edges = none
+reset_mol_ids = yes, update_edges = none

 ----------

--- a/doc/src/fix_restrain.rst
+++ b/doc/src/fix_restrain.rst
@ -13,7 +13,7 @@ Syntax
 * ID, group-ID are documented in :doc:`fix <fix>` command
 * restrain = style name of this fix command
 * one or more keyword/arg pairs may be appended
-* keyword = *bond* or *angle* or *dihedral*
+* keyword = *bond* or *lbound* or *angle* or *dihedral*

  .. parsed-literal::

@ -23,7 +23,7 @@ Syntax
         r0start = equilibrium bond distance at start of run (distance units)
         r0stop = equilibrium bond distance at end of run (optional) (distance units). If not
           specified it is assumed to be equal to r0start
-       *lbond* args = atom1 atom2 Kstart Kstop r0start (r0stop)
+       *lbound* args = atom1 atom2 Kstart Kstop r0start (r0stop)
         atom1,atom2 = IDs of 2 atoms in bond
         Kstart,Kstop = restraint coefficients at start/end of run (energy units)
         r0start = equilibrium bond distance at start of run (distance units)
@ -46,7 +46,7 @@ Examples
 .. code-block:: LAMMPS

   fix holdem all restrain bond 45 48 2000.0 2000.0 2.75
-   fix holdem all restrain lbond 45 48 2000.0 2000.0 2.75
+   fix holdem all restrain lbound 45 48 2000.0 2000.0 2.75
   fix holdem all restrain dihedral 1 2 3 4 2000.0 2000.0 120.0
   fix holdem all restrain bond 45 48 2000.0 2000.0 2.75 dihedral 1 2 3 4 2000.0 2000.0 120.0
   fix texas_holdem all restrain dihedral 1 2 3 4 0.0 2000.0 120.0 dihedral 1 2 3 5 0.0 2000.0 -120.0 dihedral 1 2 3 6 0.0 2000.0 0.0
@ -150,7 +150,7 @@ is included in :math:`K`.

 ----------

-The *lbond* keyword applies a lower bound bond restraint to the specified atoms
+The *lbound* keyword applies a lower bound bond restraint to the specified atoms
 using the same functional form used by the :doc:`bond_style harmonic <bond_harmonic>` command if the distance between
 the atoms is smaller than the equilibrium bond distance and 0 otherwise. The potential associated with
 the restraint is
--- a/doc/src/pair_coeff.rst
+++ b/doc/src/pair_coeff.rst
@ -110,8 +110,8 @@ location specified.  E.g. if the file is specified as "niu3.eam", it
 is looked for in the current working directory.  If it is specified as
 "../potentials/niu3.eam", then it is looked for in the potentials
 directory, assuming it is a sister directory of the current working
-directory.  If the file is not found, it is then looked for in the
-directory specified by the LAMMPS_POTENTIALS environment variable.
+directory.  If the file is not found, it is then looked for in one of
+the directories specified by the ``LAMMPS_POTENTIALS`` environment variable.
 Thus if this is set to the potentials directory in the LAMMPS distribution,
 then you can use those files from anywhere on your system, without
 copying them into your working directory.  Environment variables are
@ -136,6 +136,11 @@ Windows:

   % set LAMMPS_POTENTIALS="C:\\Path to LAMMPS\\Potentials"

+The ``LAMMPS_POTENTIALS`` environment variable may contain paths
+to multiple folders, if they are separated by ";" on Windows and
+":" on all other operating systems, just like the ``PATH`` and
+similar environment variables.
+
 ----------

 The alphabetic list of pair styles defined in LAMMPS is given on the
--- a/doc/src/pair_comb.rst
+++ b/doc/src/pair_comb.rst
@ -129,10 +129,10 @@ For style *comb3*\ , in addition to ffield.comb3, a special parameter
 file, *lib.comb3*\ , that is exclusively used for C/O/H systems, will be
 automatically loaded if carbon atom is detected in LAMMPS input
 structure.  This file must be in your working directory or in the
-directory pointed to by the environment variable LAMMPS_POTENTIALS, as
+directories listed in the environment variable ``LAMMPS_POTENTIALS``, as
 described on the :doc:`pair_coeff <pair_coeff>` command doc page.

-Keyword *polar* indicates whether the force field includes
+The keyword *polar* indicates whether the force field includes
 the atomic polarization.  Since the equilibration of the polarization
 has not yet been implemented, it can only set polar_off at present.

--- a/doc/src/pair_cosine_squared.rst
+++ b/doc/src/pair_cosine_squared.rst
@ -107,7 +107,7 @@ These pair styles can only be used via the *pair* keyword of the
 Restrictions
 """"""""""""

-The *cosine/squared* style is part of the "USER-MISC" package. It is only
+The *cosine/squared* style is part of the USER-MISC package. It is only
 enabled if LAMMPS is build with that package.  See the :doc:`Build package <Build_package>` doc page for more info.

 Related commands
--- a/doc/src/pair_coul_diel.rst
+++ b/doc/src/pair_coul_diel.rst
@ -95,7 +95,7 @@ This pair style can only be used via the *pair* keyword of the
 Restrictions
 """"""""""""

-This style is part of the "USER-MISC" package.  It is only enabled if
+This style is part of the USER-MISC package.  It is only enabled if
 LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` doc page for more info.

 Related commands
--- a/doc/src/pair_coul_slater.rst
+++ b/doc/src/pair_coul_slater.rst
@ -95,7 +95,7 @@ Restrictions

 The  *coul/slater/long* style requires the long-range solvers included in the KSPACE package.

-These styles are part of the "USER-MISC" package.  They are only enabled if
+These styles are part of the USER-MISC package.  They are only enabled if
 LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` doc page for more info.

 Related commands
--- a/doc/src/pair_gauss.rst
+++ b/doc/src/pair_gauss.rst
@ -164,8 +164,18 @@ heading) the following commands could be included in an input script:
 Restrictions
 """"""""""""

-The *gauss/cut* style is part of the "user-misc" package. It is only
-enabled if LAMMPS is build with that package.  See the :doc:`Build package <Build_package>` doc page for more info.
+The *gauss/cut* style is part of the USER-MISC package. It is only
+enabled if LAMMPS is build with that package.  See the :doc:`Build
+package <Build_package>` doc page for more info.
+
+The *gauss* style does not apply :doc:`special_bonds <special_bonds>`
+factors. When using this pair style on a system that has bonds, the
+special_bonds factors, if using the default setting of 0.0, may need to
+be adjusted to some very small number (e.g. 1.0e-100), so that those
+special pairs are not completely excluded from the neighbor lists, but
+won't contribute forces or energies from styles (e.g. when used in
+combination with a :doc:`hybrid pair style <pair_hybrid>`) that do
+apply those factors.

 Related commands
 """"""""""""""""
--- a/doc/src/pair_granular.rst
+++ b/doc/src/pair_granular.rst
@ -93,7 +93,7 @@ on particle *i* due to contact with particle *j* is given by:

 .. math::

-   \mathbf{F}_{ne, Hooke} = k_N \delta_{ij} \mathbf{n}
+   \mathbf{F}_{ne, Hooke} = k_n \delta_{ij} \mathbf{n}

 Where :math:`\delta_{ij} = R_i + R_j - \|\mathbf{r}_{ij}\|` is the particle
 overlap, :math:`R_i, R_j` are the particle radii, :math:`\mathbf{r}_{ij} = \mathbf{r}_i - \mathbf{r}_j` is the vector separating the two
@ -106,7 +106,7 @@ For the *hertz* model, the normal component of force is given by:

 .. math::

-   \mathbf{F}_{ne, Hertz} = k_N R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n}
+   \mathbf{F}_{ne, Hertz} = k_n R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n}

 Here, :math:`R_{eff} = \frac{R_i R_j}{R_i + R_j}` is the effective
 radius, denoted for simplicity as *R* from here on.  For *hertz*\ , the
@ -123,7 +123,7 @@ Here, :math:`E_{eff} = E = \left(\frac{1-\nu_i^2}{E_i} + \frac{1-\nu_j^2}{E_j}\r
 modulus, with :math:`\nu_i, \nu_j` the Poisson ratios of the particles of
 types *i* and *j*\ . Note that if the elastic modulus and the shear
 modulus of the two particles are the same, the *hertz/material* model
-is equivalent to the *hertz* model with :math:`k_N = 4/3 E_{eff}`
+is equivalent to the *hertz* model with :math:`k_n = 4/3 E_{eff}`

 The *dmt* model corresponds to the
 :ref:`(Derjaguin-Muller-Toporov) <DMT1975>` cohesive model, where the force
@ -140,7 +140,7 @@ where the force is computed as:

   \mathbf{F}_{ne, jkr} = \left(\frac{4Ea^3}{3R} - 2\pi a^2\sqrt{\frac{4\gamma E}{\pi a}}\right)\mathbf{n}

-Here, *a* is the radius of the contact zone, related to the overlap
+Here, :math:`a` is the radius of the contact zone, related to the overlap
 :math:`\delta` according to:

 .. math::
@ -167,7 +167,7 @@ following general form:

   \mathbf{F}_{n,damp} = -\eta_n \mathbf{v}_{n,rel}

-Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot \mathbf{n} \mathbf{n}` is the component of relative velocity along
+Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot \mathbf{n}\ \mathbf{n}` is the component of relative velocity along
 :math:`\mathbf{n}`.

 The optional *damping* keyword to the *pair_coeff* command followed by
@ -259,7 +259,9 @@ tangential model choices and their expected parameters are as follows:
 1. *linear_nohistory* : :math:`x_{\gamma,t}`, :math:`\mu_s`
 2. *linear_history* : :math:`k_t`, :math:`x_{\gamma,t}`, :math:`\mu_s`
 3. *mindlin* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
-4. *mindlin_rescale* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
+4. *mindlin/force* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
+5. *mindlin_rescale* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
+6. *mindlin_rescale/force* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`

 Here, :math:`x_{\gamma,t}` is a dimensionless multiplier for the normal
 damping :math:`\eta_n` that determines the magnitude of the tangential
@ -268,11 +270,11 @@ coefficient, and :math:`k_t` is the tangential stiffness coefficient.

 For *tangential linear_nohistory*, a simple velocity-dependent Coulomb
 friction criterion is used, which mimics the behavior of the *pair
-gran/hooke* style. The tangential force (\mathbf{F}_t\) is given by:
+gran/hooke* style. The tangential force :math:`\mathbf{F}_t` is given by:

 .. math::

-   \mathbf{F}_t =  -min(\mu_t F_{n0}, \|\mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
+   \mathbf{F}_t =  -\min(\mu_t F_{n0}, \|\mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}

 The tangential damping force :math:`\mathbf{F}_\mathrm{t,damp}` is given by:

@ -294,8 +296,8 @@ keyword also affects the tangential damping.  The parameter
 literature use :math:`x_{\gamma,t} = 1` (:ref:`Marshall <Marshall2009>`,
 :ref:`Tsuji et al <Tsuji1992>`, :ref:`Silbert et al <Silbert2001>`).  The relative
 tangential velocity at the point of contact is given by
-:math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\Omega_i + R_j\Omega_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}{n}`,
-:math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i`.
+:math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\mathbf{\Omega}_i + R_j\mathbf{\Omega}_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}\ \mathbf{n}`,
+:math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i` .
 The direction of the applied force is :math:`\mathbf{t} = \mathbf{v_{t,rel}}/\|\mathbf{v_{t,rel}}\|` .

 The normal force value :math:`F_{n0}` used to compute the critical force
@ -314,21 +316,24 @@ form:

 .. math::

-   F_{n0} = \|\mathbf{F}_ne + 2 F_{pulloff}\|
+   F_{n0} = \|\mathbf{F}_{ne} + 2 F_{pulloff}\|

 Where :math:`F_{pulloff} = 3\pi \gamma R` for *jkr*\ , and
 :math:`F_{pulloff} = 4\pi \gamma R` for *dmt*\ .

 The remaining tangential options all use accumulated tangential
-displacement (i.e. contact history). This is discussed below in the
-context of the *linear_history* option, but the same treatment of the
-accumulated displacement applies to the other options as well.
+displacement (i.e. contact history), except for the options
+*mindlin/force* and *mindlin_rescale/force*, that use accumulated
+tangential force instead, and are discussed further below.
+The accumulated tangential displacement is discussed in details below
+in the context of the *linear_history* option. The same treatment of
+the accumulated displacement applies to the other options as well.

 For *tangential linear_history*, the tangential force is given by:

 .. math::

-   \mathbf{F}_t =  -min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
+   \mathbf{F}_t =  -\min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}

 Here, :math:`\mathbf{\xi}` is the tangential displacement accumulated
 during the entire duration of the contact:
@ -356,7 +361,7 @@ work:

 .. math::

-   \mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'}\| - \mathbf{n}\cdot\mathbf{\xi'}}
+   \mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'} - (\mathbf{n}\cdot\mathbf{\xi'})\mathbf{n}\|}

 Here, :math:`\mathbf{\xi'}` is the accumulated displacement prior to the
 current time step and :math:`\mathbf{\xi}` is the corrected
@ -372,7 +377,7 @@ discussion):

 .. math::

-   \mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} + \mathbf{F}_{t,damp}\right)
+   \mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} - \mathbf{F}_{t,damp}\right)

 The tangential force is added to the total normal force (elastic plus
 damping) to produce the total force on the particle. The tangential
@ -387,27 +392,68 @@ overlap region) to induce a torque on each particle according to:

   \mathbf{\tau}_j = -(R_j - 0.5 \delta) \mathbf{n} \times \mathbf{F}_t

-For *tangential mindlin*\ , the :ref:`Mindlin <Mindlin1949>` no-slip solution is used, which differs from the *linear_history*
-option by an additional factor of *a*\ , the radius of the contact region. The tangential force is given by:
+For *tangential mindlin*\ , the :ref:`Mindlin <Mindlin1949>` no-slip solution
+is used which differs from the *linear_history* option by an additional factor
+of :math:`a`, the radius of the contact region. The tangential force is given by:

 .. math::

-   \mathbf{F}_t =  -min(\mu_t F_{n0}, \|-k_t a \mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
+   \mathbf{F}_t =  -\min(\mu_t F_{n0}, \|-k_t a \mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}

-Here, *a* is the radius of the contact region, given by :math:`a =\sqrt{R\delta}`
+
+Here, :math:`a` is the radius of the contact region, given by :math:`a =\sqrt{R\delta}`
 for all normal contact models, except for *jkr*\ , where it is given
 implicitly by :math:`\delta = a^2/R - 2\sqrt{\pi \gamma a/E}`, see
-discussion above. To match the Mindlin solution, one should set :math:`k_t = 4G/(2-\nu)`, where :math:`G` is the shear modulus, related to Young's modulus
-:math:`E` by :math:`G = E/(2(1+\nu))`, where :math:`\nu` is Poisson's ratio. This
-can also be achieved by specifying *NULL* for :math:`k_t`, in which case a
+discussion above. To match the Mindlin solution, one should set
+:math:`k_t = 8G_{eff}`, where :math:`G_{eff}` is the effective shear modulus given by:
+
+.. math::
+
+   G_{eff} = \left(\frac{2-\nu_i}{G_i} + \frac{2-\nu_j}{G_j}\right)^{-1}
+
+where :math:`G` is the shear modulus, related to Young's modulus :math:`E`
+and Poisson's ratio :math:`\nu` by :math:`G = E/(2(1+\nu))`. This can also be
+achieved by specifying *NULL* for :math:`k_t`, in which case a
 normal contact model that specifies material parameters :math:`E` and
 :math:`\nu` is required (e.g. *hertz/material*\ , *dmt* or *jkr*\ ). In this
 case, mixing of the shear modulus for different particle types *i* and
-*j* is done according to:
+*j* is done according to the formula above.
+
+.. note::
+
+   The radius of the contact region :math:`a` depends on the normal overlap.
+   As a result, the tangential force for *mindlin* can change due to
+   a variation in normal overlap, even with no change in tangential displacement.
+
+For *tangential mindlin/force*, the accumulated elastic tangential force
+characterizes the contact history, instead of the accumulated tangential
+displacement. This prevents the dependence of the tangential force on the
+normal overlap as noted above. The tangential force is given by:

 .. math::

-   1/G = 2(2-\nu_i)(1+\nu_i)/E_i + 2(2-\nu_j)(1+\nu_j)/E_j
+   \mathbf{F}_t =  -\min(\mu_t F_{n0}, \|\mathbf{F}_{te} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
+
+The increment of the elastic component of the tangential force
+:math:`\mathbf{F}_{te}` is given by:
+
+.. math::
+
+   \mathrm{d}\mathbf{F}_{te} = -k_t a \mathbf{v}_{t,rel} \mathrm{d}\tau
+
+The changes in frame of reference of the contacting pair of particles during
+contact are accounted for by the same formula as above, replacing the
+accumulated tangential displacement :math:`\xi`, by the accumulated tangential
+elastic force :math:`F_{te}`. When the tangential force exceeds the critical
+force, the tangential force is directly re-scaled to match the value for
+the critical force:
+
+.. math::
+
+   \mathbf{F}_{te} = - \mu_t F_{n0}\mathbf{t} + \mathbf{F}_{t,damp}
+
+The same rules as those described for *mindlin* apply regarding the tangential
+stiffness and mixing of the shear modulus for different particle types.

 The *mindlin_rescale* option uses the same form as *mindlin*\ , but the
 magnitude of the tangential displacement is re-scaled as the contact
@ -421,9 +467,32 @@ Here, :math:`t_{n-1}` indicates the value at the previous time
 step. This rescaling accounts for the fact that a decrease in the
 contact area upon unloading leads to the contact being unable to
 support the previous tangential loading, and spurious energy is
-created without the rescaling above (:ref:`Walton <WaltonPC>` ). See also
-discussion in :ref:`Thornton et al, 2013 <Thornton2013>` , particularly
-equation 18(b) of that work and associated discussion.
+created without the rescaling above (:ref:`Walton <WaltonPC>` ).
+
+.. note::
+
+   For *mindlin*, a decrease in the tangential force already occurs as the
+   contact unloads, due to the dependence of the tangential force on the normal
+   force described above. By re-scaling :math:`\xi`, *mindlin_rescale*
+   effectively re-scales the tangential force twice, i.e., proportionally to
+   :math:`a^2`. This peculiar behavior results from use of the accumulated
+   tangential displacement to characterize the contact history. Although
+   *mindlin_rescale* remains available for historic reasons and backward
+   compatibility purposes, it should be avoided in favor of *mindlin_rescale/force*.
+
+The *mindlin_rescale/force* option uses the same form as *mindlin/force*,
+but the magnitude of the tangential elastic force is re-scaled as the contact
+unloads, i.e. if :math:`a < a_{t_{n-1}}`:
+
+.. math::
+
+   \mathbf{F}_{te} = \mathbf{F}_{te, t_{n-1}} \frac{a}{a_{t_{n-1}}}
+
+This approach provides a better approximation of the :ref:`Mindlin-Deresiewicz <Mindlin1953>`
+laws and is more consistent than *mindlin_rescale*. See discussions in
+:ref:`Thornton et al, 2013 <Thornton2013>`, particularly equation 18(b) of that
+work and associated discussion, and :ref:`Agnolin and Roux, 2007 <AgnolinRoux2007>`,
+particularly Appendix A.

 ----------

@ -460,7 +529,7 @@ exceeds a critical value:

 .. math::

-   \mathbf{F}_{roll} =  min(\mu_{roll} F_{n,0}, \|\mathbf{F}_{roll,0}\|)\mathbf{k}
+   \mathbf{F}_{roll} =  \min(\mu_{roll} F_{n,0}, \|\mathbf{F}_{roll,0}\|)\mathbf{k}

 Here, :math:`\mathbf{k} = \mathbf{v}_{roll}/\|\mathbf{v}_{roll}\|` is the direction of
 the pseudo-force.  As with tangential displacement, the rolling
@ -512,7 +581,7 @@ is then truncated according to:

 .. math::

-   \tau_{twist} = min(\mu_{twist} F_{n,0}, \tau_{twist,0})
+   \tau_{twist} = \min(\mu_{twist} F_{n,0}, \tau_{twist,0})

 Similar to the sliding and rolling displacement, the angular
 displacement is rescaled so that it corresponds to the critical value
@ -763,3 +832,15 @@ Technology, 233, 30-46.
 .. _WaltonPC:

 **(Otis R. Walton)** Walton, O.R., Personal Communication
+
+.. _Mindlin1953:
+
+**(Mindlin and Deresiewicz, 1953)** Mindlin, R.D., & Deresiewicz, H (1953).
+Elastic Spheres in Contact under Varying Oblique Force.
+J. Appl. Mech., ASME 20, 327-344.
+
+.. _AgnolinRoux2007:
+
+**(Agnolin and Roux 2007)** Agnolin, I. & Roux, J-N. (2007).
+Internal states of model isotropic granular packings.
+I. Assembling process, geometry, and contact networks. Phys. Rev. E, 76, 061302.
--- a/doc/src/pair_mesodpd.rst
+++ b/doc/src/pair_mesodpd.rst
@ -250,8 +250,12 @@ from :ref:`(Li2013_POF) <Li2013_POF>`.  The short mDPD run (about 2 minutes
 on a single core) generates a particle trajectory which can
 be visualized as follows.

+.. only:: html
+
+   .. image:: JPG/examples_mdpd.gif
+      :align: center
+
 .. image:: JPG/examples_mdpd_first.jpg
-   :target: JPG/examples_mdpd.gif
   :align: center

 .. image:: JPG/examples_mdpd_last.jpg
--- a/doc/src/pair_peri.rst
+++ b/doc/src/pair_peri.rst
@ -128,7 +128,7 @@ viscoelastic relaxation parameter and time constant,
 respectively. m_lambdai varies within zero to one. For very small
 values of m_lambdai the viscoelastic model responds very similar to a
 linear elastic model. For details please see the description in
-"(Mtchell2011)".
+"(Mitchell2011)".

 For the *peri/eps* style:

@ -142,7 +142,7 @@ For the *peri/eps* style:
 K is the bulk modulus and G is the shear modulus. The horizon is a
 cutoff distance and s00 and :math:`\alpha` are used as a bond breaking
 criteria.  m_yield_stress is the yield stress of the material. For
-details please see the description in "(Mtchell2011a)".
+details please see the description in "(Mitchell2011a)".

 ----------

--- a/doc/src/pair_python.rst
+++ b/doc/src/pair_python.rst
@ -38,12 +38,12 @@ corresponding compiled code. This penalty can be significantly reduced
 through generating tabulations from the python code through the
 :doc:`pair_write <pair_write>` command, which is supported by this style.

-Only a single pair_coeff command is used with the *python* pair style
-which specifies a python class inside a python module or file that
-LAMMPS will look up in the current directory, the folder pointed to by
-the LAMMPS_POTENTIALS environment variable or somewhere in your python
-path.  A single python module can hold multiple python pair class
-definitions. The class definitions itself have to follow specific
+Only a single :doc:`pair_coeff <pair_coeff>` command is used with the
+*python* pair style which specifies a python class inside a python module
+or a file that LAMMPS will look up in the current directory, a folder
+pointed to by the ``LAMMPS_POTENTIALS`` environment variable or somewhere
+in your python path.  A single python module can hold multiple python pair
+class definitions.  The class definitions itself have to follow specific
 rules that are explained below.

 Atom types in the python class are specified through symbolic
--- a/doc/src/pg_cplusplus.rst
+++ b/doc/src/pg_cplusplus.rst
@ -0,0 +1,91 @@
+Using the C++ API directly
+**************************
+
+Using the C++ classes of the LAMMPS library is lacking some of the
+convenience of the C library API, but it allows a more direct access to
+simulation data and thus more low-level manipulations and tighter
+integration of LAMMPS into another code.  While for the complete C
+library API is provided in the ``library.h`` header file, for using
+the C++ API it is required to include the individual header files
+defining the individual classes in use.  Typically the name of the
+class and the name of the header follow some simple rule.  Examples
+are given below.
+
+
+Creating or deleting a LAMMPS object
+*************************************
+
+When using the LAMMPS library interfaces, the core task is to create an
+instance of the :cpp:class:`LAMMPS_NS::LAMMPS` class.  In C++ this can
+be done directly through the ``new`` operator.  All further operations
+are then initiated through calling member functions of some of the
+components of the LAMMPS class or accessing their data members.  The
+destruction of the LAMMPS instance is correspondingly initiated by using
+the ``delete`` operator.  Here is a simple example:
+
+.. code-block:: c++
+
+   #include "lammps.h"
+   #include "universe.h"
+
+   #include <mpi.h>
+   #include <iostream>
+
+   int main(int argc, char **argv)
+   {
+       LAMMPS_NS::LAMMPS *lmp;
+       // custom argument vector for LAMMPS library
+       const char *lmpargv[] {"liblammps", "-log", "none"};
+       int lmpargc = sizeof(lmpargv)/sizeof(const char *);
+
+       // explicitly initialize MPI
+       MPI_Init(&argc, &argv);
+
+       // create LAMMPS instance
+       lmp = new LAMMPS_NS::LAMMPS(lmpargc, (char **)lmpargv, MPI_COMM_WORLD);
+       // output numerical version string
+       std::cout << "LAMMPS version: " << lmp->universe->num_ver << std::endl;
+       // delete LAMMPS instance
+       delete lmp;
+
+       // stop MPI environment
+       MPI_Finalize();
+       return 0;
+   }
+
+Please note that this requires to include the ``lammps.h`` header for accessing
+the members of the LAMMPS class and then the ``universe.h`` header for accessing the ``num_ver`` member of the :cpp:class:`Universe` class.
+
+
+Executing LAMMPS commands
+*************************
+
+Once a LAMMPS instance is created by your C++ code, you need to set up a
+simulation and that is most conveniently done by "driving" it through
+issuing commands like you would do when running a LAMMPS simulation from
+an input script. Processing of input in LAMMPS is handled by the
+:cpp:class:`Input <LAMMPS_NS::Input>` class an instance of which is a
+member of the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class.  You have
+two options: reading commands from a file, or executing a single
+command from a string. See below for a small example:
+
+.. code-block:: c++
+
+   #include "lammps.h"
+   #include "input.h"
+   #include <mpi.h>
+
+   using namespace LAMMPS_NS;
+
+   int main(int argc, char **argv)
+   {
+       const char *lmpargv[] {"liblammps", "-log", "none"};
+       int lmpargc = sizeof(lmpargv)/sizeof(const char *);
+
+       MPI_Init(&argc, &argv);
+       LAMMPS *lmp = new LAMMPS(lmpargc, (char **)lmpargv, MPI_COMM_WORLD);
+       lmp->input->file("in.melt");
+       lmp->input->one("run 100 post no");
+       delete lmp;
+       return 0;
+   }
--- a/doc/src/pg_developer.rst
+++ b/doc/src/pg_developer.rst
--- a/doc/src/pg_fortran.rst
+++ b/doc/src/pg_fortran.rst
@ -0,0 +1,202 @@
+The ``LIBLAMMPS`` Fortran Module
+********************************
+
+The ``LIBLAMMPS`` module provides an interface to call LAMMPS from a
+Fortran code.  It is based on the LAMMPS C-library interface and
+requires a Fortran 2003 compatible compiler to be compiled.
+
+While C libraries have a defined binary interface (ABI) and can thus be
+used from multiple compiler versions from different vendors for as long
+as they are compatible with the hosting operating system, the same is
+not true for Fortran codes.  Thus the LAMMPS Fortran module needs to be
+compiled alongside the code using it from the source code in
+``fortran/lammps.f90``.  When linking, you also need to
+:doc:`link to the LAMMPS library <Build_link>`.  A typical command line
+for a simple program using the Fortran interface would be:
+
+.. code-block:: bash
+
+   mpifort -o testlib.x  lammps.f90 testlib.f90 -L. -llammps
+
+Please note, that the MPI compiler wrapper is only required when the
+calling the library from an MPI parallel code.  Please also note the order
+of the source files: the lammps.f90 file needs to be compiled first,
+since it provides the ``LIBLAMMPS`` module that is imported by the
+Fortran code using the interface.
+
+.. versionadded:: 30Sep2020
+
+.. admonition:: Work in Progress
+
+   This Fortran module is work in progress and only the documented
+   functionality is currently available. The final implementation should
+   cover the entire range of functionality available in the C and
+   Python library interfaces.
+
+----------
+
+Creating or deleting a LAMMPS object
+************************************
+
+With the Fortran interface the creation of a :cpp:class:`LAMMPS
+<LAMMPS_NS::LAMMPS>` instance is included in the constructor for
+creating the :f:func:`lammps` derived type.  To import the definition of
+that type and its type bound procedures you need to add a ``USE
+LIBLAMMPS`` statement.  Internally it will call either
+:cpp:func:`lammps_open_fortran` or :cpp:func:`lammps_open_no_mpi` from
+the C library API to create the class instance.  All arguments are
+optional and :cpp:func:`lammps_mpi_init` will be called automatically,
+if it is needed.  Similarly, a possible call to :cpp:func:`lammps_finalize`
+is integrated into the :f:func:`close` function and triggered with
+the optional logical argument set to ``.true.``. Here is a simple example:
+
+.. code-block:: fortran
+
+   PROGRAM testlib
+     USE LIBLAMMPS                 ! include the LAMMPS library interface
+     TYPE(lammps)     :: lmp       ! derived type to hold LAMMPS instance
+     CHARACTER(len=*), DIMENSION(*), PARAMETER :: args = &
+         [ CHARACTER(len=12) :: 'liblammps', '-log', 'none' ]
+
+     ! create a LAMMPS instance (and initialize MPI)
+     lmp = lammps(args)
+     ! get and print numerical version code
+     PRINT*, 'LAMMPS Version: ', lmp%version()
+     ! delete LAMMPS instance (and shuts down MPI)
+     CALL lmp%close(.true.)
+
+   END PROGRAM testlib
+
+--------------------
+
+Executing LAMMPS commands
+=========================
+
+Once a LAMMPS instance is created, it is possible to "drive" the LAMMPS
+simulation by telling LAMMPS to read commands from a file, or pass
+individual or multiple commands from strings or lists of strings.  This
+is done similar to how it is implemented in the `C-library
+<pg_lib_execute>` interface. Before handing off the calls to the
+C-library interface, the corresponding Fortran versions of the calls
+(:f:func:`file`, :f:func:`command`, :f:func:`commands_list`, and
+:f:func:`commands_string`) have to make a copy of the strings passed as
+arguments so that they can be modified to be compatible with the
+requirements of strings in C without affecting the original strings.
+Those copies are automatically deleted after the functions return.
+Below is a small demonstration of the uses of the different functions:
+
+.. code-block:: fortran
+
+   PROGRAM testcmd
+     USE LIBLAMMPS
+     TYPE(lammps)     :: lmp
+     CHARACTER(len=512) :: cmds
+     CHARACTER(len=40),ALLOCATABLE :: cmdlist(:)
+     CHARACTER(len=10) :: trimmed
+     INTEGER :: i
+
+     lmp = lammps()
+     CALL lmp%file('in.melt')
+     CALL lmp%command('variable zpos index 1.0')
+     ! define 10 groups of 10 atoms each
+     ALLOCATE(cmdlist(10))
+     DO i=1,10
+         WRITE(trimmed,'(I10)') 10*i
+         WRITE(cmdlist(i),'(A,I1,A,I10,A,A)')       &
+             'group g',i-1,' id ',10*(i-1)+1,':',ADJUSTL(trimmed)
+     END DO
+     CALL lmp%commands_list(cmdlist)
+     ! run multiple commands from multi-line string
+     cmds = 'clear' // NEW_LINE('A') //                       &
+         'region  box block 0 2 0 2 0 2' // NEW_LINE('A') //  &
+         'create_box 1 box' // NEW_LINE('A') //               &
+         'create_atoms 1 single 1.0 1.0 ${zpos}'
+     CALL lmp%commands_string(cmds)
+     CALL lmp%close()
+
+   END PROGRAM testcmd
+
+---------------
+
+The ``LIBLAMMPS`` module API
+****************************
+
+Below are the detailed descriptions of definitions and interfaces
+of the contents of the ``LIBLAMMPS`` Fortran interface to LAMMPS.
+
+.. f:type:: lammps
+
+   Derived type that is the general class of the Fortran interface.
+   It holds a reference to the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class instance
+   that any of the included calls are forwarded to.
+
+   :f c_ptr handle: reference to the LAMMPS class
+   :f close: :f:func:`close`
+   :f version: :f:func:`version`
+   :f file: :f:func:`file`
+   :f command: :f:func:`command`
+   :f commands_list: :f:func:`commands_list`
+   :f commands_string: :f:func:`commands_string`
+
+.. f:function:: lammps(args[,comm])
+
+   This is the constructor for the Fortran class and will forward
+   the arguments to a call to either :cpp:func:`lammps_open_fortran`
+   or :cpp:func:`lammps_open_no_mpi`. If the LAMMPS library has been
+   compiled with MPI support, it will also initialize MPI, if it has
+   not already been initialized before.
+
+   The *args* argument with the list of command line parameters is
+   optional and so it the *comm* argument with the MPI communicator.
+   If *comm* is not provided, ``MPI_COMM_WORLD`` is assumed. For
+   more details please see the documentation of :cpp:func:`lammps_open`.
+
+   :p character(len=*) args(*) [optional]: arguments as list of strings
+   :o integer comm [optional]: MPI communicator
+   :r lammps: an instance of the :f:type:`lammps` derived type
+
+.. f:subroutine:: close([finalize])
+
+   This method will close down the LAMMPS instance through calling
+   :cpp:func:`lammps_close`.  If the *finalize* argument is present and
+   has a value of ``.true.``, then this subroutine also calls
+   :cpp:func:`lammps_mpi_finalize`.
+
+   :o logical finalize [optional]: shut down the MPI environment of the LAMMPS library if true.
+
+.. f:function:: version()
+
+   This method returns the numeric LAMMPS version like :cpp:func:`lammps_version`
+
+   :r integer: LAMMPS version
+
+--------
+
+.. f:subroutine:: file(filename)
+
+   This method will call :cpp:func:`lammps_file` to have LAMMPS read
+   and process commands from a file.
+
+   :p character(len=*) filename: name of file with LAMMPS commands
+
+.. f:subroutine:: command(cmd)
+
+   This method will call :cpp:func:`lammps_command` to have LAMMPS
+   execute a single command.
+
+   :p character(len=*) cmd: single LAMMPS command
+
+.. f:subroutine:: commands_list(cmds)
+
+   This method will call :cpp:func:`lammps_commands_list` to have LAMMPS
+   execute a list of input lines.
+
+   :p character(len=*) cmd(*): list of LAMMPS input lines
+
+.. f:subroutine:: commands_string(str)
+
+   This method will call :cpp:func:`lammps_commands_string` to have LAMMPS
+   execute a block of commands from a string.
+
+   :p character(len=*) str: LAMMPS input in string
+
--- a/doc/src/pg_lib_add.rst
+++ b/doc/src/pg_lib_add.rst
@ -0,0 +1,33 @@
+Adding code to the Library interface
+====================================
+
+The functionality of the LAMMPS library interface has historically
+always been motivated by the needs of its users and functions were
+added or expanded as they were needed and used.  Contributions to
+the interface are always welcome.  However with a refactoring of
+the library interface and its documentation that started in 2020,
+there are now a few requirements for inclusion of changes.
+
+  - New functions should be orthogonal to existing ones and not
+    implement functionality that can already be achieved with the
+    existing APIs.
+  - All changes and additions should be documented with
+    `Doxygen <https://doxgygen.org>`_ style comments and references
+    to those functions added to the corresponding files in the
+    ``doc/src`` folder.
+  - If possible, new unit tests to test those new features should
+    be added.
+  - The new feature should also be implemented and documented for
+    the Python and Fortran modules.
+  - All additions should work and be compatible with ``-DLAMMPS_BIGBIG``,
+    ``-DLAMMPS_SMALLBIG``, ``-DLAMMPS_SMALLSMALL`` and compiling
+    with and without MPI support.
+  - The ``library.h`` file should be kept compatible to C code at
+    a level similar to C89. Its interfaces may not reference any
+    custom data types (e.g. ``bigint``, ``tagint``, and so on) only
+    known inside of LAMMPS.
+  - only C style comments, not C++ style
+
+Please note, that these are *not* *strict* requirements, but the
+LAMMPS developers appreciate if they are followed closely and will
+assist with implementing what is missing.
--- a/doc/src/pg_lib_config.rst
+++ b/doc/src/pg_lib_config.rst
@ -0,0 +1,67 @@
+Retrieving LAMMPS configuration information
+===========================================
+
+The following library functions can be used to query the
+LAMMPS library about compile time settings and included
+packages and styles.
+
+-----------------------
+
+.. doxygenfunction:: lammps_config_has_mpi_support
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_config_has_gzip_support
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_config_has_png_support
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_config_has_jpeg_support
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_config_has_ffmpeg_support
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_config_has_exceptions
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_config_has_package
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_config_package_count
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_config_package_name
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_has_style
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_style_count
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_style_name
+   :project: progguide
+
--- a/doc/src/pg_lib_create.rst
+++ b/doc/src/pg_lib_create.rst
@ -0,0 +1,104 @@
+Creating or deleting a LAMMPS object
+====================================
+
+The :cpp:func:`lammps_open` and :cpp:func:`lammps_open_no_mpi`
+functions are used to create and initialize a
+:cpp:func:`LAMMPS` instance.  The calling program has to
+provide a handle where a reference to this instance can be stored and
+which has to be used in all subsequent function calls until that
+instance is destroyed by calling :cpp:func:`lammps_close`.
+Here is a simple example demonstrating its use:
+
+.. code-block:: C
+
+   #include "library.h"
+   #include <stdio.h>
+
+   int main(int argc, char **argv)
+   {
+     void *handle;
+     int version;
+     const char *lmpargv[] = { "liblammps", "-log", "none"};
+     int lmpargc = sizeof(lmpargv)/sizeof(const char *);
+
+     /* create LAMMPS instance */
+     handle = lammps_open_no_mpi(lmpargc, lmpargv, NULL);
+     if (handle == NULL) {
+       printf("LAMMPS initialization failed");
+       lammps_mpi_finalize();
+       return 1;
+     }
+
+     /* get and print numerical version code */
+     version = lammps_version(handle);
+     printf("LAMMPS Version: %d\n",version);
+
+     /* delete LAMMPS instance and shut down MPI */
+     lammps_close(handle);
+     lammps_mpi_finalize();
+     return 0;
+   }
+
+The LAMMPS library will be using the MPI library it was compiled with
+and will either run on all processors in the ``MPI_COMM_WORLD``
+communicator or on the set of processors in the communicator given in
+the ``comm`` argument of :cpp:func:`lammps_open`.  This means
+the calling code can run LAMMPS on all or a subset of processors.  For
+example, a wrapper code might decide to alternate between LAMMPS and
+another code, allowing them both to run on all the processors.  Or it
+might allocate part of the processors to LAMMPS and the rest to the
+other code by creating a custom communicator with ``MPI_Comm_split()``
+and running both codes concurrently before syncing them up periodically.
+Or it might instantiate multiple instances of LAMMPS to perform
+different calculations and either alternate between them, run them
+concurrently on split communicators, or run them one after the other.
+The :cpp:func:`lammps_open` function may be called multiple
+times for this latter purpose.
+
+The :cpp:func:`lammps_close` function is used to shut down
+the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class pointed to by the handle
+passed as an argument and free all its memory. This has to be called for
+every instance created with any of the :cpp:func:`lammps_open` functions.  It will, however, **not** call
+``MPI_Finalize()``, since that may only be called once.  See
+:cpp:func:`lammps_mpi_finalize` for an alternative to calling
+``MPI_Finalize()`` explicitly in the calling program.
+
+The :cpp:func:`lammps_free` function is a clean-up
+function to free memory that the library allocated previously
+via other function calls.  See below for notes in the descriptions
+of the individual commands where such memory buffers were allocated.
+
+-----------------------
+
+.. doxygenfunction:: lammps_open
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_open_no_mpi
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_open_fortran
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_close
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_mpi_init
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_mpi_finalize
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_free
+   :project: progguide
--- a/doc/src/pg_lib_execute.rst
+++ b/doc/src/pg_lib_execute.rst
@ -0,0 +1,69 @@
+Executing LAMMPS commands
+=========================
+
+Once a LAMMPS instance is created, there are multiple ways to "drive" a
+simulation.  In most cases it is easiest to process single or multiple
+LAMMPS commands like in an input file.  This can be done through reading
+a file or passing single commands or lists of commands or blocks of
+commands with the following functions.
+
+Via these functions, the calling code can have the LAMMPS instance act
+on a series of :doc:`input file commands <Commands_all>` that are either
+read from a file or passed as strings.  This for, for example, allows to
+setup a problem from a template file and then run it in stages while
+performing other operations in between or concurrently.  The caller can
+interleave the LAMMPS function calls with operations it performs, calls
+to extract information from or set information within LAMMPS, or calls
+to another code's library.
+
+Also equivalent to regular :doc:`input script parsing <Commands_parse>`
+is the handling of comments and expansion of variables with ``${name}``
+or ``$(expression)`` syntax before the commands are parsed and
+executed. Below is a short example using some of these functions.
+
+.. code-block:: C
+
+   #include "library.h"
+   #include <mpi.h>
+   #include <stdio.h>
+
+   int main(int argc, char **argv)
+   {
+     void *handle;
+     int i;
+
+     MPI_Init(&argc, &argv);
+     handle = lammps_open(0, NULL, MPI_COMM_WORLD, NULL);
+     lammps_file(handle,"in.sysinit");
+     lammps_command(handle,"run 1000 post no");
+
+     for (i=0; i < 100; ++i) {
+       lammps_commands_string(handle,"run 100 pre no post no\n"
+                                     "print 'PE = $(pe)'\n"
+                                     "print 'KE = $(ke)'\n");
+     }
+     lammps_close(handle);
+     MPI_Finalize();
+     return 0;
+   }
+
+-----------------------
+
+.. doxygenfunction:: lammps_file
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_command
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_commands_list
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_commands_string
+   :project: progguide
+
--- a/doc/src/pg_lib_neighbor.rst
+++ b/doc/src/pg_lib_neighbor.rst
@ -0,0 +1,30 @@
+Accessing LAMMPS Neighbor lists
+===============================
+
+The following functions allow to access neighbor lists
+generated by LAMMPS or query their properties.
+
+-----------------------
+
+.. doxygenfunction:: lammps_find_compute_neighlist
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_find_fix_neighlist
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_find_pair_neighlist
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_neighlist_num_elements
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_neighlist_element_neighbors
+   :project: progguide
--- a/doc/src/pg_lib_objects.rst
+++ b/doc/src/pg_lib_objects.rst
@ -0,0 +1,31 @@
+Retrieving or setting properties of LAMMPS objects
+==================================================
+
+This section documents accessing or modifying data from objects like
+computes, fixes, or variables in LAMMPS.
+
+-----------------------
+
+.. doxygenfunction:: lammps_extract_compute
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_extract_fix
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_extract_variable
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_set_variable
+   :project: progguide
+
+-----------------------
+
+.. doxygenenum:: _LMP_STYLE_CONST
+
+.. doxygenenum:: _LMP_TYPE_CONST
--- a/doc/src/pg_lib_properties.rst
+++ b/doc/src/pg_lib_properties.rst
@ -0,0 +1,62 @@
+Retrieving or setting LAMMPS system properties
+==============================================
+
+The library interface allows to extract different kinds of information
+about the active simulation instance and also to modify some of them.
+This allows to combine MD simulation steps with other processing and
+simulation methods computed in the calling code or another code that is
+coupled to LAMMPS via the library interface.  In some cases the data
+returned is direct reference to the original data inside LAMMPS cast
+to a void pointer.  In that case the data needs to be cast to a suitable
+pointer to be able to access it, and you need to know the correct dimensions
+and lengths.  When accessing per-atom data, please note that this data
+is the per-processor **local** data and indexed accordingly. These arrays
+can change sizes and order at every neighbor list rebuild and atom sort
+event as atoms are migrating between sub-domains.
+
+-----------------------
+
+.. doxygenfunction:: lammps_version
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_get_natoms
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_get_thermo
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_extract_box
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_reset_box
+   :project: progguide
+
+-------------------
+
+.. doxygenfunction:: lammps_extract_setting
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_extract_global
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_extract_atom
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_create_atoms(void *handle, int n, int *id, int *type, double *x, double *v, int *image, int bexpand)
+   :project: progguide
+
+
--- a/doc/src/pg_lib_scatter.rst
+++ b/doc/src/pg_lib_scatter.rst
@ -0,0 +1,29 @@
+Library functions for scatter/gather operations
+================================================
+
+.. TODO add description
+
+-----------------------
+
+.. doxygenfunction:: lammps_gather_atoms
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_gather_atoms_concat
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_gather_atoms_subset
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_scatter_atoms
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_scatter_atoms_subset
+   :project: progguide
--- a/doc/src/pg_lib_utility.rst
+++ b/doc/src/pg_lib_utility.rst
@ -0,0 +1,30 @@
+Library interface utility functions
+===================================
+
+To simplify some of the tasks, the library interface contains
+some utility functions that are not directly calling LAMMPS.
+
+-----------------------
+
+.. doxygenfunction:: lammps_encode_image_flags
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_decode_image_flags(int image, int *flags)
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_set_fix_external_callback(void *, char *, FixExternalFnPtr, void*)
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_has_error
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_get_last_error_message
+   :project: progguide
--- a/doc/src/pg_library.rst
+++ b/doc/src/pg_library.rst
@ -0,0 +1,158 @@
+LAMMPS Library Interfaces
+*************************
+
+As described on the :doc:`library interface to LAMMPS <Howto_library>`
+doc page, LAMMPS can be built as a library (static or shared), so that
+it can be called by another code, used in a :doc:`coupled manner
+<Howto_couple>` with other codes, or driven through a :doc:`Python
+script <Python_head>`.  Even the LAMMPS standalone executable is
+essentially a thin wrapper on top of the LAMMPS library, creating a
+LAMMPS instance, processing input and then existing.
+
+Several of these approaches are based on C language wrapper functions
+in the files ``src/library.h`` and ``src/library.cpp``, but it is also
+possible to use C++ directly.  The basic procedure is always the same:
+you create one or more instances of the
+:cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` and then pass commands as
+strings or from files to that LAMMPS instance to execute calculations,
+or read, manipulate, and update data from the active class instances
+inside the LAMMPS to do analysis or perform operations that are not
+possible with existing commands.
+
+.. _thread-safety:
+
+.. admonition:: Thread-safety
+   :class: note
+
+   LAMMPS was initially not conceived as a thread-safe program, but over
+   the years changes have been applied to replace operations that
+   collide with creating multiple LAMMPS instances from multiple-threads
+   of the same process with thread-safe alternatives.  This primarily
+   applies to the core LAMMPS code and less so on add-on packages,
+   especially when those packages require additional code in the *lib*
+   folder, interface LAMMPS to Fortran libraries, or the code uses
+   static variables (like the USER-COLVARS package).
+
+   Another major issue to deal with is to correctly handle MPI.
+   Creating a LAMMPS instance requires passing an MPI communicator, or
+   it assumes the ``MPI_COMM_WORLD`` communicator, which spans all MPI
+   processor ranks.  When creating multiple LAMMPS object instances from
+   different threads, this communicator has to be different for each
+   thread or else collisions can happen.  or it has to be guaranteed,
+   that only one thread at a time is active.  MPI communicators,
+   however, are not a problem, if LAMMPS is compiled with the MPI STUBS
+   library, which implies that there is no MPI communication and only 1
+   MPI rank.
+
+----------
+
+.. _lammps_c_api:
+
+LAMMPS C Library API
+====================
+
+The C library interface is most commonly used path to manage LAMMPS
+instances from a compiled code and it is the basis for the :doc:`Python
+<pg_python>` and :doc:`Fortran <pg_fortran>` modules.  Almost all
+functions of the C language API require an argument containing a
+"handle" in the form of a ``void *`` type variable, which points to the
+location of a LAMMPS class instance.
+
+The ``library.h`` header file by default includes the ``mpi.h`` header
+for an MPI library, so it must be present when compiling code using the
+library interface.  This usually must be the header from the same MPI
+library as the LAMMPS library was compiled with.  The exception is when
+LAMMPS was compiled in serial mode using the ``STUBS`` MPI library.  In
+that case the calling code may be compiled with a different MPI library
+for as long as :cpp:func:`lammps_open_no_mpi` is called to create a
+LAMMPS instance. Then you may set the define ``-DLAMMPS_LIB_NO_MPI``
+when compiling your code and the inclusion of ``mpi.h`` will be skipped
+and consequently the function :cpp:func:`lammps_open` may not be used.
+
+.. admonition:: Errors versus exceptions
+   :class: note
+
+   If any of the function calls in the LAMMPS library API will trigger
+   an error inside LAMMPS, this will result in an abort of the entire
+   program.  This is not always desirable.  Instead, LAMMPS can be
+   compiled to instead :ref:`throw a C++ exception <exceptions>`.
+
+.. warning::
+
+   No checks are made on the arguments of the function calls of the C
+   library interface.  *All* function arguments must be non-NULL unless
+   *explicitly* allowed and point to consistent and valid data.  Buffers
+   for storing returned data must be allocated to a suitable size.
+   Passing invalid or unsuitable information will likely cause crashes
+   or corrupt data.
+
+------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   pg_lib_create
+   pg_lib_execute
+   pg_lib_properties
+   pg_lib_objects
+   pg_lib_scatter
+   pg_lib_neighbor
+   pg_lib_config
+   pg_lib_utility
+   pg_lib_add
+
+--------------------
+
+.. _lammps_python_api:
+
+LAMMPS Python APIs
+==================
+
+The LAMMPS Python module enables calling the LAMMPS C library API from
+Python by dynamically loading functions in the LAMMPS shared library through
+the `Python ctypes module <https://docs.python.org/3/library/ctypes.html>`_.
+Because of the dynamic loading, it is **required** that LAMMPS is compiled
+in :ref:`"shared" mode <exe>`.  The Python interface is object oriented, but
+otherwise trying to be very similar to the C library API.  Three different
+Python classes to run LAMMPS are available and they build on each other.
+
+.. toctree::
+   :maxdepth: 1
+
+   pg_python
+
+-------------------
+
+.. _lammps_fortran_api:
+
+LAMMPS Fortran API
+==================
+
+The LAMMPS Fortran module is a wrapper around calling functions from the
+LAMMPS C library API from Fortran through the ISO_C_BINDING feature in
+Fortran 2003.  The interface is object oriented but otherwise trying to
+be very similar to the C library API and the basic Python module.
+
+.. toctree::
+   :maxdepth: 1
+
+   pg_fortran
+
+-------------------
+
+.. _lammps_cplusplus_api:
+
+LAMMPS C++ API
+==============
+
+It is also possible to invoke the LAMMPS C++ API directly in your code.
+It is lacking some of the convenience of the C library API, but it allows
+a more direct access to simulation data and thus more low-level manipulations.
+The following links provide some examples and references to the C++ API.
+
+.. toctree::
+   :maxdepth: 1
+
+   pg_cplusplus
+
+
--- a/doc/src/pg_python.rst
+++ b/doc/src/pg_python.rst
@ -0,0 +1,188 @@
+The ``lammps`` Python module
+****************************
+
+.. py:module:: lammps
+
+The LAMMPS Python interface is implemented as a module called
+:py:mod:`lammps` in the ``lammps.py`` file in the ``python`` folder of
+the LAMMPS source code distribution.  After compilation of LAMMPS, the
+module can be installed into a Python system folder or a user folder
+with ``make install-python``.  Components of the module can then loaded
+into a Python session with the ``import`` command.
+
+There are multiple Python interface classes in the :py:mod:`lammps` module:
+
+- the :py:class:`lammps <lammps.lammps>` class. This is a wrapper around
+  the C-library interface and its member functions try to replicate the
+  :doc:`C-library API <pg_library>` closely.  This is the most
+  feature-complete Python API.
+- the :py:class:`PyLammps <lammps.PyLammps>` class. This is a more high-level
+  and more Python style class implemented on top of the
+  :py:class:`lammps <lammps.lammps>` class.
+- the :py:class:`IPyLammps <lammps.IPyLammps>` class is derived from
+  :py:class:`PyLammps <lammps.PyLammps>` and adds embedded graphics
+  features to conveniently include LAMMPS into `Jupyter
+  <https://jupyter.org/>`_ notebooks.
+
+.. _mpi4py_url: https://mpi4py.readthedocs.io
+
+----------
+
+Creating or deleting a LAMMPS object
+************************************
+
+With the Python interface the creation of a :cpp:class:`LAMMPS
+<LAMMPS_NS::LAMMPS>` instance is included in the constructor for the
+:py:func:`lammps <lammps.lammps>` class.  Internally it will call either
+:cpp:func:`lammps_open` or :cpp:func:`lammps_open_no_mpi` from the C
+library API to create the class instance.
+
+All arguments are optional.  The *name* argument is to allow loading a
+LAMMPS shared library that is named ``liblammps_machine.so`` instead of
+the default name of ``liblammps.so``.  In most cases the latter will be
+installed or used.  The *ptr* argument is for use of the
+:py:mod:`lammps` module from inside a LAMMPS instance, e.g. with the
+:doc:`python <python>` command, where a pointer to the already existing
+:cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class instance can be passed
+to the Python class and used instead of creating a new instance.  The
+*comm* argument may be used in combination with the `mpi4py <mpi4py_url_>`_
+module to pass an MPI communicator to LAMMPS and thus it is possible
+to run the Python module like the library interface on a subset of the
+MPI ranks after splitting the communicator. Here is a simple example:
+
+.. code-block:: python
+
+   from lammps import lammps
+
+   # NOTE: argv[0] is set by the Python module
+   args = ["-log", "none"]
+   # create LAMMPS instance
+   lmp = lammps(cmdargs=args)
+   # get and print numerical version code
+   print("LAMMPS Version: ", lmp.version())
+   # explicitly close and delete LAMMPS instance (optional)
+   lmp.close()
+
+Same as with the :doc:`C library API <pg_lib_create>` this will use the
+``MPI_COMM_WORLD`` communicator for the MPI library that LAMMPS was
+compiled with.  The :py:func:`lmp.close() <lammps.lammps.close>` call is
+optional since the LAMMPS class instance will also be deleted
+automatically during the :py:class:`lammps <lammps.lammps>` class
+destructor.
+
+Executing LAMMPS commands
+*************************
+
+Once an instance of the :py:class:`lammps <lammps.lammps>` class is
+created, there are multiple ways to "feed" it commands. In a way that is
+not very different from running a LAMMPS input script, except that
+Python has many more facilities for structured programming than the
+LAMMPS input script syntax.  Furthermore it is possible to "compute"
+what the next LAMMPS command should be. Same as in the equivalent `C
+library functions <pg_lib_execute>`, commands can be read from a file, a
+single string, a list of strings and a block of commands in a single
+multi-line string. They are processed under the same boundary conditions
+as the C library counterparts.  The example below demonstrates the use
+of :py:func:`lammps.file`, :py:func:`lammps.command`,
+:py:func:`lammps.commands_list`, and :py:func:`lammps.commands_string`:
+
+.. code-block:: python
+
+   from lammps import lammps
+
+   lmp = lammps()
+   # read commands from file 'in.melt'
+   lmp.file('in.melt')
+   # issue a single command
+   lmp.command('variable zpos index 1.0')
+   # create 10 groups with 10 atoms each
+   cmds = ["group g{} id {}:{}".format(i,10*i+1,10*(i+1)) for i in range(10)]
+   lmp.commands_list(cmds)
+   # run commands from a multi-line string
+   block = """
+   clear
+   region  box block 0 2 0 2 0 2
+   create_box 1 box
+   create_atoms 1 single 1.0 1.0 ${zpos}
+   """
+   lmp.commands_string(block)
+
+----------
+
+The ``lammps`` class API
+************************
+
+The :py:class:`lammps <lammps.lammps>` class is the core of the LAMMPS
+Python interfaces.  It is a wrapper around the :doc:`LAMMPS C library
+API <pg_library>` using the `Python ctypes module
+<https://docs.python.org/3/library/ctypes.html>`_ and a shared library
+compiled from the LAMMPS sources code.  The individual methods in this
+class try to closely follow the corresponding C functions.  The handle
+argument that needs to be passed to the C functions is stored internally
+in the class and automatically added when calling the C library
+functions. Below is a detailed documentation of the API.
+
+.. autoclass:: lammps.lammps
+   :members:
+
+----------
+
+The ``PyLammps`` class API
+**************************
+
+.. autoclass:: lammps.PyLammps
+   :members:
+
+----------
+
+The ``IPyLammps`` class API
+***************************
+
+.. autoclass:: lammps.IPyLammps
+   :members:
+
+----------
+
+Additional components of the ``lammps`` module
+**********************************************
+
+The :py:mod:`lammps` module additionally contains several constants
+and the :py:class:`NeighList <lammps.NeighList>` class:
+
+.. _py_data_constants:
+.. py:data:: LAMMPS_INT, LAMMPS_DOUBLE, LAMMPS_BIGINT, LAMMPS_TAGINT, LAMMPS_STRING
+   :type: int
+
+   Constants in the :py:mod:`lammps` module to indicate how to
+   cast data when the C library function returns a void pointer.
+   Used in :py:func:`lammps.extract_global`.
+
+.. _py_style_constants:
+.. py:data:: LMP_STYLE_GLOBAL, LMP_STYLE_ATOM, LMP_STYLE_LOCAL
+   :type: int
+
+   Constants in the :py:mod:`lammps` module to select what style of data
+   to request from computes or fixes. See :cpp:enum:`_LMP_STYLE_CONST`
+   for the equivalent constants in the C library interface. Used in
+   :py:func:`lammps.extract_compute` and :py:func:`lammps.extract_fix`.
+
+.. _py_type_constants:
+.. py:data:: LMP_TYPE_SCALAR, LMP_TYLE_VECTOR, LMP_TYPE_ARRAY, LMP_SIZE_VECTOR, LMP_SIZE_ROWS, LMP_SIZE_COLS
+   :type: int
+
+   Constants in the :py:mod:`lammps` module to select what type of data
+   to request  from computes  or fixes.  See :cpp:enum:`_LMP_TYPE_CONST`
+   for the equivalent constants in the C library interface. Used in
+   :py:func:`lammps.extract_compute` and :py:func:`lammps.extract_fix`.
+
+.. _py_var_constants:
+.. py:data:: LMP_VAR_EQUAL, LMP_VAR_ATOM
+   :type: int
+
+   Constants in the :py:mod:`lammps` module to select what style of
+   variable to query when calling :py:func:`lammps.extract_variable`.
+
+.. autoclass:: lammps.NeighList
+   :members:
+   :no-undoc-members:
+
--- a/doc/utils/requirements.txt
+++ b/doc/utils/requirements.txt
@ -1 +1,5 @@
 Sphinx
+sphinxcontrib-spelling
+sphinx-fortran
+breathe
+Pygments
--- a/doc/utils/sphinx-config/_static/css/lammps.css
+++ b/doc/utils/sphinx-config/_static/css/lammps.css
@ -7,3 +7,10 @@
    display: block;
    margin-bottom: 0.809em;
 }
+
+.lammps_release {
+    text-align: center;
+    font-size: 11px;
+    display: block;
+    margin-bottom: 0.405em;
+}
--- a/doc/utils/sphinx-config/_static/lammps-logo.png
+++ b/doc/utils/sphinx-config/_static/lammps-logo.png
--- a/doc/utils/sphinx-config/_themes/lammps_theme/layout.html
+++ b/doc/utils/sphinx-config/_themes/lammps_theme/layout.html
@ -103,6 +103,12 @@
    {%- endif %}
  {%- endblock %}
  {%- block extrahead %} {% endblock %}
+
+  {# Keep modernizr in head - http://modernizr.com/docs/#installing #}
+  <script src="{{ pathto('_static/js/modernizr.min.js', 1) }}"></script>
+
+  {# for improved browser compatibility #}
+  <script src="{{ pathto('_static/polyfill.js', 1) }}"></script>
 </head>

 <body class="wy-body-for-nav">
@ -135,9 +141,8 @@
              {%- set nav_version = current_version %}
            {% endif %}
            {% if nav_version %}
-              <div class="version">
-                {{ nav_version }}
-              </div>
+              <div class="lammps_version">Version: <b>{{ nav_version }}</b></div>
+              <div class="lammps_release">git info: {{ release }}</div>
            {% endif %}
          {% endif %}

--- a/doc/utils/sphinx-config/conf.py.in
+++ b/doc/utils/sphinx-config/conf.py.in
@ -23,11 +23,16 @@ try:
 except:
    pass

+LAMMPS_DOC_DIR = '@LAMMPS_DOC_DIR@'
+LAMMPS_SOURCE_DIR = '@LAMMPS_SOURCE_DIR@'
+LAMMPS_PYTHON_DIR = '@LAMMPS_PYTHON_DIR@'
+LAMMPS_DOXYGEN_XML_DIR = '@DOXYGEN_XML_DIR@'
+
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #sys.path.insert(0, os.path.abspath('.'))
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../src/_ext'))
+sys.path.append(os.path.join(LAMMPS_DOC_DIR, 'src', '_ext'))

 # -- General configuration ------------------------------------------------

@ -41,7 +46,9 @@ extensions = [
    'sphinx.ext.mathjax',
    'sphinx.ext.imgmath',
    'sphinx.ext.autodoc',
+    'sphinxfortran.fortran_domain',
    'table_from_list',
+    'breathe',
 ]
 # 2017-12-07: commented out, since this package is broken with Sphinx 16.x
 #             yet we can no longer use Sphinx 15.x, since that breaks with
@ -72,12 +79,24 @@ copyright = '2003-2020 Sandia Corporation'
 def get_lammps_version():
    import os
    script_dir = os.path.dirname(os.path.realpath(__file__))
-    with open(os.path.join(script_dir, '../../../src/version.h'), 'r') as f:
+    with open(os.path.join(LAMMPS_SOURCE_DIR, 'version.h'), 'r') as f:
        line = f.readline()
        start_pos = line.find('"')+1
        end_pos = line.find('"', start_pos)
        return line[start_pos:end_pos]

+def get_git_info():
+    import subprocess,time
+
+    git_n_date = ''
+    try:
+        gitinfo = subprocess.run(['git','describe'],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+        if gitinfo.returncode == 0:
+            git_n_date = gitinfo.stdout.decode().replace('_',' ')
+    except:
+        pass
+    return git_n_date
+
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
@ -85,7 +104,7 @@ def get_lammps_version():
 # The short X.Y version.
 version = get_lammps_version()
 # The full version, including alpha/beta/rc tags.
-release = ''
+release = get_git_info()

 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@ -153,7 +172,7 @@ html_title = "LAMMPS documentation"

 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-html_logo = 'lammps-logo.png'
+html_logo = '_static/lammps-logo.png'

 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@ -314,7 +333,7 @@ texinfo_documents = [

 epub_title = 'LAMMPS Documentation - ' + get_lammps_version()

-epub_cover = ('lammps-logo.png', '')
+epub_cover = ('_static/lammps-logo.png', '')

 epub_description = """
 This is the Manual for the LAMMPS software package.
@ -342,13 +361,29 @@ if spelling_spec and has_enchant:
    spelling_lang='en_US'
    spelling_word_list_filename='false_positives.txt'

-sys.path.append(os.path.join(os.path.dirname(__file__), '.'))
+conf_script_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(conf_script_dir, '.'))
 import LAMMPSLexer
 from sphinx.highlighting import lexers

 lexers['LAMMPS'] = LAMMPSLexer.LAMMPSLexer(startinline=True)

-sys.path.append(os.path.join(os.path.dirname(__file__), '../../../python'))
+sys.path.append(LAMMPS_PYTHON_DIR)

 # avoid syntax highlighting in blocks that don't specify language
 highlight_language = 'none'
+
+# autodoc configuration
+
+autodoc_member_order = 'bysource'
+#autoclass_content = 'both'
+
+# breathe configuration
+
+breathe_projects = { 'progguide' : LAMMPS_DOXYGEN_XML_DIR }
+breathe_default_project = 'progguide'
+breathe_show_define_initializer = True
+breathe_domain_by_extension = { 'h'   : 'cpp',
+                                'cpp' : 'cpp',
+                                'c'   : 'c',
+                                }
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -43,6 +43,7 @@ Afshar
 agilio
 Agilio
 agni
+Agnolin
 Ai
 Aidan
 aij
@ -114,6 +115,7 @@ Archlinux
 arcsin
 arg
 args
+argv
 arrhenius
 Arun
 arXiv
@ -137,6 +139,8 @@ atc
 AtC
 ATC
 athermal
+atime
+atimestep
 athomps
 atm
 atomeye
@ -206,7 +210,6 @@ bcolor
 bdiam
 bdw
 Beckman
-behaviour
 Belak
 Bellott
 benchmarking
@ -247,6 +250,7 @@ bispectrum
 Bispectrum
 bitbucket
 bitmapped
+bitmask
 bitrate
 bitrates
 Bitzek
@ -265,6 +269,7 @@ bodystyle
 Bogaerts
 Bogusz
 Bohrs
+boltz
 Boltzman
 BondAngle
 BondBond
@ -283,6 +288,14 @@ Botu
 Bouguet
 Bourne
 boxcolor
+boxlo
+boxhi
+boxxlo
+boxxhi
+boxylo
+boxyhi
+boxzlo
+boxzhi
 bp
 bpclermont
 bpls
@ -301,6 +314,7 @@ Bryantsev
 Btarget
 btype
 buckPlusAttr
+buf
 builtin
 Bulatov
 Bureekaew
@ -369,6 +383,7 @@ charmm
 CHARMM
 charmmfsh
 charmmfsw
+charptr
 Chaudhuri
 checkbox
 checkmark
@ -407,6 +422,7 @@ cmap
 Cmax
 cmd
 cmdlist
+cmds
 Cmin
 cmm
 CMM
@ -436,6 +452,7 @@ Colvars
 COLVARS
 comID
 Commun
+compositing
 compressibility
 compressive
 Comput
@ -584,6 +601,7 @@ del
 delaystep
 DeleteIDs
 deleteIDs
+delflag
 Dellago
 delocalization
 delocalized
@ -599,6 +617,7 @@ Dequidt
 der
 dereference
 derekt
+Deresiewicz
 Derjagin
 Derjaguin
 Derlet
@ -668,6 +687,8 @@ Donadio
 dotc
 Doty
 doxygen
+doxygenclass
+doxygenfunction
 downarrow
 Doye
 dpd
@ -721,6 +742,7 @@ Eaat
 Eacn
 eam
 eangle
+earg
 eatom
 Eb
 Eba
@ -841,6 +863,7 @@ Erhart
 erorate
 erose
 erotate
+errno
 Ertas
 ervel
 Espanol
@ -899,6 +922,7 @@ Fc
 fcc
 fcm
 Fd
+fd
 fdotr
 fdt
 Fehlberg
@ -923,6 +947,7 @@ ffplay
 fft
 fftbench
 fftw
+fgets
 fhg
 Fi
 Fichthorn
@ -958,6 +983,7 @@ fmackay
 fmag
 fmass
 fmm
+fmt
 fmx
 fmy
 fmz
@ -971,6 +997,7 @@ Fock
 Fogarty
 Foiles
 fopenmp
+forceclear
 forestgreen
 formatarg
 formulae
@ -987,6 +1014,7 @@ Fraige
 framerate
 Frauenheim
 Fraunhofer
+fread
 Freitas
 Frenkel
 Friedrichs
@ -994,6 +1022,7 @@ fs
 fsh
 fstyle
 fsw
+ftm
 ftol
 fugacity
 Fumi
@ -1101,6 +1130,7 @@ gromos
 Gronbech
 Groot
 groupbig
+groupbit
 grp
 Grueneisen
 gsmooth
@ -1163,6 +1193,7 @@ hexorder
 Heyes
 HfO
 hgrid
+hhmrr
 Hibbs
 Higdon
 Hijazi
@ -1172,6 +1203,7 @@ histogrammed
 histogramming
 hma
 hmaktulga
+hplanck
 hoc
 Hochbruck
 Hofling
@ -1214,6 +1246,7 @@ hyperspherical
 hysteretic
 hz
 Ibanez
+iatom
 ibar
 ibm
 icc
@ -1256,6 +1289,7 @@ indices
 inertiax
 inertiay
 inertiaz
+infile
 infty
 inhomogeneities
 inhomogeneous
@ -1296,6 +1330,7 @@ ipp
 Ippolito
 IPv
 IPython
+ipython
 Isele
 isenthalpic
 ish
@ -1444,6 +1479,7 @@ Kloza
 kmax
 Kmax
 KMP
+kmu
 Knizhnik
 knl
 Kofke
@ -1931,6 +1967,7 @@ muz
 mv
 mV
 Mvapich
+mvh
 mvv
 MxN
 myCompute
@ -1943,11 +1980,13 @@ na
 nabla
 Nagaosa
 Nakano
+nall
 namespace
 namespaces
 nan
 NaN
 Nandor
+nangles
 Nangletype
 nangletypes
 Nangletypes
@ -1976,6 +2015,7 @@ Nbin
 Nbins
 nbody
 Nbody
+nbonds
 nbondtype
 Nbondtype
 nbondtypes
@ -1988,9 +2028,11 @@ Nc
 nchunk
 Nchunk
 ncoeff
+ncol
 ncorr
 ncount
 nd
+ndihedrals
 Ndihedraltype
 Ndirango
 ndof
@ -2032,10 +2074,12 @@ Ngyuen
 nh
 nharmonic
 nhc
+nhi
 NiAlH
 Nicklas
 Niklasson
 Nikolskiy
+nimpropers
 Nimpropertype
 Ninteger
 Nissila
@ -2044,9 +2088,11 @@ nitride
 nitrides
 niu
 Nk
+nktv
 nl
 nlen
 Nlines
+nlo
 nlocal
 Nlocal
 Nlog
@ -2054,7 +2100,9 @@ nlp
 nm
 Nm
 Nmax
+nmax
 Nmin
+nmin
 Nmols
 nn
 Nocedal
@ -2107,6 +2155,7 @@ Nrepeat
 nreset
 Nrho
 Nroff
+nrow
 nrun
 Ns
 Nsample
@ -2125,6 +2174,7 @@ Nt
 Ntable
 ntheta
 nthreads
+ntimestep
 Ntptask
 Ntriples
 Ntype
@ -2220,6 +2270,7 @@ oxdna
 oxrna
 oxDNA
 oxRNA
+packings
 padua
 Padua
 pafi
@ -2252,6 +2303,8 @@ Particuology
 pastewka
 Pastewka
 pathangle
+pathname
+pathnames
 Patomtrans
 Pattnaik
 Pavese
@ -2352,6 +2405,7 @@ polydisperse
 polydispersity
 polyelectrolyte
 polyhedra
+polymorphism
 popen
 Popov
 popstore
@ -2385,6 +2439,7 @@ proc
 Proc
 procs
 Prony
+progguide
 ps
 Ps
 pscreen
@ -2431,7 +2486,9 @@ qbmsst
 qcore
 qdist
 qE
+qe
 qeff
+qelectron
 qeq
 QeQ
 QEq
@ -2449,6 +2506,8 @@ qmol
 qoffload
 qopenmp
 qoverride
+qqr
+qqrd
 qtb
 quadratically
 quadrupolar
@ -2504,6 +2563,7 @@ rebo
 recursing
 Ree
 refactored
+refactoring
 reflectionstyle
 regoin
 Reinders
@ -2589,6 +2649,7 @@ Rkouter
 RkouterN
 rmask
 Rmask
+rmass
 rmax
 Rmax
 rmdir
@ -2723,6 +2784,7 @@ shlib
 SHM
 shm
 shockvel
+shrinkexceed
 Shugaev
 si
 SiC
@ -2851,11 +2913,16 @@ strcmp
 streitz
 Streitz
 Streiz
+strerror
 strided
 strietz
+strmatch
+strncmp
+strstr
 Stukowski
 Su
 subbox
+Subclassed
 subcutoff
 subcycle
 subcycling
@ -2996,6 +3063,7 @@ Tmin
 tmp
 tN
 Tobias
+tokenizer
 tokyo
 tol
 toolchain
@ -3226,6 +3294,7 @@ vv
 vx
 Vx
 vxcm
+vxmu
 vy
 Vy
 vycm
@ -3258,8 +3327,9 @@ Widom
 widom
 Wijk
 Wikipedia
-wildcard
 Wildcard
+wildcard
+wildcards
 Wirnsberger
 wirtes
 witin
@ -3301,6 +3371,7 @@ Xmax
 xmgrace
 xMIC
 xmin
+xml
 xmovie
 Xmovie
 xmu
@ -3315,6 +3386,7 @@ xsu
 xtc
 xu
 Xu
+xxt
 xxxxx
 xy
 xyz
--- a/doc/utils/sphinx-config/lammps-logo.png
+++ b/doc/utils/sphinx-config/lammps-logo.png
@ -1 +0,0 @@
-../../src/JPG/lammps-logo.png
--- a/fortran/README
+++ b/fortran/README
@ -0,0 +1,11 @@
+This directory contains Fortran code which interface LAMMPS as a library
+and allows the LAMMPS library interface to be invoked from Fortran codes.
+It requires a Fortran compiler that supports the Fortran 2003 standard.
+
+This interface is based on and supersedes the previous Fortran interfaces
+in the examples/COUPLE/fortran* folders.  But is fully supported by the
+LAMMPS developers and included in the documentation and unit testing.
+
+Details on this Fortran interface and how to build programs using it
+are in the manual in the doc/html/pg_fortran.html file.
+
--- a/fortran/lammps.f90
+++ b/fortran/lammps.f90
@ -0,0 +1,281 @@
+! -------------------------------------------------------------------------
+!   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+!   http://lammps.sandia.gov, Sandia National Laboratories
+!   Steve Plimpton, sjplimp@sandia.gov
+!
+!   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+!   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+!   certain rights in this software.  This software is distributed under
+!   the GNU General Public License.
+!
+!   See the README file in the top-level LAMMPS directory.
+! -------------------------------------------------------------------------
+!
+! Fortran interface to the LAMMPS library implemented as a Fortran 2003
+! style module that wraps the C-style library interface in library.cpp
+! and library.h using the ISO_C_BINDING module of the Fortran compiler.
+!
+! Based on the LAMMPS Fortran 2003 module contributed by:
+!   Karl D. Hammond <karlh@ugcs.caltech.edu>
+!   University of Tennessee, Knoxville (USA), 2012
+!
+! The Fortran module tries to follow the API of the C-library interface
+! closely, but like the Python wrapper it employs an object oriented
+! approach.  To accommodate the object oriented approach, all exported
+! subroutine and functions have to be implemented in Fortran to then
+! call the interfaced C style functions with adapted calling conventions
+! as needed.  The C-library interfaced functions retain their names
+! starting with "lammps_" while the Fortran versions start with "lmp_".
+!
+MODULE LIBLAMMPS
+
+  USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_ptr, c_null_ptr, c_loc, &
+      c_int, c_char, c_null_char, c_double
+
+  IMPLICIT NONE
+  PRIVATE
+  PUBLIC :: lammps
+
+  TYPE lammps
+      TYPE(c_ptr) :: handle
+    CONTAINS
+      PROCEDURE :: close              => lmp_close
+      PROCEDURE :: file               => lmp_file
+      PROCEDURE :: command            => lmp_command
+      PROCEDURE :: commands_list      => lmp_commands_list
+      PROCEDURE :: commands_string    => lmp_commands_string
+      PROCEDURE :: version            => lmp_version
+      PROCEDURE :: get_natoms         => lmp_get_natoms
+  END TYPE lammps
+
+  INTERFACE lammps
+      MODULE PROCEDURE lmp_open
+  END INTERFACE lammps
+
+  ! interface definitions for calling functions in library.cpp
+  INTERFACE
+      FUNCTION lammps_open(argc,argv,comm,handle) &
+          BIND(C, name='lammps_open_fortran')
+        IMPORT :: c_ptr, c_int
+        INTEGER(c_int), VALUE, INTENT(in)     :: argc, comm
+        TYPE(c_ptr), DIMENSION(*), INTENT(in) :: argv
+        TYPE(c_ptr), INTENT(out)              :: handle
+        TYPE(c_ptr)                           :: lammps_open
+      END FUNCTION lammps_open
+
+      FUNCTION lammps_open_no_mpi(argc,argv,handle) &
+          BIND(C, name='lammps_open_no_mpi')
+        IMPORT :: c_ptr, c_int
+        INTEGER(c_int), VALUE, INTENT(in)     :: argc
+        TYPE(c_ptr), DIMENSION(*), INTENT(in) :: argv
+        TYPE(c_ptr), INTENT(out)              :: handle
+        TYPE(c_ptr)                           :: lammps_open_no_mpi
+      END FUNCTION lammps_open_no_mpi
+
+      SUBROUTINE lammps_close(handle) BIND(C, name='lammps_close')
+        IMPORT :: c_ptr
+        TYPE(c_ptr), VALUE :: handle
+      END SUBROUTINE lammps_close
+
+      SUBROUTINE lammps_mpi_init(handle) BIND(C, name='lammps_mpi_init')
+        IMPORT :: c_ptr
+        TYPE(c_ptr), VALUE :: handle
+      END SUBROUTINE lammps_mpi_init
+
+      SUBROUTINE lammps_mpi_finalize(handle) &
+          BIND(C, name='lammps_mpi_finalize')
+        IMPORT :: c_ptr
+        TYPE(c_ptr), VALUE :: handle
+      END SUBROUTINE lammps_mpi_finalize
+
+      SUBROUTINE lammps_file(handle,filename) BIND(C, name='lammps_file')
+        IMPORT :: c_ptr
+        TYPE(c_ptr), VALUE :: handle
+        TYPE(c_ptr), VALUE :: filename
+      END SUBROUTINE lammps_file
+
+      SUBROUTINE lammps_command(handle,cmd) BIND(C, name='lammps_command')
+        IMPORT :: c_ptr
+        TYPE(c_ptr), VALUE :: handle
+        TYPE(c_ptr), VALUE :: cmd
+      END SUBROUTINE lammps_command
+
+      SUBROUTINE lammps_commands_list(handle,ncmd,cmds) &
+          BIND(C, name='lammps_commands_list')
+        IMPORT :: c_ptr, c_int
+        TYPE(c_ptr), VALUE :: handle
+        INTEGER(c_int), VALUE, INTENT(in)     :: ncmd
+        TYPE(c_ptr), DIMENSION(*), INTENT(in) :: cmds
+      END SUBROUTINE lammps_commands_list
+
+      SUBROUTINE lammps_commands_string(handle,str) &
+          BIND(C, name='lammps_commands_string')
+        IMPORT :: c_ptr
+        TYPE(c_ptr), VALUE :: handle
+        TYPE(c_ptr), VALUE :: str
+      END SUBROUTINE lammps_commands_string
+
+      SUBROUTINE lammps_free(ptr) BIND(C, name='lammps_free')
+        IMPORT :: c_ptr
+        TYPE(c_ptr), VALUE :: ptr
+      END SUBROUTINE lammps_free
+
+      FUNCTION lammps_version(handle) BIND(C, name='lammps_version')
+        IMPORT :: c_ptr, c_int
+        TYPE(c_ptr), VALUE :: handle
+        INTEGER(c_int) :: lammps_version
+      END FUNCTION lammps_version
+
+      FUNCTION lammps_get_natoms(handle) BIND(C, name='lammps_get_natoms')
+        IMPORT :: c_ptr, c_double
+        TYPE(c_ptr), VALUE :: handle
+        REAL(c_double) :: lammps_get_natoms
+      END FUNCTION lammps_get_natoms
+  END INTERFACE
+
+CONTAINS
+
+  ! Fortran wrappers and helper functions.
+
+  ! Constructor for the LAMMPS class.
+  ! Combined wrapper around lammps_open_fortran() and lammps_open_no_mpi()
+  TYPE(lammps) FUNCTION lmp_open(args,comm)
+    IMPLICIT NONE
+    INTEGER,INTENT(in), OPTIONAL :: comm
+    CHARACTER(len=*), INTENT(in), OPTIONAL :: args(:)
+    TYPE(c_ptr), ALLOCATABLE     :: argv(:)
+    TYPE(c_ptr)                  :: dummy=c_null_ptr
+    INTEGER :: i,argc
+
+    IF (PRESENT(args)) THEN
+        ! convert argument list to c style
+        argc = SIZE(args)
+        ALLOCATE(argv(argc))
+        DO i=1,argc
+           argv(i) = f2c_string(args(i))
+        END DO
+    ELSE
+        argc = 1
+        ALLOCATE(argv(1))
+        argv(1) = f2c_string("liblammps")
+    ENDIF
+
+    IF (PRESENT(comm)) THEN
+        lmp_open%handle = lammps_open(argc,argv,comm,dummy)
+    ELSE
+        lmp_open%handle = lammps_open_no_mpi(argc,argv,dummy)
+    END IF
+
+    ! Clean up allocated memory
+    DO i=1,argc
+        CALL lammps_free(argv(i))
+    END DO
+    DEALLOCATE(argv)
+  END FUNCTION lmp_open
+
+  ! Combined Fortran wrapper around lammps_close() and lammps_mpi_finalize()
+  SUBROUTINE lmp_close(self,finalize)
+    IMPLICIT NONE
+    CLASS(lammps) :: self
+    LOGICAL,INTENT(in),OPTIONAL :: finalize
+
+    CALL lammps_close(self%handle)
+
+    IF (PRESENT(finalize)) THEN
+        IF (finalize) THEN
+            CALL lammps_mpi_finalize(self%handle)
+        END IF
+    END IF
+  END SUBROUTINE lmp_close
+
+  INTEGER FUNCTION lmp_version(self)
+    IMPLICIT NONE
+    CLASS(lammps) :: self
+
+    lmp_version = lammps_version(self%handle)
+  END FUNCTION lmp_version
+
+  DOUBLE PRECISION FUNCTION lmp_get_natoms(self)
+    IMPLICIT NONE
+    CLASS(lammps) :: self
+
+    lmp_get_natoms = lammps_get_natoms(self%handle)
+  END FUNCTION lmp_get_natoms
+
+  SUBROUTINE lmp_file(self,filename)
+    IMPLICIT NONE
+    CLASS(lammps) :: self
+    CHARACTER(len=*) :: filename
+    TYPE(c_ptr) :: str
+
+    str = f2c_string(filename)
+    CALL lammps_file(self%handle,str)
+    CALL lammps_free(str)
+  END SUBROUTINE lmp_file
+
+  ! equivalent function to lammps_command()
+  SUBROUTINE lmp_command(self,cmd)
+    IMPLICIT NONE
+    CLASS(lammps) :: self
+    CHARACTER(len=*) :: cmd
+    TYPE(c_ptr) :: str
+
+    str = f2c_string(cmd)
+    CALL lammps_command(self%handle,str)
+    CALL lammps_free(str)
+  END SUBROUTINE lmp_command
+
+  ! equivalent function to lammps_commands_list()
+  SUBROUTINE lmp_commands_list(self,cmds)
+    IMPLICIT NONE
+    CLASS(lammps) :: self
+    CHARACTER(len=*), INTENT(in), OPTIONAL :: cmds(:)
+    TYPE(c_ptr), ALLOCATABLE     :: cmdv(:)
+    INTEGER :: i,ncmd
+
+    ! convert command list to c style
+    ncmd = SIZE(cmds)
+    ALLOCATE(cmdv(ncmd))
+    DO i=1,ncmd
+        cmdv(i) = f2c_string(cmds(i))
+    END DO
+
+    CALL lammps_commands_list(self%handle,ncmd,cmdv)
+
+    ! Clean up allocated memory
+    DO i=1,ncmd
+        CALL lammps_free(cmdv(i))
+    END DO
+    DEALLOCATE(cmdv)
+  END SUBROUTINE lmp_commands_list
+
+  ! equivalent function to lammps_commands_string()
+  SUBROUTINE lmp_commands_string(self,str)
+    IMPLICIT NONE
+    CLASS(lammps) :: self
+    CHARACTER(len=*) :: str
+    TYPE(c_ptr) :: tmp
+
+    tmp = f2c_string(str)
+    CALL lammps_commands_string(self%handle,tmp)
+    CALL lammps_free(tmp)
+  END SUBROUTINE lmp_commands_string
+
+  ! ----------------------------------------------------------------------
+  ! local helper functions
+  ! copy fortran string to zero terminated c string
+  FUNCTION f2c_string(f_string) RESULT(ptr)
+    CHARACTER (len=*), INTENT(in)           :: f_string
+    CHARACTER (len=1, kind=c_char), POINTER :: c_string(:)
+    TYPE(c_ptr) :: ptr
+    INTEGER :: i, n
+
+    n = LEN_TRIM(f_string)
+    ALLOCATE(c_string(n+1))
+    DO i=1,n
+        c_string(i) = f_string(i:i)
+    END DO
+    c_string(n+1) = c_null_char
+    ptr = c_loc(c_string(1))
+  END FUNCTION f2c_string
+END MODULE LIBLAMMPS
--- a/lib/gpu/Makefile.linux
+++ b/lib/gpu/Makefile.linux
@ -22,13 +22,13 @@ NVCC = nvcc
 #CUDA_ARCH = -arch=sm_21

 # Kepler hardware
-CUDA_ARCH = -arch=sm_30
+#CUDA_ARCH = -arch=sm_30
 #CUDA_ARCH = -arch=sm_32
 #CUDA_ARCH = -arch=sm_35
 #CUDA_ARCH = -arch=sm_37

 # Maxwell hardware
-#CUDA_ARCH = -arch=sm_50
+CUDA_ARCH = -arch=sm_50
 #CUDA_ARCH = -arch=sm_52

 # Pascal hardware
--- a/lib/gpu/Makefile.linux.double
+++ b/lib/gpu/Makefile.linux.double
@ -7,18 +7,40 @@

 EXTRAMAKE = Makefile.lammps.standard

+ifeq ($(CUDA_HOME),)
 CUDA_HOME = /usr/local/cuda
+endif
+
 NVCC = nvcc

-# Kepler CUDA
-#CUDA_ARCH = -arch=sm_35
-# Tesla CUDA
-CUDA_ARCH = -arch=sm_21
-# newer CUDA
+# obsolete hardware. not supported by current drivers anymore.
 #CUDA_ARCH = -arch=sm_13
-# older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE

+# Fermi hardware
+#CUDA_ARCH = -arch=sm_20
+#CUDA_ARCH = -arch=sm_21
+
+# Kepler hardware
+#CUDA_ARCH = -arch=sm_30
+#CUDA_ARCH = -arch=sm_32
+#CUDA_ARCH = -arch=sm_35
+#CUDA_ARCH = -arch=sm_37
+
+# Maxwell hardware
+CUDA_ARCH = -arch=sm_50
+#CUDA_ARCH = -arch=sm_52
+
+# Pascal hardware
+#CUDA_ARCH = -arch=sm_60
+#CUDA_ARCH = -arch=sm_61
+
+# Volta hardware
+#CUDA_ARCH = -arch=sm_70
+
+# Turing hardware
+#CUDA_ARCH = -arch=sm_75
+
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL

@ -33,7 +55,7 @@ CUDA_PRECISION = -D_DOUBLE_DOUBLE

 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs
-CUDA_OPTS = -DUNIX -O3 --use_fast_math
+CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC

 CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
 CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
--- a/lib/gpu/Makefile.linux.mixed
+++ b/lib/gpu/Makefile.linux.mixed
@ -7,18 +7,41 @@

 EXTRAMAKE = Makefile.lammps.standard

+ifeq ($(CUDA_HOME),)
 CUDA_HOME = /usr/local/cuda
+endif
+
 NVCC = nvcc

-# Kepler CUDA
-#CUDA_ARCH = -arch=sm_35
-# Tesla CUDA
-CUDA_ARCH = -arch=sm_21
-# newer CUDA
+# obsolete hardware. not supported by current drivers anymore.
 #CUDA_ARCH = -arch=sm_13
 # older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE

+# Fermi hardware
+#CUDA_ARCH = -arch=sm_20
+#CUDA_ARCH = -arch=sm_21
+
+# Kepler hardware
+#CUDA_ARCH = -arch=sm_30
+#CUDA_ARCH = -arch=sm_32
+#CUDA_ARCH = -arch=sm_35
+#CUDA_ARCH = -arch=sm_37
+
+# Maxwell hardware
+CUDA_ARCH = -arch=sm_50
+#CUDA_ARCH = -arch=sm_52
+
+# Pascal hardware
+#CUDA_ARCH = -arch=sm_60
+#CUDA_ARCH = -arch=sm_61
+
+# Volta hardware
+#CUDA_ARCH = -arch=sm_70
+
+# Turing hardware
+#CUDA_ARCH = -arch=sm_75
+
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL

@ -33,7 +56,7 @@ CUDA_PRECISION = -D_SINGLE_DOUBLE

 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs
-CUDA_OPTS = -DUNIX -O3 --use_fast_math
+CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC

 CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
 CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
--- a/lib/gpu/Makefile.linux.single
+++ b/lib/gpu/Makefile.linux.single
@ -7,18 +7,40 @@

 EXTRAMAKE = Makefile.lammps.standard

+ifeq ($(CUDA_HOME),)
 CUDA_HOME = /usr/local/cuda
+endif
+
 NVCC = nvcc

-# Kepler CUDA
-#CUDA_ARCH = -arch=sm_35
-# Tesla CUDA
-CUDA_ARCH = -arch=sm_21
-# newer CUDA
+# obsolete hardware. not supported by current drivers anymore.
 #CUDA_ARCH = -arch=sm_13
-# older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE

+# Fermi hardware
+#CUDA_ARCH = -arch=sm_20
+#CUDA_ARCH = -arch=sm_21
+
+# Kepler hardware
+#CUDA_ARCH = -arch=sm_30
+#CUDA_ARCH = -arch=sm_32
+#CUDA_ARCH = -arch=sm_35
+#CUDA_ARCH = -arch=sm_37
+
+# Maxwell hardware
+CUDA_ARCH = -arch=sm_50
+#CUDA_ARCH = -arch=sm_52
+
+# Pascal hardware
+#CUDA_ARCH = -arch=sm_60
+#CUDA_ARCH = -arch=sm_61
+
+# Volta hardware
+#CUDA_ARCH = -arch=sm_70
+
+# Turing hardware
+#CUDA_ARCH = -arch=sm_75
+
 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL

@ -33,7 +55,7 @@ CUDA_PRECISION = -D_SINGLE_SINGLE

 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs
-CUDA_OPTS = -DUNIX -O3 --use_fast_math
+CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC

 CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
 CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
--- a/lib/gpu/Makefile.linux_multi
+++ b/lib/gpu/Makefile.linux_multi
@ -13,17 +13,27 @@ endif

 NVCC = nvcc

-# Kepler CUDA
-#CUDA_ARCH = -arch=sm_35
-# newer CUDA
+# obsolete hardware. not supported by current drivers anymore.
 #CUDA_ARCH = -arch=sm_13
-# older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
-CUDA_ARCH = -arch=sm_30

-CUDA_CODE = -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] \
-	    -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] \
-	    -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52]
+# Fermi hardware
+#CUDA_ARCH = -arch=sm_20
+#CUDA_ARCH = -arch=sm_21
+
+# Kepler hardware
+#CUDA_ARCH = -arch=sm_30
+#CUDA_ARCH = -arch=sm_32
+#CUDA_ARCH = -arch=sm_35
+#CUDA_ARCH = -arch=sm_37
+
+# Maxwell hardware
+CUDA_ARCH = -arch=sm_50
+#CUDA_ARCH = -arch=sm_52
+
+CUDA_CODE = -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] \
+	    -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] \
+	    -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_75,code=[sm_75,compute_75]

 CUDA_ARCH += $(CUDA_CODE)

--- a/lib/gpu/Makefile.serial
+++ b/lib/gpu/Makefile.serial
@ -13,13 +13,33 @@ endif

 NVCC = nvcc

-# Tesla CUDA
-CUDA_ARCH = -arch=sm_21
-# newer CUDA
+# obsolete hardware. not supported by current drivers anymore.
 #CUDA_ARCH = -arch=sm_13
-# older CUDA
 #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
-CUDA_ARCH = -arch=sm_35
+
+# Fermi hardware
+#CUDA_ARCH = -arch=sm_20
+#CUDA_ARCH = -arch=sm_21
+
+# Kepler hardware
+#CUDA_ARCH = -arch=sm_30
+#CUDA_ARCH = -arch=sm_32
+#CUDA_ARCH = -arch=sm_35
+#CUDA_ARCH = -arch=sm_37
+
+# Maxwell hardware
+CUDA_ARCH = -arch=sm_50
+#CUDA_ARCH = -arch=sm_52
+
+# Pascal hardware
+#CUDA_ARCH = -arch=sm_60
+#CUDA_ARCH = -arch=sm_61
+
+# Volta hardware
+#CUDA_ARCH = -arch=sm_70
+
+# Turing hardware
+#CUDA_ARCH = -arch=sm_75

 # this setting should match LAMMPS Makefile
 # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
@ -35,7 +55,7 @@ CUDA_PRECISION = -D_SINGLE_DOUBLE

 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs -L../../src/STUBS -lmpi_stubs
-CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC)
+CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC

 CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -fPIC -I../../src/STUBS
 CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
--- a/lib/gpu/lal_gauss.cu
+++ b/lib/gpu/lal_gauss.cu
@ -81,7 +81,7 @@ __kernel void k_gauss(const __global numtyp4 *restrict x_,
        numtyp r2inv = ucl_recip(rsq);
        numtyp r = ucl_sqrt(rsq);
        numtyp force = (numtyp)-2.0*gauss1[mtype].x*gauss1[mtype].y*rsq*
-        ucl_exp(-gauss1[mtype].y*rsq)*r2inv*factor_lj;
+        ucl_exp(-gauss1[mtype].y*rsq)*r2inv; //*factor_lj;

        f.x+=delx*force;
        f.y+=dely*force;
@ -90,7 +90,7 @@ __kernel void k_gauss(const __global numtyp4 *restrict x_,
        if (eflag>0) {
          numtyp e=-(gauss1[mtype].x*ucl_exp(-gauss1[mtype].y*rsq) -
            gauss1[mtype].w);
-          energy+=factor_lj*e;
+          energy+=e; //factor_lj*e;
        }
        if (vflag>0) {
          virial[0] += delx*delx*force;
@ -168,7 +168,7 @@ __kernel void k_gauss_fast(const __global numtyp4 *restrict x_,
        numtyp r2inv = ucl_recip(rsq);
        numtyp r = ucl_sqrt(rsq);
        numtyp force = (numtyp)-2.0*gauss1[mtype].x*gauss1[mtype].y*rsq*
-        ucl_exp(-gauss1[mtype].y*rsq)*r2inv*factor_lj;
+        ucl_exp(-gauss1[mtype].y*rsq)*r2inv; //*factor_lj;

        f.x+=delx*force;
        f.y+=dely*force;
@ -177,7 +177,7 @@ __kernel void k_gauss_fast(const __global numtyp4 *restrict x_,
        if (eflag>0) {
          numtyp e=-(gauss1[mtype].x*ucl_exp(-gauss1[mtype].y*rsq) -
            gauss1[mtype].w);
-          energy+=factor_lj*e;
+          energy+=e; //factor_lj*e;
        }
        if (vflag>0) {
          virial[0] += delx*delx*force;
--- a/lib/gpu/lal_tersoff.cu
+++ b/lib/gpu/lal_tersoff.cu
@ -709,7 +709,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
  for (int i=0; i<6; i++)
    virial[i]=(acctyp)0;

-  __local int red_acc[BLOCK_PAIR];
+  __local int ijnum_shared[BLOCK_PAIR];

  __syncthreads();

@ -789,14 +789,14 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
        k &= NEIGHMASK;
        if (k == i) {
          ijnum = nbor_k;
-          red_acc[m] = ijnum;
+          ijnum_shared[m] = ijnum;
          break;
        }
      }

      numtyp r1 = ucl_sqrt(rsq1);
      numtyp r1inv = ucl_rsqrt(rsq1);
-      if (ijnum < 0) ijnum = red_acc[m];
+      if (ijnum < 0) ijnum = ijnum_shared[m];

      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
--- a/lib/gpu/lal_tersoff_mod.cu
+++ b/lib/gpu/lal_tersoff_mod.cu
@ -719,7 +719,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
  for (int i=0; i<6; i++)
    virial[i]=(acctyp)0;

-  __local int red_acc[BLOCK_PAIR];
+  __local int ijnum_shared[BLOCK_PAIR];

  __syncthreads();

@ -799,14 +799,14 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
        k &= NEIGHMASK;
        if (k == i) {
          ijnum = nbor_k;
-          red_acc[m] = ijnum;
+          ijnum_shared[m] = ijnum;
          break;
        }
      }

      numtyp r1 = ucl_sqrt(rsq1);
      numtyp r1inv = ucl_rsqrt(rsq1);
-      if (ijnum < 0) ijnum = red_acc[m];
+      if (ijnum < 0) ijnum = ijnum_shared[m];

      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
@ -957,7 +957,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
  for (int i=0; i<6; i++)
    virial[i]=(acctyp)0;

-  __local int red_acc[BLOCK_PAIR];
+  __local int ijnum_shared[BLOCK_PAIR];

  __syncthreads();

@ -1037,14 +1037,14 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
        k &= NEIGHMASK;
        if (k == i) {
          ijnum = nbor_k;
-          red_acc[m] = ijnum;
+          ijnum_shared[m] = ijnum;
          break;
        }
      }

      numtyp r1 = ucl_sqrt(rsq1);
      numtyp r1inv = ucl_rsqrt(rsq1);
-      if (ijnum < 0) ijnum = red_acc[m];
+      if (ijnum < 0) ijnum = ijnum_shared[m];

      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
--- a/lib/gpu/lal_tersoff_zbl.cu
+++ b/lib/gpu/lal_tersoff_zbl.cu
@ -729,7 +729,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
  for (int i=0; i<6; i++)
    virial[i]=(acctyp)0;

-  __local int red_acc[BLOCK_PAIR];
+  __local int ijnum_shared[BLOCK_PAIR];

  __syncthreads();

@ -809,14 +809,14 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
        k &= NEIGHMASK;
        if (k == i) {
          ijnum = nbor_k;
-          red_acc[m] = ijnum;
+          ijnum_shared[m] = ijnum;
          break;
        }
      }

      numtyp r1 = ucl_sqrt(rsq1);
      numtyp r1inv = ucl_rsqrt(rsq1);
-      if (ijnum < 0) ijnum = red_acc[m];
+      if (ijnum < 0) ijnum = ijnum_shared[m];

      // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
      int idx = ijnum;
--- a/lib/kokkos/BUILD.md
+++ b/lib/kokkos/BUILD.md
@ -10,33 +10,45 @@ for C++.  Applications heavily leveraging Kokkos are strongly encouraged to use
 You can either use Kokkos as an installed package (encouraged) or use Kokkos in-tree in your project.
 Modern CMake is exceedingly simple at a high-level (with the devil in the details).
 Once Kokkos is installed In your `CMakeLists.txt` simply use:
-````
+````cmake
 find_package(Kokkos REQUIRED)
 ````
 Then for every executable or library in your project:
-````
+````cmake
 target_link_libraries(myTarget Kokkos::kokkos)
 ````
 That's it! There is no checking Kokkos preprocessor, compiler, or linker flags.
 Kokkos propagates all the necessary flags to your project.
 This means not only is linking to Kokkos easy, but Kokkos itself can actually configure compiler and linker flags for *your*
-project. If building in-tree, there is no `find_package` and you link with `target_link_libraries(kokkos)`.
+project.
+When configuring your project just set:
+````bash
+> cmake ${srcdir} \
+  -DKokkos_ROOT=${kokkos_install_prefix} \
+  -DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos}
+````
+Note: You may need the following if using some versions of CMake (e.g. 3.12):
+````cmake
+cmake_policy(SET CMP0074 NEW)
+````
+If building in-tree, there is no `find_package`. You can use `add_subdirectory(kokkos)` with the Kokkos source and again just link with `target_link_libraries(Kokkos::kokkos)`.
+The examples in `examples/cmake_build_installed` and `examples/cmake_build_in_tree` can help get you started.


 ## Configuring CMake
-A very basic installation is done with:
-````
-cmake ${srcdir} \
+A very basic installation of Kokkos is done with:
+````bash
+> cmake ${srcdir} \
 -DCMAKE_CXX_COMPILER=g++ \
- -DCMAKE_INSTALL_PREFIX=${my_install_folder}
+ -DCMAKE_INSTALL_PREFIX=${kokkos_install_folder}
 ````
 which builds and installed a default Kokkos when you run `make install`.
 There are numerous device backends, options, and architecture-specific optimizations that can be configured, e.g.
-````
-cmake ${srcdir} \
+````bash
+> cmake ${srcdir} \
 -DCMAKE_CXX_COMPILER=g++ \
- -DCMAKE_INSTALL_PREFIX=${my_install_folder} \
- -DKokkos_ENABLE_OPENMP=On
+ -DCMAKE_INSTALL_PREFIX=${kokkos_install_folder} \
+ -DKokkos_ENABLE_OPENMP=ON
 ````
 which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below.

@ -50,16 +62,16 @@ which activates the OpenMP backend. All of the options controlling device backen
 ## Spack
 An alternative to manually building with the CMake is to use the Spack package manager.
 To do so, download the `kokkos-spack` git repo and add to the package list:
-````
-spack repo add $path-to-kokkos-spack
+````bash
+> spack repo add $path-to-kokkos-spack
 ````
 A basic installation would be done as:
-````
-spack install kokkos
+````bash
+> spack install kokkos
 ````
 Spack allows options and and compilers to be tuned in the install command.
-````
-spack install kokkos@3.0 %gcc@7.3.0 +openmp
+````bash
+> spack install kokkos@3.0 %gcc@7.3.0 +openmp
 ````
 This example illustrates the three most common parameters to Spack:
 * Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
@ -67,17 +79,17 @@ This example illustrates the three most common parameters to Spack:
 * Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.

 For a complete list of Kokkos options, run:
+````bash
+> spack info kokkos
 ````
-spack info kokkos
-````
-More details can be found in the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
+More details can be found in the [Spack README](Spack.md)

 #### Spack Development
 Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
 Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
 If you must know, you can locate Spack Kokkos installations with:
-````
-spack find -p kokkos ...
+````bash
+> spack find -p kokkos ...
 ````
 where `...` is the unique spec identifying the particular Kokkos configuration and version.

@ -104,6 +116,12 @@ Device backends can be enabled by specifying `-DKokkos_ENABLE_X`.
 * Kokkos_ENABLE_SERIAL
    * Whether to build serial backend
    * BOOL Default: ON
+* Kokkos_ENABLE_HIP (Experimental)
+    * Whether to build HIP backend
+    * BOOL Default: OFF
+* Kokkos_ENABLE_OPENMPTARGET (Experimental)
+    * Whether to build the OpenMP target backend
+    * BOOL Default: OFF

 ## Enable Options
 Options can be enabled by specifying `-DKokkos_ENABLE_X`.
@ -138,9 +156,6 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
 * Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
    * Debug check on dual views
    * BOOL Default: OFF
-* Kokkos_ENABLE_DEPRECATED_CODE
-    * Whether to enable deprecated code
-    * BOOL Default: OFF
 * Kokkos_ENABLE_EXAMPLES
    * Whether to enable building examples
    * BOOL Default: OFF
@ -150,9 +165,6 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
 * Kokkos_ENABLE_LARGE_MEM_TESTS
    * Whether to perform extra large memory tests
    * BOOL_Default: OFF
-* Kokkos_ENABLE_PROFILING
-    * Whether to create bindings for profiling tools
-    * BOOL Default: ON
 * Kokkos_ENABLE_PROFILING_LOAD_PRINT
    * Whether to print information about which profiling tools gotloaded
    * BOOL Default: OFF
@ -235,8 +247,11 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_
 * Kokkos_ARCH_BGQ
    * Whether to optimize for the BGQ architecture
    * BOOL Default: OFF
-* Kokkos_ARCH_EPYC
-    * Whether to optimize for the EPYC architecture
+* Kokkos_ARCH_ZEN
+    * Whether to optimize for the Zen architecture
+    * BOOL Default: OFF
+* Kokkos_ARCH_ZEN2
+    * Whether to optimize for the Zen2 architecture
    * BOOL Default: OFF
 * Kokkos_ARCH_HSW
    * Whether to optimize for the HSW architecture
--- a/lib/kokkos/CHANGELOG.md
+++ b/lib/kokkos/CHANGELOG.md
@ -1,6 +1,113 @@
 # Change Log

-## [3.1.1](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14)
+## [3.2.00](https://github.com/kokkos/kokkos/tree/3.2.00) (2020-08-19)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.01...3.2.00)
+
+**Implemented enhancements:**
+
+- HIP:Enable stream in HIP [\#3163](https://github.com/kokkos/kokkos/issues/3163)
+- HIP:Add support for shuffle reduction for the HIP backend [\#3154](https://github.com/kokkos/kokkos/issues/3154)
+- HIP:Add implementations of missing HIPHostPinnedSpace methods for LAMMPS [\#3137](https://github.com/kokkos/kokkos/issues/3137)
+- HIP:Require HIP 3.5.0 or higher [\#3099](https://github.com/kokkos/kokkos/issues/3099)
+- HIP:WorkGraphPolicy for HIP [\#3096](https://github.com/kokkos/kokkos/issues/3096)
+- OpenMPTarget: Significant update to the new experimental backend.  Requires C++17, works on Intel GPUs, reference counting fixes. [\#3169](https://github.com/kokkos/kokkos/issues/3169)
+- Windows Cuda support [\#3018](https://github.com/kokkos/kokkos/issues/3018)
+- Pass `-Wext-lambda-captures-this` to NVCC when support for `__host__ __device__` lambda is enabled from CUDA 11 [\#3241](https://github.com/kokkos/kokkos/issues/3241)
+- Use explicit staging buffer for constant memory kernel launches and cleanup host/device synchronization [\#3234](https://github.com/kokkos/kokkos/issues/3234)
+- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 1: [\#3202](https://github.com/kokkos/kokkos/issues/3202)
+- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 2: [\#3203](https://github.com/kokkos/kokkos/issues/3203)
+- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 3: [\#3196](https://github.com/kokkos/kokkos/issues/3196)
+- Annotations for `DefaultExectutionSpace` and `DefaultHostExectutionSpace` to use in static analysis [\#3189](https://github.com/kokkos/kokkos/issues/3189)
+- Add documentation on using Spack to install Kokkos and developing packages that depend on Kokkos [\#3187](https://github.com/kokkos/kokkos/issues/3187)
+- Improve support for nvcc\_wrapper with exotic host compiler [\#3186](https://github.com/kokkos/kokkos/issues/3186)
+- Add OpenMPTarget backend flags for NVC++ compiler [\#3185](https://github.com/kokkos/kokkos/issues/3185)
+- Move deep\_copy/create\_mirror\_view on Experimental::OffsetView into Kokkos:: namespace [\#3166](https://github.com/kokkos/kokkos/issues/3166)
+- Allow for larger block size in HIP [\#3165](https://github.com/kokkos/kokkos/issues/3165)
+- View: Added names of Views to the different View initialize/free kernels [\#3159](https://github.com/kokkos/kokkos/issues/3159)
+- Cuda: Caching cudaFunctorAttributes and whether L1/Shmem prefer was set [\#3151](https://github.com/kokkos/kokkos/issues/3151)
+- BuildSystem: Provide an explicit default CMAKE\_BUILD\_TYPE [\#3131](https://github.com/kokkos/kokkos/issues/3131)
+- Cuda: Update CUDA occupancy calculation [\#3124](https://github.com/kokkos/kokkos/issues/3124)
+- Vector: Adding data() to Vector [\#3123](https://github.com/kokkos/kokkos/issues/3123)
+- BuildSystem: Add CUDA Ampere configuration support [\#3122](https://github.com/kokkos/kokkos/issues/3122)
+- General: Apply [[noreturn]] to Kokkos::abort when applicable [\#3106](https://github.com/kokkos/kokkos/issues/3106)
+- TeamPolicy: Validate storage level argument passed to TeamPolicy::set\_scratch\_size() [\#3098](https://github.com/kokkos/kokkos/issues/3098)
+- nvcc\_wrapper: send --cudart to nvcc instead of host compiler [\#3092](https://github.com/kokkos/kokkos/issues/3092)
+- BuildSystem: Make kokkos\_has\_string() function in Makefile.kokkos case insensitive [\#3091](https://github.com/kokkos/kokkos/issues/3091)
+- Modify KOKKOS\_FUNCTION macro for clang-tidy analysis [\#3087](https://github.com/kokkos/kokkos/issues/3087)
+- Move allocation profiling to allocate/deallocate calls [\#3084](https://github.com/kokkos/kokkos/issues/3084)
+- BuildSystem: FATAL\_ERROR when attempting in-source build [\#3082](https://github.com/kokkos/kokkos/issues/3082)
+- Change enums in ScatterView to types [\#3076](https://github.com/kokkos/kokkos/issues/3076)
+- HIP: Changes for new compiler/runtime [\#3067](https://github.com/kokkos/kokkos/issues/3067)
+- Extract and use get\_gpu [\#3061](https://github.com/kokkos/kokkos/issues/3061)
+- Extract and use get\_gpu [\#3048](https://github.com/kokkos/kokkos/issues/3048)
+- Add is\_allocated to View-like containers [\#3059](https://github.com/kokkos/kokkos/issues/3059)
+- Combined reducers for scalar references [\#3052](https://github.com/kokkos/kokkos/issues/3052)
+- Add configurable capacity for UniqueToken [\#3051](https://github.com/kokkos/kokkos/issues/3051)
+- Add installation testing [\#3034](https://github.com/kokkos/kokkos/issues/3034)
+- BuildSystem: Add -expt-relaxed-constexpr flag to nvcc\_wrapper [\#3021](https://github.com/kokkos/kokkos/issues/3021)
+- HIP: Add UniqueToken [\#3020](https://github.com/kokkos/kokkos/issues/3020)
+- Autodetect number of devices [\#3013](https://github.com/kokkos/kokkos/issues/3013)
+
+
+**Fixed bugs:**
+
+- Check error code from `cudaStreamSynchronize` in CUDA fences [\#3255](https://github.com/kokkos/kokkos/issues/3255)
+- Fix issue with C++ standard flags when using `nvcc\_wrapper` with PGI [\#3254](https://github.com/kokkos/kokkos/issues/3254)
+- Add missing threadfence in lock-based atomics [\#3208](https://github.com/kokkos/kokkos/issues/3208)
+- Fix dedup of linker flags for shared lib on CMake <=3.12 [\#3176](https://github.com/kokkos/kokkos/issues/3176)
+- Fix memory leak with CUDA streams [\#3170](https://github.com/kokkos/kokkos/issues/3170)
+- BuildSystem: Fix OpenMP Target flags for Cray [\#3161](https://github.com/kokkos/kokkos/issues/3161)
+- ScatterView: fix for OpenmpTarget remove inheritance from reducers [\#3162](https://github.com/kokkos/kokkos/issues/3162)
+- BuildSystem: Set OpenMP flags according to host compiler [\#3127](https://github.com/kokkos/kokkos/issues/3127)
+- OpenMP: Fix logic for nested omp in partition\_master bug [\#3101](https://github.com/kokkos/kokkos/issues/3101)
+- BuildSystem: Fixes for Cuda/11 and c++17 [\#3085](https://github.com/kokkos/kokkos/issues/3085)
+- HIP: Fix print\_configuration [\#3080](https://github.com/kokkos/kokkos/issues/3080)
+- Conditionally define get\_gpu [\#3072](https://github.com/kokkos/kokkos/issues/3072)
+- Fix bounds for ranges in random number generator [\#3069](https://github.com/kokkos/kokkos/issues/3069)
+- Fix Cuda minor arch check [\#3035](https://github.com/kokkos/kokkos/issues/3035)
+
+**Incompatibilities:**
+
+- Remove ETI support [\#3157](https://github.com/kokkos/kokkos/issues/3157)
+- Remove KOKKOS\_INTERNAL\_ENABLE\_NON\_CUDA\_BACKEND [\#3147](https://github.com/kokkos/kokkos/issues/3147)
+- Remove core/unit\_test/config [\#3146](https://github.com/kokkos/kokkos/issues/3146)
+- Removed the preprocessor branch for KOKKOS\_ENABLE\_PROFILING [\#3115](https://github.com/kokkos/kokkos/issues/3115)
+- Disable profiling with MSVC [\#3066](https://github.com/kokkos/kokkos/issues/3066)
+
+**Closed issues:**
+
+- Silent error (Validate storage level arg to set_scratch_size) [\#3097](https://github.com/kokkos/kokkos/issues/3097)
+- Remove KOKKKOS\_ENABLE\_PROFILING Option [\#3095](https://github.com/kokkos/kokkos/issues/3095)
+- Cuda 11 -\> allow C++17 [\#3083](https://github.com/kokkos/kokkos/issues/3083)
+- In source build failure not explained [\#3081](https://github.com/kokkos/kokkos/issues/3081)
+- Allow naming of Views for initialization kernel [\#3070](https://github.com/kokkos/kokkos/issues/3070)
+- DefaultInit tests failing when using CTest resource allocation feature [\#3040](https://github.com/kokkos/kokkos/issues/3040)
+- Add installation testing.  [\#3037](https://github.com/kokkos/kokkos/issues/3037)
+- nvcc\_wrapper needs to handle `-expt-relaxed-constexpr` flag [\#3017](https://github.com/kokkos/kokkos/issues/3017)
+- CPU core oversubscription warning on macOS with OpenMP backend [\#2996](https://github.com/kokkos/kokkos/issues/2996)
+- Default behavior of KOKKOS\_NUM\_DEVICES to use all devices available [\#2975](https://github.com/kokkos/kokkos/issues/2975)
+- Assert blocksize \> 0 [\#2974](https://github.com/kokkos/kokkos/issues/2974)
+- Add ability to assign kokkos profile function from executable  [\#2973](https://github.com/kokkos/kokkos/issues/2973)
+- ScatterView Support for the pre/post increment operator [\#2967](https://github.com/kokkos/kokkos/issues/2967)
+
+- Compiler issue: Cuda build with clang 10 has errors with the atomic unit tests [\#3237](https://github.com/kokkos/kokkos/issues/3237)
+- Incompatibility of flags for C++ standard with PGI v20.4 on Power9/NVIDIA V100 system [\#3252](https://github.com/kokkos/kokkos/issues/3252)
+- Error configuring as subproject [\#3140](https://github.com/kokkos/kokkos/issues/3140)
+- CMake fails with Nvidia compilers when the GPU architecture option is not supplied (Fix configure with OMPT and Cuda) [\#3207](https://github.com/kokkos/kokkos/issues/3207)
+- PGI compiler being passed the gcc -fopenmp flag [\#3125](https://github.com/kokkos/kokkos/issues/3125)
+- Cuda: Memory leak when using CUDA stream [\#3167](https://github.com/kokkos/kokkos/issues/3167)
+- RangePolicy has an implicitly deleted assignment operator [\#3192](https://github.com/kokkos/kokkos/issues/3192)
+- MemorySpace::allocate needs to have memory pool counting.  [\#3064](https://github.com/kokkos/kokkos/issues/3064)
+- Missing write fence for lock based atomics on CUDA [\#3038](https://github.com/kokkos/kokkos/issues/3038)
+- CUDA compute capability version check problem [\#3026](https://github.com/kokkos/kokkos/issues/3026)
+- Make DynRankView fencing consistent [\#3014](https://github.com/kokkos/kokkos/issues/3014)
+- nvcc\_wrapper cant handle -Xcompiler -o out.o [\#2993](https://github.com/kokkos/kokkos/issues/2993)
+- Reductions of non-trivial types of size 4 fail in CUDA shfl operations [\#2990](https://github.com/kokkos/kokkos/issues/2990)
+- complex\_double misalignment in reduce, clang+CUDA [\#2989](https://github.com/kokkos/kokkos/issues/2989)
+- Span of degenerated \(zero-length\) subviews is not zero in some special cases [\#2979](https://github.com/kokkos/kokkos/issues/2979)
+- Rank 1 custom layouts dont work as expected. [\#2840](https://github.com/kokkos/kokkos/issues/2840)
+
+## [3.1.01](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.00...3.1.1)

 **Fixed bugs:**
--- a/lib/kokkos/CMakeLists.txt
+++ b/lib/kokkos/CMakeLists.txt
@ -1,4 +1,9 @@

+# Disable in-source builds to prevent source tree corruption.
+if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" )
+  message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files." )
+endif()
+
 # We want to determine if options are given with the wrong case
 # In order to detect which arguments are given to compare against
 # the list of valid arguments, at the beginning here we need to
@ -34,6 +39,9 @@ IF(COMMAND TRIBITS_PACKAGE_DECL)
 ELSE()
  SET(KOKKOS_HAS_TRILINOS OFF)
 ENDIF()
+# Is this build a subdirectory of another project
+GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY)
+

 INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake)
 INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_pick_cxx_std.cmake)
@ -75,16 +83,17 @@ IF(NOT KOKKOS_HAS_TRILINOS)
      SET(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE)
      SET(ENV{CXX} ${SPACK_CXX})
    ENDIF()
-  ENDif()
-  IF(NOT DEFINED ${PROJECT_NAME})
-    # WORKAROUND FOR HIPCC
-    IF(Kokkos_ENABLE_HIP)
-      SET(KOKKOS_INTERNAL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-      SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --amdgpu-target=gfx906")
  ENDIF()
+  # Always call the project command to define Kokkos_ variables
+  # and to make sure that C++ is an enabled language
  PROJECT(Kokkos CXX)
-    IF(Kokkos_ENABLE_HIP)
-      SET(CMAKE_CXX_FLAGS ${KOKKOS_INTERNAL_CMAKE_CXX_FLAGS})
+  IF(NOT HAS_PARENT)
+    IF (NOT CMAKE_BUILD_TYPE)
+      SET(DEFAULT_BUILD_TYPE "RelWithDebInfo")
+      MESSAGE(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.")
+      SET(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING
+          "Choose the type of build, options are: Debug, Release, RelWithDebInfo and MinSizeRel."
+          FORCE)
    ENDIF()
  ENDIF()
 ENDIF()
@ -102,8 +111,8 @@ ENDIF()


 set(Kokkos_VERSION_MAJOR 3)
-set(Kokkos_VERSION_MINOR 1)
-set(Kokkos_VERSION_PATCH 1)
+set(Kokkos_VERSION_MINOR 2)
+set(Kokkos_VERSION_PATCH 0)
 set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
 math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")

@ -147,6 +156,7 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake)
 # Check the environment and set certain variables
 # to allow platform-specific checks
 INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake)
+
 # The build environment setup goes in the following steps
 # 1) Check all the enable options. This includes checking Kokkos_DEVICES
 # 2) Check the compiler ID (type and version)
@ -169,7 +179,6 @@ SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontain
 SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms)
 SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES})

-GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY)
 IF (KOKKOS_HAS_TRILINOS)
  SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
  SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR})
@ -203,7 +212,7 @@ IF (KOKKOS_HAS_TRILINOS)
    SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
    LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
  ENDFOREACH()
-  SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
+  SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
  IF (KOKKOS_ENABLE_CUDA)
    STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS    "${KOKKOS_CUDA_OPTIONS}")
    FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
@ -246,7 +255,7 @@ KOKKOS_PACKAGE_POSTPROCESS()
 #We are ready to configure the header
 CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY)

-IF (NOT KOKKOS_HAS_TRILINOS)
+IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING)
  ADD_LIBRARY(kokkos INTERFACE)
  #Make sure in-tree projects can reference this as Kokkos::
  #to match the installed target names
@ -262,8 +271,6 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake)
 # If the argument of DESTINATION is a relative path, CMake computes it
 # as relative to ${CMAKE_INSTALL_PATH}.
 INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION ${CMAKE_INSTALL_BINDIR})
-INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
-

 #  Finally - if we are a subproject - make sure the enabled devices are visible
 IF (HAS_PARENT)
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@ -11,8 +11,8 @@ CXXFLAGS += $(SHFLAGS)
 endif

 KOKKOS_VERSION_MAJOR = 3
-KOKKOS_VERSION_MINOR = 1
-KOKKOS_VERSION_PATCH = 1
+KOKKOS_VERSION_MINOR = 2
+KOKKOS_VERSION_PATCH = 0
 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)

 # Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial
@ -20,11 +20,11 @@ KOKKOS_DEVICES ?= "OpenMP"
 #KOKKOS_DEVICES ?= "Pthread"
 # Options: 
 # Intel:    KNC,KNL,SNB,HSW,BDW,SKX
-# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75
+# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80
 # ARM:      ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
 # IBM:      BGQ,Power7,Power8,Power9
 # AMD-GPUS: Vega900,Vega906
-# AMD-CPUS: AMDAVX,EPYC
+# AMD-CPUS: AMDAVX,Zen,Zen2
 KOKKOS_ARCH ?= ""
 # Options: yes,no
 KOKKOS_DEBUG ?= "no"
@ -32,10 +32,8 @@ KOKKOS_DEBUG ?= "no"
 KOKKOS_USE_TPLS ?= ""
 # Options: c++11,c++14,c++1y,c++17,c++1z,c++2a
 KOKKOS_CXX_STANDARD ?= "c++11"
-# Options: aggressive_vectorization,disable_profiling,enable_deprecated_code,disable_deprecated_code,enable_large_mem_tests,disable_complex_align
+# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align
 KOKKOS_OPTIONS ?= ""
-# Option for setting ETI path
-KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti
 KOKKOS_CMAKE ?= "no"
 KOKKOS_TRIBITS ?= "no"
 KOKKOS_STANDALONE_CMAKE ?= "no"
@ -74,6 +72,7 @@ KOKKOS_INTERNAL_ENABLE_CXX1Y := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),
 KOKKOS_INTERNAL_ENABLE_CXX17 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++17)
 KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z)
 KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a)
+KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++20)

 # Check for external libraries.
 KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
@ -83,9 +82,7 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),exper
 # Check for advanced settings.
 KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
 KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
-KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling)
-KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code)
-KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecated_code)
+KOKKOS_INTERNAL_ENABLE_TUNING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_tuning)
 KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align)
 KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
 KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print)
@ -96,7 +93,6 @@ KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS
 KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
 KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr)
 KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
-KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti)

 KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)

@ -140,6 +136,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
  KOKKOS_DEVICELIST += OPENMPTARGET
+  KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER := $(shell expr $(KOKKOS_INTERNAL_ENABLE_CXX17) \
+                                                    + $(KOKKOS_INTERNAL_ENABLE_CXX20) \
+                                                    + $(KOKKOS_INTERNAL_ENABLE_CXX2A))
+  ifneq ($(KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER), 1)
+    $(error OpenMPTarget backend requires C++17 or newer)
+  endif
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
@ -281,7 +283,7 @@ endif
 ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
  KOKKOS_INTERNAL_CXX11_FLAG := --c++11
  KOKKOS_INTERNAL_CXX14_FLAG := --c++14
-  #KOKKOS_INTERNAL_CXX17_FLAG := --c++17
+  KOKKOS_INTERNAL_CXX17_FLAG := --c++17
 else
  ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
     KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
@ -338,35 +340,27 @@ KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pas
 KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70)
 KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72)
 KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75)
+KOKKOS_INTERNAL_USE_ARCH_AMPERE80 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere80)
 KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37)  \
+                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70)   \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72)   \
                                              + $(KOKKOS_INTERNAL_USE_ARCH_TURING75)  \
-                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
-                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
-                                              + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
+                                              + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80))

 #SEK: This seems like a bug to me
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
  KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell)
  KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler)
-  KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30)  \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32)  \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37)  \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61)  \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70)   \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72)   \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_TURING75)  \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
-                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
+  KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
+                                                + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50))
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
@ -394,19 +388,20 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_

 # AMD based.
 KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
-KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC)
+KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
+KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
 KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
 KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906)

 # Any AVX?
 KOKKOS_INTERNAL_USE_ARCH_SSE42      := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
 KOKKOS_INTERNAL_USE_ARCH_AVX        := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
-KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
+KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
 KOKKOS_INTERNAL_USE_ARCH_AVX512MIC  := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
 KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))

 # Decide what ISA level we are able to support.
-KOKKOS_INTERNAL_USE_ISA_X86_64    := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
+KOKKOS_INTERNAL_USE_ISA_X86_64    := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
 KOKKOS_INTERNAL_USE_ISA_KNC       := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
 KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
 KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
@ -430,7 +425,7 @@ endif
 KOKKOS_CPPFLAGS =
 KOKKOS_LIBDIRS =
 ifneq ($(KOKKOS_CMAKE), yes)
-  KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH)
+  KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
 endif
 KOKKOS_TPL_INCLUDE_DIRS =
 KOKKOS_TPL_LIBRARY_DIRS =
@ -458,88 +453,91 @@ KOKKOS_CONFIG_HEADER=KokkosCore_config.h
 # Functions for generating config header file
 kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP))

+# assign hash sign to variable for compat. with make 4.3
+H := \#
+
 # Do not append first line
 tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
 tmp := $(call kokkos_append_header,"Makefile constructed configuration:")
 tmp := $(call kokkos_append_header,"$(shell date)")
 tmp := $(call kokkos_append_header,"----------------------------------------------*/")

-tmp := $(call kokkos_append_header,'\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
-tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
-tmp := $(call kokkos_append_header,'\#else')
-tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H')
-tmp := $(call kokkos_append_header,'\#endif')
+tmp := $(call kokkos_append_header,'$H''if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
+tmp := $(call kokkos_append_header,'$H''error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
+tmp := $(call kokkos_append_header,'$H''else')
+tmp := $(call kokkos_append_header,'$H''define KOKKOS_CORE_CONFIG_H')
+tmp := $(call kokkos_append_header,'$H''endif')

 tmp := $(call kokkos_append_header,"")
-tmp := $(call kokkos_append_header,"\#define KOKKOS_VERSION $(KOKKOS_VERSION)")
+tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION $(KOKKOS_VERSION)")
 tmp := $(call kokkos_append_header,"")
 	
 tmp := $(call kokkos_append_header,"/* Execution Spaces */")

 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)")
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
-  tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM')
-  tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
+  tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_ROCM')
+  tmp := $(call kokkos_append_header,'$H''define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
-  tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_HIP')
+  tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_HIP')
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
-  tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET')
+  tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMPTARGET')
  ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
  endif
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
-  tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMP')
+  tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMP')
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_THREADS")
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX")
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_SERIAL")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL")
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
-  tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TM")
-  tmp := $(call kokkos_append_header,"\#endif")
+  tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TM")
+  tmp := $(call kokkos_append_header,"$H""endif")
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
-  tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_X86_64")
-  tmp := $(call kokkos_append_header,"\#endif")
+  tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_X86_64")
+  tmp := $(call kokkos_append_header,"$H""endif")
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
-  tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_KNC")
-  tmp := $(call kokkos_append_header,"\#endif")
+  tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_KNC")
+  tmp := $(call kokkos_append_header,"$H""endif")
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
-  tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCLE")
-  tmp := $(call kokkos_append_header,"\#endif")
+  tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCLE")
+  tmp := $(call kokkos_append_header,"$H""endif")
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
-  tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCBE")
-  tmp := $(call kokkos_append_header,"\#endif")
+  tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCBE")
+  tmp := $(call kokkos_append_header,"$H""endif")
 endif

 #only add the c++ standard flags if this is not CMake
@ -548,34 +546,39 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
 ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
 endif
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX11")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1)
 ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG)
 endif
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX14")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1)
  #I cannot make CMake add this in a good way - so add it here
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Y_FLAG)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX14")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1)
 ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG)
 endif
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
  #I cannot make CMake add this in a good way - so add it here
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17")
 endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1)
  #I cannot make CMake add this in a good way - so add it here
  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX20")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20")
+endif
+ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX20), 1)
+  #I cannot make CMake add this in a good way - so add it here
+  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX20_FLAG)
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20")
 endif

 ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
@ -585,20 +588,26 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)

  KOKKOS_CXXFLAGS += -g
  KOKKOS_LDFLAGS += -g
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG")
  ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
  endif
 endif
 ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_COMPLEX_ALIGN")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_COMPLEX_ALIGN")
 endif

 ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
 endif

+ifeq ($(KOKKOS_INTERNAL_ENABLE_TUNING), 1)
+  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TUNING")
+endif
+
+tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LIBDL")
+
 ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
  ifneq ($(KOKKOS_CMAKE), yes)
    ifneq ($(HWLOC_PATH),)
@ -611,11 +620,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
    KOKKOS_LIBS += -lhwloc
    KOKKOS_TPL_LIBRARY_NAMES += hwloc
  endif
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HWLOC")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC")
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_LIBRT")
  KOKKOS_LIBS += -lrt
  KOKKOS_TPL_LIBRARY_NAMES += rt
 endif
@ -632,50 +641,36 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
    KOKKOS_LIBS += -lmemkind -lnuma
    KOKKOS_TPL_LIBRARY_NAMES += memkind numa
  endif
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HBWSPACE")
-endif
-
-ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING")
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
-  ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE")
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_ETI")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HBWSPACE")
 endif

 ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LARGE_MEM_TESTS")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS")
 endif

 tmp := $(call kokkos_append_header,"/* Optimization Settings */")

 ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
 endif

 tmp := $(call kokkos_append_header,"/* Cuda Settings */")

 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
  else
    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-      tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
+      tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
    endif
  endif

  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_UVM")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_UVM")
  endif

  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
      KOKKOS_CXXFLAGS += -fcuda-rdc
      KOKKOS_LDFLAGS += -fcuda-rdc
@ -696,7 +691,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
    ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
      ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
-        tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA")
+        tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
        KOKKOS_CXXFLAGS += -expt-extended-lambda
      else
        $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
@ -704,14 +699,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
    endif

    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-      tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA")
+      tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
    endif
  endif

  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1)
    ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
      ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -ge 80; echo $$?),0)
-        tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR")
+        tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR")
        KOKKOS_CXXFLAGS += -expt-relaxed-constexpr
      else
        $(warning Warning: Cuda relaxed constexpr support was requested but NVCC version is too low. This requires NVCC for Cuda version 8.0 or higher. Disabling relaxed constexpr support now.)
@ -719,25 +714,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
    endif

    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-      tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR")
+      tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR")
    endif
  endif

  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
  endif
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
  ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH")
  endif
 endif

 # Add Architecture flags.

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
    KOKKOS_CXXFLAGS +=
@ -754,7 +749,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
    KOKKOS_CXXFLAGS +=
@ -770,9 +765,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
  endif
 endif

-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_EPYC")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_AVX2")
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1)
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -mavx2
@ -783,9 +778,22 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
  endif
 endif

+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1)
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
+
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+    KOKKOS_CXXFLAGS += -mavx2
+    KOKKOS_LDFLAGS += -mavx2
+  else
+    KOKKOS_CXXFLAGS += -march=znver2 -mtune=znver2
+    KOKKOS_LDFLAGS += -march=znver2 -mtune=znver2
+  endif
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
    KOKKOS_CXXFLAGS +=
@ -802,8 +810,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX2")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
    KOKKOS_CXXFLAGS +=
@ -820,7 +828,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_SSE42")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -xSSE4.2
@ -842,7 +850,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -mavx
@ -864,7 +872,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER7")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER7")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)

@ -876,7 +884,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER8")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER8")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)

@ -897,7 +905,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER9")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER9")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)

@ -918,7 +926,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -xCORE-AVX2
@ -940,7 +948,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -xCORE-AVX2
@ -962,7 +970,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512MIC")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512MIC")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -xMIC-AVX512
@ -983,7 +991,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512XEON")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")

  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
    KOKKOS_CXXFLAGS += -xCORE-AVX512
@ -1004,7 +1012,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KNC")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC")
  KOKKOS_CXXFLAGS += -mmic
  KOKKOS_LDFLAGS += -mmic
 endif
@ -1039,65 +1047,70 @@ endif

 ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER32")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER35")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER37")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL50")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL52")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL53")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL60")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA70")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING75")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75")
    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1)
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80")
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80
+  endif

  ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
    KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
@ -1121,13 +1134,13 @@ endif
 ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
  # Lets start with adding architecture defines
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 900")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA900")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 900")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900")
    KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900
  endif
  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 906")
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA906")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 906")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906")
    KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906
  endif

@ -1138,7 +1151,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
  KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)

  ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1)
-    tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
+    tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
    KOKKOS_CXXFLAGS+=-fgpu-rdc
    KOKKOS_LDFLAGS+=-fgpu-rdc
  else
@ -1171,9 +1184,6 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)

 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
-ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
-  KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Cuda/*.cpp)
-endif
  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
  ifneq ($(CUDA_PATH),)
    KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include
@ -1211,9 +1221,6 @@ endif

 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
-ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
-  KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/OpenMP/*.cpp)
-endif
  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)

  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -1228,9 +1235,6 @@ endif

 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
-ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
-  KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Threads/*.cpp)
-endif
  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
  KOKKOS_LIBS += -lpthread
  KOKKOS_TPL_LIBRARY_NAMES += pthread
@ -1279,9 +1283,6 @@ endif
 # Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
 # device to avoid a link warning.
 ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
-ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
-  KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Serial/*.cpp)
-endif
 endif
 ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
  KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))
--- a/lib/kokkos/Makefile.targets
+++ b/lib/kokkos/Makefile.targets
@ -26,21 +26,17 @@ Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spi
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
 Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
-Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
+Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
 Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
 Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
+Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
 Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp 
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp

-ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
-ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
-  include $(KOKKOS_ETI_PATH)/Serial/Makefile.eti_Serial
-endif
-endif
-
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
@ -50,9 +46,6 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
 Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
-ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
-  include $(KOKKOS_ETI_PATH)/Cuda/Makefile.eti_Cuda
-endif
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
@ -75,9 +68,6 @@ Kokkos_ROCm_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_RO
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
 Kokkos_ROCm_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
-ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
-  include $(KOKKOS_ETI_PATH)/ROCm/Makefile.eti_ROCm
-endif
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
@ -85,9 +75,6 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
 Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
-ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
-  include $(KOKKOS_ETI_PATH)/Threads/Makefile.eti_Threads
-endif
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -95,9 +82,6 @@ Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokko
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
 Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
-ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
-  include $(KOKKOS_ETI_PATH)/OpenMP/Makefile.eti_OpenMP
-endif
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
--- a/lib/kokkos/README.md
+++ b/lib/kokkos/README.md
@ -151,7 +151,7 @@ Full details are given in the [build instructions](BUILD.md). Basic setups are s
 ## CMake

 The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
-````
+````bash
 cmake $srcdir \
  -DCMAKE_CXX_COMPILER=$path_to_compiler \
  -DCMAKE_INSTALL_PREFIX=$path_to_install \
@ -170,7 +170,7 @@ and run `make test` after completing the build.

 For your CMake project using Kokkos, code such as the following:

-````
+````cmake
 find_package(Kokkos)
 ...
 target_link_libraries(myTarget Kokkos::kokkos)
@ -187,17 +187,15 @@ for the install location given above.

 ## Spack
 An alternative to manually building with the CMake is to use the Spack package manager.
-To do so, download the `kokkos-spack` git repo and add to the package list:
-````
-spack repo add $path-to-kokkos-spack
+To get started, download the Spack [repo](https://github.com/spack/spack).
 ````
 A basic installation would be done as:
-````
-spack install kokkos
+````bash
+> spack install kokkos
 ````
 Spack allows options and and compilers to be tuned in the install command.
-````
-spack install kokkos@3.0 %gcc@7.3.0 +openmp
+````bash
+> spack install kokkos@3.0 %gcc@7.3.0 +openmp
 ````
 This example illustrates the three most common parameters to Spack:
 * Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
@ -205,33 +203,33 @@ This example illustrates the three most common parameters to Spack:
 * Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.

 For a complete list of Kokkos options, run:
-````
-spack info kokkos
+````bash
+> spack info kokkos
 ````
 Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
 Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
 More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with:
-````
-spack find -p kokkos ...
+````bash
+> spack find -p kokkos ...
 ````
 where `...` is the unique spec identifying the particular Kokkos configuration and version.
-
+Some more details can found in the Kokkos spack [documentation](Spack.md) or the Spack [website](https://spack.readthedocs.io/en/latest).

 ## Raw Makefile
 A bash script is provided to generate raw makefiles.
 To install Kokkos as a library create a build directory and run the following
-````
-$KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install
+````bash
+> $KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install
 ````
 Once the Makefile is generated, run:
-````
-make kokkoslib
-make install
+````bash
+> make kokkoslib
+> make install
 ````
 To additionally run the unit tests:
-````
-make build-test
-make test
+````bash
+> make build-test
+> make test
 ````
 Run `generate_makefile.bash --help` for more detailed options such as
 changing the device type for which to build.
@ -274,7 +272,7 @@ more than a single GPU is used by a single process.

 If you publish work which mentions Kokkos, please cite the following paper:

-````
+````BibTeX
@article{CarterEdwards20143202,
  title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",
  journal = "Journal of Parallel and Distributed Computing ",
--- a/lib/kokkos/Spack.md
+++ b/lib/kokkos/Spack.md
@ -0,0 +1,267 @@
+![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4)
+
+# Kokkos Spack
+
+This gives instructions for using Spack to install Kokkos and developing packages that depend on Kokkos.
+
+## Getting Started
+
+Make sure you have downloaded [Spack](https://github.com/spack/spack).
+The easiest way to configure the Spack environment is:
+````bash
+> source spack/share/spack/setup-env.sh
+````
+with other scripts available for other shells.
+You can display information about how to install packages with:
+````bash
+> spack info kokkos
+````
+This will print all the information about how to install Kokkos with Spack.
+For detailed instructions on how to use Spack, see the [User Manual](https://spack.readthedocs.io).
+
+## Setting Up Spack: Avoiding the Package Cascade
+By default, Spack doesn't 'see' anything on your system - including things like CMake and CUDA.
+This can be limited by adding a `packages.yaml` to your `$HOME/.spack` folder that includes CMake (and CUDA, if applicable).  For example, your `packages.yaml` file could be:
+````yaml
+packages:
+ cuda:
+  modules:
+   cuda@10.1.243: [cuda/10.1.243]
+  paths:
+   cuda@10.1.243:
+    /opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243
+  buildable: false
+ cmake:
+  modules:
+   cmake: [cmake/3.16.8]
+  paths:
+   cmake:
+    /opt/local/ppc64le/cmake/3.16.8
+  buildable: false
+````
+The `modules` entry is only necessary on systems that require loading Modules (i.e. most DOE systems).
+The `buildable` flag is useful to make sure Spack crashes if there is a path error,
+rather than having a type-o and Spack rebuilding everything because `cmake` isn't found.
+You can verify your environment is set up correctly by running `spack graph` or `spack spec`.
+For example:
+````bash
+> spack graph kokkos +cuda
+o  kokkos
+|\
+o |  cuda
+ /
+o  cmake
+````
+Without the existing CUDA and CMake being identified in `packages.yaml`, a (subset!) of the output would be:
+````bash
+o  kokkos
+|\
+| o  cmake
+| |\
+| | | |\
+| | | | | |\
+| | | | | | | |\
+| | | | | | | | | |\
+| | | | | | | o | | |  libarchive
+| | | | | | | |\ \ \ \
+| | | | | | | | | |\ \ \ \
+| | | | | | | | | | | | |_|/
+| | | | | | | | | | | |/| |
+| | | | | | | | | | | | | o  curl
+| | |_|_|_|_|_|_|_|_|_|_|/|
+| |/| | | |_|_|_|_|_|_|_|/
+| | | | |/| | | | | | | |
+| | | | o | | | | | | | |  openssl
+| |/| | | | | | | | | | |
+| | | | | | | | | | o | |  libxml2
+| | |_|_|_|_|_|_|_|/| | |
+| | | | | | | | | | |\ \ \
+| o | | | | | | | | | | | |  zlib
+|  / / / / / / / / / / / /
+| o | | | | | | | | | | |  xz
+|  / / / / / / / / / / /
+| o | | | | | | | | | |  rhash
+|  / / / / / / / / / /
+| | | | o | | | | | |  nettle
+| | | | |\ \ \ \ \ \ \
+| | | o | | | | | | | |  libuv
+| | | | o | | | | | | |  autoconf
+| | |_|/| | | | | | | |
+| | | | |/ / / / / / /
+| o | | | | | | | | |  perl
+| o | | | | | | | | |  gdbm
+| o | | | | | | | | |  readline
+````
+
+## Configuring Kokkos as a Project Dependency
+Say you have a project "SuperScience" which needs to use Kokkos.
+In your `package.py` file, you would generally include something like:
+````python
+class SuperScience(CMakePackage):
+  ...
+  depends_on("kokkos")
+````
+Often projects want to tweak behavior when using certain features, e.g.
+````python
+  depends_on("kokkos+cuda", when="+cuda")
+````
+if your project needs CUDA-specific logic to configure and build.
+This illustrates the general principle in Spack of "flowing-up".
+A user requests a feature in the final app:
+````bash
+> spack install superscience+cuda
+````
+This flows upstream to the Kokkos dependency, causing the `kokkos+cuda` variant to build.
+The downstream app (SuperScience) tells the upstream app (Kokkos) how to build.
+
+Because Kokkos is a performance portability library, it somewhat inverts this principle.
+Kokkos "flows-down", telling your application how best to configure for performance.
+Rather than a downstream app (SuperScience) telling the upstream (Kokkos) what variants to build,
+a pre-built Kokkos should be telling the downstream app SuperScience what variants to use.
+Kokkos works best when there is an "expert" configuration installed on your system.
+Your build should simply request `-DKokkos_ROOT=<BEST_KOKKOS_FOR_MY_SYSTEM>` and configure appropriately based on the Kokkos it finds.
+
+Kokkos has many, many build variants.
+Where possible, projects should only depend on a general Kokkos, not specific variants.
+We recommend instead adding for each system you build on a Kokkos configuration to your `packages.yaml` file (usually found in `~/.spack` for specific users).
+For a Xeon + Volta system, this could look like:
+````yaml
+ kokkos:
+  variants: +cuda +openmp +cuda_lambda +wrapper ^cuda@10.1 cuda_arch=70
+  compiler: [gcc@7.2.0]
+````
+which gives the "best" Kokkos configuration as CUDA+OpenMP optimized for a Volta 70 architecture using CUDA 10.1.
+It also enables support for CUDA Lambdas.
+The `+wrapper` option tells Kokkos to build with the special `nvcc_wrapper` (more below).
+Note here that we use the built-in `cuda_arch` variant of Spack to specify the archicture.
+For a Haswell system, we use
+````yaml
+ kokkos:
+  variants: +openmp std=14 target=haswell
+  compiler: [intel@18]
+````
+which uses the built-in microarchitecture variants of Spack.
+Consult the Spack documentation for more details of Spack microarchitectures
+and CUDA architectures.
+Spack does not currently provide an AMD GPU microarchitecture option.
+If building for HIP or an AMD GPU, Kokkos provides an `amd_gpu_arch` similar to `cuda_arch`.
+````yaml
+ kokkos:
+  variants: +hip amd_gpu_arch=vega900
+````
+
+Without an optimal default in your `packages.yaml` file, it is highly likely that the default Kokkos configuration you get will not be what you want.
+For example, CUDA is not enabled by default (there is no easy logic to conditionally activate this for CUDA-enabled systems).
+If you don't specify a CUDA build variant in a `packages.yaml` and you build your Kokkos-dependent project:
+````bash
+> spack install superscience
+````
+you may end up just getting the default Kokkos (i.e. Serial).
+Some examples are included in the `config/yaml` folder for common platforms.
+Before running `spack install <package>` we recommend running `spack spec <package>` to confirm your dependency tree is correct.
+For example, with Kokkos Kernels:
+````bash
+kokkos-kernels@3.0%gcc@8.3.0~blas build_type=RelWithDebInfo ~cblas~complex_double~complex_float~cublas~cuda cuda_arch=none ~cusparse~diy+double execspace_cuda=auto execspace_openmp=auto execspace_serial=auto execspace_threads=auto ~float~lapack~lapacke+layoutleft~layoutright memspace_cudaspace=auto memspace_cudauvmspace=auto +memspace_hostspace~mkl+offset_int+offset_size_t~openmp+ordinal_int~ordinal_int64_t~serial~superlu arch=linux-rhel7-skylake_avx512
+    ^cmake@3.16.2%gcc@8.3.0~doc+ncurses+openssl+ownlibs~qt arch=linux-rhel7-skylake_avx512
+        ^kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=14 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512
+                ^cuda@10.1%gcc@8.3.0 arch=linux-rhel7-skylake_avx512
+                        ^kokkos-nvcc-wrapper@old%gcc@8.3.0 build_type=RelWithDebInfo +mpi arch=linux-rhel7-skylake_avx512
+                                    ^openmpi@4.0.2%gcc@8.3.0~cuda+cxx_exceptions fabrics=none ~java~legacylaunchers~memchecker patches=073477a76bba780c67c36e959cd3ee6910743e2735c7e76850ffba6791d498e4 ~pmi schedulers=none ~sqlite3~thread_multiple+vt arch=linux-rhel7-skylake_avx512
+````
+The output can be very verbose, but we can verify the expected `kokkos`:
+````bash
+kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=11 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512
+````
+We see that we do have `+volta70` and `+wrapper`, e.g.
+
+### Spack Environments
+The encouraged way to use Spack is with Spack environments ([more details here](https://spack-tutorial.readthedocs.io/en/latest/tutorial_environments.html#dealing-with-many-specs-at-once)).
+Rather than installing packages one-at-a-time, you add packages to an environment.
+After adding all packages, you concretize and install them all.
+Using environments, one can explicitly add a desired Kokkos for the environment, e.g.
+````bash
+> spack add kokkos +cuda +cuda_lambda +volta70
+> spack add my_project +my_variant
+> ...
+> spack install
+````
+All packages within the environment will build against the CUDA-enabled Kokkos,
+even if they only request a default Kokkos.
+
+## NVCC Wrapper
+Kokkos is a C++ project, but often builds for the CUDA backend.
+This is particularly problematic with CMake. At this point, `nvcc` does not accept all the flags that normally get passed to a C++ compiler.
+Kokkos provides `nvcc_wrapper` that identifies correctly as a C++ compiler to CMake and accepts C++ flags, but uses `nvcc` as the underlying compiler.
+`nvcc` itself also uses an underlying host compiler, e.g. GCC.
+
+In Spack, the underlying host compiler is specified as below, e.g.:
+````bash
+> spack install package %gcc@8.0.0
+````
+This is still valid for Kokkos. To use the special wrapper for CUDA builds, request a desired compiler and simply add the `+wrapper` variant.
+````bash
+> spack install kokkos +cuda +wrapper %gcc@7.2.0
+````
+Downstream projects depending on Kokkos need to override their compiler.
+Kokkos provides the compiler in a `kokkos_cxx` variable,
+which points to either `nvcc_wrapper` when needed or the regular compiler otherwise.
+Spack projects already do this to use MPI compiler wrappers.
+````python
+def cmake_args(self):
+  options = []
+  ...
+  options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["kokkos"].kokkos_cxx)
+  ...
+  return options
+````
+Note: `nvcc_wrapper` works with the MPI compiler wrappers.
+If building your project with MPI, do NOT set your compiler to `nvcc_wrapper`.
+Instead set your compiler to `mpicxx` and `nvcc_wrapper` will be used under the hood.
+````python
+def cmake_args(self):
+  options = []
+  ...
+  options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["mpi"].mpicxx)
+  ...
+  return options
+````
+To accomplish this, `nvcc_wrapper` must depend on MPI (even though it uses no MPI).
+This has the unfortunate consequence that Kokkos CUDA projects not using MPI will implicitly depend on MPI anyway.
+This behavior is necessary for now, but will hopefully be removed later.
+When using environments, if MPI is not needed, you can remove the MPI dependency with:
+````bash
+> spack add kokkos-nvcc-wrapper ~mpi
+````
+
+## Developing With Spack
+
+Spack has historically been much more suited to *deployment* of mature packages than active testing or developing.
+However, recent features have improved support for development.
+Future releases are likely to make this even easier and incorporate Git integration.
+The most common commands will do a full build and install of the packages.
+If doing development, you may wish to merely set up a build environment.
+This allows you to modify the source and re-build.
+In this case, you can stop after configuring.
+Suppose you have Kokkos checkout in the folder `kokkos-src`:
+````bash
+> spack dev-build -d kokkos-src -u cmake kokkos@develop +wrapper +openmp
+````
+This sets up a development environment for you in `kokkos-src` which you can use (Bash example shown):
+Note: Always specify `develop` as the version when doing `dev-build`, except in rare cases.
+You are usually developing a feature branch that will merge into `develop`,
+hence you are making a new `develop` branch.
+
+````bash
+> cd kokko-src
+> source spack-build-env.txt
+> cd spack-build
+> make
+````
+Before sourcing the Spack development environment, you may wish to save your current environment:
+````bash
+> declare -px > myenv.sh
+````
+When done with Spack, you can then restore your original environment:
+````bash
+> source myenv.sh
+````
--- a/lib/kokkos/algorithms/CMakeLists.txt
+++ b/lib/kokkos/algorithms/CMakeLists.txt
@ -2,7 +2,9 @@

 KOKKOS_SUBPACKAGE(Algorithms)

-ADD_SUBDIRECTORY(src)
+IF (NOT Kokkos_INSTALL_TESTING)
+  ADD_SUBDIRECTORY(src)
+ENDIF()

 KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)

--- a/lib/kokkos/algorithms/src/CMakeLists.txt
+++ b/lib/kokkos/algorithms/src/CMakeLists.txt
@ -7,9 +7,15 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

 #-----------------------------------------------------------------------------

-FILE(GLOB HEADERS *.hpp)
-FILE(GLOB SOURCES *.cpp)
-LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
+FILE(GLOB ALGO_HEADERS *.hpp)
+FILE(GLOB ALGO_SOURCES *.cpp)
+LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
+
+INSTALL (
+  DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
+  DESTINATION ${KOKKOS_HEADER_DIR}
+  FILES_MATCHING PATTERN "*.hpp"
+)

 #-----------------------------------------------------------------------------

@ -17,8 +23,8 @@ LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
 # These will get ignored for standalone CMake and a true interface library made
 KOKKOS_ADD_INTERFACE_LIBRARY(
  kokkosalgorithms
-  HEADERS ${HEADERS}
-  SOURCES ${SOURCES}
+  HEADERS ${ALGO_HEADERS}
+  SOURCES ${ALGO_SOURCES}
 )
 KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
  ${KOKKOS_TOP_BUILD_DIR}
--- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
@ -94,9 +94,9 @@ namespace Kokkos {
    class Pool {
     public:
      //The Kokkos device type
-      typedef Device device_type;
+      using device_type = Device;
      //The actual generator type
-      typedef Generator<Device> generator_type;
+      using generator_type = Generator<Device>;

      //Default constructor: does not initialize a pool
      Pool();
@ -124,7 +124,7 @@ namespace Kokkos {
    class Generator {
     public:
     //The Kokkos device type
-    typedef DeviceType device_type;
+    using device_type = DeviceType;

    //Max return values of respective [X]rand[S]() functions
    enum {MAX_URAND = 0xffffffffU};
@ -138,75 +138,75 @@ namespace Kokkos {
    KOKKOS_INLINE_FUNCTION
    Generator (STATE_ARGUMENTS, int state_idx = 0);

-    //Draw a equidistributed uint32_t in the range (0,MAX_URAND]
+    //Draw a equidistributed uint32_t in the range [0,MAX_URAND)
    KOKKOS_INLINE_FUNCTION
    uint32_t urand();

-    //Draw a equidistributed uint64_t in the range (0,MAX_URAND64]
+    //Draw a equidistributed uint64_t in the range [0,MAX_URAND64)
    KOKKOS_INLINE_FUNCTION
    uint64_t urand64();

-    //Draw a equidistributed uint32_t in the range (0,range]
+    //Draw a equidistributed uint32_t in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    uint32_t urand(const uint32_t& range);

-    //Draw a equidistributed uint32_t in the range (start,end]
+    //Draw a equidistributed uint32_t in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    uint32_t urand(const uint32_t& start, const uint32_t& end );

-    //Draw a equidistributed uint64_t in the range (0,range]
+    //Draw a equidistributed uint64_t in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    uint64_t urand64(const uint64_t& range);

-    //Draw a equidistributed uint64_t in the range (start,end]
+    //Draw a equidistributed uint64_t in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    uint64_t urand64(const uint64_t& start, const uint64_t& end );

-    //Draw a equidistributed int in the range (0,MAX_RAND]
+    //Draw a equidistributed int in the range [0,MAX_RAND)
    KOKKOS_INLINE_FUNCTION
    int rand();

-    //Draw a equidistributed int in the range (0,range]
+    //Draw a equidistributed int in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    int rand(const int& range);

-    //Draw a equidistributed int in the range (start,end]
+    //Draw a equidistributed int in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    int rand(const int& start, const int& end );

-    //Draw a equidistributed int64_t in the range (0,MAX_RAND64]
+    //Draw a equidistributed int64_t in the range [0,MAX_RAND64)
    KOKKOS_INLINE_FUNCTION
    int64_t rand64();

-    //Draw a equidistributed int64_t in the range (0,range]
+    //Draw a equidistributed int64_t in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    int64_t rand64(const int64_t& range);

-    //Draw a equidistributed int64_t in the range (start,end]
+    //Draw a equidistributed int64_t in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    int64_t rand64(const int64_t& start, const int64_t& end );

-    //Draw a equidistributed float in the range (0,1.0]
+    //Draw a equidistributed float in the range [0,1.0)
    KOKKOS_INLINE_FUNCTION
    float frand();

-    //Draw a equidistributed float in the range (0,range]
+    //Draw a equidistributed float in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    float frand(const float& range);

-    //Draw a equidistributed float in the range (start,end]
+    //Draw a equidistributed float in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    float frand(const float& start, const float& end );

-    //Draw a equidistributed double in the range (0,1.0]
+    //Draw a equidistributed double in the range [0,1.0)
    KOKKOS_INLINE_FUNCTION
    double drand();

-    //Draw a equidistributed double in the range (0,range]
+    //Draw a equidistributed double in the range [0,range)
    KOKKOS_INLINE_FUNCTION
    double drand(const double& range);

-    //Draw a equidistributed double in the range (start,end]
+    //Draw a equidistributed double in the range [start,end)
    KOKKOS_INLINE_FUNCTION
    double drand(const double& start, const double& end );

@ -221,11 +221,11 @@ namespace Kokkos {

    //Additional Functions:

-    //Fills view with random numbers in the range (0,range]
+    //Fills view with random numbers in the range [0,range)
    template<class ViewType, class PoolType>
    void fill_random(ViewType view, PoolType pool, ViewType::value_type range);

-    //Fills view with random numbers in the range (start,end]
+    //Fills view with random numbers in the range [start,end)
    template<class ViewType, class PoolType>
    void fill_random(ViewType view, PoolType pool,
                     ViewType::value_type start, ViewType::value_type end);
@ -381,7 +381,7 @@ struct rand<Generator, unsigned long> {
 // NOTE (mfh 26 oct 2014) This is a partial specialization for long
 // long, a C99 / C++11 signed type which is guaranteed to be at
 // least 64 bits.  Do NOT write a partial specialization for
-// int64_t!!!  This is just a typedef!  It could be either long or
+// int64_t!!!  This is just an alias!  It could be either long or
 // long long.  We don't know which a priori, and I've seen both.
 // The types long and long long are guaranteed to differ, so it's
 // always safe to specialize for both.
@ -413,7 +413,7 @@ struct rand<Generator, long long> {
 // NOTE (mfh 26 oct 2014) This is a partial specialization for
 // unsigned long long, a C99 / C++11 unsigned type which is
 // guaranteed to be at least 64 bits.  Do NOT write a partial
-// specialization for uint64_t!!!  This is just a typedef!  It could
+// specialization for uint64_t!!!  This is just an alias!  It could
 // be either unsigned long or unsigned long long.  We don't know
 // which a priori, and I've seen both.  The types unsigned long and
 // unsigned long long are guaranteed to differ, so it's always safe
@ -604,11 +604,7 @@ struct Random_UniqueIndex {
  KOKKOS_FUNCTION
  static int get_state_idx(const locks_view_type) {
 #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
-#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
-    const int i = ExecutionSpace::hardware_thread_id();
-#else
    const int i = ExecutionSpace::impl_hardware_thread_id();
-#endif
    return i;
 #else
    return 0;
@ -652,15 +648,13 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
  static int get_state_idx(const locks_view_type& locks_) {
 #ifdef __HIP_DEVICE_COMPILE__
    const int i_offset =
-        (hipThreadIdx_x * hipBlockDim_y + hipThreadIdx_y) * hipBlockDim_z +
-        hipThreadIdx_z;
-    int i = (((hipBlockIdx_x * hipGridDim_y + hipBlockIdx_y) * hipGridDim_z +
-              hipBlockIdx_z) *
-                 hipBlockDim_x * hipBlockDim_y * hipBlockDim_z +
+        (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
+    int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
+                 blockDim.x * blockDim.y * blockDim.z +
             i_offset) %
            locks_.extent(0);
    while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
-      i += hipBlockDim_x * hipBlockDim_y * hipBlockDim_z;
+      i += blockDim.x * blockDim.y * blockDim.z;
      if (i >= static_cast<int>(locks_.extent(0))) {
        i = i_offset;
      }
@ -687,7 +681,7 @@ class Random_XorShift64 {
  friend class Random_XorShift64_Pool<DeviceType>;

 public:
-  typedef DeviceType device_type;
+  using device_type = DeviceType;

  constexpr static uint32_t MAX_URAND   = std::numeric_limits<uint32_t>::max();
  constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
@ -805,11 +799,6 @@ class Random_XorShift64 {
  // number
  KOKKOS_INLINE_FUNCTION
  double normal() {
-#ifndef __HIP_DEVICE_COMPILE__  // FIXME_HIP
-    using std::sqrt;
-#else
-    using ::sqrt;
-#endif
    double S = 2.0;
    double U;
    while (S >= 1.0) {
@ -817,7 +806,7 @@ class Random_XorShift64 {
      const double V = 2.0 * drand() - 1.0;
      S              = U * U + V * V;
    }
-    return U * sqrt(-2.0 * log(S) / S);
+    return U * std::sqrt(-2.0 * log(S) / S);
  }

  KOKKOS_INLINE_FUNCTION
@ -830,15 +819,15 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
 class Random_XorShift64_Pool {
 private:
  using execution_space = typename DeviceType::execution_space;
-  typedef View<int*, execution_space> locks_type;
-  typedef View<uint64_t*, DeviceType> state_data_type;
+  using locks_type      = View<int*, execution_space>;
+  using state_data_type = View<uint64_t*, DeviceType>;
  locks_type locks_;
  state_data_type state_;
  int num_states_;

 public:
-  typedef Random_XorShift64<DeviceType> generator_type;
-  typedef DeviceType device_type;
+  using generator_type = Random_XorShift64<DeviceType>;
+  using device_type    = DeviceType;

  KOKKOS_INLINE_FUNCTION
  Random_XorShift64_Pool() { num_states_ = 0; }
@ -923,8 +912,8 @@ class Random_XorShift1024 {
  friend class Random_XorShift1024_Pool<DeviceType>;

 public:
-  typedef Random_XorShift1024_Pool<DeviceType> pool_type;
-  typedef DeviceType device_type;
+  using pool_type   = Random_XorShift1024_Pool<DeviceType>;
+  using device_type = DeviceType;

  constexpr static uint32_t MAX_URAND   = std::numeric_limits<uint32_t>::max();
  constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
@ -1046,11 +1035,6 @@ class Random_XorShift1024 {
  // number
  KOKKOS_INLINE_FUNCTION
  double normal() {
-#ifndef KOKKOS_ENABLE_HIP  // FIXME_HIP
-    using std::sqrt;
-#else
-    using ::sqrt;
-#endif
    double S = 2.0;
    double U;
    while (S >= 1.0) {
@ -1058,7 +1042,7 @@ class Random_XorShift1024 {
      const double V = 2.0 * drand() - 1.0;
      S              = U * U + V * V;
    }
-    return U * sqrt(-2.0 * log(S) / S);
+    return U * std::sqrt(-2.0 * log(S) / S);
  }

  KOKKOS_INLINE_FUNCTION
@ -1071,9 +1055,9 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
 class Random_XorShift1024_Pool {
 private:
  using execution_space = typename DeviceType::execution_space;
-  typedef View<int*, execution_space> locks_type;
-  typedef View<int*, DeviceType> int_view_type;
-  typedef View<uint64_t * [16], DeviceType> state_data_type;
+  using locks_type      = View<int*, execution_space>;
+  using int_view_type   = View<int*, DeviceType>;
+  using state_data_type = View<uint64_t * [16], DeviceType>;

  locks_type locks_;
  state_data_type state_;
@ -1082,9 +1066,9 @@ class Random_XorShift1024_Pool {
  friend class Random_XorShift1024<DeviceType>;

 public:
-  typedef Random_XorShift1024<DeviceType> generator_type;
+  using generator_type = Random_XorShift1024<DeviceType>;

-  typedef DeviceType device_type;
+  using device_type = DeviceType;

  KOKKOS_INLINE_FUNCTION
  Random_XorShift1024_Pool() { num_states_ = 0; }
@ -1176,14 +1160,13 @@ struct fill_random_functor_begin_end;

 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1203,14 +1186,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {

 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1232,14 +1214,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {

 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1262,14 +1243,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {

 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1293,14 +1273,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {

 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1326,14 +1305,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {

 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1361,14 +1339,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {

 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1398,14 +1375,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {

 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type range;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
                            typename ViewType::const_value_type range_)
@ -1437,14 +1413,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1466,14 +1441,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1497,14 +1471,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1529,14 +1502,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1562,14 +1534,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1597,14 +1568,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1634,14 +1604,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
@ -1673,14 +1642,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
 template <class ViewType, class RandomPool, int loops, class IndexType>
 struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8,
                                     IndexType> {
-  typedef typename ViewType::execution_space execution_space;
+  using execution_space = typename ViewType::execution_space;
  ViewType a;
  RandomPool rand_pool;
  typename ViewType::const_value_type begin, end;

-  typedef rand<typename RandomPool::generator_type,
-               typename ViewType::non_const_value_type>
-      Rand;
+  using Rand = rand<typename RandomPool::generator_type,
+                    typename ViewType::non_const_value_type>;

  fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
                                typename ViewType::const_value_type begin_,
--- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
@ -95,9 +95,9 @@ class BinSort {
 public:
  template <class DstViewType, class SrcViewType>
  struct copy_functor {
-    typedef typename SrcViewType::const_type src_view_type;
+    using src_view_type = typename SrcViewType::const_type;

-    typedef Impl::CopyOp<DstViewType, src_view_type> copy_op;
+    using copy_op = Impl::CopyOp<DstViewType, src_view_type>;

    DstViewType dst_values;
    src_view_type src_values;
@ -120,17 +120,17 @@ class BinSort {
    // If a Kokkos::View then can generate constant random access
    // otherwise can only use the constant type.

-    typedef typename std::conditional<
+    using src_view_type = typename std::conditional<
        Kokkos::is_view<SrcViewType>::value,
        Kokkos::View<typename SrcViewType::const_data_type,
                     typename SrcViewType::array_layout,
                     typename SrcViewType::device_type,
                     Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
-        typename SrcViewType::const_type>::type src_view_type;
+        typename SrcViewType::const_type>::type;

-    typedef typename PermuteViewType::const_type perm_view_type;
+    using perm_view_type = typename PermuteViewType::const_type;

-    typedef Impl::CopyOp<DstViewType, src_view_type> copy_op;
+    using copy_op = Impl::CopyOp<DstViewType, src_view_type>;

    DstViewType dst_values;
    perm_view_type sort_order;
@ -151,8 +151,8 @@ class BinSort {
    }
  };

-  typedef typename Space::execution_space execution_space;
-  typedef BinSortOp bin_op_type;
+  using execution_space = typename Space::execution_space;
+  using bin_op_type     = BinSortOp;

  struct bin_count_tag {};
  struct bin_offset_tag {};
@ -160,30 +160,30 @@ class BinSort {
  struct bin_sort_bins_tag {};

 public:
-  typedef SizeType size_type;
-  typedef size_type value_type;
+  using size_type  = SizeType;
+  using value_type = size_type;

-  typedef Kokkos::View<size_type*, Space> offset_type;
-  typedef Kokkos::View<const int*, Space> bin_count_type;
+  using offset_type    = Kokkos::View<size_type*, Space>;
+  using bin_count_type = Kokkos::View<const int*, Space>;

-  typedef typename KeyViewType::const_type const_key_view_type;
+  using const_key_view_type = typename KeyViewType::const_type;

  // If a Kokkos::View then can generate constant random access
  // otherwise can only use the constant type.

-  typedef typename std::conditional<
+  using const_rnd_key_view_type = typename std::conditional<
      Kokkos::is_view<KeyViewType>::value,
      Kokkos::View<typename KeyViewType::const_data_type,
                   typename KeyViewType::array_layout,
                   typename KeyViewType::device_type,
                   Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
-      const_key_view_type>::type const_rnd_key_view_type;
+      const_key_view_type>::type;

-  typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
-  typedef typename KeyViewType::const_value_type const_key_scalar;
+  using non_const_key_scalar = typename KeyViewType::non_const_value_type;
+  using const_key_scalar     = typename KeyViewType::const_value_type;

-  typedef Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >
-      bin_count_atomic_type;
+  using bin_count_atomic_type =
+      Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >;

 private:
  const_key_view_type keys;
@ -266,10 +266,10 @@ class BinSort {
  template <class ValuesViewType>
  void sort(ValuesViewType const& values, int values_range_begin,
            int values_range_end) const {
-    typedef Kokkos::View<typename ValuesViewType::data_type,
+    using scratch_view_type =
+        Kokkos::View<typename ValuesViewType::data_type,
                     typename ValuesViewType::array_layout,
-                         typename ValuesViewType::device_type>
-        scratch_view_type;
+                     typename ValuesViewType::device_type>;

    const size_t len        = range_end - range_begin;
    const size_t values_len = values_range_end - values_range_begin;
@ -278,13 +278,6 @@ class BinSort {
          "BinSort::sort: values range length != permutation vector length");
    }

-#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
-    scratch_view_type sorted_values(
-        ViewAllocateWithoutInitializing(
-            "Kokkos::SortImpl::BinSortFunctor::sorted_values"),
-        len, values.extent(1), values.extent(2), values.extent(3),
-        values.extent(4), values.extent(5), values.extent(6), values.extent(7));
-#else
    scratch_view_type sorted_values(
        ViewAllocateWithoutInitializing(
            "Kokkos::SortImpl::BinSortFunctor::sorted_values"),
@ -303,7 +296,6 @@ class BinSort {
                                : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
        values.rank_dynamic > 7 ? values.extent(7)
                                : KOKKOS_IMPL_CTOR_DEFAULT_ARG);
-#endif

    {
      copy_permute_functor<scratch_view_type /* DstViewType */
@ -511,8 +503,8 @@ bool try_std_sort(ViewType view) {

 template <class ViewType>
 struct min_max_functor {
-  typedef Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>
-      minmax_scalar;
+  using minmax_scalar =
+      Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>;

  ViewType view;
  min_max_functor(const ViewType& view_) : view(view_) {}
@ -531,7 +523,7 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
  if (!always_use_kokkos_sort) {
    if (Impl::try_std_sort(view)) return;
  }
-  typedef BinOp1D<ViewType> CompType;
+  using CompType = BinOp1D<ViewType>;

  Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
  Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
@ -548,8 +540,8 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {

 template <class ViewType>
 void sort(ViewType view, size_t const begin, size_t const end) {
-  typedef Kokkos::RangePolicy<typename ViewType::execution_space> range_policy;
-  typedef BinOp1D<ViewType> CompType;
+  using range_policy = Kokkos::RangePolicy<typename ViewType::execution_space>;
+  using CompType     = BinOp1D<ViewType>;

  Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
  Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
--- a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
+++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
@ -20,14 +20,18 @@ KOKKOS_ADD_TEST_LIBRARY(
  HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
  SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
 )
-# WORKAROUND FOR HIPCC
-IF(Kokkos_ENABLE_HIP)
-  TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0 --amdgpu-target=gfx906")
-ELSE()
-  TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0")
+
+# avoid deprecation warnings from MSVC
+TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC GTEST_HAS_TR1_TUPLE=0 GTEST_HAS_PTHREAD=0)
+
+IF(NOT (Kokkos_ENABLE_CUDA AND WIN32))
+TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
 ENDIF()

-TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
+# Suppress clang-tidy diagnostics on code that we do not have control over
+IF(CMAKE_CXX_CLANG_TIDY)
+  SET_TARGET_PROPERTIES(kokkosalgorithms_gtest PROPERTIES CXX_CLANG_TIDY "")
+ENDIF()

 SET(SOURCES
  UnitTestMain.cpp
--- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp
@ -111,10 +111,10 @@ struct RandomProperties {

 template <class GeneratorPool, class Scalar>
 struct test_random_functor {
-  typedef typename GeneratorPool::generator_type rnd_type;
+  using rnd_type = typename GeneratorPool::generator_type;

-  typedef RandomProperties value_type;
-  typedef typename GeneratorPool::device_type device_type;
+  using value_type  = RandomProperties;
+  using device_type = typename GeneratorPool::device_type;

  GeneratorPool rand_pool;
  const double mean;
@ -125,12 +125,12 @@ struct test_random_functor {
  // implementations might violate this upper bound, due to rounding
  // error.  Just in case, we leave an extra space at the end of each
  // dimension, in the View types below.
-  typedef Kokkos::View<int[HIST_DIM1D + 1], typename GeneratorPool::device_type>
-      type_1d;
+  using type_1d =
+      Kokkos::View<int[HIST_DIM1D + 1], typename GeneratorPool::device_type>;
  type_1d density_1d;
-  typedef Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
-                       typename GeneratorPool::device_type>
-      type_3d;
+  using type_3d =
+      Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
+                   typename GeneratorPool::device_type>;
  type_3d density_3d;

  test_random_functor(GeneratorPool rand_pool_, type_1d d1d, type_3d d3d)
@ -200,9 +200,9 @@ struct test_random_functor {

 template <class DeviceType>
 struct test_histogram1d_functor {
-  typedef RandomProperties value_type;
-  typedef typename DeviceType::execution_space execution_space;
-  typedef typename DeviceType::memory_space memory_space;
+  using value_type      = RandomProperties;
+  using execution_space = typename DeviceType::execution_space;
+  using memory_space    = typename DeviceType::memory_space;

  // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
  // an exclusive upper bound on the range of random numbers that
@ -210,7 +210,7 @@ struct test_histogram1d_functor {
  // implementations might violate this upper bound, due to rounding
  // error.  Just in case, we leave an extra space at the end of each
  // dimension, in the View type below.
-  typedef Kokkos::View<int[HIST_DIM1D + 1], memory_space> type_1d;
+  using type_1d = Kokkos::View<int[HIST_DIM1D + 1], memory_space>;
  type_1d density_1d;
  double mean;

@ -219,7 +219,7 @@ struct test_histogram1d_functor {

  KOKKOS_INLINE_FUNCTION void operator()(
      const typename memory_space::size_type i, RandomProperties& prop) const {
-    typedef typename memory_space::size_type size_type;
+    using size_type    = typename memory_space::size_type;
    const double count = density_1d(i);
    prop.mean += count;
    prop.variance += 1.0 * (count - mean) * (count - mean);
@ -234,9 +234,9 @@ struct test_histogram1d_functor {

 template <class DeviceType>
 struct test_histogram3d_functor {
-  typedef RandomProperties value_type;
-  typedef typename DeviceType::execution_space execution_space;
-  typedef typename DeviceType::memory_space memory_space;
+  using value_type      = RandomProperties;
+  using execution_space = typename DeviceType::execution_space;
+  using memory_space    = typename DeviceType::memory_space;

  // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
  // an exclusive upper bound on the range of random numbers that
@ -244,9 +244,9 @@ struct test_histogram3d_functor {
  // implementations might violate this upper bound, due to rounding
  // error.  Just in case, we leave an extra space at the end of each
  // dimension, in the View type below.
-  typedef Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
-                       memory_space>
-      type_3d;
+  using type_3d =
+      Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
+                   memory_space>;
  type_3d density_3d;
  double mean;

@ -255,7 +255,7 @@ struct test_histogram3d_functor {

  KOKKOS_INLINE_FUNCTION void operator()(
      const typename memory_space::size_type i, RandomProperties& prop) const {
-    typedef typename memory_space::size_type size_type;
+    using size_type    = typename memory_space::size_type;
    const double count = density_3d(
        i / (HIST_DIM3D * HIST_DIM3D),
        (i % (HIST_DIM3D * HIST_DIM3D)) / HIST_DIM3D, i % HIST_DIM3D);
@ -276,7 +276,7 @@ struct test_histogram3d_functor {
 //
 template <class RandomGenerator, class Scalar>
 struct test_random_scalar {
-  typedef typename RandomGenerator::generator_type rnd_type;
+  using rnd_type = typename RandomGenerator::generator_type;

  int pass_mean, pass_var, pass_covar;
  int pass_hist1d_mean, pass_hist1d_var, pass_hist1d_covar;
@ -294,7 +294,7 @@ struct test_random_scalar {
      cout << " -- Testing randomness properties" << endl;

      RandomProperties result;
-      typedef test_random_functor<RandomGenerator, Scalar> functor_type;
+      using functor_type = test_random_functor<RandomGenerator, Scalar>;
      parallel_reduce(num_draws / 1024,
                      functor_type(pool, density_1d, density_3d), result);

@ -325,8 +325,8 @@ struct test_random_scalar {
      cout << " -- Testing 1-D histogram" << endl;

      RandomProperties result;
-      typedef test_histogram1d_functor<typename RandomGenerator::device_type>
-          functor_type;
+      using functor_type =
+          test_histogram1d_functor<typename RandomGenerator::device_type>;
      parallel_reduce(HIST_DIM1D, functor_type(density_1d, num_draws), result);

      double tolerance   = 6 * std::sqrt(1.0 / HIST_DIM1D);
@ -357,8 +357,8 @@ struct test_random_scalar {
      cout << " -- Testing 3-D histogram" << endl;

      RandomProperties result;
-      typedef test_histogram3d_functor<typename RandomGenerator::device_type>
-          functor_type;
+      using functor_type =
+          test_histogram3d_functor<typename RandomGenerator::device_type>;
      parallel_reduce(HIST_DIM1D, functor_type(density_3d, num_draws), result);

      double tolerance   = 6 * std::sqrt(1.0 / HIST_DIM1D);
--- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp
@ -55,8 +55,8 @@ namespace Impl {

 template <class ExecutionSpace, class Scalar>
 struct is_sorted_struct {
-  typedef unsigned int value_type;
-  typedef ExecutionSpace execution_space;
+  using value_type      = unsigned int;
+  using execution_space = ExecutionSpace;

  Kokkos::View<Scalar*, ExecutionSpace> keys;

@ -69,8 +69,8 @@ struct is_sorted_struct {

 template <class ExecutionSpace, class Scalar>
 struct sum {
-  typedef double value_type;
-  typedef ExecutionSpace execution_space;
+  using value_type      = double;
+  using execution_space = ExecutionSpace;

  Kokkos::View<Scalar*, ExecutionSpace> keys;

@ -81,8 +81,8 @@ struct sum {

 template <class ExecutionSpace, class Scalar>
 struct bin3d_is_sorted_struct {
-  typedef unsigned int value_type;
-  typedef ExecutionSpace execution_space;
+  using value_type      = unsigned int;
+  using execution_space = ExecutionSpace;

  Kokkos::View<Scalar * [3], ExecutionSpace> keys;

@ -115,8 +115,8 @@ struct bin3d_is_sorted_struct {

 template <class ExecutionSpace, class Scalar>
 struct sum3D {
-  typedef double value_type;
-  typedef ExecutionSpace execution_space;
+  using value_type      = double;
+  using execution_space = ExecutionSpace;

  Kokkos::View<Scalar * [3], ExecutionSpace> keys;

@ -131,7 +131,7 @@ struct sum3D {

 template <class ExecutionSpace, typename KeyType>
 void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
-  typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
+  using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
  KeyViewType keys("Keys", n);

  // Test sorting array with all numbers equal
@ -166,7 +166,7 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {

 template <class ExecutionSpace, typename KeyType>
 void test_3D_sort_impl(unsigned int n) {
-  typedef Kokkos::View<KeyType * [3], ExecutionSpace> KeyViewType;
+  using KeyViewType = Kokkos::View<KeyType * [3], ExecutionSpace>;

  KeyViewType keys("Keys", n * n * n);

@ -186,7 +186,7 @@ void test_3D_sort_impl(unsigned int n) {
  typename KeyViewType::value_type min[3] = {0, 0, 0};
  typename KeyViewType::value_type max[3] = {100, 100, 100};

-  typedef Kokkos::BinOp3D<KeyViewType> BinOp;
+  using BinOp = Kokkos::BinOp3D<KeyViewType>;
  BinOp bin_op(bin_max, min, max);
  Kokkos::BinSort<KeyViewType, BinOp> Sorter(keys, bin_op, false);
  Sorter.create_permute_vector();
@ -215,9 +215,9 @@ void test_3D_sort_impl(unsigned int n) {

 template <class ExecutionSpace, typename KeyType>
 void test_dynamic_view_sort_impl(unsigned int n) {
-  typedef Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>
-      KeyDynamicViewType;
-  typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
+  using KeyDynamicViewType =
+      Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>;
+  using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;

  const size_t upper_bound    = 2 * n;
  const size_t min_chunk_size = 1024;
@ -305,8 +305,8 @@ void test_issue_1160_impl() {
  Kokkos::deep_copy(x_, h_x);
  Kokkos::deep_copy(v_, h_v);

-  typedef decltype(element_) KeyViewType;
-  typedef Kokkos::BinOp1D<KeyViewType> BinOp;
+  using KeyViewType = decltype(element_);
+  using BinOp       = Kokkos::BinOp1D<KeyViewType>;

  int begin = 3;
  int end   = 8;
--- a/lib/kokkos/appveyor.yml
+++ b/lib/kokkos/appveyor.yml
@ -5,6 +5,6 @@ build_script:
 - cmd: >-
    mkdir build &&
    cd build &&
-    cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_LIBDL=OFF -DKokkos_ENABLE_PROFILING=OFF &&
+    cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON &&
    cmake --build . --target install &&
    ctest -C Debug -V
--- a/lib/kokkos/benchmarks/atomic/main.cpp
+++ b/lib/kokkos/benchmarks/atomic/main.cpp
@ -69,13 +69,13 @@ int main(int argc, char* argv[]) {
      return 0;
    }

-    int L    = atoi(argv[1]);
-    int N    = atoi(argv[2]);
-    int M    = atoi(argv[3]);
-    int D    = atoi(argv[4]);
-    int K    = atoi(argv[5]);
-    int R    = atoi(argv[6]);
-    int type = atoi(argv[7]);
+    int L    = std::stoi(argv[1]);
+    int N    = std::stoi(argv[2]);
+    int M    = std::stoi(argv[3]);
+    int D    = std::stoi(argv[4]);
+    int K    = std::stoi(argv[5]);
+    int R    = std::stoi(argv[6]);
+    int type = std::stoi(argv[7]);

    Kokkos::View<int*> offsets("Offsets", L, M);
    Kokkos::Random_XorShift64_Pool<> pool(12371);
--- a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp
+++ b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp
@ -73,15 +73,15 @@ int main(int argc, char* argv[]) {
    return 0;
  }

-  int P = atoi(argv[1]);
-  int N = atoi(argv[2]);
-  int K = atoi(argv[3]);
-  int R = atoi(argv[4]);
-  int D = atoi(argv[5]);
-  int U = atoi(argv[6]);
-  int F = atoi(argv[7]);
-  int T = atoi(argv[8]);
-  int S = atoi(argv[9]);
+  int P = std::stoi(argv[1]);
+  int N = std::stoi(argv[2]);
+  int K = std::stoi(argv[3]);
+  int R = std::stoi(argv[4]);
+  int D = std::stoi(argv[5]);
+  int U = std::stoi(argv[6]);
+  int F = std::stoi(argv[7]);
+  int T = std::stoi(argv[8]);
+  int S = std::stoi(argv[9]);

  if (U > 8) {
    printf("U must be 1-8\n");
--- a/lib/kokkos/benchmarks/gather/main.cpp
+++ b/lib/kokkos/benchmarks/gather/main.cpp
@ -72,13 +72,13 @@ int main(int argc, char* argv[]) {
    return 0;
  }

-  int S = atoi(argv[1]);
-  int N = atoi(argv[2]);
-  int K = atoi(argv[3]);
-  int D = atoi(argv[4]);
-  int R = atoi(argv[5]);
-  int U = atoi(argv[6]);
-  int F = atoi(argv[7]);
+  int S = std::stoi(argv[1]);
+  int N = std::stoi(argv[2]);
+  int K = std::stoi(argv[3]);
+  int D = std::stoi(argv[4]);
+  int R = std::stoi(argv[5]);
+  int U = std::stoi(argv[6]);
+  int F = std::stoi(argv[7]);

  if ((S != 1) && (S != 2) && (S != 4)) {
    printf("S must be one of 1,2,4\n");
--- a/lib/kokkos/benchmarks/gups/gups-kokkos.cc
+++ b/lib/kokkos/benchmarks/gups/gups-kokkos.cc
@ -50,58 +50,61 @@
 #define HLINE "-------------------------------------------------------------\n"

 #if defined(KOKKOS_ENABLE_CUDA)
-typedef Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror GUPSHostArray;
-typedef Kokkos::View<int64_t*, Kokkos::CudaSpace> GUPSDeviceArray;
+using GUPSHostArray   = Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror;
+using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>;
 #else
-typedef Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror GUPSHostArray;
-typedef Kokkos::View<int64_t*, Kokkos::HostSpace> GUPSDeviceArray;
+using GUPSHostArray   = Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror;
+using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::HostSpace>;
 #endif

-typedef int GUPSIndex;
+using GUPSIndex = int;

 double now() {
  struct timeval now;
  gettimeofday(&now, nullptr);

-	return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
+  return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
 }

-void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices, const int64_t dataCount) {
-	for( GUPSIndex i = 0; i < indices.extent(0); ++i ) {
+void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices,
+                       const int64_t dataCount) {
+  for (GUPSIndex i = 0; i < indices.extent(0); ++i) {
    indices[i] = lrand48() % dataCount;
  }

  Kokkos::deep_copy(dev_indices, indices);
 }

-void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data, const int64_t datum,
-	const bool performAtomics) {
-
-	if( performAtomics ) {
-		Kokkos::parallel_for("bench-gups-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) {
-			Kokkos::atomic_fetch_xor( &data[indices[i]], datum );
+void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data,
+              const int64_t datum, const bool performAtomics) {
+  if (performAtomics) {
+    Kokkos::parallel_for(
+        "bench-gups-atomic", indices.extent(0),
+        KOKKOS_LAMBDA(const GUPSIndex i) {
+          Kokkos::atomic_fetch_xor(&data[indices[i]], datum);
        });
  } else {
-		Kokkos::parallel_for("bench-gups-non-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) {
-			data[indices[i]] ^= datum;
-		});
+    Kokkos::parallel_for(
+        "bench-gups-non-atomic", indices.extent(0),
+        KOKKOS_LAMBDA(const GUPSIndex i) { data[indices[i]] ^= datum; });
  }

  Kokkos::fence();
 }

-int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const int repeats,
-	const bool useAtomics) {
-
+int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount,
+                  const int repeats, const bool useAtomics) {
  printf("Reports fastest timing per kernel\n");
  printf("Creating Views...\n");

  printf("Memory Sizes:\n");
-	printf("- Elements:      %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(dataCount),
-		1.0e-6 * ((double) dataCount * (double) sizeof(int64_t)));
-	printf("- Indices:       %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(indicesCount),
-		1.0e-6 * ((double) indicesCount * (double) sizeof(int64_t)));
-	printf(" - Atomics:      %15s\n", (useAtomics ? "Yes" : "No") );
+  printf("- Elements:      %15" PRIu64 " (%12.4f MB)\n",
+         static_cast<uint64_t>(dataCount),
+         1.0e-6 * ((double)dataCount * (double)sizeof(int64_t)));
+  printf("- Indices:       %15" PRIu64 " (%12.4f MB)\n",
+         static_cast<uint64_t>(indicesCount),
+         1.0e-6 * ((double)indicesCount * (double)sizeof(int64_t)));
+  printf(" - Atomics:      %15s\n", (useAtomics ? "Yes" : "No"));
  printf("Benchmark kernels will be performed for %d iterations.\n", repeats);

  printf(HLINE);
@ -118,24 +121,22 @@ int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const
  printf("Initializing Views...\n");

 #if defined(KOKKOS_HAVE_OPENMP)
-	Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
+  Kokkos::parallel_for(
+      "init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
 #else
-	Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
+  Kokkos::parallel_for(
+      "init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
 #endif
-		KOKKOS_LAMBDA(const int i) {
-
-		data[i] = 10101010101;
-	});
+      KOKKOS_LAMBDA(const int i) { data[i] = 10101010101; });

 #if defined(KOKKOS_HAVE_OPENMP)
-	Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
+  Kokkos::parallel_for(
+      "init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
 #else
-	Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
+  Kokkos::parallel_for(
+      "init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
 #endif
-		KOKKOS_LAMBDA(const int i) {
-
-		indices[i] = 0;
-	});
+      KOKKOS_LAMBDA(const int i) { indices[i] = 0; });

  Kokkos::deep_copy(dev_data, data);
  Kokkos::deep_copy(dev_indices, indices);
@ -143,7 +144,7 @@ int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const

  printf("Starting benchmarking...\n");

-	for( GUPSIndex k = 0; k < repeats; ++k ) {
+  for (GUPSIndex k = 0; k < repeats; ++k) {
    randomize_indices(indices, dev_indices, data.extent(0));

    start = now();
@ -155,15 +156,15 @@ int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const
  Kokkos::deep_copy(data, dev_data);

  printf(HLINE);
-	printf("GUP/s Random:      %18.6f\n",
-		(1.0e-9 * ((double) repeats) * (double) dev_indices.extent(0)) / gupsTime);
+  printf(
+      "GUP/s Random:      %18.6f\n",
+      (1.0e-9 * ((double)repeats) * (double)dev_indices.extent(0)) / gupsTime);
  printf(HLINE);

  return 0;
 }

 int main(int argc, char* argv[]) {
-
  printf(HLINE);
  printf("Kokkos GUPS Benchmark\n");
  printf(HLINE);
@ -177,17 +178,17 @@ int main(int argc, char* argv[]) {
  int64_t repeats = 10;
  bool useAtomics = false;

-	for( int i = 1; i < argc; ++i ) {
-		if( strcmp( argv[i], "--indices" ) == 0 ) {
-			indices = std::atoll(argv[i+1]);
+  for (int i = 1; i < argc; ++i) {
+    if (strcmp(argv[i], "--indices") == 0) {
+      indices = std::atoll(argv[i + 1]);
      ++i;
-		} else if( strcmp( argv[i], "--data" ) == 0 ) {
-			data = std::atoll(argv[i+1]);
+    } else if (strcmp(argv[i], "--data") == 0) {
+      data = std::atoll(argv[i + 1]);
      ++i;
-		} else if( strcmp( argv[i], "--repeats" ) == 0 ) {
-			repeats = std::atoll(argv[i+1]);
+    } else if (strcmp(argv[i], "--repeats") == 0) {
+      repeats = std::atoll(argv[i + 1]);
      ++i;
-		} else if( strcmp( argv[i], "--atomics" ) == 0 ) {
+    } else if (strcmp(argv[i], "--atomics") == 0) {
      useAtomics = true;
    }
  }
--- a/Show More
+++ b/Show More