Merge branch 'master' into master

This commit is contained in:
TD Swinburne (Tom)
2020-09-01 10:59:03 +02:00
committed by GitHub
2136 changed files with 37709 additions and 82215 deletions

View File

@ -15,75 +15,93 @@ if(BUILD_DOC)
endif() endif()
set(VIRTUALENV ${Python3_EXECUTABLE} -m virtualenv -p ${Python3_EXECUTABLE}) set(VIRTUALENV ${Python3_EXECUTABLE} -m virtualenv -p ${Python3_EXECUTABLE})
endif() endif()
find_package(Doxygen 1.8.10 REQUIRED)
file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.rst) file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.rst)
add_custom_command( add_custom_command(
OUTPUT docenv OUTPUT docenv
COMMAND ${VIRTUALENV} docenv COMMAND ${VIRTUALENV} docenv
) )
set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin) set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt)
set(SPHINX_CONFIG_DIR ${LAMMPS_DOC_DIR}/utils/sphinx-config)
set(SPHINX_CONFIG_FILE_TEMPLATE ${SPHINX_CONFIG_DIR}/conf.py.in)
set(SPHINX_STATIC_DIR ${SPHINX_CONFIG_DIR}/_static)
# configuration and static files are copied to binary dir to avoid collisions with parallel builds
set(DOC_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/doc)
set(DOC_BUILD_CONFIG_FILE ${DOC_BUILD_DIR}/conf.py)
set(DOC_BUILD_STATIC_DIR ${DOC_BUILD_DIR}/_static)
set(DOXYGEN_BUILD_DIR ${DOC_BUILD_DIR}/doxygen)
set(DOXYGEN_XML_DIR ${DOXYGEN_BUILD_DIR}/xml)
# copy entire configuration folder to doc build directory
# files in _static are automatically copied during sphinx-build, so no need to copy them individually
file(COPY ${SPHINX_CONFIG_DIR}/ DESTINATION ${DOC_BUILD_DIR})
# configure paths in conf.py, since relative paths change when file is copied
configure_file(${SPHINX_CONFIG_FILE_TEMPLATE} ${DOC_BUILD_CONFIG_FILE})
add_custom_command( add_custom_command(
OUTPUT requirements.txt OUTPUT ${DOC_BUILD_DIR}/requirements.txt
DEPENDS docenv DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE}
COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/utils/requirements.txt requirements.txt COMMAND ${CMAKE_COMMAND} -E copy ${DOCENV_REQUIREMENTS_FILE} ${DOC_BUILD_DIR}/requirements.txt
COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade pip
COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r requirements.txt --upgrade COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
) )
# download mathjax distribution and unpack to folder "mathjax" # download mathjax distribution and unpack to folder "mathjax"
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/mathjax/es5) if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/mathjax/es5)
file(DOWNLOAD "https://github.com/mathjax/MathJax/archive/3.0.5.tar.gz" file(DOWNLOAD "https://github.com/mathjax/MathJax/archive/3.0.5.tar.gz"
"${CMAKE_CURRENT_BINARY_DIR}/mathjax.tar.gz" "${CMAKE_CURRENT_BINARY_DIR}/mathjax.tar.gz"
EXPECTED_MD5 5d9d3799cce77a1a95eee6be04eb68e7) EXPECTED_MD5 5d9d3799cce77a1a95eee6be04eb68e7)
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf mathjax.tar.gz WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf mathjax.tar.gz WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
file(GLOB MATHJAX_VERSION_DIR ${CMAKE_CURRENT_BINARY_DIR}/MathJax-*) file(GLOB MATHJAX_VERSION_DIR ${CMAKE_CURRENT_BINARY_DIR}/MathJax-*)
execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${CMAKE_CURRENT_BINARY_DIR}/mathjax) execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${DOC_BUILD_STATIC_DIR}/mathjax)
endif() endif()
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax)
file(COPY ${CMAKE_CURRENT_BINARY_DIR}/mathjax/es5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax/)
# for increased browser compatibility # for increased browser compatibility
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/html/_static/polyfill.js) if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/polyfill.js)
file(DOWNLOAD "https://polyfill.io/v3/polyfill.min.js?features=es6" file(DOWNLOAD "https://polyfill.io/v3/polyfill.min.js?features=es6"
"${CMAKE_CURRENT_BINARY_DIR}/html/_static/polyfill.js") "${DOC_BUILD_STATIC_DIR}/polyfill.js")
endif() endif()
# note, this may run in parallel with other tasks, so we must not use multiple processes here # set up doxygen and add targets to run it
file(MAKE_DIRECTORY ${DOXYGEN_BUILD_DIR})
file(COPY ${LAMMPS_DOC_DIR}/doxygen/lammps-logo.png DESTINATION ${DOXYGEN_BUILD_DIR}/lammps-logo.png)
configure_file(${LAMMPS_DOC_DIR}/doxygen/Doxyfile.in ${DOXYGEN_BUILD_DIR}/Doxyfile)
get_target_property(LAMMPS_SOURCES lammps SOURCES)
add_custom_command( add_custom_command(
OUTPUT html OUTPUT ${DOXYGEN_XML_DIR}/index.xml
DEPENDS ${DOC_SOURCES} docenv requirements.txt DEPENDS ${DOC_SOURCES} ${LAMMPS_SOURCES}
COMMAND ${DOCENV_BINARY_DIR}/sphinx-build -b html -c ${LAMMPS_DOC_DIR}/utils/sphinx-config -d ${CMAKE_BINARY_DIR}/doctrees ${LAMMPS_DOC_DIR}/src html COMMAND Doxygen::doxygen ${DOXYGEN_BUILD_DIR}/Doxyfile WORKING_DIRECTORY ${DOXYGEN_BUILD_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${CMAKE_CURRENT_BINARY_DIR}/html/index.html COMMAND ${CMAKE_COMMAND} -E touch ${DOXYGEN_XML_DIR}/run.stamp
) )
# copy selected image files to html output tree if(EXISTS ${DOXYGEN_XML_DIR}/run.stamp)
file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/html/JPG) set(SPHINX_EXTRA_OPTS "-E")
set(HTML_EXTRA_IMAGES balance_nonuniform.jpg balance_rcb.jpg else()
balance_uniform.jpg bow_tutorial_01.png bow_tutorial_02.png set(SPHINX_EXTRA_OPTS "")
bow_tutorial_03.png bow_tutorial_04.png bow_tutorial_05.png endif()
dump1.jpg dump2.jpg examples_mdpd.gif gran_funnel.png gran_mixer.png
hop1.jpg hop2.jpg saed_ewald_intersect.jpg saed_mesh.jpg
screenshot_atomeye.jpg screenshot_gl.jpg screenshot_pymol.jpg
screenshot_vmd.jpg sinusoid.jpg xrd_mesh.jpg)
set(HTML_IMAGE_TARGETS "")
foreach(_IMG ${HTML_EXTRA_IMAGES})
string(PREPEND _IMG JPG/)
list(APPEND HTML_IMAGE_TARGETS "${CMAKE_CURRENT_BINARY_DIR}/html/${_IMG}")
add_custom_command( add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/html/${_IMG} OUTPUT html
DEPENDS ${LAMMPS_DOC_DIR}/src/${_IMG} ${CMAKE_CURRENT_BINARY_DIR}/html/JPG DEPENDS ${DOC_SOURCES} docenv ${DOC_BUILD_DIR}/requirements.txt ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE}
COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/src/${_IMG} ${CMAKE_BINARY_DIR}/html/${_IMG} COMMAND ${DOCENV_BINARY_DIR}/sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html
COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${DOC_BUILD_DIR}/html/index.html
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src/PDF ${DOC_BUILD_DIR}/html/PDF
COMMAND ${CMAKE_COMMAND} -E remove -f ${DOXYGEN_XML_DIR}/run.stamp
) )
endforeach()
add_custom_target( add_custom_target(
doc ALL doc ALL
DEPENDS html ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax/es5 ${HTML_IMAGE_TARGETS} DEPENDS html ${DOC_BUILD_STATIC_DIR}/mathjax/es5
SOURCES ${LAMMPS_DOC_DIR}/utils/requirements.txt ${DOC_SOURCES} SOURCES ${LAMMPS_DOC_DIR}/utils/requirements.txt ${DOC_SOURCES}
) )
install(DIRECTORY ${CMAKE_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR}) install(DIRECTORY ${DOC_BUILD_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
endif() endif()

View File

@ -75,7 +75,7 @@ if(GPU_API STREQUAL "CUDA")
endif() endif()
# Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11 # Kepler (GPU Arch 3.5) is supported by CUDA 5 to CUDA 11
if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0")) if((CUDA_VERSION VERSION_GREATER_EQUAL "5.0") AND (CUDA_VERSION VERSION_LESS "12.0"))
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35]") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_35,code=[sm_35,compute_35]")
endif() endif()
# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0") if(CUDA_VERSION VERSION_GREATER_EQUAL "6.0")

View File

@ -35,8 +35,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject) include(ExternalProject)
ExternalProject_Add(kokkos_build ExternalProject_Add(kokkos_build
URL https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz URL https://github.com/kokkos/kokkos/archive/3.2.00.tar.gz
URL_MD5 3ccb2100f7fc316891e7dad3bc33fa37 URL_MD5 81569170fe232e5e64ab074f7cca5e50
CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS} CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS}
BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a
) )
@ -50,7 +50,7 @@ if(DOWNLOAD_KOKKOS)
target_link_libraries(lammps PRIVATE LAMMPS::KOKKOS) target_link_libraries(lammps PRIVATE LAMMPS::KOKKOS)
add_dependencies(LAMMPS::KOKKOS kokkos_build) add_dependencies(LAMMPS::KOKKOS kokkos_build)
elseif(EXTERNAL_KOKKOS) elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 3.1.01 REQUIRED CONFIG) find_package(Kokkos 3.2.00 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos) target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else() else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)

7
doc/.gitignore vendored
View File

@ -1,6 +1,7 @@
/old /old
/html /html
/html-offline /html-offline
/epub
/latex /latex
/mathjax /mathjax
/spelling /spelling
@ -10,3 +11,9 @@
/Developer.pdf /Developer.pdf
/doctrees /doctrees
/docenv /docenv
/doxygen-warn.log
/utils/sphinx-config/conf.py
/doxygen/Doxyfile
*.el
/utils/sphinx-config/_static/mathjax
/utils/sphinx-config/_static/polyfill.js

View File

@ -4,20 +4,28 @@ SHELL = /bin/bash
BUILDDIR = ${CURDIR} BUILDDIR = ${CURDIR}
RSTDIR = $(BUILDDIR)/src RSTDIR = $(BUILDDIR)/src
VENV = $(BUILDDIR)/docenv VENV = $(BUILDDIR)/docenv
MATHJAX = $(BUILDDIR)/mathjax
TXT2RST = $(VENV)/bin/txt2rst TXT2RST = $(VENV)/bin/txt2rst
ANCHORCHECK = $(VENV)/bin/rst_anchor_check ANCHORCHECK = $(VENV)/bin/rst_anchor_check
SPHINXCONFIG = $(BUILDDIR)/utils/sphinx-config
MATHJAX = $(SPHINXCONFIG)/_static/mathjax
POLYFILL = $(SPHINXCONFIG)/_static/polyfill.js
PYTHON = $(shell which python3) PYTHON = $(shell which python3)
DOXYGEN = $(shell which doxygen)
VIRTUALENV = virtualenv VIRTUALENV = virtualenv
HAS_PYTHON3 = NO HAS_PYTHON3 = NO
HAS_VIRTUALENV = NO HAS_VIRTUALENV = NO
HAS_DOXYGEN = NO
HAS_PDFLATEX = NO HAS_PDFLATEX = NO
ifeq ($(shell which python3 >/dev/null 2>&1; echo $$?), 0) ifeq ($(shell which python3 >/dev/null 2>&1; echo $$?), 0)
HAS_PYTHON3 = YES HAS_PYTHON3 = YES
endif endif
ifeq ($(shell which doxygen >/dev/null 2>&1; echo $$?), 0)
HAS_DOXYGEN = YES
endif
ifeq ($(shell which virtualenv-3 >/dev/null 2>&1; echo $$?), 0) ifeq ($(shell which virtualenv-3 >/dev/null 2>&1; echo $$?), 0)
VIRTUALENV = virtualenv-3 VIRTUALENV = virtualenv-3
HAS_VIRTUALENV = YES HAS_VIRTUALENV = YES
@ -33,16 +41,20 @@ HAS_PDFLATEX = YES
endif endif
SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())') SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())') $(shell test -f $(BUILDDIR)/doxygen/xml/run.stamp && printf -- "-E")
.PHONY: help clean-all clean clean-spelling epub mobi rst html pdf spelling anchor_check style_check # grab list of sources from doxygen config file.
# we only want to use explicitly listed files.
DOXYFILES = $(shell sed -n -e 's/\#.*$$//' -e '/^ *INPUT \+=/,/^[A-Z_]\+ \+=/p' doxygen/Doxyfile.in | sed -e 's/@LAMMPS_SOURCE_DIR@/..\/src/g' -e 's/\\//g' -e 's/ \+/ /' -e 's/[A-Z_]\+ \+= *\(YES\|NO\|\)//')
.PHONY: help clean-all clean clean-spelling epub mobi rst html pdf spelling anchor_check style_check xmlgen
# ------------------------------------------ # ------------------------------------------
help: help:
@echo "Please use \`make <target>' where <target> is one of" @echo "Please use \`make <target>' where <target> is one of"
@echo " html create HTML doc pages in html dir" @echo " html create HTML doc pages in html dir"
@echo " pdf create Developer.pdf and Manual.pdf in this dir" @echo " pdf create Manual.pdf in this dir"
@echo " fetch fetch HTML and PDF files from LAMMPS web site" @echo " fetch fetch HTML and PDF files from LAMMPS web site"
@echo " epub create ePUB format manual for e-book readers" @echo " epub create ePUB format manual for e-book readers"
@echo " mobi convert ePUB to MOBI format manual for e-book readers (e.g. Kindle)" @echo " mobi convert ePUB to MOBI format manual for e-book readers (e.g. Kindle)"
@ -57,23 +69,32 @@ help:
# ------------------------------------------ # ------------------------------------------
clean-all: clean clean-all: clean
rm -rf $(BUILDDIR)/docenv $(BUILDDIR)/doctrees $(BUILDDIR)/mathjax Manual.pdf Developer.pdf rm -rf $(BUILDDIR)/docenv $(MATHJAX) $(BUILDDIR)/LAMMPS.mobi $(BUILDDIR)/LAMMPS.epub $(BUILDDIR)/Manual.pdf
clean: clean-spelling clean: clean-spelling
rm -rf html epub latex rm -rf $(BUILDDIR)/html $(BUILDDIR)/epub $(BUILDDIR)/latex $(BUILDDIR)/doctrees $(BUILDDIR)/doxygen/xml $(BUILDDIR)/doxygen-warn.log $(BUILDDIR)/doxygen/Doxyfile $(SPHINXCONFIG)/conf.py
clean-spelling: clean-spelling:
rm -rf spelling rm -rf $(BUILDDIR)/spelling
html: $(ANCHORCHECK) $(MATHJAX) $(SPHINXCONFIG)/conf.py: $(SPHINXCONFIG)/conf.py.in
sed -e 's,@DOXYGEN_XML_DIR@,$(BUILDDIR)/doxygen/xml,g' \
-e 's,@LAMMPS_SOURCE_DIR@,$(BUILDDIR)/../src,g' \
-e 's,@LAMMPS_PYTHON_DIR@,$(BUILDDIR)/../python,g' \
-e 's,@LAMMPS_DOC_DIR@,$(BUILDDIR),g' $< > $@
html: xmlgen $(SPHINXCONFIG)/conf.py $(ANCHORCHECK) $(MATHJAX) $(POLYFILL)
@$(MAKE) $(MFLAGS) -C graphviz all
@(\ @(\
. $(VENV)/bin/activate ;\ . $(VENV)/bin/activate ; env PYTHONWARNINGS= \
sphinx-build $(SPHINXEXTRA) -b html -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) html ;\ sphinx-build $(SPHINXEXTRA) -b html -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) html ;\
ln -sf Manual.html html/index.html;\
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
echo "############################################" ;\ echo "############################################" ;\
rst_anchor_check src/*.rst ;\ rst_anchor_check src/*.rst ;\
python utils/check-packages.py -s ../src -d src ;\ python $(BUILDDIR)/utils/check-packages.py -s ../src -d src ;\
env LC_ALL=C grep -n '[^ -~]' $(RSTDIR)/*.rst ;\ env LC_ALL=C grep -n '[^ -~]' $(RSTDIR)/*.rst ;\
python utils/check-styles.py -s ../src -d src ;\ python $(BUILDDIR)/utils/check-styles.py -s ../src -d src ;\
echo "############################################" ;\ echo "############################################" ;\
deactivate ;\ deactivate ;\
) )
@ -82,30 +103,28 @@ html: $(ANCHORCHECK) $(MATHJAX)
@rm -rf html/USER @rm -rf html/USER
@rm -rf html/JPG @rm -rf html/JPG
@cp -r src/PDF html/PDF @cp -r src/PDF html/PDF
@mkdir -p html/JPG
@cp `grep -A2 '\.\. .*\(image\|figure\)::' src/*.rst | grep ':target: JPG' | sed -e 's,.*:target: JPG/,src/JPG/,' | sort | uniq` html/JPG/
@rm -rf html/PDF/.[sg]* @rm -rf html/PDF/.[sg]*
@mkdir -p html/_static/mathjax
@cp -r $(MATHJAX)/es5 html/_static/mathjax/
@echo "Build finished. The HTML pages are in doc/html." @echo "Build finished. The HTML pages are in doc/html."
spelling: $(VENV) utils/sphinx-config/false_positives.txt spelling: xmlgen $(VENV) $(SPHINXCONFIG)/false_positives.txt
@(\ @(\
. $(VENV)/bin/activate ;\ . $(VENV)/bin/activate ; env PYTHONWARNINGS= \
cp utils/sphinx-config/false_positives.txt $(RSTDIR)/ ; env PYTHONWARNINGS= \ cp $(SPHINXCONFIG)/false_positives.txt $(RSTDIR)/ ; env PYTHONWARNINGS= \
sphinx-build -b spelling -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) spelling ;\ sphinx-build -b spelling -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) spelling ;\
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
deactivate ;\ deactivate ;\
) )
@echo "Spell check finished." @echo "Spell check finished."
epub: $(VENV) epub: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
@$(MAKE) $(MFLAGS) -C graphviz all
@mkdir -p epub/JPG @mkdir -p epub/JPG
@rm -f LAMMPS.epub @rm -f LAMMPS.epub
@cp src/JPG/lammps-logo.png epub/
@cp src/JPG/*.* epub/JPG @cp src/JPG/*.* epub/JPG
@(\ @(\
. $(VENV)/bin/activate ;\ . $(VENV)/bin/activate ;\
sphinx-build $(SPHINXEXTRA) -b epub -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) epub ;\ sphinx-build $(SPHINXEXTRA) -b epub -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) epub ;\
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
deactivate ;\ deactivate ;\
) )
@mv epub/LAMMPS.epub . @mv epub/LAMMPS.epub .
@ -117,18 +136,13 @@ mobi: epub
@ebook-convert LAMMPS.epub LAMMPS.mobi @ebook-convert LAMMPS.epub LAMMPS.mobi
@echo "Conversion finished. The MOBI manual file is created." @echo "Conversion finished. The MOBI manual file is created."
pdf: $(ANCHORCHECK) pdf: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
@$(MAKE) $(MFLAGS) -C graphviz all
@if [ "$(HAS_PDFLATEX)" == "NO" ] ; then echo "PDFLaTeX was not found! Please check README.md for further instructions" 1>&2; exit 1; fi @if [ "$(HAS_PDFLATEX)" == "NO" ] ; then echo "PDFLaTeX was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
@(\ @(\
cd src/Developer; \ . $(VENV)/bin/activate ; env PYTHONWARNINGS= \
pdflatex developer; \ sphinx-build $(SPHINXEXTRA) -b latex -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) latex ;\
pdflatex developer; \ rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
mv developer.pdf ../../Developer.pdf; \
cd ../../; \
)
@(\
. $(VENV)/bin/activate ;\
sphinx-build $(SPHINXEXTRA) -b latex -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) latex ;\
echo "############################################" ;\ echo "############################################" ;\
rst_anchor_check src/*.rst ;\ rst_anchor_check src/*.rst ;\
python utils/check-packages.py -s ../src -d src ;\ python utils/check-packages.py -s ../src -d src ;\
@ -154,12 +168,11 @@ pdf: $(ANCHORCHECK)
@rm -rf latex/USER @rm -rf latex/USER
@cp -r src/PDF latex/PDF @cp -r src/PDF latex/PDF
@rm -rf latex/PDF/.[sg]* @rm -rf latex/PDF/.[sg]*
@echo "Build finished. Manual.pdf and Developer.pdf are in this directory." @echo "Build finished. Manual.pdf is in this directory."
fetch: fetch:
@rm -rf html_www Manual_www.pdf Developer_www.pdf @rm -rf html_www Manual_www.pdf
@curl -s -o Manual_www.pdf http://lammps.sandia.gov/doc/Manual.pdf @curl -s -o Manual_www.pdf http://lammps.sandia.gov/doc/Manual.pdf
@curl -s -o Developer_www.pdf http://lammps.sandia.gov/doc/Developer.pdf
@curl -s -o lammps-doc.tar.gz http://lammps.sandia.gov/tars/lammps-doc.tar.gz @curl -s -o lammps-doc.tar.gz http://lammps.sandia.gov/tars/lammps-doc.tar.gz
@tar xzf lammps-doc.tar.gz @tar xzf lammps-doc.tar.gz
@rm -f lammps-doc.tar.gz @rm -f lammps-doc.tar.gz
@ -185,21 +198,32 @@ package_check : $(VENV)
deactivate ;\ deactivate ;\
) )
xmlgen : doxygen/xml/index.xml
doxygen/Doxyfile: doxygen/Doxyfile.in
sed -e 's/@LAMMPS_SOURCE_DIR@/..\/..\/src/g' $< > $@
doxygen/xml/index.xml : $(VENV) doxygen/Doxyfile $(DOXYFILES)
@(cd doxygen; $(DOXYGEN) Doxyfile && touch xml/run.stamp)
# ------------------------------------------ # ------------------------------------------
$(VENV): $(VENV):
@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "Python3 was not found! Please check README.md for further instructions" 1>&2; exit 1; fi @if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "python3 was not found! Please see README for further instructions" 1>&2; exit 1; fi
@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please check README.md for further instructions" 1>&2; exit 1; fi @if [ "$(HAS_DOXYGEN)" == "NO" ] ; then echo "doxygen was not found! Please see README for further instructions" 1>&2; exit 1; fi
@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please see README for further instructions" 1>&2; exit 1; fi
@( \ @( \
$(VIRTUALENV) -p $(PYTHON) $(VENV); \ $(VIRTUALENV) -p $(PYTHON) $(VENV); \
. $(VENV)/bin/activate; \ . $(VENV)/bin/activate; \
pip install --upgrade pip; \ pip install --upgrade pip; \
pip install --use-feature=2020-resolver -r requirements.txt; \ pip install --use-feature=2020-resolver -r $(BUILDDIR)/utils/requirements.txt; \
deactivate;\ deactivate;\
) )
$(MATHJAX): $(MATHJAX):
@git clone --depth 1 https://github.com/mathjax/MathJax.git mathjax @git clone --depth 1 https://github.com/mathjax/MathJax.git $@
$(POLYFILL): $(MATHJAX)
@curl -s -o $@ "https://polyfill.io/v3/polyfill.min.js?features=es6"
$(TXT2RST) $(ANCHORCHECK): $(VENV) $(TXT2RST) $(ANCHORCHECK): $(VENV)
@( \ @( \

View File

@ -1,97 +1,60 @@
LAMMPS Documentation LAMMPS Documentation
Depending on how you obtained LAMMPS, this directory has 2 or 3 Depending on how you obtained LAMMPS and whether you have built
sub-directories and optionally 2 PDF files and an ePUB file: the manual yourself, this directory has a varying number of
sub-directories and files. Here is a list with descriptions:
README this file
src content files for LAMMPS documentation src content files for LAMMPS documentation
html HTML version of the LAMMPS manual (see html/Manual.html) html HTML version of the LAMMPS manual (see html/Manual.html)
utils utilities and settings for building the documentation utils utilities and settings for building the documentation
Manual.pdf large PDF version of entire manual Manual.pdf PDF version of entire manual
Developer.pdf small PDF with info about how LAMMPS is structured Developer.pdf PDF with info about how LAMMPS is structured
LAMMPS.epub Manual in ePUB format LAMMPS.epub Manual in ePUB format
LAMMPS.mobi Manual in MOBI (Kindle) format
lammps.1 man page for the lammps command
msi2lmp.1 man page for the msi2lmp command
mathjax code and fonts for rendering math in html
doctree temporary data
docenv python virtual environment for generating the manual
doxygen Doxygen configuration and output
.gitignore list of files and folders to be ignored by git
doxygen-warn.log logfile with warnings from running doxygen
If you downloaded LAMMPS as a tarball from the web site, all these and:
directories and files should be included.
If you downloaded LAMMPS from the public SVN or Git repositories, then github-development-workflow.md notes on the LAMMPS development workflow
the HTML and PDF files are not included. Instead you need to create include-file-conventions.md notes on LAMMPS' include file conventions
them, in one of three ways: documentation_conventions.md notes on writing documentation for LAMMPS
If you downloaded a LAMMPS tarball from lammps.sandia.gov, then the html
folder and the PDF manual should be included. If you downloaded LAMMPS
from GitHub then you either need to download them or build them.
(a) You can "fetch" the current HTML and PDF files from the LAMMPS web (a) You can "fetch" the current HTML and PDF files from the LAMMPS web
site. Just type "make fetch". This should create a html_www dir and site. Just type "make fetch". This should create a html_www dir and
Manual_www.pdf/Developer_www.pdf files. Note that if new LAMMPS Manual_www.pdf/Developer_www.pdf files. These files will always
features have been added more recently than the date of your version, represent the latest published patch/development version of LAMMPS.
the fetched documentation will include those changes (but your source
code will not, unless you update your local repository).
(b) You can build the HTML and PDF files yourself, by typing "make (b) You can build the HTML and PDF files yourself, by typing "make html"
html" or by "make pdf", respectively. This requires various tools or by "make pdf", respectively. This requires various tools and files.
including the Python documentation processing tool Sphinx, which the Some of them have to be installed (more on that below). For the rest the
build process will attempt to download and install on your system into build process will attempt to download and install into a python virtual
a python virtual environment, if not already available. The PDF file environment and local folders.
will require a working LaTeX installation with several add-on packages
in addition to the Python/Sphinx setup. See more details below.
---------------- ----------------
The generation of all documentation is managed by the Makefile in this Installing prerequisites for the documentation build
dir.
Options: To run the HTML documention build toolchain, python 3.x, doxygen, git,
and virtualenv have to be installed. Also internet access is initially
required to download external files and tools.
make html # generate HTML in html dir using Sphinx Building the PDF format manual requires in addition a compatible LaTeX
make pdf # generate 2 PDF files (Manual.pdf,Developer.pdf) installation with support for PDFLaTeX and several add-on LaTeX packages
# in this dir via Sphinx and PDFLaTeX installed. This includes:
make fetch # fetch HTML doc pages and 2 PDF files from web site
# as a tarball and unpack into html dir and 2 PDFs
make epub # generate LAMMPS.epub in ePUB format using Sphinx
make clean # remove intermediate RST files created by HTML build
make clean-all # remove entire build folder and any cached data
----------------
Installing prerequisites for HTML build
To run the HTML documention build toolchain, Python 3 and virtualenv
have to be installed. Here are instructions for common setups:
# Ubuntu
sudo apt-get install python-virtualenv
# Fedora (up to version 21)
# Red Hat Enterprise Linux or CentOS (up to version 7.x)
sudo yum install python3-virtualenv
# Fedora (since version 22)
sudo dnf install python3-virtualenv
# MacOS X
## Python 3
Download the latest Python 3 MacOS X package from
https://www.python.org and install it. This will install both Python
3 and pip3.
## virtualenv
Once Python 3 is installed, open a Terminal and type
pip3 install virtualenv
This will install virtualenv from the Python Package Index.
----------------
Installing prerequisites for PDF build
Same as for HTML plus a compatible LaTeX installation with
support for PDFLaTeX. Also the following LaTeX packages need
to be installed (e.g. from texlive):
- amsmath - amsmath
- anysize
- babel - babel
- capt-of - capt-of
- cmap - cmap
@ -105,24 +68,13 @@ to be installed (e.g. from texlive):
- tabulary - tabulary
- upquote - upquote
- wrapfig - wrapfig
Building the EPUB format requires LaTeX installation with the same packages
as for the PDF format plus the 'dvipng' command to convert the embedded math
into images. The MOBI format is generated from the EPUB format file by using
the tool 'ebook-convert' from the 'calibre' e-book management software
(https://calibre-ebook.com).
---------------- ----------------
Installing prerequisites for epub build More details this can be found in the manual itself. The online
version is at: https://lammps.sandia.gov/doc/Manual_build.html
## ePUB
Same as for HTML. This uses the same tools and configuration
files as the HTML tree. The ePUB format conversion currently
does not support processing mathematical expressions via MathJAX,
so there will be limitations on some pages. For the time being
until this is resolved, building and using the PDF format file
is recommended instead.
For converting the generated ePUB file to a mobi format file
(for e-book readers like Kindle, that cannot read ePUB), you
also need to have the 'ebook-convert' tool from the "calibre"
software installed. http://calibre-ebook.com/
You first create the ePUB file with 'make epub' and then do:
ebook-convert LAMMPS.epub LAMMPS.mobi

View File

@ -0,0 +1,93 @@
# Outline of LAMMPS documentation file conventions
This purpose of this document is to provide a point of reference
for LAMMPS developers and contributors as to what conventions
should be used to structure and format files in the LAMMPS manual.
Last change: 2020-04-23
## File format and tools
In fall 2019, the LAMMPS documentation file format has changed from
a home grown minimal markup designed to generate HTML format files
from a mostly plain text format to using the reStructuredText file
format. For a transition period all files in the old .txt format
were transparently converted to .rst and then processed. The txt2rst
tool is still included in the distribution to obtain an initial .rst
file for integration into the manual. Since the transition to
reStructured text as source format, many of the artifacts or the
translation have been removed though and parts of the documentation
refactored and expanded to take advantage of the capabilities
reStructuredText and associated tools. The conversion from the
source to the final formats (HTML, PDF, and optionally e-book
reader formats ePUB and MOBI) is mostly automated and controlled
by a Makefile in the `doc` folder. This makefile assumes that the
processing is done on a Unix-like machine and Python 3.5 or later
and a matching virtualenv module are available. Additional Python
packages (like the Sphinx tool and several extensions) are
transparently installed into a virtual environment over the
internet using the `pip` package manager. Further requirements
and details are discussed in the manual.
## Work in progress
The refactoring and improving of the documentation is an ongoing
process, so statements in this document may not always be fully
up-to-date. If in doubt, contact the LAMMPS developers.
## General structure
The layout and formatting of added files should follow the example
of the existing files. Since those are directly derived from their
former .txt format versions and the manual has been maintained in
that format for many years, there is a large degree of consistency
already, so comparision with similar files should give you a good
idea what kind of information and sections are needed.
## Formatting conventions
Filenames, folders, paths, (shell) commands, definitions, makefile
settings and similar should be formatted as "literals" with
double backward quotes bracketing the item: \`\`path/to/some/file\`\`
Keywords and options are formatted in italics: \*option\*
Mathematical expressions, equations, symbols are typeset using
either a `.. math:`` block or the `:math:` role.
Groups of shell commands or LAMMPS input script or C/C++ source
code should be typeset into a `.. code-block::` section. A syntax
highlighting extension for LAMMPS input scripts is provided, so
`LAMMPS` can be used to indicate the language in the code block
in addition to `bash`, `c`, or `python`. When no syntax style
is indicated, no syntax highlighting is performed.
As an alternative, e.g. to typeset the syntax of file formats
a `.. parsed-literal::` block can be used, which allows some
formatting directives, which means that related characters need
to be escaped with a preceding backslash: `\*`.
Special remarks can be highlighted with a `.. note::` block and
strong warnings can be put into a `.. warning::` block.
## Required steps when adding a custom style to LAMMPS
When adding a new style (e.g. pair style or a compute or a fix)
or a new command, it is **required** to include the corresponding
documentation. Those are often new files that need to be added.
In order to be included in the documentation, those new files
need to be reference in a `.. toctree::` block. Most of those
use patterns with wildcards, so the addition will be automatic.
However, those additions also need to be added to some lists of
styles or commands. The `make style\_check` command will perform
a test and report any missing entries and list the affected files.
Any references defined with `.. \_refname:` have to be unique
across all documentation files and this can be checked for with
`make anchor\_check`. Finally, a spell-check should be done,
which is triggered via `make spelling`. Any offenses need to
be corrected and false positives should be added to the file
`utils/sphinx-config/false\_positives.txt`.
## Required additional steps when adding a new package to LAMMPS
TODO

1
doc/doxygen/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/xml

528
doc/doxygen/Doxyfile.in Normal file
View File

@ -0,0 +1,528 @@
# Doxyfile 1.8.15 -*- makefile -*-
DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = "LAMMPS Programmer's Guide"
PROJECT_NUMBER = "24 August 2020"
PROJECT_BRIEF = "Documentation of the LAMMPS library interface and Python wrapper"
PROJECT_LOGO = lammps-logo.png
CREATE_SUBDIRS = NO
ALLOW_UNICODE_NAMES = NO
OUTPUT_LANGUAGE = English
OUTPUT_TEXT_DIRECTION = LTR
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = NO
INHERIT_DOCS = YES
TAB_SIZE = 2
# When enabled doxygen tries to link words that correspond to documented
# classes, or namespaces to their corresponding documentation. Such a link can
# be prevented in individual cases by putting a % sign in front of the word or
# globally by setting AUTOLINK_SUPPORT to NO.
# The default value is: YES.
AUTOLINK_SUPPORT = YES
# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
# to include (a tag file for) the STL sources as input, then you should set this
# tag to YES in order to let doxygen match functions declarations and
# definitions whose arguments contain STL classes (e.g. func(std::string);
# versus func(std::string) {}). This also make the inheritance and collaboration
# diagrams that involve STL classes more complete and accurate.
# The default value is: NO.
BUILTIN_STL_SUPPORT = YES
IDL_PROPERTY_SUPPORT = NO
# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
# cache is used to resolve symbols given their name and scope. Since this can be
# an expensive process and often the same symbol appears multiple times in the
# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
# doxygen will become slower. If the cache is too large, memory is wasted. The
# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
# symbols. At the end of a run doxygen will report the cache usage and suggest
# the optimal cache size from a speed point of view.
# Minimum value: 0, maximum value: 9, default value: 0.
LOOKUP_CACHE_SIZE = 2
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
# documentation are documented, even if no documentation was available. Private
# class members and static file members will be hidden unless the
# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
# Note: This will also disable the warnings about undocumented members that are
# normally produced when WARNINGS is set to YES.
# The default value is: NO.
EXTRACT_ALL = NO
# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
# be included in the documentation.
# The default value is: NO.
EXTRACT_PRIVATE = YES
# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
# scope will be included in the documentation.
# The default value is: NO.
EXTRACT_PACKAGE = YES
# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
# included in the documentation.
# The default value is: NO.
EXTRACT_STATIC = YES
# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
# locally in source files will be included in the documentation. If set to NO,
# only classes defined in header files are included. Does not have any effect
# for Java sources.
# The default value is: YES.
EXTRACT_LOCAL_CLASSES = YES
# If this flag is set to YES, the members of anonymous namespaces will be
# extracted and appear in the documentation as a namespace called
# 'anonymous_namespace{file}', where file will be replaced with the base name of
# the file that contains the anonymous namespace. By default anonymous namespace
# are hidden.
# The default value is: NO.
EXTRACT_ANON_NSPACES = YES
# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
# undocumented members inside documented classes or files. If set to NO these
# members will be included in the various overviews, but no documentation
# section is generated. This option has no effect if EXTRACT_ALL is enabled.
# The default value is: NO.
HIDE_UNDOC_MEMBERS = YES
# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
# undocumented classes that are normally visible in the class hierarchy. If set
# to NO, these classes will be included in the various overviews. This option
# has no effect if EXTRACT_ALL is enabled.
# The default value is: NO.
HIDE_UNDOC_CLASSES = YES
# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
# (class|struct|union) declarations. If set to NO, these declarations will be
# included in the documentation.
# The default value is: NO.
HIDE_FRIEND_COMPOUNDS = NO
# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
# documentation blocks found inside the body of a function. If set to NO, these
# blocks will be appended to the function's detailed documentation block.
# The default value is: NO.
HIDE_IN_BODY_DOCS = NO
# The INTERNAL_DOCS tag determines if documentation that is typed after a
# \internal command is included. If the tag is set to NO then the documentation
# will be excluded. Set it to YES to include the internal documentation.
# The default value is: NO.
INTERNAL_DOCS = NO
# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
# names in lower-case letters. If set to YES, upper-case letters are also
# allowed. This is useful if you have classes or files whose names only differ
# in case and if your file system supports case sensitive file names. Windows
# and Mac users are advised to set this option to NO.
# The default value is: system dependent.
CASE_SENSE_NAMES = YES
# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
# their full class and namespace scopes in the documentation. If set to YES, the
# scope will be hidden.
# The default value is: NO.
HIDE_SCOPE_NAMES = YES
# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
# append additional text to a page's title, such as Class Reference. If set to
# YES the compound reference will be hidden.
# The default value is: NO.
HIDE_COMPOUND_REFERENCE= NO
# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
# the files that are included by a file in the documentation of that file.
# The default value is: YES.
SHOW_INCLUDE_FILES = NO
# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
# grouped member an include statement to the documentation, telling the reader
# which file to include in order to use the member.
# The default value is: NO.
SHOW_GROUPED_MEMB_INC = NO
# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
# files with double quotes in the documentation rather than with sharp brackets.
# The default value is: NO.
FORCE_LOCAL_INCLUDES = NO
# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
# documentation for inline members.
# The default value is: YES.
INLINE_INFO = YES
# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
# (detailed) documentation of file and class members alphabetically by member
# name. If set to NO, the members will appear in declaration order.
# The default value is: YES.
SORT_MEMBER_DOCS = NO
# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
# descriptions of file, namespace and class members alphabetically by member
# name. If set to NO, the members will appear in declaration order. Note that
# this will also influence the order of the classes in the class list.
# The default value is: NO.
SORT_BRIEF_DOCS = NO
# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
# (brief and detailed) documentation of class members so that constructors and
# destructors are listed first. If set to NO the constructors will appear in the
# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
# member documentation.
# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
# detailed member documentation.
# The default value is: NO.
SORT_MEMBERS_CTORS_1ST = NO
# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
# of group names into alphabetical order. If set to NO the group names will
# appear in their defined order.
# The default value is: NO.
SORT_GROUP_NAMES = NO
# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
# fully-qualified names, including namespaces. If set to NO, the class list will
# be sorted only by class name, not including the namespace part.
# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
# Note: This option applies only to the class list, not to the alphabetical
# list.
# The default value is: NO.
SORT_BY_SCOPE_NAME = NO
# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
# type resolution of all parameters of a function it will reject a match between
# the prototype and the implementation of a member function even if there is
# only one candidate or it is obvious which candidate to choose by doing a
# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
# accept a match between prototype and implementation in such cases.
# The default value is: NO.
STRICT_PROTO_MATCHING = NO
# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
# list. This list is created by putting \todo commands in the documentation.
# The default value is: YES.
GENERATE_TODOLIST = YES
# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
# list. This list is created by putting \test commands in the documentation.
# The default value is: YES.
GENERATE_TESTLIST = YES
# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
# list. This list is created by putting \bug commands in the documentation.
# The default value is: YES.
GENERATE_BUGLIST = YES
# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
# the deprecated list. This list is created by putting \deprecated commands in
# the documentation.
# The default value is: YES.
GENERATE_DEPRECATEDLIST= YES
# The ENABLED_SECTIONS tag can be used to enable conditional documentation
# sections, marked by \if <section_label> ... \endif and \cond <section_label>
# ... \endcond blocks.
ENABLED_SECTIONS =
# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
# initial value of a variable or macro / define can have for it to appear in the
# documentation. If the initializer consists of more lines than specified here
# it will be hidden. Use a value of 0 to hide initializers completely. The
# appearance of the value of individual variables and macros / defines can be
# controlled using \showinitializer or \hideinitializer command in the
# documentation regardless of this setting.
# Minimum value: 0, maximum value: 10000, default value: 30.
MAX_INITIALIZER_LINES = 30
# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
# the bottom of the documentation of classes and structs. If set to YES, the
# list will mention the files that were used to generate the documentation.
# The default value is: YES.
SHOW_USED_FILES = YES
# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
# will remove the Files entry from the Quick Index and from the Folder Tree View
# (if specified).
# The default value is: YES.
SHOW_FILES = NO
# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
# page. This will remove the Namespaces entry from the Quick Index and from the
# Folder Tree View (if specified).
# The default value is: YES.
SHOW_NAMESPACES = YES
# The FILE_VERSION_FILTER tag can be used to specify a program or script that
# doxygen should invoke to get the current version for each file (typically from
# the version control system). Doxygen will invoke the program by executing (via
# popen()) the command command input-file, where command is the value of the
# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
# by doxygen. Whatever the program writes to standard output is used as the file
# version. For an example see the documentation.
FILE_VERSION_FILTER =
# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
# by doxygen. The layout file controls the global structure of the generated
# output files in an output format independent way. To create the layout file
# that represents doxygen's defaults, run doxygen with the -l option. You can
# optionally specify a file name after the option, if omitted DoxygenLayout.xml
# will be used as the name of the layout file.
#
# Note that if you run doxygen from a directory containing a file called
# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
# tag is left empty.
LAYOUT_FILE =
# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
# the reference definitions. This must be a list of .bib files. The .bib
# extension is automatically appended if omitted. This requires the bibtex tool
# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
# For LaTeX the style of the bibliography can be controlled using
# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
# search path. See also \cite for info how to create references.
CITE_BIB_FILES =
#---------------------------------------------------------------------------
# Configuration options related to warning and progress messages
#---------------------------------------------------------------------------
# The QUIET tag can be used to turn on/off the messages that are generated to
# standard output by doxygen. If QUIET is set to YES this implies that the
# messages are off.
# The default value is: NO.
QUIET = NO
# The WARNINGS tag can be used to turn on/off the warning messages that are
# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
# this implies that the warnings are on.
#
# Tip: Turn warnings on while writing the documentation.
# The default value is: YES.
WARNINGS = YES
# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
# will automatically be disabled.
# The default value is: YES.
WARN_IF_UNDOCUMENTED = YES
# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
# potential errors in the documentation, such as not documenting some parameters
# in a documented function, or documenting parameters that don't exist or using
# markup commands wrongly.
# The default value is: YES.
WARN_IF_DOC_ERROR = YES
# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
# are documented, but have no documentation for their parameters or return
# value. If set to NO, doxygen will only warn about wrong or incomplete
# parameter documentation, but not about the absence of documentation. If
# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
# The default value is: NO.
WARN_NO_PARAMDOC = YES
# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
# a warning is encountered.
# The default value is: NO.
WARN_AS_ERROR = NO
# The WARN_FORMAT tag determines the format of the warning messages that doxygen
# can produce. The string should contain the $file, $line, and $text tags, which
# will be replaced by the file and line number from which the warning originated
# and the warning text. Optionally the format may contain $version, which will
# be replaced by the version of the file (if it could be obtained via
# FILE_VERSION_FILTER)
# The default value is: $file:$line: $text.
WARN_FORMAT = "$file:$line: $text"
# The WARN_LOGFILE tag can be used to specify a file to which warning and error
# messages should be written. If left blank the output is written to standard
# error (stderr).
WARN_LOGFILE = "../doxygen-warn.log"
#---------------------------------------------------------------------------
# Configuration options related to the input files
#---------------------------------------------------------------------------
# The INPUT tag is used to specify the files and/or directories that contain
# documented source files. You may enter file names like myfile.cpp or
# directories like /usr/src/myproject. Separate the files or directories with
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.
INPUT = @LAMMPS_SOURCE_DIR@/utils.cpp \
@LAMMPS_SOURCE_DIR@/utils.h \
@LAMMPS_SOURCE_DIR@/library.cpp \
@LAMMPS_SOURCE_DIR@/library.h \
@LAMMPS_SOURCE_DIR@/lammps.cpp \
@LAMMPS_SOURCE_DIR@/lammps.h \
@LAMMPS_SOURCE_DIR@/lmptype.h \
@LAMMPS_SOURCE_DIR@/pointers.h \
@LAMMPS_SOURCE_DIR@/atom.cpp \
@LAMMPS_SOURCE_DIR@/atom.h \
@LAMMPS_SOURCE_DIR@/input.cpp \
@LAMMPS_SOURCE_DIR@/input.h \
@LAMMPS_SOURCE_DIR@/tokenizer.cpp \
@LAMMPS_SOURCE_DIR@/tokenizer.h \
@LAMMPS_SOURCE_DIR@/text_file_reader.cpp \
@LAMMPS_SOURCE_DIR@/text_file_reader.h \
@LAMMPS_SOURCE_DIR@/potential_file_reader.cpp \
@LAMMPS_SOURCE_DIR@/potential_file_reader.h \
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded
# from the input.
# The default value is: NO.
EXCLUDE_SYMLINKS = YES
#---------------------------------------------------------------------------
# Configuration options related to output
#---------------------------------------------------------------------------
GENERATE_HTML = NO
GENERATE_LATEX = NO
GENERATE_XML = YES
XML_OUTPUT = xml
XML_PROGRAMLISTING = YES
XML_NS_MEMB_FILE_SCOPE = NO
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
# C-preprocessor directives found in the sources and include files.
# The default value is: YES.
#ENABLE_PREPROCESSING = YES
ENABLE_PREPROCESSING = NO
# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
# in the source code. If set to NO, only conditional compilation will be
# performed. Macro expansion can be done in a controlled way by setting
# EXPAND_ONLY_PREDEF to YES.
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
MACRO_EXPANSION = NO
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
# the macro expansion is limited to the macros specified with the PREDEFINED and
# EXPAND_AS_DEFINED tags.
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
EXPAND_ONLY_PREDEF = NO
# If the SEARCH_INCLUDES tag is set to YES, the include files in the
# INCLUDE_PATH will be searched if a #include is found.
# The default value is: YES.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
SEARCH_INCLUDES = YES
# The INCLUDE_PATH tag can be used to specify one or more directories that
# contain include files that are not input files but should be processed by the
# preprocessor.
# This tag requires that the tag SEARCH_INCLUDES is set to YES.
INCLUDE_PATH =
# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
# patterns (like *.h and *.hpp) to filter out the header-files in the
# directories. If left blank, the patterns specified with FILE_PATTERNS will be
# used.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
INCLUDE_FILE_PATTERNS =
# The PREDEFINED tag can be used to specify one or more macro names that are
# defined before the preprocessor is started (similar to the -D option of e.g.
# gcc). The argument of the tag is a list of macros of the form: name or
# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
# is assumed. To prevent a macro definition from being undefined via #undef or
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
PREDEFINED =
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The
# macro definition that is found in the sources will be used. Use the PREDEFINED
# tag if you want to use a different macro definition that overrules the
# definition found in the source code.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
EXPAND_AS_DEFINED =
# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
# remove all references to function-like macros that are alone on a line, have
# an all uppercase name, and do not end with a semicolon. Such function macros
# are typically used for boiler-plate code, and will confuse the parser if not
# removed.
# The default value is: YES.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
SKIP_FUNCTION_MACROS = YES

BIN
doc/doxygen/lammps-logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

3
doc/graphviz/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/*.png
/*.svg
/*.pdf

30
doc/graphviz/Makefile Normal file
View File

@ -0,0 +1,30 @@
# Makefile for generating images with graphviz
#
SHELL = /bin/bash
BUILDDIR = ${CURDIR}/..
IMGDIR = $(BUILDDIR)/src/JPG
IMGSRC = $(wildcard *.dot)
IMGPNG = $(IMGSRC:%.dot=$(IMGDIR)/%.png)
HAS_DOT = NO
ifeq ($(shell which dot >/dev/null 2>&1; echo $$?), 0)
HAS_DOT = YES
endif
all: $(IMGPNG)
clean:
rm -f $(IMGSVG) $(IMGPDF) $(IMGPNG) *~
ifeq ($(HAS_DOT),YES)
$(IMGDIR)/%.png: %.dot
dot -Tpng -o $@ $<
endif
ifeq ($(HAS_DOT),NO)
$(IMGDIR)/%.png: %.dot
@echo '###################################################'
@echo '# Need to install "graphviz" to regenerate graphs #'
@echo '###################################################'
endif

View File

@ -0,0 +1,90 @@
// LAMMPS Class topology
digraph lammps {
rankdir="LR"
La [shape=circle label="LAMMPS"]
At [shape=box label="Atom" color=blue]
Ci [shape=box label="CiteMe"]
Co [shape=box label="Comm" color=blue]
Do [shape=box label="Domain" color=blue]
Er [shape=box label="Error" color=blue]
Fo [shape=box label="Force" color=blue]
Gr [shape=box label="Group" color=blue]
In [shape=box label="Input" color=blue]
Ko [shape=box label="KokkosLMP"]
Ak [shape=box label="AtomKK" color=blue]
Mk [shape=box label="MemoryKK" color=blue]
Me [shape=box label="Memory" color=blue]
Mo [shape=box label="Modify" color=blue]
Ne [shape=box label="Neighbor" color=blue]
Ou [shape=box label="Output" color=blue]
Py [shape=box label="Python" color=blue]
Up [shape=box label="Update" color=blue]
Un [shape=box label="Universe" color=blue]
Ti [shape=box label="Timer" color=blue]
Rg [label="Region" color=red]
Rb [shape=box label="RegionBlock"]
Rs [shape=box label="RegionSphere"]
Av [label="AtomVec" color=red]
It [label="Integrate" color=red]
Mi [label="Min" color=red]
Pa [label="Pair" color=red]
Bo [label="Bond" color=red]
An [label="Angle" color=red]
Di [label="Dihedral" color=red]
Im [label="Improper" color=red]
Ks [label="Kspace" color=red]
Du [label="Dump" color=red]
Fi [label="Fix" color=red]
Cp [label="Compute" color=red]
Th [label="Thermo"]
Va [label="Variable"]
Ew [shape=box label="Ewald"]
Pp [shape=box label="PPPM"]
Ff [label="FFT3d"]
Re [label="Remap"]
Gc [label="GridComm"]
Cb [shape=box label="CommBrick"]
Ct [shape=box label="CommTiled"]
Aa [shape=box label="AtomVecAtomic"]
Am [shape=box label="AtomVecMolecular"]
Lj [shape=box label="PairLJCut"]
Lo [shape=box label="PairLJCutOMP"]
Lg [shape=box label="PairLJCutGPU"]
Te [shape=box label="PairTersoff"]
Bh [shape=box label="BondHarmonic"]
Bf [shape=box label="BondFENE"]
Fa [shape=box label="FixAveTime"]
Fn [shape=box label="FixNVE"]
Fh [shape=box label="FixNH"]
Fp [shape=box label="FixNPT"]
Ft [shape=box label="FixNVT"]
Da [shape=box label="DumpAtom"]
Dc [shape=box label="DumpCustom"]
Dg [shape=box label="DumpCFG"]
Ve [shape=box label="Verlet"]
Rr [shape=box label="Respa"]
Po [shape=box label="PPPMOmp"]
La -> {At Ci Co Do Er Fo Gr In Ko Ak Mk Me Mo Ne Ou Py Ti Up Un} [penwidth=2]
Do -> {Rg} [penwidth=2]
Co -> {Cb Ct} [style=dashed penwidth=2]
Rg -> {Rb Rs} [style=dashed penwidth=2]
In -> Va [penwidth=2]
Mo -> {Fi Cp} [penwidth=2]
Fo -> {Pa Bo An Di Im Ks} [penwidth=2]
Ks -> {Ew Pp} [style=dashed penwidth=2]
Pp -> {Ff Re Gc} [penwidth=2]
Pp -> {Po} [style=dashed penwidth=2]
Up -> {It Mi} [penwidth=2]
It -> {Ve Rr} [style=dashed penwidth=2]
Ou -> {Du Th} [penwidth=2]
Du -> {Da Dc} [style=dashed penwidth=2]
Dc -> {Dg} [style=dashed penwidth=2]
At -> Av [penwidth=2]
Av -> {Aa Am} [style=dashed penwidth=2]
Pa -> {Lj Te} [style=dashed penwidth=2]
Lj -> {Lo Lg} [style=dashed penwidth=2]
Bo -> {Bh Bf} [style=dashed penwidth=2]
Fi -> {Fa Fn Fh} [style=dashed penwidth=2]
Fh -> {Fp Ft} [style=dashed penwidth=2]
}

View File

@ -3,7 +3,7 @@
This purpose of this document is to provide a point of reference This purpose of this document is to provide a point of reference
for LAMMPS developers and contributors as to what include files for LAMMPS developers and contributors as to what include files
and definitions to put where into LAMMPS source. and definitions to put where into LAMMPS source.
Last change 2019-07-05 Last change 2020-08-31
## Table of Contents ## Table of Contents
@ -99,10 +99,13 @@ Include files should be included in this order:
#### pointers.h #### pointers.h
The `pointer.h` header file also includes `cstdio` and `lmptype.h` The `pointer.h` header file also includes `cstdio`, `cstddef`,
(and through it `stdint.h`, `intttypes.h`, cstdlib, and `climits`). `string`, `lmptype.h`, and `utils.h` (and through those indirectly
`stdint.h`, `intttypes.h`, cstdlib, and `climits`).
This means any header including `pointers.h` can assume that `FILE`, This means any header including `pointers.h` can assume that `FILE`,
`NULL`, `INT_MAX` are defined. `NULL`, `INT_MAX` are defined, they may freely use std::string
and functions from the utils namespace without including the
corresponding header files.
## Tools ## Tools

View File

@ -1,4 +1,4 @@
.TH LAMMPS "21 August 2020" "2020-08-21" .TH LAMMPS "24 August 2020" "2020-08-24"
.SH NAME .SH NAME
.B LAMMPS .B LAMMPS
\- Molecular Dynamics Simulator. \- Molecular Dynamics Simulator.

View File

@ -1,4 +0,0 @@
Sphinx
sphinxcontrib-spelling
breathe
Pygments

View File

@ -471,7 +471,7 @@ LAMMPS source distribution.
.. code-block:: bash .. code-block:: bash
make html # create HTML doc pages in html directory make html # create HTML doc pages in html directory
make pdf # create Developer.pdf and Manual.pdf in this directory make pdf # create Manual.pdf in this directory
make fetch # fetch HTML and PDF files from LAMMPS web site make fetch # fetch HTML and PDF files from LAMMPS web site
make clean # remove all intermediate files make clean # remove all intermediate files
make clean-all # reset the entire doc build environment make clean-all # reset the entire doc build environment

View File

@ -378,22 +378,22 @@ The images below illustrate how the data is presented.
.. list-table:: .. list-table::
* - .. figure:: JPG/coverage-overview-top.png * - .. figure:: JPG/coverage-overview-top.png
:target: JPG/coverage-overview-top.png :scale: 25%
Top of the overview page Top of the overview page
- .. figure:: JPG/coverage-overview-manybody.png - .. figure:: JPG/coverage-overview-manybody.png
:target: JPG/coverage-overview-manybody.png :scale: 25%
Styles with good coverage Styles with good coverage
- .. figure:: JPG/coverage-file-top.png - .. figure:: JPG/coverage-file-top.png
:target: JPG/coverage-file-top.png :scale: 25%
Top of individual source page Top of individual source page
- .. figure:: JPG/coverage-file-branches.png - .. figure:: JPG/coverage-file-branches.png
:target: JPG/coverage-file-branches.png :scale: 25%
Source page with branches Source page with branches

View File

@ -361,9 +361,12 @@ be specified in uppercase.
* - AMDAVX * - AMDAVX
- HOST - HOST
- AMD 64-bit x86 CPU (AVX 1) - AMD 64-bit x86 CPU (AVX 1)
* - EPYC * - ZEN
- HOST - HOST
- AMD EPYC Zen class CPU (AVX 2) - AMD Zen class CPU (AVX 2)
* - ZEN2
- HOST
- AMD Zen2 class CPU (AVX 2)
* - ARMV80 * - ARMV80
- HOST - HOST
- ARMv8.0 Compatible CPU - ARMv8.0 Compatible CPU
@ -445,12 +448,18 @@ be specified in uppercase.
* - TURING75 * - TURING75
- GPU - GPU
- NVIDIA Turing generation CC 7.5 GPU - NVIDIA Turing generation CC 7.5 GPU
* - AMPERE80
- GPU
- NVIDIA Ampere generation CC 8.0 GPU
* - VEGA900 * - VEGA900
- GPU - GPU
- AMD GPU MI25 GFX900 - AMD GPU MI25 GFX900
* - VEGA906 * - VEGA906
- GPU - GPU
- AMD GPU MI50/MI60 GFX906 - AMD GPU MI50/MI60 GFX906
* - INTEL_GEN
- GPU
- Intel GPUs Gen9+
Basic CMake build settings: Basic CMake build settings:
^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -1,3 +0,0 @@
/developer.aux
/developer.log
/developer.toc

View File

@ -1,198 +0,0 @@
#FIG 3.2 Produced by xfig version 3.2.5a
Portrait
Center
Inches
Letter
100.00
Single
-2
1200 2
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2232 1170 3540 1170 3540 1505 2232 1505 2232 1170
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2220 1830 3015 1830 3015 2219 2220 2219 2220 1830
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2226 3285 3300 3285 3300 3665 2226 3665 2226 3285
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2223 5190 3225 5190 3225 5525 2223 5525 2223 5190
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2232 7125 3090 7125 3090 7478 2232 7478 2232 7125
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2226 10230 3300 10230 3300 10565 2226 10565 2226 10230
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4026 10305 4980 10305 4980 10592 4026 10592 4026 10305
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4029 9900 5205 9900 5205 10250 4029 10250 4029 9900
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4038 9315 5370 9315 5370 9659 4038 9659 4038 9315
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4023 8955 4530 8955 4530 9278 4023 9278 4023 8955
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4029 8475 5190 8475 5190 8762 4029 8762 4029 8475
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4008 8115 5430 8115 5430 8408 4008 8408 4008 8115
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4026 7425 4995 7425 4995 7712 4026 7712 4026 7425
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4035 6720 4650 6720 4650 7025 4035 7025 4035 6720
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4044 7080 4830 7080 4830 7358 4044 7358 4044 7080
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4032 6105 5205 6105 5205 6419 4032 6419 4032 6105
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4026 5715 5115 5715 5115 6062 4026 6062 4026 5715
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4023 3585 4605 3585 4605 3872 4023 3872 4023 3585
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3954 1680 5175 1680 5175 1997 3954 1997 3954 1680
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
1620 5235 2100 615
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
1605 5445 2070 10695
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3120 1935 3855 1800
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3150 2115 3765 2250
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3135 7230 3945 6840
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3150 7335 3945 8610
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
5265 8610 6195 8400
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
5280 8655 6180 8820
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3345 10290 3930 10020
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3360 10395 3930 10425
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3360 10455 3930 10755
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2193 360 3435 360 3435 647 2193 647 2193 360
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3398 3472 3923 3307
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3413 3601 3923 3721
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3285 2806 3870 2802
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3315 5372 3900 5368
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
6354 2280 7470 2280 7470 2585 6354 2585 6354 2280
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
6348 1875 7320 1875 7320 2222 6348 2222 6348 1875
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3954 2070 5505 2070 5505 2372 3954 2372 3954 2070
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
5634 2137 6230 2045
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
5670 2310 6265 2418
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3900 2640 5400 2640 5400 2975 3900 2975 3900 2640
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4038 3165 5385 3165 5385 3497 4038 3497 4038 3165
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4245 4110 5730 4110 5730 4499 4245 4499 4245 4110
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4233 4545 6390 4545 6390 4862 4233 4862 4233 4545
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4026 5190 5385 5190 5385 5525 4026 5525 4026 5190
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4038 7755 5310 7755 5310 8075 4038 8075 4038 7755
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
6270 8250 7365 8250 7365 8610 6270 8610 6270 8250
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
6273 8655 7380 8655 7380 8978 6273 8978 6273 8655
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
4041 10620 5985 10620 5985 10943 4041 10943 4041 10620
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2217 10830 3135 10830 3135 11156 2217 11156 2217 10830
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2229 9780 3240 9780 3240 10118 2229 10118 2229 9780
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2214 9015 3285 9015 3285 9362 2214 9362 2214 9015
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2208 5850 3420 5850 3420 6209 2208 6209 2208 5850
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2217 4275 3615 4275 3615 4634 2217 4634 2217 4275
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2235 2655 3150 2655 3150 3000 2235 3000 2235 2655
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
60 5115 1500 5115 1500 5610 60 5610 60 5115
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3486 6018 4011 5853
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3486 6129 3996 6249
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3361 9291 3991 9531
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3345 9129 4005 9099
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3691 4412 4216 4277
2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
1 1 2.00 120.00 240.00
3695 4561 4175 4711
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2220 735 3129 735 3129 1043 2220 1043 2220 735
4 0 1 50 -1 18 18 0.0000 4 225 1275 2265 1455 Universe\001
4 0 1 50 -1 18 18 0.0000 4 285 735 2265 2175 Input\001
4 0 1 50 -1 18 18 0.0000 4 225 780 2265 2925 Atom\001
4 0 1 50 -1 18 18 0.0000 4 285 1020 2265 3600 Update\001
4 0 1 50 -1 18 18 0.0000 4 285 1320 2265 4575 Neighbor\001
4 0 1 50 -1 18 18 0.0000 4 225 945 2265 5475 Comm\001
4 0 1 50 -1 18 18 0.0000 4 225 1110 2265 6150 Domain\001
4 0 1 50 -1 18 18 0.0000 4 225 810 2265 7425 Force\001
4 0 1 50 -1 18 18 0.0000 4 285 975 2265 9300 Modify\001
4 0 1 50 -1 18 18 0.0000 4 285 900 2265 10050 Group\001
4 0 1 50 -1 18 18 0.0000 4 285 990 2265 10500 Output\001
4 0 1 50 -1 18 18 0.0000 4 225 825 2265 11100 Timer\001
4 0 0 50 -1 18 18 0.0000 4 225 1170 3990 1950 Variable\001
4 0 4 50 -1 18 18 0.0000 4 225 1470 3990 2325 Command\001
4 0 4 50 -1 18 18 0.0000 4 285 1275 4065 3450 Integrate\001
4 0 4 50 -1 18 18 0.0000 4 225 525 4065 3825 Min\001
4 0 0 50 -1 18 18 0.0000 4 285 1230 4065 5475 Irregular\001
4 0 4 50 -1 18 18 0.0000 4 285 1020 4065 6000 Region\001
4 0 0 50 -1 18 18 0.0000 4 225 975 4065 6375 Lattice\001
4 0 4 50 -1 18 18 0.0000 4 225 435 4065 9225 Fix\001
4 0 4 50 -1 18 18 0.0000 4 285 1305 4065 9600 Compute\001
4 0 4 50 -1 18 18 0.0000 4 225 570 4065 6975 Pair\001
4 0 4 50 -1 18 18 0.0000 4 285 840 4065 7665 Angle\001
4 0 4 50 -1 18 18 0.0000 4 225 1215 4065 8010 Dihedral\001
4 0 4 50 -1 18 18 0.0000 4 285 1305 4065 8355 Improper\001
4 0 4 50 -1 18 18 0.0000 4 285 1095 4065 8700 KSpace\001
4 0 4 50 -1 18 18 0.0000 4 285 855 4065 10545 Dump\001
4 0 0 50 -1 18 18 0.0000 4 225 1815 4065 10890 WriteRestart\001
4 0 0 50 -1 18 18 0.0000 4 225 930 6315 8550 FFT3D\001
4 0 0 50 -1 18 18 0.0000 4 285 1005 6315 8925 Remap\001
4 0 0 50 -1 18 18 0.0000 4 225 885 6390 2175 Finish\001
4 0 0 50 -1 18 18 0.0000 4 285 1050 6390 2550 Special\001
4 0 4 50 -1 18 18 0.0000 4 225 1305 3990 2925 AtomVec\001
4 0 4 50 -1 18 18 0.0000 4 225 765 4065 7320 Bond\001
4 0 0 50 -1 18 18 0.0000 4 225 1095 4065 10200 Thermo\001
4 0 0 50 -1 18 18 0.0000 4 285 1380 4305 4425 NeighList\001
4 0 0 50 -1 18 18 0.0000 4 285 2025 4305 4800 NeighRequest\001
4 0 1 50 -1 18 18 0.0000 4 285 1155 2250 600 Memory\001
4 0 0 50 -1 18 18 0.0000 4 225 1305 120 5475 LAMMPS\001
4 0 1 50 -1 18 18 0.0000 4 225 735 2265 1005 Error\001

Binary file not shown.

View File

@ -1,699 +0,0 @@
\documentclass{article}
\usepackage{graphicx}
\begin{document}
\centerline{\Large \bf LAMMPS Developer Guide}
\centerline{\bf 23 Aug 2011}
\vspace{0.5in}
This document is a developer guide to the LAMMPS molecular dynamics
package, whose WWW site is at lammps.sandia.gov. It describes the
internal structure and algorithms of the code. Sections will be added
as we have time, and in response to requests from developers and
users.
\tableofcontents
\pagebreak
\section{LAMMPS source files}
LAMMPS source files are in two directories of the distribution
tarball. The src directory has the majority of them, all of which are
C++ files (*.cpp and *.h). Many of these files are in the src
directory itself. There are also dozens of ``packages'', which can be
included or excluded when LAMMPS is built. See the
doc/Section\_build.html section of the manual for more information
about packages, or type ``make'' from within the src directory, which
lists package-related commands, such as ``make package-status''. The
source files for each package are in an all-uppercase sub-directory of
src, like src/MOLECULE or src/USER-CUDA. If the package is currently
installed, copies of the package source files will also exist in the
src directory itself. The src/STUBS sub-directory is not a package
but contains a dummy version of the MPI library, used when building a
serial version of the code.
The lib directory also contains source code for external libraries,
used by a few of the packages. Each sub-directory, like meam or gpu,
contains the source files, some of which are in different languages
such as Fortran. The files are compiled into libraries from within
each sub-directory, e.g. performing a ``make'' in the lib/meam directory
creates a libmeam.a file. These libraries are linked to during a
LAMMPS build, if the corresponding package is installed.
LAMMPS C++ source files almost always come in pairs, such as run.cpp
and run.h. The pair of files defines a C++ class, the Run class in
this case, which contains the code invoked by the ``run'' command in a
LAMMPS input script. As this example illustrates, source file and
class names often have a one-to-one correspondence with a command used
in a LAMMPS input script. Some source files and classes do not have a
corresponding input script command, e.g. ``force.cpp'' and the Force
class. They are discussed in the next section.
\pagebreak
\section{Class hierarchy of LAMMPS}
Though LAMMPS has a lot of source files and classes, its class
hierarchy is quite simple, as outlined in Fig \ref{fig:classes}. Each
boxed name refers to a class and has a pair of associated source files
in lammps/src, e.g. ``memory.cpp'' and ``memory.h''. More details on the
class and its methods and data structures can be found by examining
its *.h file.
LAMMPS (lammps.cpp/h) is the top-level class for the entire code. It
holds an ``instance'' of LAMMPS and can be instantiated one or more
times by a calling code. For example, the file src/main.cpp simply
instantiates one instance of LAMMPS and passes it the input script.
The file src/library.cpp contains a C-style library interface to the
LAMMPS class. See the lammps/couple and lammps/python directories for
examples of simple programs that use LAMMPS through its library
interface. A driver program can instantiate the LAMMPS class multiple
times, e.g. to embed several atomistic simulation regions within a
mesoscale or continuum simulation domain.
There are a dozen or so top-level classes within the LAMMPS class that
are visible everywhere in the code. They are shaded blue in Fig
\ref{fig:classes}. Thus any class can refer to the y-coordinate of
local atom $I$ as atom$\rightarrow$x[i][1]. This visibility is
enabled by a bit of cleverness in the Pointers class (see
src/pointers.h) which every class inherits from.
There are a handful of virtual parent classes in LAMMPS that define
what LAMMPS calls ``styles''. They are shaded red in Fig
\ref{fig:classes}. Each of these are parents of a number of child
classes that implement the interface defined by the parent class. For
example, the fix style has around 100 child classes. They are the
possible fixes that can be specified by the fix command in an input
script, e.g. fix nve, fix shake, fix ave/time, etc. The corresponding
classes are Fix (for the parent class), FixNVE, FixShake, FixAveTime,
etc. The source files for these classes are easy to identify in the
src directory, since they begin with the word ``fix'', e,g,
fix\_nve.cpp, fix\_shake,cpp, fix\_ave\_time.cpp, etc.
The one exception is child class files for the ``command'' style. These
implement specific commands in the input script that can be invoked
before/after/between runs or which launch a simulation. Examples are
the create\_box, minimize, run, and velocity commands which encode the
CreateBox, Minimize, Run, and Velocity classes. The corresponding
files are create\_box,cpp, minimize.cpp, run.cpp, and velocity.cpp.
The list of command style files can be found by typing ``grep
COMMAND\_CLASS *.h'' from within the src directory, since that word in
the header file identifies the class as an input script command.
Similar words can be grepped to list files for the other LAMMPS
styles. E.g. ATOM\_CLASS, PAIR\_CLASS, BOND\_CLASS, REGION\_CLASS,
FIX\_CLASS, COMPUTE\_CLASS, DUMP\_CLASS, etc.
\begin{figure}[htb]
\begin{center}
\includegraphics[height=4in]{classes.pdf}
\end{center}
\caption{Class hierarchy within LAMMPS source code.}
\label{fig:classes}
\end{figure}
More details on individual classes in Fig \ref{fig:classes} are as
follows:
\begin{itemize}
\item The Memory class handles allocation of all large vectors and
arrays.
\item The Error class prints all error and warning messages.
\item The Universe class sets up partitions of processors so that
multiple simulations can be run, each on a subset of the processors
allocated for a run, e.g. by the mpirun command.
\item The Input class reads an input script, stores variables, and
invokes stand-alone commands that are child classes of the Command
class.
\item As discussed above, the Command class is a parent class for
certain input script commands that perform a one-time operation
before/after/between simulations or which invoke a simulation. They
are instantiated from within the Input class, invoked, then
immediately destructed.
\item The Finish class is instantiated to print statistics to the
screen after a simulation is performed, by commands like run and
minimize.
\item The Special class walks the bond topology of a molecular system
to find first, second, third neighbors of each atom. It is invoked by
several commands, like read\_data, read\_restart, and replicate.
\item The Atom class stores all per-atom arrays. More precisely, they
are allocated and stored by the AtomVec class, and the Atom class
simply stores a pointer to them. The AtomVec class is a parent
class for atom styles, defined by the atom\_style command.
\item The Update class holds an integrator and a minimizer. The
Integrate class is a parent style for the Verlet and rRESPA time
integrators, as defined by the run\_style input command. The Min
class is a parent style for various energy minimizers.
\item The Neighbor class builds and stores neighbor lists. The
NeighList class stores a single list (for all atoms). The
NeighRequest class is called by pair, fix, or compute styles when
they need a particular kind of neighbor list.
\item The Comm class performs interprocessor communication, typically
of ghost atom information. This usually involves MPI message
exchanges with 6 neighboring processors in the 3d logical grid of
processors mapped to the simulation box. Sometimes the Irregular
class is used, when atoms may migrate to arbitrary processors.
\item The Domain class stores the simulation box geometry, as well as
geometric Regions and any user definition of a Lattice. The latter
are defined by region and lattice commands in an input script.
\item The Force class computes various forces between atoms. The Pair
parent class is for non-bonded or pair-wise forces, which in LAMMPS
lingo includes many-body forces such as the Tersoff 3-body
potential. The Bond, Angle, Dihedral, Improper parent classes are
styles for bonded interactions within a static molecular topology.
The KSpace parent class is for computing long-range Coulombic
interactions. One of its child classes, PPPM, uses the FFT3D and
Remap classes to communicate grid-based information with neighboring
processors.
\item The Modify class stores lists of Fix and Compute classes, both
of which are parent styles.
\item The Group class manipulates groups that atoms are assigned to
via the group command. It also computes various attributes of
groups of atoms.
\item The Output class is used to generate 3 kinds of output from a
LAMMPS simulation: thermodynamic information printed to the screen
and log file, dump file snapshots, and restart files. These
correspond to the Thermo, Dump, and WriteRestart classes
respectively. The Dump class is a parent style.
\item The Timer class logs MPI timing information, output at the end
of a run.
\end{itemize}
%%\pagebreak
%%\section{Spatial decomposition and parallel operations}
%%distributed memory
%%Ref to JCP paper
%%diagram of 3d grid of procs and spatial decomp
%%6-way comm
%%ghost atoms, PBC added when comm (in atom class)
%%\pagebreak
%%\section{Fixes, computes, variables}
%%fixes intercolate in timestep, store per-atom info
%%computes based on current snapshot
%%equal- and atom-style variables
%%output they produce - see write-up in HowTo
\pagebreak
\section{How a timestep works}
The first and most fundamental operation within LAMMPS to understand
is how a timestep is structured. Timestepping is performed by the
Integrate class within the Update class. Since Integrate is a parent
class, corresponding to the run\_style input script command, it has
child classes. In this section, the timestep implemented by the
Verlet child class is described. A similar timestep is implemented by
the Respa child class, for the rRESPA hierarchical timestepping
method. The Min parent class performs energy minimization, so does
not perform a literal timestep. But it has logic similar to what is
described here, to compute forces and invoke fixes at each iteration
of a minimization. Differences between time integration and
minimization are highlighted at the end of this section.
The Verlet class is encoded in the src/verlet.cpp and verlet.h files.
It implements the velocity-Verlet timestepping algorithm. The
workhorse method is Verlet::run(), but first we highlight several
other methods in the class.
\begin{itemize}
\item The init() method is called at the beginning of each dynamics
run. It simply sets some internal flags, based on user settings in
other parts of the code.
\item The setup() or setup\_minimal() methods are also called before
each run. The velocity-Verlet method requires current forces be
calculated before the first timestep, so these routines compute
forces due to all atomic interactions, using the same logic that
appears in the timestepping described next. A few fixes are also
invoked, using the mechanism described in the next section. Various
counters are also initialized before the run begins. The
setup\_minimal() method is a variant that has a flag for performing
less setup. This is used when runs are continued and information
from the previous run is still valid. For example, if repeated
short LAMMPS runs are being invoked, interleaved by other commands,
via the ``pre no'' and ``every'' options of the run command, the
setup\_minimal() method is used.
\item The force\_clear() method initializes force and other arrays to
zero before each timestep, so that forces (torques, etc) can be
accumulated.
\end{itemize}
Now for the Verlet::run() method. Its structure in hi-level pseudo
code is shown in Fig \ref{fig:verlet}. In the actual code in
src/verlet.cpp some of these operations are conditionally invoked.
\begin{figure}[htb]
\begin{center}
\begin{verbatim}
loop over N timesteps:
ev_set()
fix->initial_integrate()
fix->post_integrate()
nflag = neighbor->decide()
if nflag:
fix->pre_exchange()
domain->pbc()
domain->reset_box()
comm->setup()
neighbor->setup_bins()
comm->exchange()
comm->borders()
fix->pre_neighbor()
neighbor->build()
else
comm->forward_comm()
force_clear()
fix->pre_force()
pair->compute()
bond->compute()
angle->compute()
dihedral->compute()
improper->compute()
kspace->compute()
comm->reverse_comm()
fix->post_force()
fix->final_integrate()
fix->end_of_step()
if any output on this step: output->write()
\end{verbatim}
\end{center}
\caption{Pseudo-code for the Verlet::run() method.}
\label{fig:verlet}
\end{figure}
The ev\_set() method (in the parent Integrate class), sets two flags
({\em eflag} and {\em vflag}) for energy and virial computation. Each
flag encodes whether global and/or per-atom energy and virial should
be calculated on this timestep, because some fix or variable or output
will need it. These flags are passed to the various methods that
compute particle interactions, so that they can skip the extra
calculations if the energy and virial are not needed. See the
comments with the Integrate::ev\_set() method which document the flag
values.
At various points of the timestep, fixes are invoked,
e.g. fix$\rightarrow$initial\_integrate(). In the code, this is
actually done via the Modify class which stores all the Fix objects
and lists of which should be invoked at what point in the timestep.
Fixes are the LAMMPS mechanism for tailoring the operations of a
timestep for a particular simulation. As described elsewhere
(unwritten section), each fix has one or more methods, each of which
is invoked at a specific stage of the timestep, as in Fig
\ref{fig:verlet}. All the fixes defined in an input script with an
initial\_integrate() method are invoked at the beginning of each
timestep. Fix nve, nvt, npt are examples, since they perform the
start-of-timestep velocity-Verlet integration to update velocities by
a half-step, and coordinates by a full step. The post\_integrate()
method is next. Only a few fixes use this, e.g. to reflect particles
off box boundaries in the FixWallReflect class.
The decide() method in the Neighbor class determines whether neighbor
lists need to be rebuilt on the current timestep. If not, coordinates
of ghost atoms are acquired by each processor via the forward\_comm()
method of the Comm class. If neighbor lists need to be built, several
operations within the inner if clause of Fig \ref{fig:verlet} are
first invoked. The pre\_exchange() method of any defined fixes is
invoked first. Typically this inserts or deletes particles from the
system.
Periodic boundary conditions are then applied by the Domain class via
its pbc() method to remap particles that have moved outside the
simulation box back into the box. Note that this is not done every
timestep. but only when neighbor lists are rebuilt. This is so that
each processor's sub-domain will have consistent (nearby) atom
coordinates for its owned and ghost atoms. It is also why dumped atom
coordinates can be slightly outside the simulation box.
The box boundaries are then reset (if needed) via the reset\_box()
method of the Domain class, e.g. if box boundaries are shrink-wrapped
to current particle coordinates. A change in the box size or shape
requires internal information for communicating ghost atoms (Comm
class) and neighbor list bins (Neighbor class) be updated. The
setup() method of the Comm class and setup\_bins() method of the
Neighbor class perform the update.
The code is now ready to migrate atoms that have left a processor's
geometric sub-domain to new processors. The exchange() method of the
Comm class performs this operation. The borders() method of the Comm
class then identifies ghost atoms surrounding each processor's
sub-domain and communicates ghost atom information to neighboring
processors. It does this by looping over all the atoms owned by a
processor to make lists of those to send to each neighbor processor.
On subsequent timesteps, the lists are used by the
Comm::forward\_comm() method.
Fixes with a pre\_neighbor() method are then called. These typically
re-build some data structure stored by the fix that depends on the
current atoms owned by each processor.
Now that each processor has a current list of its owned and ghost
atoms, LAMMPS is ready to rebuild neighbor lists via the build()
method of the Neighbor class. This is typically done by binning all
owned and ghost atoms, and scanning a stencil of bins around each
owned atom's bin to make a Verlet list of neighboring atoms within the
force cutoff plus neighbor skin distance.
In the next portion of the timestep, all interaction forces between
particles are computed, after zeroing the per-atom force vector via
the force\_clear() method. If the newton flag is set to ``on'' by the
newton command, forces on both owned and ghost atoms are calculated.
Pairwise forces are calculated first, which enables the global virial
(if requested) to be calculated cheaply (at the end of the
Pair::compute() method), by a dot product of atom coordinates and
forces. By including owned and ghost atoms in the dot product, the
effect of periodic boundary conditions is correctly accounted for.
Molecular topology interactions (bonds, angles, dihedrals, impropers)
are calculated next. The final contribution is from long-range
Coulombic interactions, invoked by the KSpace class.
If the newton flag is on, forces on ghost atoms are communicated and
summed back to their corresponding owned atoms. The reverse\_comm()
method of the Comm class performs this operation, which is essentially
the inverse operation of sending copies of owned atom coordinates to
other processor's ghost atoms.
At this point in the timestep, the total force on each atom is known.
Additional force constraints (external forces, SHAKE, etc) are applied
by Fixes that have a post\_force() method. The second half of the
velocity-Verlet integration is then performed (another half-step
update of the velocities) via fixes like nve, nvt, npt.
At the end of the timestep, fixes that define an end\_of\_step()
method are invoked. These typically perform a diagnostic calculation,
e.g. the ave/time and ave/spatial fixes. The final operation of the
timestep is to perform any requested output, via the write() method of
the Output class. There are 3 kinds of LAMMPS output: thermodynamic
output to the screen and log file, snapshots of atom data to a dump
file, and restart files. See the thermo\_style, dump, and restart
commands for more details.
The iteration performed by an energy minimization is similar to the
dynamics timestep of Fig \ref{fig:verlet}. Forces are computed,
neighbor lists are built as needed, atoms migrate to new processors,
and atom coordinates and forces are communicated to neighboring
processors. The only difference is what Fix class operations are
invoked when. Only a subset of LAMMPS fixes are useful during energy
minimization, as explained in their individual doc pages. The
relevant Fix class methods are min\_pre\_exchange(),
min\_pre\_force(), and min\_post\_force(). Each is invoked at the
appropriate place within the minimization iteration. For example, the
min\_post\_force() method is analogous to the post\_force() method for
dynamics; it is used to alter or constrain forces on each atom, which
affects the minimization procedure.
\pagebreak
\section{Extending LAMMPS}
The Section\_modify.html file in the doc directory of
the LAMMPS distribution gives an overview of how LAMMPS can
be extended by writing new classes that derive from existing
parent classes in LAMMPS. Here, some specific coding
details are provided for writing a new fix.
\subsection{New fixes}
(this section provided by Kirill Lykov)
\vspace{0.25cm}
Writing fixes is a flexible way of extending LAMMPS. Users can
implement many things using fixes:
\begin{itemize}
\item changing particles attributes (positions, velocities, forces, etc.).
Example: FixFreeze.
\item reading/writing data. Example: FixRestart.
\item implementing boundary conditions. Example: FixWall.
\item saving information about particles for future use (previous positions,
for instance). Example: FixStoreState.
\end{itemize}
All fixes are derived from class Fix and must have constructor with the
signature: FixMine(class LAMMPS *, int, char **).
Every fix must be registered in LAMMPS by writing the following lines
of code in the header before include guards:
\begin{center}
\begin{verbatim}
#ifdef FIX_CLASS
FixStyle(your/fix/name,FixMine)
#else
\end{verbatim}
\end{center}
Where ``your/fix/name'' is a name of your fix in the script and FixMine
is the name of the class. This code allows LAMMPS to find your fix
when it parses input script. In addition, your fix header must be
included in the file ``style\_fix.h''. In case if you use LAMMPS make,
this file is generated automatically - all files starting with prefix
fix\_ are included, so call your header the same way. Otherwise, don't
forget to add your include into ``style\_fix.h''.
Let's write a simple fix which will print average velocity at the end
of each timestep. First of all, implement a constructor:
\begin{center}
\begin{verbatim}
FixPrintVel::FixPrintVel(LAMMPS *lmp, int narg, char **arg)
: Fix(lmp, narg, arg)
{
if (narg < 4)
error->all(FLERR,"Illegal fix print command");
nevery = atoi(arg[3]);
if (nevery <= 0)
error->all(FLERR,"Illegal fix print command");
}
\end{verbatim}
\end{center}
In the constructor you should parse your fix arguments which are
specified in the script. All fixes have pretty the same syntax: fix
[fix\_identifier] [group\_name] [fix\_name] [fix\_arguments]. The
first 3 parameters are parsed by Fix class constructor, while
[fix\_arguments] should be parsed by you. In our case, we need to
specify how often we want to print an average velocity. For instance,
once in 50 timesteps: fix 1 print/vel 50. There is a special variable
in Fix class called nevery which specifies how often method
end\_of\_step() is called. Thus all we need to do is just set it up.
The next method we need to implement is setmask():
\begin{center}
\begin{verbatim}
int FixPrintVel::setmask()
{
int mask = 0;
mask |= FixConst::END_OF_STEP;
return mask;
}
\end{verbatim}
\end{center}
Here user specifies which methods of your fix should be called during
the execution. For instance, END\_OF\_STEP corresponds to the
end\_of\_step() method. Overall, there are 8 most important methods,
methods are called in predefined order during the execution of the
verlet algorithm as was mentioned in the Section 3:
\begin{itemize}
\item initial\_integrate()
\item post\_integrate()
\item pre\_exchange()
\item pre\_neighbor()
\item pre\_force()
\item post\_force()
\item final\_integrate()
\item end\_of\_step()
\end{itemize}
Fix developer must understand when he wants to execute his code. In
case if we want to write FixPrintVel, we need only end\_of\_step():
\begin{center}
\begin{verbatim}
void FixPrintVel::end_of_step()
{
// for add3, scale3
using namespace MathExtra;
double** v = atom->v;
int nlocal = atom->nlocal;
double localAvgVel[4]; // 4th element for particles count
memset(localAvgVel, 0, 4 * sizeof(double));
for (int particleInd = 0; particleInd < nlocal; ++particleInd) {
add3(localAvgVel, v[particleInd], localAvgVel);
}
localAvgVel[3] = nlocal;
double globalAvgVel[4];
memset(globalAvgVel, 0, 4 * sizeof(double));
MPI_Allreduce(localAvgVel, globalAvgVel, 4, MPI_DOUBLE, MPI_SUM, world);
scale3(1.0 / globalAvgVel[3], globalAvgVel);
if (comm->me == 0) {
printf("\%e, \%e, \%e\n",
globalAvgVel[0], globalAvgVel[1], globalAvgVel[2]);
}
}
\end{verbatim}
\end{center}
In the code above, we use MathExtra routines defined in
``math\_extra.h''. There are bunch of math functions to work with
arrays of doubles as with math vectors.
In this code we use an instance of Atom class. This object is stored
in the Pointers class (see ``pointers.h''). This object contains all
global information about the simulation system. Data from Pointers
class available to all classes inherited from it using protected
inheritance. Hence when you write you own class, which is going to use
LAMMPS data, don't forget to inherit from Pointers. When writing
fixes we inherit from class Fix which is inherited from Pointers so
there is no need to inherit from it directly.
The code above computes average velocity for all particles in the
simulation. Yet you have one unused parameter in fix call from the
script - [group\_name]. This parameter specifies the group of atoms
used in the fix. So we should compute average for all particles in the
simulation if group\_name == all, but it can be any group. The group
information is specified by groupbit which is defined in class Fix:
\begin{center}
\begin{verbatim}
for (int particleInd = 0; particleInd < nlocal; ++particleInd) {
if (atom->mask[particleInd] & groupbit) {
//Do all job here
}
}
\end{verbatim}
\end{center}
Class Atom encapsulates atoms positions, velocities, forces, etc. User
can access them using particle index. Note, that particle indexes are
usually changed every timestep because of sorting.
Lets consider another Fix example. We want to have a fix which stores
atoms position from previous time step in your fix. The local atoms
indexes will not be valid on the next iteration. In order to handle
this situation there are several methods which should be implemented:
\begin{itemize}
\item \verb|double memory_usage| - return how much memory fix uses
\item \verb|void grow_arrays(int)| - do reallocation of the per particle arrays
in your fix
\item \verb|void copy_arrays(int i, int j, int delflag)| - copy i-th per-particle
information to j-th. Used when atoms sorting is performed. if delflag is set
and atom j owns a body, move the body information to atom i.
\item \verb|void set_arrays(int i)| - sets i-th particle related information to zero
\end{itemize}
Note, that if your class implements these methods, it must call add calls of
add\_callback and delete\_callback to constructor and destructor:
\begin{center}
\begin{verbatim}
FixSavePos::FixSavePos(LAMMPS *lmp, int narg, char **arg) {
//...
atom->add_callback(0);
}
FixSavePos::~FixSavePos() {
atom->delete_callback(id, 0);
}
\end{verbatim}
\end{center}
Since we want to store positions of atoms from previous timestep, we
need to add double** x to the header file. Than add allocation code to
constructor:
\verb|memory->create(this->x, atom->nmax, 3, "FixSavePos:x");|. Free memory
at destructor: \verb|memory->destroy(x);|
Finally, implement mentioned methods:
\begin{center}
\begin{verbatim}
double FixSavePos::memory_usage()
{
int nmax = atom->nmax;
double bytes = 0.0;
bytes += nmax * 3 * sizeof(double);
return bytes;
}
void FixSavePos::grow_arrays(int nmax)
{
memory->grow(this->x, nmax, 3, "FixSavePos:x");
}
void FixSavePos::copy_arrays(int i, int j, int delflag)
{
memcpy(this->x[j], this->x[i], sizeof(double) * 3);
}
void FixSavePos::set_arrays(int i)
{
memset(this->x[i], 0, sizeof(double) * 3);
}
int FixSavePos::pack_exchange(int i, double *buf)
{
int m = 0;
buf[m++] = x[i][0];
buf[m++] = x[i][1];
buf[m++] = x[i][2];
return m;
}
int FixSavePos::unpack_exchange(int nlocal, double *buf)
{
int m = 0;
x[nlocal][0] = buf[m++];
x[nlocal][1] = buf[m++];
x[nlocal][2] = buf[m++];
return m;
}
\end{verbatim}
\end{center}
Now, a little bit about memory allocation. We used Memory class which
is just a bunch of template functions for allocating 1D and 2D
arrays. So you need to add include ``memory.h'' to have access to them.
Finally, if you need to write/read some global information used in
your fix to the restart file, you might do it by setting flag
restart\_global = 1 in the constructor and implementing methods void
write\_restart(FILE *fp) and void restart(char *buf).
\end{document}

View File

@ -502,7 +502,7 @@ Doc page with :doc:`WARNING messages <Errors_warnings>`
*Bond/react: Unknown section in map file* *Bond/react: Unknown section in map file*
Please ensure reaction map files are properly formatted. Please ensure reaction map files are properly formatted.
*Bond/react: Atom affected by reaction too close to template edge* *Bond/react: Atom/Bond type affected by reaction too close to template edge*
This means an atom which changes type or connectivity during the This means an atom which changes type or connectivity during the
reaction is too close to an 'edge' atom defined in the map reaction is too close to an 'edge' atom defined in the map
file. This could cause incorrect assignment of bonds, angle, etc. file. This could cause incorrect assignment of bonds, angle, etc.

View File

@ -191,19 +191,19 @@ You start the command ``ccmake ../cmake`` in the ``build`` folder.
.. list-table:: .. list-table::
* - .. figure:: JPG/ccmake-initial.png * - .. figure:: JPG/ccmake-initial.png
:target: JPG/ccmake-initial.png :scale: 33%
:align: center :align: center
Initial ``ccmake`` screen Initial ``ccmake`` screen
- .. figure:: JPG/ccmake-config.png - .. figure:: JPG/ccmake-config.png
:target: JPG/ccmake-config.png :scale: 33%
:align: center :align: center
Configure output of ``ccmake`` Configure output of ``ccmake``
- .. figure:: JPG/ccmake-options.png - .. figure:: JPG/ccmake-options.png
:target: JPG/ccmake-options.png :scale: 33%
:align: center :align: center
Options screen of ``ccmake`` Options screen of ``ccmake``
@ -236,19 +236,19 @@ not required, it can also be entered from the GUI.
.. list-table:: .. list-table::
* - .. figure:: JPG/cmake-gui-initial.png * - .. figure:: JPG/cmake-gui-initial.png
:target: JPG/cmake-gui-initial.png :scale: 40%
:align: center :align: center
Initial ``cmake-gui`` screen Initial ``cmake-gui`` screen
- .. figure:: JPG/cmake-gui-popup.png - .. figure:: JPG/cmake-gui-popup.png
:target: JPG/cmake-gui-popup.png :scale: 60%
:align: center :align: center
Generator selection in ``cmake-gui`` Generator selection in ``cmake-gui``
- .. figure:: JPG/cmake-gui-options.png - .. figure:: JPG/cmake-gui-options.png
:target: JPG/cmake-gui-options.png :scale: 40%
:align: center :align: center
Options screen of ``cmake-gui`` Options screen of ``cmake-gui``

View File

@ -12,96 +12,52 @@ LAMMPS can be coupled to other codes in at least 4 ways. Each has
advantages and disadvantages, which you will have to think about in the advantages and disadvantages, which you will have to think about in the
context of your application. context of your application.
---------- 1. Define a new :doc:`fix <fix>` command that calls the other code. In
this scenario, LAMMPS is the driver code. During timestepping,
the fix is invoked, and can make library calls to the other code,
which has been linked to LAMMPS as a library. This is the way how the
:ref:`LATTE <PKG-LATTE>` package, which performs density-functional
tight-binding calculations using the `LATTE software <https://github.com/lanl/LATTE>`_
to compute forces, is hooked to LAMMPS.
See the :doc:`fix latte <fix_latte>` command for more details.
Also see the :doc:`Modify <Modify>` doc pages for info on how to
add a new fix to LAMMPS.
(1) Define a new :doc:`fix <fix>` command that calls the other code. In .. spacer
this scenario, LAMMPS is the driver code. During its timestepping,
the fix is invoked, and can make library calls to the other code,
which has been linked to LAMMPS as a library. This is the way the
`POEMS <poems_>`_ package that performs constrained rigid-body motion on
groups of atoms is hooked to LAMMPS. See the :doc:`fix poems <fix_poems>` command for more details. See the
:doc:`Modify <Modify>` doc pages for info on how to add a new fix to
LAMMPS.
.. _poems: http://www.rpi.edu/~anderk5/lab 2. Define a new LAMMPS command that calls the other code. This is
conceptually similar to method (1), but in this case LAMMPS and the
other code are on a more equal footing. Note that now the other code
is not called during the timestepping of a LAMMPS run, but between
runs. The LAMMPS input script can be used to alternate LAMMPS runs
with calls to the other code, invoked via the new command. The
:doc:`run <run>` command facilitates this with its *every* option,
which makes it easy to run a few steps, invoke the command, run a few
steps, invoke the command, etc.
---------- In this scenario, the other code can be called as a library, as in
1., or it could be a stand-alone code, invoked by a system() call
made by the command (assuming your parallel machine allows one or
more processors to start up another program). In the latter case the
stand-alone code could communicate with LAMMPS through files that the
command writes and reads.
(2) Define a new LAMMPS command that calls the other code. This is See the :doc:`Modify command <Modify_command>` doc page for info on how
conceptually similar to method (1), but in this case LAMMPS and the to add a new command to LAMMPS.
other code are on a more equal footing. Note that now the other code
is not called during the timestepping of a LAMMPS run, but between
runs. The LAMMPS input script can be used to alternate LAMMPS runs
with calls to the other code, invoked via the new command. The
:doc:`run <run>` command facilitates this with its *every* option, which
makes it easy to run a few steps, invoke the command, run a few steps,
invoke the command, etc.
In this scenario, the other code can be called as a library, as in .. spacer
(1), or it could be a stand-alone code, invoked by a system() call
made by the command (assuming your parallel machine allows one or more
processors to start up another program). In the latter case the
stand-alone code could communicate with LAMMPS through files that the
command writes and reads.
See the :doc:`Modify command <Modify_command>` doc page for info on how 3. Use LAMMPS as a library called by another code. In this case the
to add a new command to LAMMPS. other code is the driver and calls LAMMPS as needed. Or a wrapper
code could link and call both LAMMPS and another code as libraries.
Again, the :doc:`run <run>` command has options that allow it to be
invoked with minimal overhead (no setup or clean-up) if you wish to
do multiple short runs, driven by another program. Details about
using the library interface are given in the :doc:`library API
<pg_library>` documentation.
---------- .. spacer
(3) Use LAMMPS as a library called by another code. In this case the 4. Couple LAMMPS with another code in a client/server mode. This is
other code is the driver and calls LAMMPS as needed. Or a wrapper described on the :doc:`Howto client/server <Howto_client_server>` doc
code could link and call both LAMMPS and another code as libraries. page.
Again, the :doc:`run <run>` command has options that allow it to be
invoked with minimal overhead (no setup or clean-up) if you wish to do
multiple short runs, driven by another program.
Examples of driver codes that call LAMMPS as a library are included in
the examples/COUPLE directory of the LAMMPS distribution; see
examples/COUPLE/README for more details:
* simple: simple driver programs in C++ and C which invoke LAMMPS as a
library
* plugin: simple driver program in C which invokes LAMMPS as a plugin
from a shared library.
* lammps_quest: coupling of LAMMPS and `Quest <quest_>`_, to run classical
MD with quantum forces calculated by a density functional code
* lammps_spparks: coupling of LAMMPS and `SPPARKS <spparks_>`_, to couple
a kinetic Monte Carlo model for grain growth using MD to calculate
strain induced across grain boundaries
.. _quest: http://dft.sandia.gov/Quest
.. _spparks: http://www.sandia.gov/~sjplimp/spparks.html
The :doc:`Build basics <Build_basics>` doc page describes how to build
LAMMPS as a library. Once this is done, you can interface with LAMMPS
either via C++, C, Fortran, or Python (or any other language that
supports a vanilla C-like interface). For example, from C++ you could
create one (or more) "instances" of LAMMPS, pass it an input script to
process, or execute individual commands, all by invoking the correct
class methods in LAMMPS. From C or Fortran you can make function
calls to do the same things. See the :doc:`Python <Python_head>` doc
pages for a description of the Python wrapper provided with LAMMPS
that operates through the LAMMPS library interface.
The files src/library.cpp and library.h contain the C-style interface
to LAMMPS. See the :doc:`Howto library <Howto_library>` doc page for a
description of the interface and how to extend it for your needs.
Note that the lammps_open() function that creates an instance of
LAMMPS takes an MPI communicator as an argument. This means that
instance of LAMMPS will run on the set of processors in the
communicator. Thus the calling code can run LAMMPS on all or a subset
of processors. For example, a wrapper script might decide to
alternate between LAMMPS and another code, allowing them both to run
on all the processors. Or it might allocate half the processors to
LAMMPS and half to the other code and run both codes simultaneously
before syncing them up periodically. Or it might instantiate multiple
instances of LAMMPS to perform different calculations.
----------
(4) Couple LAMMPS with another code in a client/server mode. This is
described on the :doc:`Howto client/server <Howto_client_server>` doc
page.

View File

@ -2,241 +2,36 @@ Library interface to LAMMPS
=========================== ===========================
As described on the :doc:`Build basics <Build_basics>` doc page, LAMMPS As described on the :doc:`Build basics <Build_basics>` doc page, LAMMPS
can be built as a library, so that it can be called by another code, can be built as a static or shared library, so that it can be called by
used in a :doc:`coupled manner <Howto_couple>` with other codes, or another code, used in a :doc:`coupled manner <Howto_couple>` with other
driven through a :doc:`Python interface <Python_head>`. codes, or driven through a :doc:`Python interface <Python_head>`.
All of these methodologies use a C-style interface to LAMMPS that is At the core of LAMMPS is the ``LAMMPS`` class which encapsulates the
provided in the files src/library.cpp and src/library.h. The state of the simulation program through the state of the various class
functions therein have a C-style argument list, but contain C++ code instances that it is composed of. So a calculation using LAMMPS
you could write yourself in a C++ application that was invoking LAMMPS requires to create an instance of the ``LAMMPS`` class and then send it
directly. The C++ code in the functions illustrates how to invoke (text) commands, either individually or from a file, or perform other
internal LAMMPS operations. Note that LAMMPS classes are defined operations that modify the state stored inside that instance or drive
within a LAMMPS namespace (LAMMPS_NS) if you use them from another C++ simulations. This is essentially what the ``src/main.cpp`` file does
application. as well for the standalone LAMMPS executable with reading commands
either from an input file or stdin.
The examples/COUPLE and python/examples directories have example C++ Creating a LAMMPS instance can be done by using C++ code directly or
and C and Python codes which show how a driver code can link to LAMMPS through a C-style interface library to LAMMPS that is provided in the
as a library, run LAMMPS on a subset of processors, grab data from files ``src/library.cpp`` and ``library.h``. This
LAMMPS, change it, and put it back into LAMMPS. :ref:`C language API <lammps_c_api>`, can be used from C and C++,
and is also the basis for the :doc:`Python <pg_python>` and
:doc:`Fortran <pg_fortran>` interfaces or wrappers included in the
LAMMPS source code.
Thread-safety The ``examples/COUPLE`` and ``python/examples`` directories contain some
------------- example programs written in C++, C, Fortran, and Python, which show how
a driver code can link to LAMMPS as a library, run LAMMPS on a subset of
processors (so the others are available to run some other code
concurrently), grab data from LAMMPS, change it, and send it back into
LAMMPS.
LAMMPS has not initially been conceived as a thread-safe program, but A detailed documentation of the available APIs and examples of how to
over the years changes have been applied to replace operations that use them can be found in the :doc:`Programmer Documentation
collide with creating multiple LAMMPS instances from multiple-threads <pg_library>` section of this manual.
of the same process with thread-safe alternatives. This primarily
applies to the core LAMMPS code and less so on add-on packages, especially
when those packages require additional code in the *lib* folder,
interface LAMMPS to Fortran libraries, or the code uses static variables
(like the USER-COLVARS package.
Another major issue to deal with is to correctly handle MPI. Creating
a LAMMPS instance requires passing an MPI communicator, or it assumes
the MPI_COMM_WORLD communicator, which spans all MPI processor ranks.
When creating multiple LAMMPS object instances from different threads,
this communicator has to be different for each thread or else collisions
can happen, or it has to be guaranteed, that only one thread at a time
is active. MPI communicators, however, are not a problem, if LAMMPS is
compiled with the MPI STUBS library, which implies that there is no MPI
communication and only 1 MPI rank.
Provided APIs
-------------
The file src/library.cpp contains the following functions for creating
and destroying an instance of LAMMPS and sending it commands to
execute. See the documentation in the src/library.cpp file for
details.
.. note::
You can write code for additional functions as needed to define
how your code talks to LAMMPS and add them to src/library.cpp and
src/library.h, as well as to the :doc:`Python interface <Python_head>`.
The added functions can access or change any internal LAMMPS data you
wish.
.. code-block:: c
void lammps_open(int, char **, MPI_Comm, void **)
void lammps_open_no_mpi(int, char **, void **)
void lammps_close(void *)
int lammps_version(void *)
void lammps_file(void *, char *)
char *lammps_command(void *, char *)
void lammps_commands_list(void *, int, char **)
void lammps_commands_string(void *, char *)
void lammps_free(void *)
The lammps_open() function is used to initialize LAMMPS, passing in a
list of strings as if they were :doc:`command-line arguments <Run_options>` when LAMMPS is run in stand-alone mode
from the command line, and a MPI communicator for LAMMPS to run under.
It returns a ptr to the LAMMPS object that is created, and which is
used in subsequent library calls. The lammps_open() function can be
called multiple times, to create multiple instances of LAMMPS.
LAMMPS will run on the set of processors in the communicator. This
means the calling code can run LAMMPS on all or a subset of
processors. For example, a wrapper script might decide to alternate
between LAMMPS and another code, allowing them both to run on all the
processors. Or it might allocate half the processors to LAMMPS and
half to the other code and run both codes simultaneously before
syncing them up periodically. Or it might instantiate multiple
instances of LAMMPS to perform different calculations.
The lammps_open_no_mpi() function is similar except that no MPI
communicator is passed from the caller. Instead, MPI_COMM_WORLD is
used to instantiate LAMMPS, and MPI is initialized if necessary.
The lammps_close() function is used to shut down an instance of LAMMPS
and free all its memory.
The lammps_version() function can be used to determined the specific
version of the underlying LAMMPS code. This is particularly useful
when loading LAMMPS as a shared library via dlopen(). The code using
the library interface can than use this information to adapt to
changes to the LAMMPS command syntax between versions. The returned
LAMMPS version code is an integer (e.g. 2 Sep 2015 results in
20150902) that grows with every new LAMMPS version.
The lammps_file(), lammps_command(), lammps_commands_list(), and
lammps_commands_string() functions are used to pass one or more
commands to LAMMPS to execute, the same as if they were coming from an
input script.
Via these functions, the calling code can read or generate a series of
LAMMPS commands one or multiple at a time and pass it through the library
interface to setup a problem and then run it in stages. The caller
can interleave the command function calls with operations it performs,
calls to extract information from or set information within LAMMPS, or
calls to another code's library.
The lammps_file() function passes the filename of an input script.
The lammps_command() function passes a single command as a string.
The lammps_commands_list() function passes multiple commands in a
char\*\* list. In both lammps_command() and lammps_commands_list(),
individual commands may or may not have a trailing newline. The
lammps_commands_string() function passes multiple commands
concatenated into one long string, separated by newline characters.
In both lammps_commands_list() and lammps_commands_string(), a single
command can be spread across multiple lines, if the last printable
character of all but the last line is "&", the same as if the lines
appeared in an input script.
The lammps_free() function is a clean-up function to free memory that
the library allocated previously via other function calls. See
comments in src/library.cpp file for which other functions need this
clean-up.
The file src/library.cpp also contains these functions for extracting
information from LAMMPS and setting value within LAMMPS. Again, see
the documentation in the src/library.cpp file for details, including
which quantities can be queried by name:
.. code-block:: c
int lammps_extract_setting(void *, char *)
void *lammps_extract_global(void *, char *)
void lammps_extract_box(void *, double *, double *,
double *, double *, double *, int *, int *)
void *lammps_extract_atom(void *, char *)
void *lammps_extract_compute(void *, char *, int, int)
void *lammps_extract_fix(void *, char *, int, int, int, int)
void *lammps_extract_variable(void *, char *, char *)
The extract_setting() function returns info on the size
of data types (e.g. 32-bit or 64-bit atom IDs) used
by the LAMMPS executable (a compile-time choice).
The other extract functions return a pointer to various global or
per-atom quantities stored in LAMMPS or to values calculated by a
compute, fix, or variable. The pointer returned by the
extract_global() function can be used as a permanent reference to a
value which may change. For the extract_atom() method, see the
extract() method in the src/atom.cpp file for a list of valid per-atom
properties. New names could easily be added if the property you want
is not listed. For the other extract functions, the underlying
storage may be reallocated as LAMMPS runs, so you need to re-call the
function to assure a current pointer or returned value(s).
.. code-block:: c
double lammps_get_thermo(void *, char *)
int lammps_get_natoms(void *)
int lammps_set_variable(void *, char *, char *)
void lammps_reset_box(void *, double *, double *, double, double, double)
The lammps_get_thermo() function returns the current value of a thermo
keyword as a double precision value.
The lammps_get_natoms() function returns the total number of atoms in
the system and can be used by the caller to allocate memory for the
lammps_gather_atoms() and lammps_scatter_atoms() functions.
The lammps_set_variable() function can set an existing string-style
variable to a new string value, so that subsequent LAMMPS commands can
access the variable.
The lammps_reset_box() function resets the size and shape of the
simulation box, e.g. as part of restoring a previously extracted and
saved state of a simulation.
.. code-block:: c
void lammps_gather_atoms(void *, char *, int, int, void *)
void lammps_gather_atoms_concat(void *, char *, int, int, void *)
void lammps_gather_atoms_subset(void *, char *, int, int, int, int *, void *)
void lammps_scatter_atoms(void *, char *, int, int, void *)
void lammps_scatter_atoms_subset(void *, char *, int, int, int, int *, void *)
The gather functions collect peratom info of the requested type (atom
coords, atom types, forces, etc) from all processors, and returns the
same vector of values to each calling processor. The scatter
functions do the inverse. They distribute a vector of peratom values,
passed by all calling processors, to individual atoms, which may be
owned by different processors.
.. warning::
These functions are not compatible with the
-DLAMMPS_BIGBIG setting when compiling LAMMPS. Dummy functions
that result in an error message and abort will be substituted
instead of resulting in random crashes and memory corruption.
The lammps_gather_atoms() function does this for all N atoms in the
system, ordered by atom ID, from 1 to N. The
lammps_gather_atoms_concat() function does it for all N atoms, but
simply concatenates the subset of atoms owned by each processor. The
resulting vector is not ordered by atom ID. Atom IDs can be requested
by the same function if the caller needs to know the ordering. The
lammps_gather_subset() function allows the caller to request values
for only a subset of atoms (identified by ID).
For all 3 gather function, per-atom image flags can be retrieved in 2 ways.
If the count is specified as 1, they are returned
in a packed format with all three image flags stored in a single integer.
If the count is specified as 3, the values are unpacked into xyz flags
by the library before returning them.
The lammps_scatter_atoms() function takes a list of values for all N
atoms in the system, ordered by atom ID, from 1 to N, and assigns
those values to each atom in the system. The
lammps_scatter_atoms_subset() function takes a subset of IDs as an
argument and only scatters those values to the owning atoms.
.. code-block:: c
void lammps_create_atoms(void *, int, tagint *, int *, double *, double *,
imageint *, int)
The lammps_create_atoms() function takes a list of N atoms as input
with atom types and coords (required), an optionally atom IDs and
velocities and image flags. It uses the coords of each atom to assign
it as a new atom to the processor that owns it. This function is
useful to add atoms to a simulation or (in tandem with
lammps_reset_box()) to restore a previously extracted and saved state
of a simulation. Additional properties for the new atoms can then be
assigned via the lammps_scatter_atoms() or lammps_extract_atom()
functions.

View File

@ -79,13 +79,13 @@ To get a copy of the current potentials files:
which will download the potentials files to which will download the potentials files to
``/usr/share/lammps-stable/potentials``. The ``lmp_stable`` binary is ``/usr/share/lammps-stable/potentials``. The ``lmp_stable`` binary is
hard-coded to look for potential files in this directory (it does not hard-coded to look for potential files in this directory (it does not
use the `LAMMPS_POTENTIALS` environment variable, as described use the ``LAMMPS_POTENTIALS`` environment variable, as described
in :doc:`pair_coeff <pair_coeff>` command). in :doc:`pair_coeff <pair_coeff>` command).
The ``lmp_stable`` binary is built with the :ref:`KIM package <kim>` which The ``lmp_stable`` binary is built with the :ref:`KIM package <kim>` which
results in the above command also installing the `kim-api` binaries when LAMMPS results in the above command also installing the ``kim-api`` binaries when LAMMPS
is installed. In order to use potentials from `openkim.org <openkim_>`_, you is installed. In order to use potentials from `openkim.org <openkim_>`_, you
can install the `openkim-models` package can install the ``openkim-models`` package
.. code-block:: bash .. code-block:: bash

View File

@ -23,7 +23,6 @@ this Intr are included in this list.
* `Mail list <https://lammps.sandia.gov/mail.html>`_ * `Mail list <https://lammps.sandia.gov/mail.html>`_
* `Workshops <https://lammps.sandia.gov/workshops.html>`_ * `Workshops <https://lammps.sandia.gov/workshops.html>`_
* `Tutorials <https://lammps.sandia.gov/tutorials.html>`_ * `Tutorials <https://lammps.sandia.gov/tutorials.html>`_
* `Developer guide <https://lammps.sandia.gov/Developer.pdf>`_
* `Pre- and post-processing tools for LAMMPS <https://lammps.sandia.gov/prepost.html>`_ * `Pre- and post-processing tools for LAMMPS <https://lammps.sandia.gov/prepost.html>`_
* `Other software usable with LAMMPS <https://lammps.sandia.gov/offsite.html>`_ * `Other software usable with LAMMPS <https://lammps.sandia.gov/offsite.html>`_

Binary file not shown.

After

Width:  |  Height:  |  Size: 245 KiB

View File

@ -27,8 +27,7 @@ all LAMMPS development is coordinated.
The content for this manual is part of the LAMMPS distribution. You The content for this manual is part of the LAMMPS distribution. You
can build a local copy of the Manual as HTML pages or a PDF file, by can build a local copy of the Manual as HTML pages or a PDF file, by
following the steps on the :doc:`Manual build <Manual_build>` doc page. following the steps on the :doc:`Manual build <Manual_build>` doc page.
There is also a `Developer.pdf <Developer.pdf>`_ document which gives The manual is split into two parts: 1) User documentation and 2) Programmer documentation.
a brief description of the basic code structure of LAMMPS.
---------- ----------
@ -55,11 +54,24 @@ every LAMMPS command.
Howto Howto
Examples Examples
Tools Tools
Modify
Python_head Python_head
Errors Errors
Manual_build Manual_build
.. _programmer_documentation:
.. toctree::
:maxdepth: 2
:numbered: 3
:caption: Programmer Documentation
:name: progdoc
:includehidden:
pg_library
Modify
pg_developer
.. pg_modify
.. pg_base
.. toctree:: .. toctree::
:caption: Index :caption: Index
:name: index :name: index

View File

@ -14,7 +14,6 @@ files. Here is a list with descriptions:
lammps.1 # man page for the lammps command lammps.1 # man page for the lammps command
msi2lmp.1 # man page for the msi2lmp command msi2lmp.1 # man page for the msi2lmp command
Manual.pdf # large PDF version of entire manual Manual.pdf # large PDF version of entire manual
Developer.pdf # small PDF with info about how LAMMPS is structured
LAMMPS.epub # Manual in ePUB e-book format LAMMPS.epub # Manual in ePUB e-book format
LAMMPS.mobi # Manual in MOBI e-book format LAMMPS.mobi # Manual in MOBI e-book format
docenv # virtualenv folder for processing the manual sources docenv # virtualenv folder for processing the manual sources
@ -35,7 +34,7 @@ of two ways:
a. You can "fetch" the current HTML and PDF files from the LAMMPS web a. You can "fetch" the current HTML and PDF files from the LAMMPS web
site. Just type ``make fetch``. This should download a html_www site. Just type ``make fetch``. This should download a html_www
directory and Manual_www.pdf/Developer_www.pdf files. Note that if directory and a Manual_www.pdf file. Note that if
new LAMMPS features have been added more recently than the date of new LAMMPS features have been added more recently than the date of
your LAMMPS version, the fetched documentation will include those your LAMMPS version, the fetched documentation will include those
changes (but your source code will not, unless you update your local changes (but your source code will not, unless you update your local
@ -49,6 +48,11 @@ b. You can build the HTML or PDF files yourself, by typing ``make html``
only once, unless you type ``make clean-all``. After that, viewing and only once, unless you type ``make clean-all``. After that, viewing and
processing of the documentation can be done without internet access. processing of the documentation can be done without internet access.
A current version of the manual (latest patch release, aka unstable branch)
is is available online at: `https://lammps.sandia.gov/doc/Manual.html <https://lammps.sandia.gov/doc/Manual.html>`_
A version of the manual corresponding to the ongoing development
(aka master branch) is available online at: `https://docs.lammps.org/ <https://docs.lammps.org/>`_
---------- ----------
The generation of all documentation is managed by the Makefile in the The generation of all documentation is managed by the Makefile in the
@ -58,10 +62,9 @@ available:
.. code-block:: bash .. code-block:: bash
make html # generate HTML in html dir using Sphinx make html # generate HTML in html dir using Sphinx
make pdf # generate 2 PDF files (Manual.pdf,Developer.pdf) make pdf # generate PDF as Manual.pdf using Sphinx and pdflatex
# in doc dir via htmldoc and pdflatex make fetch # fetch HTML doc pages and PDF file from web site
make fetch # fetch HTML doc pages and 2 PDF files from web site # as a tarball and unpack into html dir and PDF
# as a tarball and unpack into html dir and 2 PDFs
make epub # generate LAMMPS.epub in ePUB format using Sphinx make epub # generate LAMMPS.epub in ePUB format using Sphinx
make mobi # generate LAMMPS.mobi in MOBI format using ebook-convert make mobi # generate LAMMPS.mobi in MOBI format using ebook-convert

View File

@ -1,5 +1,5 @@
Modify & extend LAMMPS Modifying & extending LAMMPS
********************** ****************************
LAMMPS is designed in a modular fashion so as to be easy to modify and LAMMPS is designed in a modular fashion so as to be easy to modify and
extend with new functionality. In fact, about 95% of its source code extend with new functionality. In fact, about 95% of its source code

View File

@ -1692,7 +1692,7 @@ USER-MEAMC package
**Contents:** **Contents:**
A pair style for the modified embedded atom (MEAM) potential A pair style for the modified embedded atom (MEAM) potential
translated from the Fortran version in the (obsolete) "MEAM" package translated from the Fortran version in the (obsolete) MEAM package
to plain C++. The USER-MEAMC fully replaces the MEAM package, which to plain C++. The USER-MEAMC fully replaces the MEAM package, which
has been removed from LAMMPS after the 12 December 2018 version. has been removed from LAMMPS after the 12 December 2018 version.

View File

@ -6,7 +6,7 @@ name gives more details.
User packages have been contributed by users, and begin with the User packages have been contributed by users, and begin with the
"user" prefix. If a contribution is a single command (single file), "user" prefix. If a contribution is a single command (single file),
it is typically in the user-misc package. User packages don't it is typically in the USER-MISC package. User packages don't
necessarily meet the requirements of the :doc:`standard packages <Packages_standard>`. This means the developers will try necessarily meet the requirements of the :doc:`standard packages <Packages_standard>`. This means the developers will try
to keep things working and usually can answer technical questions to keep things working and usually can answer technical questions
about compiling the package. If you have problems using a specific about compiling the package. If you have problems using a specific

View File

@ -89,7 +89,6 @@ Miscellaneous tools
:columns: 6 :columns: 6
* :ref:`CMake <cmake>` * :ref:`CMake <cmake>`
* :ref:`doxygen <doxygen>`
* :ref:`emacs <emacs>` * :ref:`emacs <emacs>`
* :ref:`i-pi <ipi>` * :ref:`i-pi <ipi>`
* :ref:`kate <kate>` * :ref:`kate <kate>`
@ -254,21 +253,6 @@ The tool is authored by Xiaowang Zhou (Sandia), xzhou at sandia.gov.
---------- ----------
.. _doxygen:
doxygen tool
--------------------------
The tools/doxygen directory contains a shell script called
doxygen.sh which can generate a call graph and API lists using
the `Doxygen software <http://doxygen.org>`_.
See the included README file for details.
The tool is authored by Nandor Tamaskovics, numericalfreedom at googlemail.com.
----------
.. _drude: .. _drude:
drude tool drude tool

View File

@ -14,19 +14,22 @@ Syntax
react react-ID react-group-ID Nevery Rmin Rmax template-ID(pre-reacted) template-ID(post-reacted) map_file individual_keyword values ... react react-ID react-group-ID Nevery Rmin Rmax template-ID(pre-reacted) template-ID(post-reacted) map_file individual_keyword values ...
... ...
* ID, group-ID are documented in :doc:`fix <fix>` command. Group-ID is ignored. * ID, group-ID are documented in :doc:`fix <fix>` command.
* bond/react = style name of this fix command * bond/react = style name of this fix command
* the common keyword/values may be appended directly after 'bond/react' * the common keyword/values may be appended directly after 'bond/react'
* this applies to all reaction specifications (below) * this applies to all reaction specifications (below)
* common_keyword = *stabilization* * common_keyword = *stabilization* or *reset_mol_ids*
.. parsed-literal:: .. parsed-literal::
*stabilization* values = *no* or *yes* *group-ID* *xmax* *stabilization* values = *no* or *yes* *group-ID* *xmax*
*no* = no reaction site stabilization *no* = no reaction site stabilization (default)
*yes* = perform reaction site stabilization *yes* = perform reaction site stabilization
*group-ID* = user-assigned prefix for the dynamic group of atoms not currently involved in a reaction *group-ID* = user-assigned prefix for the dynamic group of atoms not currently involved in a reaction
*xmax* = xmax value that is used by an internally-created :doc:`nve/limit <fix_nve_limit>` integrator *xmax* = xmax value that is used by an internally-created :doc:`nve/limit <fix_nve_limit>` integrator
*reset_mol_ids* values = *yes* or *no*
*yes* = update molecule IDs based on new global topology (default)
*no* = do not update molecule IDs
* react = mandatory argument indicating new reaction specification * react = mandatory argument indicating new reaction specification
* react-ID = user-assigned name for the reaction * react-ID = user-assigned name for the reaction
@ -50,9 +53,9 @@ Syntax
*stabilize_steps* value = timesteps *stabilize_steps* value = timesteps
timesteps = number of timesteps to apply the internally-created :doc:`nve/limit <fix_nve_limit>` fix to reacting atoms timesteps = number of timesteps to apply the internally-created :doc:`nve/limit <fix_nve_limit>` fix to reacting atoms
*update_edges* value = *none* or *charges* or *custom* *update_edges* value = *none* or *charges* or *custom*
none = do not update topology near the edges of reaction templates *none* = do not update topology near the edges of reaction templates
charges = update atomic charges of all atoms in reaction templates *charges* = update atomic charges of all atoms in reaction templates
custom = force the update of user-specified atomic charges *custom* = force the update of user-specified atomic charges
Examples Examples
"""""""" """"""""
@ -154,6 +157,13 @@ due to the internal dynamic grouping performed by fix bond/react.
If the group-ID is an existing static group, react-group-IDs If the group-ID is an existing static group, react-group-IDs
should also be specified as this static group, or a subset. should also be specified as this static group, or a subset.
The *reset_mol_ids* keyword invokes the :doc:`reset_mol_ids <reset_mol_ids>`
command after a reaction occurs, to ensure that molecule IDs are
consistent with the new bond topology. The group-ID used for
:doc:`reset_mol_ids <reset_mol_ids>` is the group-ID for this fix.
Resetting molecule IDs is necessarily a global operation, and so can
be slow for very large systems.
The following comments pertain to each *react* argument (in other The following comments pertain to each *react* argument (in other
words, can be customized for each reaction, or reaction step): words, can be customized for each reaction, or reaction step):
@ -203,9 +213,10 @@ surrounding topology. As described below, the bonding atom pairs of
the pre-reacted template are specified by atom ID in the map file. The the pre-reacted template are specified by atom ID in the map file. The
pre-reacted molecule template should contain as few atoms as possible pre-reacted molecule template should contain as few atoms as possible
while still completely describing the topology of all atoms affected while still completely describing the topology of all atoms affected
by the reaction. For example, if the force field contains dihedrals, by the reaction (which includes all atoms that change atom type or
the pre-reacted template should contain any atom within three bonds of connectivity, and all bonds that change bond type). For example, if
reacting atoms. the force field contains dihedrals, the pre-reacted template should
contain any atom within three bonds of reacting atoms.
Some atoms in the pre-reacted template that are not reacting may have Some atoms in the pre-reacted template that are not reacting may have
missing topology with respect to the simulation. For example, the missing topology with respect to the simulation. For example, the
@ -300,8 +311,8 @@ either 'none' or 'charges.' Further details are provided in the
discussion of the 'update_edges' keyword. The fifth optional section discussion of the 'update_edges' keyword. The fifth optional section
begins with the keyword 'Constraints' and lists additional criteria begins with the keyword 'Constraints' and lists additional criteria
that must be satisfied in order for the reaction to occur. Currently, that must be satisfied in order for the reaction to occur. Currently,
there are four types of constraints available, as discussed below: there are five types of constraints available, as discussed below:
'distance', 'angle', 'dihedral', and 'arrhenius'. 'distance', 'angle', 'dihedral', 'arrhenius', and 'rmsd'.
A sample map file is given below: A sample map file is given below:
@ -421,6 +432,25 @@ temperature calculations. A uniform random number between 0 and 1 is
generated using *seed*\ ; if this number is less than the result of the generated using *seed*\ ; if this number is less than the result of the
Arrhenius equation above, the reaction is permitted to occur. Arrhenius equation above, the reaction is permitted to occur.
The constraint of type 'rmsd' has the following syntax:
.. parsed-literal::
rmsd *RMSDmax* *molfragment*
where 'rmsd' is the required keyword, and *RMSDmax* is the maximum
root-mean-square deviation between atom positions of the pre-reaction
template and the local reaction site (distance units), after optimal
translation and rotation of the pre-reaction template. Optionally, the
name of a molecule fragment (of the pre-reaction template) can be
specified by *molfragment*\ . If a molecule fragment is specified,
only atoms that are part of this molecule fragment are used to
determine the RMSD. A molecule fragment must have been defined in the
:doc:`molecule <molecule>` command for the pre-reaction template. For
example, the molecule fragment could consist of only the backbone
atoms of a polymer chain. This constraint can be used to enforce a
specific relative position and orientation between reacting molecules.
Once a reaction site has been successfully identified, data structures Once a reaction site has been successfully identified, data structures
within LAMMPS that store bond topology are updated to reflect the within LAMMPS that store bond topology are updated to reflect the
post-reacted molecule template. All force fields with fixed bonds, post-reacted molecule template. All force fields with fixed bonds,
@ -554,7 +584,7 @@ Default
""""""" """""""
The option defaults are stabilization = no, prob = 1.0, stabilize_steps = 60, The option defaults are stabilization = no, prob = 1.0, stabilize_steps = 60,
update_edges = none reset_mol_ids = yes, update_edges = none
---------- ----------

View File

@ -13,7 +13,7 @@ Syntax
* ID, group-ID are documented in :doc:`fix <fix>` command * ID, group-ID are documented in :doc:`fix <fix>` command
* restrain = style name of this fix command * restrain = style name of this fix command
* one or more keyword/arg pairs may be appended * one or more keyword/arg pairs may be appended
* keyword = *bond* or *angle* or *dihedral* * keyword = *bond* or *lbound* or *angle* or *dihedral*
.. parsed-literal:: .. parsed-literal::
@ -23,7 +23,7 @@ Syntax
r0start = equilibrium bond distance at start of run (distance units) r0start = equilibrium bond distance at start of run (distance units)
r0stop = equilibrium bond distance at end of run (optional) (distance units). If not r0stop = equilibrium bond distance at end of run (optional) (distance units). If not
specified it is assumed to be equal to r0start specified it is assumed to be equal to r0start
*lbond* args = atom1 atom2 Kstart Kstop r0start (r0stop) *lbound* args = atom1 atom2 Kstart Kstop r0start (r0stop)
atom1,atom2 = IDs of 2 atoms in bond atom1,atom2 = IDs of 2 atoms in bond
Kstart,Kstop = restraint coefficients at start/end of run (energy units) Kstart,Kstop = restraint coefficients at start/end of run (energy units)
r0start = equilibrium bond distance at start of run (distance units) r0start = equilibrium bond distance at start of run (distance units)
@ -46,7 +46,7 @@ Examples
.. code-block:: LAMMPS .. code-block:: LAMMPS
fix holdem all restrain bond 45 48 2000.0 2000.0 2.75 fix holdem all restrain bond 45 48 2000.0 2000.0 2.75
fix holdem all restrain lbond 45 48 2000.0 2000.0 2.75 fix holdem all restrain lbound 45 48 2000.0 2000.0 2.75
fix holdem all restrain dihedral 1 2 3 4 2000.0 2000.0 120.0 fix holdem all restrain dihedral 1 2 3 4 2000.0 2000.0 120.0
fix holdem all restrain bond 45 48 2000.0 2000.0 2.75 dihedral 1 2 3 4 2000.0 2000.0 120.0 fix holdem all restrain bond 45 48 2000.0 2000.0 2.75 dihedral 1 2 3 4 2000.0 2000.0 120.0
fix texas_holdem all restrain dihedral 1 2 3 4 0.0 2000.0 120.0 dihedral 1 2 3 5 0.0 2000.0 -120.0 dihedral 1 2 3 6 0.0 2000.0 0.0 fix texas_holdem all restrain dihedral 1 2 3 4 0.0 2000.0 120.0 dihedral 1 2 3 5 0.0 2000.0 -120.0 dihedral 1 2 3 6 0.0 2000.0 0.0
@ -150,7 +150,7 @@ is included in :math:`K`.
---------- ----------
The *lbond* keyword applies a lower bound bond restraint to the specified atoms The *lbound* keyword applies a lower bound bond restraint to the specified atoms
using the same functional form used by the :doc:`bond_style harmonic <bond_harmonic>` command if the distance between using the same functional form used by the :doc:`bond_style harmonic <bond_harmonic>` command if the distance between
the atoms is smaller than the equilibrium bond distance and 0 otherwise. The potential associated with the atoms is smaller than the equilibrium bond distance and 0 otherwise. The potential associated with
the restraint is the restraint is

View File

@ -110,8 +110,8 @@ location specified. E.g. if the file is specified as "niu3.eam", it
is looked for in the current working directory. If it is specified as is looked for in the current working directory. If it is specified as
"../potentials/niu3.eam", then it is looked for in the potentials "../potentials/niu3.eam", then it is looked for in the potentials
directory, assuming it is a sister directory of the current working directory, assuming it is a sister directory of the current working
directory. If the file is not found, it is then looked for in the directory. If the file is not found, it is then looked for in one of
directory specified by the LAMMPS_POTENTIALS environment variable. the directories specified by the ``LAMMPS_POTENTIALS`` environment variable.
Thus if this is set to the potentials directory in the LAMMPS distribution, Thus if this is set to the potentials directory in the LAMMPS distribution,
then you can use those files from anywhere on your system, without then you can use those files from anywhere on your system, without
copying them into your working directory. Environment variables are copying them into your working directory. Environment variables are
@ -136,6 +136,11 @@ Windows:
% set LAMMPS_POTENTIALS="C:\\Path to LAMMPS\\Potentials" % set LAMMPS_POTENTIALS="C:\\Path to LAMMPS\\Potentials"
The ``LAMMPS_POTENTIALS`` environment variable may contain paths
to multiple folders, if they are separated by ";" on Windows and
":" on all other operating systems, just like the ``PATH`` and
similar environment variables.
---------- ----------
The alphabetic list of pair styles defined in LAMMPS is given on the The alphabetic list of pair styles defined in LAMMPS is given on the

View File

@ -129,10 +129,10 @@ For style *comb3*\ , in addition to ffield.comb3, a special parameter
file, *lib.comb3*\ , that is exclusively used for C/O/H systems, will be file, *lib.comb3*\ , that is exclusively used for C/O/H systems, will be
automatically loaded if carbon atom is detected in LAMMPS input automatically loaded if carbon atom is detected in LAMMPS input
structure. This file must be in your working directory or in the structure. This file must be in your working directory or in the
directory pointed to by the environment variable LAMMPS_POTENTIALS, as directories listed in the environment variable ``LAMMPS_POTENTIALS``, as
described on the :doc:`pair_coeff <pair_coeff>` command doc page. described on the :doc:`pair_coeff <pair_coeff>` command doc page.
Keyword *polar* indicates whether the force field includes The keyword *polar* indicates whether the force field includes
the atomic polarization. Since the equilibration of the polarization the atomic polarization. Since the equilibration of the polarization
has not yet been implemented, it can only set polar_off at present. has not yet been implemented, it can only set polar_off at present.

View File

@ -107,7 +107,7 @@ These pair styles can only be used via the *pair* keyword of the
Restrictions Restrictions
"""""""""""" """"""""""""
The *cosine/squared* style is part of the "USER-MISC" package. It is only The *cosine/squared* style is part of the USER-MISC package. It is only
enabled if LAMMPS is build with that package. See the :doc:`Build package <Build_package>` doc page for more info. enabled if LAMMPS is build with that package. See the :doc:`Build package <Build_package>` doc page for more info.
Related commands Related commands

View File

@ -95,7 +95,7 @@ This pair style can only be used via the *pair* keyword of the
Restrictions Restrictions
"""""""""""" """"""""""""
This style is part of the "USER-MISC" package. It is only enabled if This style is part of the USER-MISC package. It is only enabled if
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` doc page for more info. LAMMPS was built with that package. See the :doc:`Build package <Build_package>` doc page for more info.
Related commands Related commands

View File

@ -95,7 +95,7 @@ Restrictions
The *coul/slater/long* style requires the long-range solvers included in the KSPACE package. The *coul/slater/long* style requires the long-range solvers included in the KSPACE package.
These styles are part of the "USER-MISC" package. They are only enabled if These styles are part of the USER-MISC package. They are only enabled if
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` doc page for more info. LAMMPS was built with that package. See the :doc:`Build package <Build_package>` doc page for more info.
Related commands Related commands

View File

@ -164,8 +164,18 @@ heading) the following commands could be included in an input script:
Restrictions Restrictions
"""""""""""" """"""""""""
The *gauss/cut* style is part of the "user-misc" package. It is only The *gauss/cut* style is part of the USER-MISC package. It is only
enabled if LAMMPS is build with that package. See the :doc:`Build package <Build_package>` doc page for more info. enabled if LAMMPS is build with that package. See the :doc:`Build
package <Build_package>` doc page for more info.
The *gauss* style does not apply :doc:`special_bonds <special_bonds>`
factors. When using this pair style on a system that has bonds, the
special_bonds factors, if using the default setting of 0.0, may need to
be adjusted to some very small number (e.g. 1.0e-100), so that those
special pairs are not completely excluded from the neighbor lists, but
won't contribute forces or energies from styles (e.g. when used in
combination with a :doc:`hybrid pair style <pair_hybrid>`) that do
apply those factors.
Related commands Related commands
"""""""""""""""" """"""""""""""""

View File

@ -93,7 +93,7 @@ on particle *i* due to contact with particle *j* is given by:
.. math:: .. math::
\mathbf{F}_{ne, Hooke} = k_N \delta_{ij} \mathbf{n} \mathbf{F}_{ne, Hooke} = k_n \delta_{ij} \mathbf{n}
Where :math:`\delta_{ij} = R_i + R_j - \|\mathbf{r}_{ij}\|` is the particle Where :math:`\delta_{ij} = R_i + R_j - \|\mathbf{r}_{ij}\|` is the particle
overlap, :math:`R_i, R_j` are the particle radii, :math:`\mathbf{r}_{ij} = \mathbf{r}_i - \mathbf{r}_j` is the vector separating the two overlap, :math:`R_i, R_j` are the particle radii, :math:`\mathbf{r}_{ij} = \mathbf{r}_i - \mathbf{r}_j` is the vector separating the two
@ -106,7 +106,7 @@ For the *hertz* model, the normal component of force is given by:
.. math:: .. math::
\mathbf{F}_{ne, Hertz} = k_N R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n} \mathbf{F}_{ne, Hertz} = k_n R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n}
Here, :math:`R_{eff} = \frac{R_i R_j}{R_i + R_j}` is the effective Here, :math:`R_{eff} = \frac{R_i R_j}{R_i + R_j}` is the effective
radius, denoted for simplicity as *R* from here on. For *hertz*\ , the radius, denoted for simplicity as *R* from here on. For *hertz*\ , the
@ -123,7 +123,7 @@ Here, :math:`E_{eff} = E = \left(\frac{1-\nu_i^2}{E_i} + \frac{1-\nu_j^2}{E_j}\r
modulus, with :math:`\nu_i, \nu_j` the Poisson ratios of the particles of modulus, with :math:`\nu_i, \nu_j` the Poisson ratios of the particles of
types *i* and *j*\ . Note that if the elastic modulus and the shear types *i* and *j*\ . Note that if the elastic modulus and the shear
modulus of the two particles are the same, the *hertz/material* model modulus of the two particles are the same, the *hertz/material* model
is equivalent to the *hertz* model with :math:`k_N = 4/3 E_{eff}` is equivalent to the *hertz* model with :math:`k_n = 4/3 E_{eff}`
The *dmt* model corresponds to the The *dmt* model corresponds to the
:ref:`(Derjaguin-Muller-Toporov) <DMT1975>` cohesive model, where the force :ref:`(Derjaguin-Muller-Toporov) <DMT1975>` cohesive model, where the force
@ -140,7 +140,7 @@ where the force is computed as:
\mathbf{F}_{ne, jkr} = \left(\frac{4Ea^3}{3R} - 2\pi a^2\sqrt{\frac{4\gamma E}{\pi a}}\right)\mathbf{n} \mathbf{F}_{ne, jkr} = \left(\frac{4Ea^3}{3R} - 2\pi a^2\sqrt{\frac{4\gamma E}{\pi a}}\right)\mathbf{n}
Here, *a* is the radius of the contact zone, related to the overlap Here, :math:`a` is the radius of the contact zone, related to the overlap
:math:`\delta` according to: :math:`\delta` according to:
.. math:: .. math::
@ -167,7 +167,7 @@ following general form:
\mathbf{F}_{n,damp} = -\eta_n \mathbf{v}_{n,rel} \mathbf{F}_{n,damp} = -\eta_n \mathbf{v}_{n,rel}
Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot \mathbf{n} \mathbf{n}` is the component of relative velocity along Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot \mathbf{n}\ \mathbf{n}` is the component of relative velocity along
:math:`\mathbf{n}`. :math:`\mathbf{n}`.
The optional *damping* keyword to the *pair_coeff* command followed by The optional *damping* keyword to the *pair_coeff* command followed by
@ -259,7 +259,9 @@ tangential model choices and their expected parameters are as follows:
1. *linear_nohistory* : :math:`x_{\gamma,t}`, :math:`\mu_s` 1. *linear_nohistory* : :math:`x_{\gamma,t}`, :math:`\mu_s`
2. *linear_history* : :math:`k_t`, :math:`x_{\gamma,t}`, :math:`\mu_s` 2. *linear_history* : :math:`k_t`, :math:`x_{\gamma,t}`, :math:`\mu_s`
3. *mindlin* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s` 3. *mindlin* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
4. *mindlin_rescale* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s` 4. *mindlin/force* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
5. *mindlin_rescale* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
6. *mindlin_rescale/force* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
Here, :math:`x_{\gamma,t}` is a dimensionless multiplier for the normal Here, :math:`x_{\gamma,t}` is a dimensionless multiplier for the normal
damping :math:`\eta_n` that determines the magnitude of the tangential damping :math:`\eta_n` that determines the magnitude of the tangential
@ -268,11 +270,11 @@ coefficient, and :math:`k_t` is the tangential stiffness coefficient.
For *tangential linear_nohistory*, a simple velocity-dependent Coulomb For *tangential linear_nohistory*, a simple velocity-dependent Coulomb
friction criterion is used, which mimics the behavior of the *pair friction criterion is used, which mimics the behavior of the *pair
gran/hooke* style. The tangential force (\mathbf{F}_t\) is given by: gran/hooke* style. The tangential force :math:`\mathbf{F}_t` is given by:
.. math:: .. math::
\mathbf{F}_t = -min(\mu_t F_{n0}, \|\mathbf{F}_\mathrm{t,damp}\|) \mathbf{t} \mathbf{F}_t = -\min(\mu_t F_{n0}, \|\mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
The tangential damping force :math:`\mathbf{F}_\mathrm{t,damp}` is given by: The tangential damping force :math:`\mathbf{F}_\mathrm{t,damp}` is given by:
@ -294,8 +296,8 @@ keyword also affects the tangential damping. The parameter
literature use :math:`x_{\gamma,t} = 1` (:ref:`Marshall <Marshall2009>`, literature use :math:`x_{\gamma,t} = 1` (:ref:`Marshall <Marshall2009>`,
:ref:`Tsuji et al <Tsuji1992>`, :ref:`Silbert et al <Silbert2001>`). The relative :ref:`Tsuji et al <Tsuji1992>`, :ref:`Silbert et al <Silbert2001>`). The relative
tangential velocity at the point of contact is given by tangential velocity at the point of contact is given by
:math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\Omega_i + R_j\Omega_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}{n}`, :math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\mathbf{\Omega}_i + R_j\mathbf{\Omega}_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}\ \mathbf{n}`,
:math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i`. :math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i` .
The direction of the applied force is :math:`\mathbf{t} = \mathbf{v_{t,rel}}/\|\mathbf{v_{t,rel}}\|` . The direction of the applied force is :math:`\mathbf{t} = \mathbf{v_{t,rel}}/\|\mathbf{v_{t,rel}}\|` .
The normal force value :math:`F_{n0}` used to compute the critical force The normal force value :math:`F_{n0}` used to compute the critical force
@ -314,21 +316,24 @@ form:
.. math:: .. math::
F_{n0} = \|\mathbf{F}_ne + 2 F_{pulloff}\| F_{n0} = \|\mathbf{F}_{ne} + 2 F_{pulloff}\|
Where :math:`F_{pulloff} = 3\pi \gamma R` for *jkr*\ , and Where :math:`F_{pulloff} = 3\pi \gamma R` for *jkr*\ , and
:math:`F_{pulloff} = 4\pi \gamma R` for *dmt*\ . :math:`F_{pulloff} = 4\pi \gamma R` for *dmt*\ .
The remaining tangential options all use accumulated tangential The remaining tangential options all use accumulated tangential
displacement (i.e. contact history). This is discussed below in the displacement (i.e. contact history), except for the options
context of the *linear_history* option, but the same treatment of the *mindlin/force* and *mindlin_rescale/force*, that use accumulated
accumulated displacement applies to the other options as well. tangential force instead, and are discussed further below.
The accumulated tangential displacement is discussed in details below
in the context of the *linear_history* option. The same treatment of
the accumulated displacement applies to the other options as well.
For *tangential linear_history*, the tangential force is given by: For *tangential linear_history*, the tangential force is given by:
.. math:: .. math::
\mathbf{F}_t = -min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t} \mathbf{F}_t = -\min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
Here, :math:`\mathbf{\xi}` is the tangential displacement accumulated Here, :math:`\mathbf{\xi}` is the tangential displacement accumulated
during the entire duration of the contact: during the entire duration of the contact:
@ -356,7 +361,7 @@ work:
.. math:: .. math::
\mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'}\| - \mathbf{n}\cdot\mathbf{\xi'}} \mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'} - (\mathbf{n}\cdot\mathbf{\xi'})\mathbf{n}\|}
Here, :math:`\mathbf{\xi'}` is the accumulated displacement prior to the Here, :math:`\mathbf{\xi'}` is the accumulated displacement prior to the
current time step and :math:`\mathbf{\xi}` is the corrected current time step and :math:`\mathbf{\xi}` is the corrected
@ -372,7 +377,7 @@ discussion):
.. math:: .. math::
\mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} + \mathbf{F}_{t,damp}\right) \mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} - \mathbf{F}_{t,damp}\right)
The tangential force is added to the total normal force (elastic plus The tangential force is added to the total normal force (elastic plus
damping) to produce the total force on the particle. The tangential damping) to produce the total force on the particle. The tangential
@ -387,27 +392,68 @@ overlap region) to induce a torque on each particle according to:
\mathbf{\tau}_j = -(R_j - 0.5 \delta) \mathbf{n} \times \mathbf{F}_t \mathbf{\tau}_j = -(R_j - 0.5 \delta) \mathbf{n} \times \mathbf{F}_t
For *tangential mindlin*\ , the :ref:`Mindlin <Mindlin1949>` no-slip solution is used, which differs from the *linear_history* For *tangential mindlin*\ , the :ref:`Mindlin <Mindlin1949>` no-slip solution
option by an additional factor of *a*\ , the radius of the contact region. The tangential force is given by: is used which differs from the *linear_history* option by an additional factor
of :math:`a`, the radius of the contact region. The tangential force is given by:
.. math:: .. math::
\mathbf{F}_t = -min(\mu_t F_{n0}, \|-k_t a \mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t} \mathbf{F}_t = -\min(\mu_t F_{n0}, \|-k_t a \mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
Here, *a* is the radius of the contact region, given by :math:`a =\sqrt{R\delta}`
Here, :math:`a` is the radius of the contact region, given by :math:`a =\sqrt{R\delta}`
for all normal contact models, except for *jkr*\ , where it is given for all normal contact models, except for *jkr*\ , where it is given
implicitly by :math:`\delta = a^2/R - 2\sqrt{\pi \gamma a/E}`, see implicitly by :math:`\delta = a^2/R - 2\sqrt{\pi \gamma a/E}`, see
discussion above. To match the Mindlin solution, one should set :math:`k_t = 4G/(2-\nu)`, where :math:`G` is the shear modulus, related to Young's modulus discussion above. To match the Mindlin solution, one should set
:math:`E` by :math:`G = E/(2(1+\nu))`, where :math:`\nu` is Poisson's ratio. This :math:`k_t = 8G_{eff}`, where :math:`G_{eff}` is the effective shear modulus given by:
can also be achieved by specifying *NULL* for :math:`k_t`, in which case a
.. math::
G_{eff} = \left(\frac{2-\nu_i}{G_i} + \frac{2-\nu_j}{G_j}\right)^{-1}
where :math:`G` is the shear modulus, related to Young's modulus :math:`E`
and Poisson's ratio :math:`\nu` by :math:`G = E/(2(1+\nu))`. This can also be
achieved by specifying *NULL* for :math:`k_t`, in which case a
normal contact model that specifies material parameters :math:`E` and normal contact model that specifies material parameters :math:`E` and
:math:`\nu` is required (e.g. *hertz/material*\ , *dmt* or *jkr*\ ). In this :math:`\nu` is required (e.g. *hertz/material*\ , *dmt* or *jkr*\ ). In this
case, mixing of the shear modulus for different particle types *i* and case, mixing of the shear modulus for different particle types *i* and
*j* is done according to: *j* is done according to the formula above.
.. note::
The radius of the contact region :math:`a` depends on the normal overlap.
As a result, the tangential force for *mindlin* can change due to
a variation in normal overlap, even with no change in tangential displacement.
For *tangential mindlin/force*, the accumulated elastic tangential force
characterizes the contact history, instead of the accumulated tangential
displacement. This prevents the dependence of the tangential force on the
normal overlap as noted above. The tangential force is given by:
.. math:: .. math::
1/G = 2(2-\nu_i)(1+\nu_i)/E_i + 2(2-\nu_j)(1+\nu_j)/E_j \mathbf{F}_t = -\min(\mu_t F_{n0}, \|\mathbf{F}_{te} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
The increment of the elastic component of the tangential force
:math:`\mathbf{F}_{te}` is given by:
.. math::
\mathrm{d}\mathbf{F}_{te} = -k_t a \mathbf{v}_{t,rel} \mathrm{d}\tau
The changes in frame of reference of the contacting pair of particles during
contact are accounted for by the same formula as above, replacing the
accumulated tangential displacement :math:`\xi`, by the accumulated tangential
elastic force :math:`F_{te}`. When the tangential force exceeds the critical
force, the tangential force is directly re-scaled to match the value for
the critical force:
.. math::
\mathbf{F}_{te} = - \mu_t F_{n0}\mathbf{t} + \mathbf{F}_{t,damp}
The same rules as those described for *mindlin* apply regarding the tangential
stiffness and mixing of the shear modulus for different particle types.
The *mindlin_rescale* option uses the same form as *mindlin*\ , but the The *mindlin_rescale* option uses the same form as *mindlin*\ , but the
magnitude of the tangential displacement is re-scaled as the contact magnitude of the tangential displacement is re-scaled as the contact
@ -421,9 +467,32 @@ Here, :math:`t_{n-1}` indicates the value at the previous time
step. This rescaling accounts for the fact that a decrease in the step. This rescaling accounts for the fact that a decrease in the
contact area upon unloading leads to the contact being unable to contact area upon unloading leads to the contact being unable to
support the previous tangential loading, and spurious energy is support the previous tangential loading, and spurious energy is
created without the rescaling above (:ref:`Walton <WaltonPC>` ). See also created without the rescaling above (:ref:`Walton <WaltonPC>` ).
discussion in :ref:`Thornton et al, 2013 <Thornton2013>` , particularly
equation 18(b) of that work and associated discussion. .. note::
For *mindlin*, a decrease in the tangential force already occurs as the
contact unloads, due to the dependence of the tangential force on the normal
force described above. By re-scaling :math:`\xi`, *mindlin_rescale*
effectively re-scales the tangential force twice, i.e., proportionally to
:math:`a^2`. This peculiar behavior results from use of the accumulated
tangential displacement to characterize the contact history. Although
*mindlin_rescale* remains available for historic reasons and backward
compatibility purposes, it should be avoided in favor of *mindlin_rescale/force*.
The *mindlin_rescale/force* option uses the same form as *mindlin/force*,
but the magnitude of the tangential elastic force is re-scaled as the contact
unloads, i.e. if :math:`a < a_{t_{n-1}}`:
.. math::
\mathbf{F}_{te} = \mathbf{F}_{te, t_{n-1}} \frac{a}{a_{t_{n-1}}}
This approach provides a better approximation of the :ref:`Mindlin-Deresiewicz <Mindlin1953>`
laws and is more consistent than *mindlin_rescale*. See discussions in
:ref:`Thornton et al, 2013 <Thornton2013>`, particularly equation 18(b) of that
work and associated discussion, and :ref:`Agnolin and Roux, 2007 <AgnolinRoux2007>`,
particularly Appendix A.
---------- ----------
@ -460,7 +529,7 @@ exceeds a critical value:
.. math:: .. math::
\mathbf{F}_{roll} = min(\mu_{roll} F_{n,0}, \|\mathbf{F}_{roll,0}\|)\mathbf{k} \mathbf{F}_{roll} = \min(\mu_{roll} F_{n,0}, \|\mathbf{F}_{roll,0}\|)\mathbf{k}
Here, :math:`\mathbf{k} = \mathbf{v}_{roll}/\|\mathbf{v}_{roll}\|` is the direction of Here, :math:`\mathbf{k} = \mathbf{v}_{roll}/\|\mathbf{v}_{roll}\|` is the direction of
the pseudo-force. As with tangential displacement, the rolling the pseudo-force. As with tangential displacement, the rolling
@ -512,7 +581,7 @@ is then truncated according to:
.. math:: .. math::
\tau_{twist} = min(\mu_{twist} F_{n,0}, \tau_{twist,0}) \tau_{twist} = \min(\mu_{twist} F_{n,0}, \tau_{twist,0})
Similar to the sliding and rolling displacement, the angular Similar to the sliding and rolling displacement, the angular
displacement is rescaled so that it corresponds to the critical value displacement is rescaled so that it corresponds to the critical value
@ -763,3 +832,15 @@ Technology, 233, 30-46.
.. _WaltonPC: .. _WaltonPC:
**(Otis R. Walton)** Walton, O.R., Personal Communication **(Otis R. Walton)** Walton, O.R., Personal Communication
.. _Mindlin1953:
**(Mindlin and Deresiewicz, 1953)** Mindlin, R.D., & Deresiewicz, H (1953).
Elastic Spheres in Contact under Varying Oblique Force.
J. Appl. Mech., ASME 20, 327-344.
.. _AgnolinRoux2007:
**(Agnolin and Roux 2007)** Agnolin, I. & Roux, J-N. (2007).
Internal states of model isotropic granular packings.
I. Assembling process, geometry, and contact networks. Phys. Rev. E, 76, 061302.

View File

@ -250,8 +250,12 @@ from :ref:`(Li2013_POF) <Li2013_POF>`. The short mDPD run (about 2 minutes
on a single core) generates a particle trajectory which can on a single core) generates a particle trajectory which can
be visualized as follows. be visualized as follows.
.. only:: html
.. image:: JPG/examples_mdpd.gif
:align: center
.. image:: JPG/examples_mdpd_first.jpg .. image:: JPG/examples_mdpd_first.jpg
:target: JPG/examples_mdpd.gif
:align: center :align: center
.. image:: JPG/examples_mdpd_last.jpg .. image:: JPG/examples_mdpd_last.jpg

View File

@ -128,7 +128,7 @@ viscoelastic relaxation parameter and time constant,
respectively. m_lambdai varies within zero to one. For very small respectively. m_lambdai varies within zero to one. For very small
values of m_lambdai the viscoelastic model responds very similar to a values of m_lambdai the viscoelastic model responds very similar to a
linear elastic model. For details please see the description in linear elastic model. For details please see the description in
"(Mtchell2011)". "(Mitchell2011)".
For the *peri/eps* style: For the *peri/eps* style:
@ -142,7 +142,7 @@ For the *peri/eps* style:
K is the bulk modulus and G is the shear modulus. The horizon is a K is the bulk modulus and G is the shear modulus. The horizon is a
cutoff distance and s00 and :math:`\alpha` are used as a bond breaking cutoff distance and s00 and :math:`\alpha` are used as a bond breaking
criteria. m_yield_stress is the yield stress of the material. For criteria. m_yield_stress is the yield stress of the material. For
details please see the description in "(Mtchell2011a)". details please see the description in "(Mitchell2011a)".
---------- ----------

View File

@ -38,12 +38,12 @@ corresponding compiled code. This penalty can be significantly reduced
through generating tabulations from the python code through the through generating tabulations from the python code through the
:doc:`pair_write <pair_write>` command, which is supported by this style. :doc:`pair_write <pair_write>` command, which is supported by this style.
Only a single pair_coeff command is used with the *python* pair style Only a single :doc:`pair_coeff <pair_coeff>` command is used with the
which specifies a python class inside a python module or file that *python* pair style which specifies a python class inside a python module
LAMMPS will look up in the current directory, the folder pointed to by or a file that LAMMPS will look up in the current directory, a folder
the LAMMPS_POTENTIALS environment variable or somewhere in your python pointed to by the ``LAMMPS_POTENTIALS`` environment variable or somewhere
path. A single python module can hold multiple python pair class in your python path. A single python module can hold multiple python pair
definitions. The class definitions itself have to follow specific class definitions. The class definitions itself have to follow specific
rules that are explained below. rules that are explained below.
Atom types in the python class are specified through symbolic Atom types in the python class are specified through symbolic

91
doc/src/pg_cplusplus.rst Normal file
View File

@ -0,0 +1,91 @@
Using the C++ API directly
**************************
Using the C++ classes of the LAMMPS library is lacking some of the
convenience of the C library API, but it allows a more direct access to
simulation data and thus more low-level manipulations and tighter
integration of LAMMPS into another code. While for the complete C
library API is provided in the ``library.h`` header file, for using
the C++ API it is required to include the individual header files
defining the individual classes in use. Typically the name of the
class and the name of the header follow some simple rule. Examples
are given below.
Creating or deleting a LAMMPS object
*************************************
When using the LAMMPS library interfaces, the core task is to create an
instance of the :cpp:class:`LAMMPS_NS::LAMMPS` class. In C++ this can
be done directly through the ``new`` operator. All further operations
are then initiated through calling member functions of some of the
components of the LAMMPS class or accessing their data members. The
destruction of the LAMMPS instance is correspondingly initiated by using
the ``delete`` operator. Here is a simple example:
.. code-block:: c++
#include "lammps.h"
#include "universe.h"
#include <mpi.h>
#include <iostream>
int main(int argc, char **argv)
{
LAMMPS_NS::LAMMPS *lmp;
// custom argument vector for LAMMPS library
const char *lmpargv[] {"liblammps", "-log", "none"};
int lmpargc = sizeof(lmpargv)/sizeof(const char *);
// explicitly initialize MPI
MPI_Init(&argc, &argv);
// create LAMMPS instance
lmp = new LAMMPS_NS::LAMMPS(lmpargc, (char **)lmpargv, MPI_COMM_WORLD);
// output numerical version string
std::cout << "LAMMPS version: " << lmp->universe->num_ver << std::endl;
// delete LAMMPS instance
delete lmp;
// stop MPI environment
MPI_Finalize();
return 0;
}
Please note that this requires to include the ``lammps.h`` header for accessing
the members of the LAMMPS class and then the ``universe.h`` header for accessing the ``num_ver`` member of the :cpp:class:`Universe` class.
Executing LAMMPS commands
*************************
Once a LAMMPS instance is created by your C++ code, you need to set up a
simulation and that is most conveniently done by "driving" it through
issuing commands like you would do when running a LAMMPS simulation from
an input script. Processing of input in LAMMPS is handled by the
:cpp:class:`Input <LAMMPS_NS::Input>` class an instance of which is a
member of the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class. You have
two options: reading commands from a file, or executing a single
command from a string. See below for a small example:
.. code-block:: c++
#include "lammps.h"
#include "input.h"
#include <mpi.h>
using namespace LAMMPS_NS;
int main(int argc, char **argv)
{
const char *lmpargv[] {"liblammps", "-log", "none"};
int lmpargc = sizeof(lmpargv)/sizeof(const char *);
MPI_Init(&argc, &argv);
LAMMPS *lmp = new LAMMPS(lmpargc, (char **)lmpargv, MPI_COMM_WORLD);
lmp->input->file("in.melt");
lmp->input->one("run 100 post no");
delete lmp;
return 0;
}

1088
doc/src/pg_developer.rst Normal file

File diff suppressed because it is too large Load Diff

202
doc/src/pg_fortran.rst Normal file
View File

@ -0,0 +1,202 @@
The ``LIBLAMMPS`` Fortran Module
********************************
The ``LIBLAMMPS`` module provides an interface to call LAMMPS from a
Fortran code. It is based on the LAMMPS C-library interface and
requires a Fortran 2003 compatible compiler to be compiled.
While C libraries have a defined binary interface (ABI) and can thus be
used from multiple compiler versions from different vendors for as long
as they are compatible with the hosting operating system, the same is
not true for Fortran codes. Thus the LAMMPS Fortran module needs to be
compiled alongside the code using it from the source code in
``fortran/lammps.f90``. When linking, you also need to
:doc:`link to the LAMMPS library <Build_link>`. A typical command line
for a simple program using the Fortran interface would be:
.. code-block:: bash
mpifort -o testlib.x lammps.f90 testlib.f90 -L. -llammps
Please note, that the MPI compiler wrapper is only required when the
calling the library from an MPI parallel code. Please also note the order
of the source files: the lammps.f90 file needs to be compiled first,
since it provides the ``LIBLAMMPS`` module that is imported by the
Fortran code using the interface.
.. versionadded:: 30Sep2020
.. admonition:: Work in Progress
This Fortran module is work in progress and only the documented
functionality is currently available. The final implementation should
cover the entire range of functionality available in the C and
Python library interfaces.
----------
Creating or deleting a LAMMPS object
************************************
With the Fortran interface the creation of a :cpp:class:`LAMMPS
<LAMMPS_NS::LAMMPS>` instance is included in the constructor for
creating the :f:func:`lammps` derived type. To import the definition of
that type and its type bound procedures you need to add a ``USE
LIBLAMMPS`` statement. Internally it will call either
:cpp:func:`lammps_open_fortran` or :cpp:func:`lammps_open_no_mpi` from
the C library API to create the class instance. All arguments are
optional and :cpp:func:`lammps_mpi_init` will be called automatically,
if it is needed. Similarly, a possible call to :cpp:func:`lammps_finalize`
is integrated into the :f:func:`close` function and triggered with
the optional logical argument set to ``.true.``. Here is a simple example:
.. code-block:: fortran
PROGRAM testlib
USE LIBLAMMPS ! include the LAMMPS library interface
TYPE(lammps) :: lmp ! derived type to hold LAMMPS instance
CHARACTER(len=*), DIMENSION(*), PARAMETER :: args = &
[ CHARACTER(len=12) :: 'liblammps', '-log', 'none' ]
! create a LAMMPS instance (and initialize MPI)
lmp = lammps(args)
! get and print numerical version code
PRINT*, 'LAMMPS Version: ', lmp%version()
! delete LAMMPS instance (and shuts down MPI)
CALL lmp%close(.true.)
END PROGRAM testlib
--------------------
Executing LAMMPS commands
=========================
Once a LAMMPS instance is created, it is possible to "drive" the LAMMPS
simulation by telling LAMMPS to read commands from a file, or pass
individual or multiple commands from strings or lists of strings. This
is done similar to how it is implemented in the `C-library
<pg_lib_execute>` interface. Before handing off the calls to the
C-library interface, the corresponding Fortran versions of the calls
(:f:func:`file`, :f:func:`command`, :f:func:`commands_list`, and
:f:func:`commands_string`) have to make a copy of the strings passed as
arguments so that they can be modified to be compatible with the
requirements of strings in C without affecting the original strings.
Those copies are automatically deleted after the functions return.
Below is a small demonstration of the uses of the different functions:
.. code-block:: fortran
PROGRAM testcmd
USE LIBLAMMPS
TYPE(lammps) :: lmp
CHARACTER(len=512) :: cmds
CHARACTER(len=40),ALLOCATABLE :: cmdlist(:)
CHARACTER(len=10) :: trimmed
INTEGER :: i
lmp = lammps()
CALL lmp%file('in.melt')
CALL lmp%command('variable zpos index 1.0')
! define 10 groups of 10 atoms each
ALLOCATE(cmdlist(10))
DO i=1,10
WRITE(trimmed,'(I10)') 10*i
WRITE(cmdlist(i),'(A,I1,A,I10,A,A)') &
'group g',i-1,' id ',10*(i-1)+1,':',ADJUSTL(trimmed)
END DO
CALL lmp%commands_list(cmdlist)
! run multiple commands from multi-line string
cmds = 'clear' // NEW_LINE('A') // &
'region box block 0 2 0 2 0 2' // NEW_LINE('A') // &
'create_box 1 box' // NEW_LINE('A') // &
'create_atoms 1 single 1.0 1.0 ${zpos}'
CALL lmp%commands_string(cmds)
CALL lmp%close()
END PROGRAM testcmd
---------------
The ``LIBLAMMPS`` module API
****************************
Below are the detailed descriptions of definitions and interfaces
of the contents of the ``LIBLAMMPS`` Fortran interface to LAMMPS.
.. f:type:: lammps
Derived type that is the general class of the Fortran interface.
It holds a reference to the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class instance
that any of the included calls are forwarded to.
:f c_ptr handle: reference to the LAMMPS class
:f close: :f:func:`close`
:f version: :f:func:`version`
:f file: :f:func:`file`
:f command: :f:func:`command`
:f commands_list: :f:func:`commands_list`
:f commands_string: :f:func:`commands_string`
.. f:function:: lammps(args[,comm])
This is the constructor for the Fortran class and will forward
the arguments to a call to either :cpp:func:`lammps_open_fortran`
or :cpp:func:`lammps_open_no_mpi`. If the LAMMPS library has been
compiled with MPI support, it will also initialize MPI, if it has
not already been initialized before.
The *args* argument with the list of command line parameters is
optional and so it the *comm* argument with the MPI communicator.
If *comm* is not provided, ``MPI_COMM_WORLD`` is assumed. For
more details please see the documentation of :cpp:func:`lammps_open`.
:p character(len=*) args(*) [optional]: arguments as list of strings
:o integer comm [optional]: MPI communicator
:r lammps: an instance of the :f:type:`lammps` derived type
.. f:subroutine:: close([finalize])
This method will close down the LAMMPS instance through calling
:cpp:func:`lammps_close`. If the *finalize* argument is present and
has a value of ``.true.``, then this subroutine also calls
:cpp:func:`lammps_mpi_finalize`.
:o logical finalize [optional]: shut down the MPI environment of the LAMMPS library if true.
.. f:function:: version()
This method returns the numeric LAMMPS version like :cpp:func:`lammps_version`
:r integer: LAMMPS version
--------
.. f:subroutine:: file(filename)
This method will call :cpp:func:`lammps_file` to have LAMMPS read
and process commands from a file.
:p character(len=*) filename: name of file with LAMMPS commands
.. f:subroutine:: command(cmd)
This method will call :cpp:func:`lammps_command` to have LAMMPS
execute a single command.
:p character(len=*) cmd: single LAMMPS command
.. f:subroutine:: commands_list(cmds)
This method will call :cpp:func:`lammps_commands_list` to have LAMMPS
execute a list of input lines.
:p character(len=*) cmd(*): list of LAMMPS input lines
.. f:subroutine:: commands_string(str)
This method will call :cpp:func:`lammps_commands_string` to have LAMMPS
execute a block of commands from a string.
:p character(len=*) str: LAMMPS input in string

33
doc/src/pg_lib_add.rst Normal file
View File

@ -0,0 +1,33 @@
Adding code to the Library interface
====================================
The functionality of the LAMMPS library interface has historically
always been motivated by the needs of its users and functions were
added or expanded as they were needed and used. Contributions to
the interface are always welcome. However with a refactoring of
the library interface and its documentation that started in 2020,
there are now a few requirements for inclusion of changes.
- New functions should be orthogonal to existing ones and not
implement functionality that can already be achieved with the
existing APIs.
- All changes and additions should be documented with
`Doxygen <https://doxgygen.org>`_ style comments and references
to those functions added to the corresponding files in the
``doc/src`` folder.
- If possible, new unit tests to test those new features should
be added.
- The new feature should also be implemented and documented for
the Python and Fortran modules.
- All additions should work and be compatible with ``-DLAMMPS_BIGBIG``,
``-DLAMMPS_SMALLBIG``, ``-DLAMMPS_SMALLSMALL`` and compiling
with and without MPI support.
- The ``library.h`` file should be kept compatible to C code at
a level similar to C89. Its interfaces may not reference any
custom data types (e.g. ``bigint``, ``tagint``, and so on) only
known inside of LAMMPS.
- only C style comments, not C++ style
Please note, that these are *not* *strict* requirements, but the
LAMMPS developers appreciate if they are followed closely and will
assist with implementing what is missing.

67
doc/src/pg_lib_config.rst Normal file
View File

@ -0,0 +1,67 @@
Retrieving LAMMPS configuration information
===========================================
The following library functions can be used to query the
LAMMPS library about compile time settings and included
packages and styles.
-----------------------
.. doxygenfunction:: lammps_config_has_mpi_support
:project: progguide
-----------------------
.. doxygenfunction:: lammps_config_has_gzip_support
:project: progguide
-----------------------
.. doxygenfunction:: lammps_config_has_png_support
:project: progguide
-----------------------
.. doxygenfunction:: lammps_config_has_jpeg_support
:project: progguide
-----------------------
.. doxygenfunction:: lammps_config_has_ffmpeg_support
:project: progguide
-----------------------
.. doxygenfunction:: lammps_config_has_exceptions
:project: progguide
-----------------------
.. doxygenfunction:: lammps_config_has_package
:project: progguide
-----------------------
.. doxygenfunction:: lammps_config_package_count
:project: progguide
-----------------------
.. doxygenfunction:: lammps_config_package_name
:project: progguide
-----------------------
.. doxygenfunction:: lammps_has_style
:project: progguide
-----------------------
.. doxygenfunction:: lammps_style_count
:project: progguide
-----------------------
.. doxygenfunction:: lammps_style_name
:project: progguide

104
doc/src/pg_lib_create.rst Normal file
View File

@ -0,0 +1,104 @@
Creating or deleting a LAMMPS object
====================================
The :cpp:func:`lammps_open` and :cpp:func:`lammps_open_no_mpi`
functions are used to create and initialize a
:cpp:func:`LAMMPS` instance. The calling program has to
provide a handle where a reference to this instance can be stored and
which has to be used in all subsequent function calls until that
instance is destroyed by calling :cpp:func:`lammps_close`.
Here is a simple example demonstrating its use:
.. code-block:: C
#include "library.h"
#include <stdio.h>
int main(int argc, char **argv)
{
void *handle;
int version;
const char *lmpargv[] = { "liblammps", "-log", "none"};
int lmpargc = sizeof(lmpargv)/sizeof(const char *);
/* create LAMMPS instance */
handle = lammps_open_no_mpi(lmpargc, lmpargv, NULL);
if (handle == NULL) {
printf("LAMMPS initialization failed");
lammps_mpi_finalize();
return 1;
}
/* get and print numerical version code */
version = lammps_version(handle);
printf("LAMMPS Version: %d\n",version);
/* delete LAMMPS instance and shut down MPI */
lammps_close(handle);
lammps_mpi_finalize();
return 0;
}
The LAMMPS library will be using the MPI library it was compiled with
and will either run on all processors in the ``MPI_COMM_WORLD``
communicator or on the set of processors in the communicator given in
the ``comm`` argument of :cpp:func:`lammps_open`. This means
the calling code can run LAMMPS on all or a subset of processors. For
example, a wrapper code might decide to alternate between LAMMPS and
another code, allowing them both to run on all the processors. Or it
might allocate part of the processors to LAMMPS and the rest to the
other code by creating a custom communicator with ``MPI_Comm_split()``
and running both codes concurrently before syncing them up periodically.
Or it might instantiate multiple instances of LAMMPS to perform
different calculations and either alternate between them, run them
concurrently on split communicators, or run them one after the other.
The :cpp:func:`lammps_open` function may be called multiple
times for this latter purpose.
The :cpp:func:`lammps_close` function is used to shut down
the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class pointed to by the handle
passed as an argument and free all its memory. This has to be called for
every instance created with any of the :cpp:func:`lammps_open` functions. It will, however, **not** call
``MPI_Finalize()``, since that may only be called once. See
:cpp:func:`lammps_mpi_finalize` for an alternative to calling
``MPI_Finalize()`` explicitly in the calling program.
The :cpp:func:`lammps_free` function is a clean-up
function to free memory that the library allocated previously
via other function calls. See below for notes in the descriptions
of the individual commands where such memory buffers were allocated.
-----------------------
.. doxygenfunction:: lammps_open
:project: progguide
-----------------------
.. doxygenfunction:: lammps_open_no_mpi
:project: progguide
-----------------------
.. doxygenfunction:: lammps_open_fortran
:project: progguide
-----------------------
.. doxygenfunction:: lammps_close
:project: progguide
-----------------------
.. doxygenfunction:: lammps_mpi_init
:project: progguide
-----------------------
.. doxygenfunction:: lammps_mpi_finalize
:project: progguide
-----------------------
.. doxygenfunction:: lammps_free
:project: progguide

View File

@ -0,0 +1,69 @@
Executing LAMMPS commands
=========================
Once a LAMMPS instance is created, there are multiple ways to "drive" a
simulation. In most cases it is easiest to process single or multiple
LAMMPS commands like in an input file. This can be done through reading
a file or passing single commands or lists of commands or blocks of
commands with the following functions.
Via these functions, the calling code can have the LAMMPS instance act
on a series of :doc:`input file commands <Commands_all>` that are either
read from a file or passed as strings. This for, for example, allows to
setup a problem from a template file and then run it in stages while
performing other operations in between or concurrently. The caller can
interleave the LAMMPS function calls with operations it performs, calls
to extract information from or set information within LAMMPS, or calls
to another code's library.
Also equivalent to regular :doc:`input script parsing <Commands_parse>`
is the handling of comments and expansion of variables with ``${name}``
or ``$(expression)`` syntax before the commands are parsed and
executed. Below is a short example using some of these functions.
.. code-block:: C
#include "library.h"
#include <mpi.h>
#include <stdio.h>
int main(int argc, char **argv)
{
void *handle;
int i;
MPI_Init(&argc, &argv);
handle = lammps_open(0, NULL, MPI_COMM_WORLD, NULL);
lammps_file(handle,"in.sysinit");
lammps_command(handle,"run 1000 post no");
for (i=0; i < 100; ++i) {
lammps_commands_string(handle,"run 100 pre no post no\n"
"print 'PE = $(pe)'\n"
"print 'KE = $(ke)'\n");
}
lammps_close(handle);
MPI_Finalize();
return 0;
}
-----------------------
.. doxygenfunction:: lammps_file
:project: progguide
-----------------------
.. doxygenfunction:: lammps_command
:project: progguide
-----------------------
.. doxygenfunction:: lammps_commands_list
:project: progguide
-----------------------
.. doxygenfunction:: lammps_commands_string
:project: progguide

View File

@ -0,0 +1,30 @@
Accessing LAMMPS Neighbor lists
===============================
The following functions allow to access neighbor lists
generated by LAMMPS or query their properties.
-----------------------
.. doxygenfunction:: lammps_find_compute_neighlist
:project: progguide
-----------------------
.. doxygenfunction:: lammps_find_fix_neighlist
:project: progguide
-----------------------
.. doxygenfunction:: lammps_find_pair_neighlist
:project: progguide
-----------------------
.. doxygenfunction:: lammps_neighlist_num_elements
:project: progguide
-----------------------
.. doxygenfunction:: lammps_neighlist_element_neighbors
:project: progguide

View File

@ -0,0 +1,31 @@
Retrieving or setting properties of LAMMPS objects
==================================================
This section documents accessing or modifying data from objects like
computes, fixes, or variables in LAMMPS.
-----------------------
.. doxygenfunction:: lammps_extract_compute
:project: progguide
-----------------------
.. doxygenfunction:: lammps_extract_fix
:project: progguide
-----------------------
.. doxygenfunction:: lammps_extract_variable
:project: progguide
-----------------------
.. doxygenfunction:: lammps_set_variable
:project: progguide
-----------------------
.. doxygenenum:: _LMP_STYLE_CONST
.. doxygenenum:: _LMP_TYPE_CONST

View File

@ -0,0 +1,62 @@
Retrieving or setting LAMMPS system properties
==============================================
The library interface allows to extract different kinds of information
about the active simulation instance and also to modify some of them.
This allows to combine MD simulation steps with other processing and
simulation methods computed in the calling code or another code that is
coupled to LAMMPS via the library interface. In some cases the data
returned is direct reference to the original data inside LAMMPS cast
to a void pointer. In that case the data needs to be cast to a suitable
pointer to be able to access it, and you need to know the correct dimensions
and lengths. When accessing per-atom data, please note that this data
is the per-processor **local** data and indexed accordingly. These arrays
can change sizes and order at every neighbor list rebuild and atom sort
event as atoms are migrating between sub-domains.
-----------------------
.. doxygenfunction:: lammps_version
:project: progguide
-----------------------
.. doxygenfunction:: lammps_get_natoms
:project: progguide
-----------------------
.. doxygenfunction:: lammps_get_thermo
:project: progguide
-----------------------
.. doxygenfunction:: lammps_extract_box
:project: progguide
-----------------------
.. doxygenfunction:: lammps_reset_box
:project: progguide
-------------------
.. doxygenfunction:: lammps_extract_setting
:project: progguide
-----------------------
.. doxygenfunction:: lammps_extract_global
:project: progguide
-----------------------
.. doxygenfunction:: lammps_extract_atom
:project: progguide
-----------------------
.. doxygenfunction:: lammps_create_atoms(void *handle, int n, int *id, int *type, double *x, double *v, int *image, int bexpand)
:project: progguide

View File

@ -0,0 +1,29 @@
Library functions for scatter/gather operations
================================================
.. TODO add description
-----------------------
.. doxygenfunction:: lammps_gather_atoms
:project: progguide
-----------------------
.. doxygenfunction:: lammps_gather_atoms_concat
:project: progguide
-----------------------
.. doxygenfunction:: lammps_gather_atoms_subset
:project: progguide
-----------------------
.. doxygenfunction:: lammps_scatter_atoms
:project: progguide
-----------------------
.. doxygenfunction:: lammps_scatter_atoms_subset
:project: progguide

View File

@ -0,0 +1,30 @@
Library interface utility functions
===================================
To simplify some of the tasks, the library interface contains
some utility functions that are not directly calling LAMMPS.
-----------------------
.. doxygenfunction:: lammps_encode_image_flags
:project: progguide
-----------------------
.. doxygenfunction:: lammps_decode_image_flags(int image, int *flags)
:project: progguide
-----------------------
.. doxygenfunction:: lammps_set_fix_external_callback(void *, char *, FixExternalFnPtr, void*)
:project: progguide
-----------------------
.. doxygenfunction:: lammps_has_error
:project: progguide
-----------------------
.. doxygenfunction:: lammps_get_last_error_message
:project: progguide

158
doc/src/pg_library.rst Normal file
View File

@ -0,0 +1,158 @@
LAMMPS Library Interfaces
*************************
As described on the :doc:`library interface to LAMMPS <Howto_library>`
doc page, LAMMPS can be built as a library (static or shared), so that
it can be called by another code, used in a :doc:`coupled manner
<Howto_couple>` with other codes, or driven through a :doc:`Python
script <Python_head>`. Even the LAMMPS standalone executable is
essentially a thin wrapper on top of the LAMMPS library, creating a
LAMMPS instance, processing input and then existing.
Several of these approaches are based on C language wrapper functions
in the files ``src/library.h`` and ``src/library.cpp``, but it is also
possible to use C++ directly. The basic procedure is always the same:
you create one or more instances of the
:cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` and then pass commands as
strings or from files to that LAMMPS instance to execute calculations,
or read, manipulate, and update data from the active class instances
inside the LAMMPS to do analysis or perform operations that are not
possible with existing commands.
.. _thread-safety:
.. admonition:: Thread-safety
:class: note
LAMMPS was initially not conceived as a thread-safe program, but over
the years changes have been applied to replace operations that
collide with creating multiple LAMMPS instances from multiple-threads
of the same process with thread-safe alternatives. This primarily
applies to the core LAMMPS code and less so on add-on packages,
especially when those packages require additional code in the *lib*
folder, interface LAMMPS to Fortran libraries, or the code uses
static variables (like the USER-COLVARS package).
Another major issue to deal with is to correctly handle MPI.
Creating a LAMMPS instance requires passing an MPI communicator, or
it assumes the ``MPI_COMM_WORLD`` communicator, which spans all MPI
processor ranks. When creating multiple LAMMPS object instances from
different threads, this communicator has to be different for each
thread or else collisions can happen. or it has to be guaranteed,
that only one thread at a time is active. MPI communicators,
however, are not a problem, if LAMMPS is compiled with the MPI STUBS
library, which implies that there is no MPI communication and only 1
MPI rank.
----------
.. _lammps_c_api:
LAMMPS C Library API
====================
The C library interface is most commonly used path to manage LAMMPS
instances from a compiled code and it is the basis for the :doc:`Python
<pg_python>` and :doc:`Fortran <pg_fortran>` modules. Almost all
functions of the C language API require an argument containing a
"handle" in the form of a ``void *`` type variable, which points to the
location of a LAMMPS class instance.
The ``library.h`` header file by default includes the ``mpi.h`` header
for an MPI library, so it must be present when compiling code using the
library interface. This usually must be the header from the same MPI
library as the LAMMPS library was compiled with. The exception is when
LAMMPS was compiled in serial mode using the ``STUBS`` MPI library. In
that case the calling code may be compiled with a different MPI library
for as long as :cpp:func:`lammps_open_no_mpi` is called to create a
LAMMPS instance. Then you may set the define ``-DLAMMPS_LIB_NO_MPI``
when compiling your code and the inclusion of ``mpi.h`` will be skipped
and consequently the function :cpp:func:`lammps_open` may not be used.
.. admonition:: Errors versus exceptions
:class: note
If any of the function calls in the LAMMPS library API will trigger
an error inside LAMMPS, this will result in an abort of the entire
program. This is not always desirable. Instead, LAMMPS can be
compiled to instead :ref:`throw a C++ exception <exceptions>`.
.. warning::
No checks are made on the arguments of the function calls of the C
library interface. *All* function arguments must be non-NULL unless
*explicitly* allowed and point to consistent and valid data. Buffers
for storing returned data must be allocated to a suitable size.
Passing invalid or unsuitable information will likely cause crashes
or corrupt data.
------------------------------
.. toctree::
:maxdepth: 1
pg_lib_create
pg_lib_execute
pg_lib_properties
pg_lib_objects
pg_lib_scatter
pg_lib_neighbor
pg_lib_config
pg_lib_utility
pg_lib_add
--------------------
.. _lammps_python_api:
LAMMPS Python APIs
==================
The LAMMPS Python module enables calling the LAMMPS C library API from
Python by dynamically loading functions in the LAMMPS shared library through
the `Python ctypes module <https://docs.python.org/3/library/ctypes.html>`_.
Because of the dynamic loading, it is **required** that LAMMPS is compiled
in :ref:`"shared" mode <exe>`. The Python interface is object oriented, but
otherwise trying to be very similar to the C library API. Three different
Python classes to run LAMMPS are available and they build on each other.
.. toctree::
:maxdepth: 1
pg_python
-------------------
.. _lammps_fortran_api:
LAMMPS Fortran API
==================
The LAMMPS Fortran module is a wrapper around calling functions from the
LAMMPS C library API from Fortran through the ISO_C_BINDING feature in
Fortran 2003. The interface is object oriented but otherwise trying to
be very similar to the C library API and the basic Python module.
.. toctree::
:maxdepth: 1
pg_fortran
-------------------
.. _lammps_cplusplus_api:
LAMMPS C++ API
==============
It is also possible to invoke the LAMMPS C++ API directly in your code.
It is lacking some of the convenience of the C library API, but it allows
a more direct access to simulation data and thus more low-level manipulations.
The following links provide some examples and references to the C++ API.
.. toctree::
:maxdepth: 1
pg_cplusplus

188
doc/src/pg_python.rst Normal file
View File

@ -0,0 +1,188 @@
The ``lammps`` Python module
****************************
.. py:module:: lammps
The LAMMPS Python interface is implemented as a module called
:py:mod:`lammps` in the ``lammps.py`` file in the ``python`` folder of
the LAMMPS source code distribution. After compilation of LAMMPS, the
module can be installed into a Python system folder or a user folder
with ``make install-python``. Components of the module can then loaded
into a Python session with the ``import`` command.
There are multiple Python interface classes in the :py:mod:`lammps` module:
- the :py:class:`lammps <lammps.lammps>` class. This is a wrapper around
the C-library interface and its member functions try to replicate the
:doc:`C-library API <pg_library>` closely. This is the most
feature-complete Python API.
- the :py:class:`PyLammps <lammps.PyLammps>` class. This is a more high-level
and more Python style class implemented on top of the
:py:class:`lammps <lammps.lammps>` class.
- the :py:class:`IPyLammps <lammps.IPyLammps>` class is derived from
:py:class:`PyLammps <lammps.PyLammps>` and adds embedded graphics
features to conveniently include LAMMPS into `Jupyter
<https://jupyter.org/>`_ notebooks.
.. _mpi4py_url: https://mpi4py.readthedocs.io
----------
Creating or deleting a LAMMPS object
************************************
With the Python interface the creation of a :cpp:class:`LAMMPS
<LAMMPS_NS::LAMMPS>` instance is included in the constructor for the
:py:func:`lammps <lammps.lammps>` class. Internally it will call either
:cpp:func:`lammps_open` or :cpp:func:`lammps_open_no_mpi` from the C
library API to create the class instance.
All arguments are optional. The *name* argument is to allow loading a
LAMMPS shared library that is named ``liblammps_machine.so`` instead of
the default name of ``liblammps.so``. In most cases the latter will be
installed or used. The *ptr* argument is for use of the
:py:mod:`lammps` module from inside a LAMMPS instance, e.g. with the
:doc:`python <python>` command, where a pointer to the already existing
:cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class instance can be passed
to the Python class and used instead of creating a new instance. The
*comm* argument may be used in combination with the `mpi4py <mpi4py_url_>`_
module to pass an MPI communicator to LAMMPS and thus it is possible
to run the Python module like the library interface on a subset of the
MPI ranks after splitting the communicator. Here is a simple example:
.. code-block:: python
from lammps import lammps
# NOTE: argv[0] is set by the Python module
args = ["-log", "none"]
# create LAMMPS instance
lmp = lammps(cmdargs=args)
# get and print numerical version code
print("LAMMPS Version: ", lmp.version())
# explicitly close and delete LAMMPS instance (optional)
lmp.close()
Same as with the :doc:`C library API <pg_lib_create>` this will use the
``MPI_COMM_WORLD`` communicator for the MPI library that LAMMPS was
compiled with. The :py:func:`lmp.close() <lammps.lammps.close>` call is
optional since the LAMMPS class instance will also be deleted
automatically during the :py:class:`lammps <lammps.lammps>` class
destructor.
Executing LAMMPS commands
*************************
Once an instance of the :py:class:`lammps <lammps.lammps>` class is
created, there are multiple ways to "feed" it commands. In a way that is
not very different from running a LAMMPS input script, except that
Python has many more facilities for structured programming than the
LAMMPS input script syntax. Furthermore it is possible to "compute"
what the next LAMMPS command should be. Same as in the equivalent `C
library functions <pg_lib_execute>`, commands can be read from a file, a
single string, a list of strings and a block of commands in a single
multi-line string. They are processed under the same boundary conditions
as the C library counterparts. The example below demonstrates the use
of :py:func:`lammps.file`, :py:func:`lammps.command`,
:py:func:`lammps.commands_list`, and :py:func:`lammps.commands_string`:
.. code-block:: python
from lammps import lammps
lmp = lammps()
# read commands from file 'in.melt'
lmp.file('in.melt')
# issue a single command
lmp.command('variable zpos index 1.0')
# create 10 groups with 10 atoms each
cmds = ["group g{} id {}:{}".format(i,10*i+1,10*(i+1)) for i in range(10)]
lmp.commands_list(cmds)
# run commands from a multi-line string
block = """
clear
region box block 0 2 0 2 0 2
create_box 1 box
create_atoms 1 single 1.0 1.0 ${zpos}
"""
lmp.commands_string(block)
----------
The ``lammps`` class API
************************
The :py:class:`lammps <lammps.lammps>` class is the core of the LAMMPS
Python interfaces. It is a wrapper around the :doc:`LAMMPS C library
API <pg_library>` using the `Python ctypes module
<https://docs.python.org/3/library/ctypes.html>`_ and a shared library
compiled from the LAMMPS sources code. The individual methods in this
class try to closely follow the corresponding C functions. The handle
argument that needs to be passed to the C functions is stored internally
in the class and automatically added when calling the C library
functions. Below is a detailed documentation of the API.
.. autoclass:: lammps.lammps
:members:
----------
The ``PyLammps`` class API
**************************
.. autoclass:: lammps.PyLammps
:members:
----------
The ``IPyLammps`` class API
***************************
.. autoclass:: lammps.IPyLammps
:members:
----------
Additional components of the ``lammps`` module
**********************************************
The :py:mod:`lammps` module additionally contains several constants
and the :py:class:`NeighList <lammps.NeighList>` class:
.. _py_data_constants:
.. py:data:: LAMMPS_INT, LAMMPS_DOUBLE, LAMMPS_BIGINT, LAMMPS_TAGINT, LAMMPS_STRING
:type: int
Constants in the :py:mod:`lammps` module to indicate how to
cast data when the C library function returns a void pointer.
Used in :py:func:`lammps.extract_global`.
.. _py_style_constants:
.. py:data:: LMP_STYLE_GLOBAL, LMP_STYLE_ATOM, LMP_STYLE_LOCAL
:type: int
Constants in the :py:mod:`lammps` module to select what style of data
to request from computes or fixes. See :cpp:enum:`_LMP_STYLE_CONST`
for the equivalent constants in the C library interface. Used in
:py:func:`lammps.extract_compute` and :py:func:`lammps.extract_fix`.
.. _py_type_constants:
.. py:data:: LMP_TYPE_SCALAR, LMP_TYLE_VECTOR, LMP_TYPE_ARRAY, LMP_SIZE_VECTOR, LMP_SIZE_ROWS, LMP_SIZE_COLS
:type: int
Constants in the :py:mod:`lammps` module to select what type of data
to request from computes or fixes. See :cpp:enum:`_LMP_TYPE_CONST`
for the equivalent constants in the C library interface. Used in
:py:func:`lammps.extract_compute` and :py:func:`lammps.extract_fix`.
.. _py_var_constants:
.. py:data:: LMP_VAR_EQUAL, LMP_VAR_ATOM
:type: int
Constants in the :py:mod:`lammps` module to select what style of
variable to query when calling :py:func:`lammps.extract_variable`.
.. autoclass:: lammps.NeighList
:members:
:no-undoc-members:

View File

@ -1 +1,5 @@
Sphinx Sphinx
sphinxcontrib-spelling
sphinx-fortran
breathe
Pygments

View File

@ -7,3 +7,10 @@
display: block; display: block;
margin-bottom: 0.809em; margin-bottom: 0.809em;
} }
.lammps_release {
text-align: center;
font-size: 11px;
display: block;
margin-bottom: 0.405em;
}

View File

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -103,6 +103,12 @@
{%- endif %} {%- endif %}
{%- endblock %} {%- endblock %}
{%- block extrahead %} {% endblock %} {%- block extrahead %} {% endblock %}
{# Keep modernizr in head - http://modernizr.com/docs/#installing #}
<script src="{{ pathto('_static/js/modernizr.min.js', 1) }}"></script>
{# for improved browser compatibility #}
<script src="{{ pathto('_static/polyfill.js', 1) }}"></script>
</head> </head>
<body class="wy-body-for-nav"> <body class="wy-body-for-nav">
@ -135,9 +141,8 @@
{%- set nav_version = current_version %} {%- set nav_version = current_version %}
{% endif %} {% endif %}
{% if nav_version %} {% if nav_version %}
<div class="version"> <div class="lammps_version">Version: <b>{{ nav_version }}</b></div>
{{ nav_version }} <div class="lammps_release">git info: {{ release }}</div>
</div>
{% endif %} {% endif %}
{% endif %} {% endif %}

View File

@ -23,11 +23,16 @@ try:
except: except:
pass pass
LAMMPS_DOC_DIR = '@LAMMPS_DOC_DIR@'
LAMMPS_SOURCE_DIR = '@LAMMPS_SOURCE_DIR@'
LAMMPS_PYTHON_DIR = '@LAMMPS_PYTHON_DIR@'
LAMMPS_DOXYGEN_XML_DIR = '@DOXYGEN_XML_DIR@'
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.')) #sys.path.insert(0, os.path.abspath('.'))
sys.path.append(os.path.join(os.path.dirname(__file__), '../../src/_ext')) sys.path.append(os.path.join(LAMMPS_DOC_DIR, 'src', '_ext'))
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
@ -41,7 +46,9 @@ extensions = [
'sphinx.ext.mathjax', 'sphinx.ext.mathjax',
'sphinx.ext.imgmath', 'sphinx.ext.imgmath',
'sphinx.ext.autodoc', 'sphinx.ext.autodoc',
'sphinxfortran.fortran_domain',
'table_from_list', 'table_from_list',
'breathe',
] ]
# 2017-12-07: commented out, since this package is broken with Sphinx 16.x # 2017-12-07: commented out, since this package is broken with Sphinx 16.x
# yet we can no longer use Sphinx 15.x, since that breaks with # yet we can no longer use Sphinx 15.x, since that breaks with
@ -72,12 +79,24 @@ copyright = '2003-2020 Sandia Corporation'
def get_lammps_version(): def get_lammps_version():
import os import os
script_dir = os.path.dirname(os.path.realpath(__file__)) script_dir = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(script_dir, '../../../src/version.h'), 'r') as f: with open(os.path.join(LAMMPS_SOURCE_DIR, 'version.h'), 'r') as f:
line = f.readline() line = f.readline()
start_pos = line.find('"')+1 start_pos = line.find('"')+1
end_pos = line.find('"', start_pos) end_pos = line.find('"', start_pos)
return line[start_pos:end_pos] return line[start_pos:end_pos]
def get_git_info():
import subprocess,time
git_n_date = ''
try:
gitinfo = subprocess.run(['git','describe'],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
if gitinfo.returncode == 0:
git_n_date = gitinfo.stdout.decode().replace('_',' ')
except:
pass
return git_n_date
# The version info for the project you're documenting, acts as replacement for # The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the # |version| and |release|, also used in various other places throughout the
# built documents. # built documents.
@ -85,7 +104,7 @@ def get_lammps_version():
# The short X.Y version. # The short X.Y version.
version = get_lammps_version() version = get_lammps_version()
# The full version, including alpha/beta/rc tags. # The full version, including alpha/beta/rc tags.
release = '' release = get_git_info()
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.
@ -153,7 +172,7 @@ html_title = "LAMMPS documentation"
# The name of an image file (relative to this directory) to place at the top # The name of an image file (relative to this directory) to place at the top
# of the sidebar. # of the sidebar.
html_logo = 'lammps-logo.png' html_logo = '_static/lammps-logo.png'
# The name of an image file (within the static path) to use as favicon of the # The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
@ -314,7 +333,7 @@ texinfo_documents = [
epub_title = 'LAMMPS Documentation - ' + get_lammps_version() epub_title = 'LAMMPS Documentation - ' + get_lammps_version()
epub_cover = ('lammps-logo.png', '') epub_cover = ('_static/lammps-logo.png', '')
epub_description = """ epub_description = """
This is the Manual for the LAMMPS software package. This is the Manual for the LAMMPS software package.
@ -342,13 +361,29 @@ if spelling_spec and has_enchant:
spelling_lang='en_US' spelling_lang='en_US'
spelling_word_list_filename='false_positives.txt' spelling_word_list_filename='false_positives.txt'
sys.path.append(os.path.join(os.path.dirname(__file__), '.')) conf_script_dir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(conf_script_dir, '.'))
import LAMMPSLexer import LAMMPSLexer
from sphinx.highlighting import lexers from sphinx.highlighting import lexers
lexers['LAMMPS'] = LAMMPSLexer.LAMMPSLexer(startinline=True) lexers['LAMMPS'] = LAMMPSLexer.LAMMPSLexer(startinline=True)
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../python')) sys.path.append(LAMMPS_PYTHON_DIR)
# avoid syntax highlighting in blocks that don't specify language # avoid syntax highlighting in blocks that don't specify language
highlight_language = 'none' highlight_language = 'none'
# autodoc configuration
autodoc_member_order = 'bysource'
#autoclass_content = 'both'
# breathe configuration
breathe_projects = { 'progguide' : LAMMPS_DOXYGEN_XML_DIR }
breathe_default_project = 'progguide'
breathe_show_define_initializer = True
breathe_domain_by_extension = { 'h' : 'cpp',
'cpp' : 'cpp',
'c' : 'c',
}

View File

@ -43,6 +43,7 @@ Afshar
agilio agilio
Agilio Agilio
agni agni
Agnolin
Ai Ai
Aidan Aidan
aij aij
@ -114,6 +115,7 @@ Archlinux
arcsin arcsin
arg arg
args args
argv
arrhenius arrhenius
Arun Arun
arXiv arXiv
@ -137,6 +139,8 @@ atc
AtC AtC
ATC ATC
athermal athermal
atime
atimestep
athomps athomps
atm atm
atomeye atomeye
@ -206,7 +210,6 @@ bcolor
bdiam bdiam
bdw bdw
Beckman Beckman
behaviour
Belak Belak
Bellott Bellott
benchmarking benchmarking
@ -247,6 +250,7 @@ bispectrum
Bispectrum Bispectrum
bitbucket bitbucket
bitmapped bitmapped
bitmask
bitrate bitrate
bitrates bitrates
Bitzek Bitzek
@ -265,6 +269,7 @@ bodystyle
Bogaerts Bogaerts
Bogusz Bogusz
Bohrs Bohrs
boltz
Boltzman Boltzman
BondAngle BondAngle
BondBond BondBond
@ -283,6 +288,14 @@ Botu
Bouguet Bouguet
Bourne Bourne
boxcolor boxcolor
boxlo
boxhi
boxxlo
boxxhi
boxylo
boxyhi
boxzlo
boxzhi
bp bp
bpclermont bpclermont
bpls bpls
@ -301,6 +314,7 @@ Bryantsev
Btarget Btarget
btype btype
buckPlusAttr buckPlusAttr
buf
builtin builtin
Bulatov Bulatov
Bureekaew Bureekaew
@ -369,6 +383,7 @@ charmm
CHARMM CHARMM
charmmfsh charmmfsh
charmmfsw charmmfsw
charptr
Chaudhuri Chaudhuri
checkbox checkbox
checkmark checkmark
@ -407,6 +422,7 @@ cmap
Cmax Cmax
cmd cmd
cmdlist cmdlist
cmds
Cmin Cmin
cmm cmm
CMM CMM
@ -436,6 +452,7 @@ Colvars
COLVARS COLVARS
comID comID
Commun Commun
compositing
compressibility compressibility
compressive compressive
Comput Comput
@ -584,6 +601,7 @@ del
delaystep delaystep
DeleteIDs DeleteIDs
deleteIDs deleteIDs
delflag
Dellago Dellago
delocalization delocalization
delocalized delocalized
@ -599,6 +617,7 @@ Dequidt
der der
dereference dereference
derekt derekt
Deresiewicz
Derjagin Derjagin
Derjaguin Derjaguin
Derlet Derlet
@ -668,6 +687,8 @@ Donadio
dotc dotc
Doty Doty
doxygen doxygen
doxygenclass
doxygenfunction
downarrow downarrow
Doye Doye
dpd dpd
@ -721,6 +742,7 @@ Eaat
Eacn Eacn
eam eam
eangle eangle
earg
eatom eatom
Eb Eb
Eba Eba
@ -841,6 +863,7 @@ Erhart
erorate erorate
erose erose
erotate erotate
errno
Ertas Ertas
ervel ervel
Espanol Espanol
@ -899,6 +922,7 @@ Fc
fcc fcc
fcm fcm
Fd Fd
fd
fdotr fdotr
fdt fdt
Fehlberg Fehlberg
@ -923,6 +947,7 @@ ffplay
fft fft
fftbench fftbench
fftw fftw
fgets
fhg fhg
Fi Fi
Fichthorn Fichthorn
@ -958,6 +983,7 @@ fmackay
fmag fmag
fmass fmass
fmm fmm
fmt
fmx fmx
fmy fmy
fmz fmz
@ -971,6 +997,7 @@ Fock
Fogarty Fogarty
Foiles Foiles
fopenmp fopenmp
forceclear
forestgreen forestgreen
formatarg formatarg
formulae formulae
@ -987,6 +1014,7 @@ Fraige
framerate framerate
Frauenheim Frauenheim
Fraunhofer Fraunhofer
fread
Freitas Freitas
Frenkel Frenkel
Friedrichs Friedrichs
@ -994,6 +1022,7 @@ fs
fsh fsh
fstyle fstyle
fsw fsw
ftm
ftol ftol
fugacity fugacity
Fumi Fumi
@ -1101,6 +1130,7 @@ gromos
Gronbech Gronbech
Groot Groot
groupbig groupbig
groupbit
grp grp
Grueneisen Grueneisen
gsmooth gsmooth
@ -1163,6 +1193,7 @@ hexorder
Heyes Heyes
HfO HfO
hgrid hgrid
hhmrr
Hibbs Hibbs
Higdon Higdon
Hijazi Hijazi
@ -1172,6 +1203,7 @@ histogrammed
histogramming histogramming
hma hma
hmaktulga hmaktulga
hplanck
hoc hoc
Hochbruck Hochbruck
Hofling Hofling
@ -1214,6 +1246,7 @@ hyperspherical
hysteretic hysteretic
hz hz
Ibanez Ibanez
iatom
ibar ibar
ibm ibm
icc icc
@ -1256,6 +1289,7 @@ indices
inertiax inertiax
inertiay inertiay
inertiaz inertiaz
infile
infty infty
inhomogeneities inhomogeneities
inhomogeneous inhomogeneous
@ -1296,6 +1330,7 @@ ipp
Ippolito Ippolito
IPv IPv
IPython IPython
ipython
Isele Isele
isenthalpic isenthalpic
ish ish
@ -1444,6 +1479,7 @@ Kloza
kmax kmax
Kmax Kmax
KMP KMP
kmu
Knizhnik Knizhnik
knl knl
Kofke Kofke
@ -1931,6 +1967,7 @@ muz
mv mv
mV mV
Mvapich Mvapich
mvh
mvv mvv
MxN MxN
myCompute myCompute
@ -1943,11 +1980,13 @@ na
nabla nabla
Nagaosa Nagaosa
Nakano Nakano
nall
namespace namespace
namespaces namespaces
nan nan
NaN NaN
Nandor Nandor
nangles
Nangletype Nangletype
nangletypes nangletypes
Nangletypes Nangletypes
@ -1976,6 +2015,7 @@ Nbin
Nbins Nbins
nbody nbody
Nbody Nbody
nbonds
nbondtype nbondtype
Nbondtype Nbondtype
nbondtypes nbondtypes
@ -1988,9 +2028,11 @@ Nc
nchunk nchunk
Nchunk Nchunk
ncoeff ncoeff
ncol
ncorr ncorr
ncount ncount
nd nd
ndihedrals
Ndihedraltype Ndihedraltype
Ndirango Ndirango
ndof ndof
@ -2032,10 +2074,12 @@ Ngyuen
nh nh
nharmonic nharmonic
nhc nhc
nhi
NiAlH NiAlH
Nicklas Nicklas
Niklasson Niklasson
Nikolskiy Nikolskiy
nimpropers
Nimpropertype Nimpropertype
Ninteger Ninteger
Nissila Nissila
@ -2044,9 +2088,11 @@ nitride
nitrides nitrides
niu niu
Nk Nk
nktv
nl nl
nlen nlen
Nlines Nlines
nlo
nlocal nlocal
Nlocal Nlocal
Nlog Nlog
@ -2054,7 +2100,9 @@ nlp
nm nm
Nm Nm
Nmax Nmax
nmax
Nmin Nmin
nmin
Nmols Nmols
nn nn
Nocedal Nocedal
@ -2107,6 +2155,7 @@ Nrepeat
nreset nreset
Nrho Nrho
Nroff Nroff
nrow
nrun nrun
Ns Ns
Nsample Nsample
@ -2125,6 +2174,7 @@ Nt
Ntable Ntable
ntheta ntheta
nthreads nthreads
ntimestep
Ntptask Ntptask
Ntriples Ntriples
Ntype Ntype
@ -2220,6 +2270,7 @@ oxdna
oxrna oxrna
oxDNA oxDNA
oxRNA oxRNA
packings
padua padua
Padua Padua
pafi pafi
@ -2252,6 +2303,8 @@ Particuology
pastewka pastewka
Pastewka Pastewka
pathangle pathangle
pathname
pathnames
Patomtrans Patomtrans
Pattnaik Pattnaik
Pavese Pavese
@ -2352,6 +2405,7 @@ polydisperse
polydispersity polydispersity
polyelectrolyte polyelectrolyte
polyhedra polyhedra
polymorphism
popen popen
Popov Popov
popstore popstore
@ -2385,6 +2439,7 @@ proc
Proc Proc
procs procs
Prony Prony
progguide
ps ps
Ps Ps
pscreen pscreen
@ -2431,7 +2486,9 @@ qbmsst
qcore qcore
qdist qdist
qE qE
qe
qeff qeff
qelectron
qeq qeq
QeQ QeQ
QEq QEq
@ -2449,6 +2506,8 @@ qmol
qoffload qoffload
qopenmp qopenmp
qoverride qoverride
qqr
qqrd
qtb qtb
quadratically quadratically
quadrupolar quadrupolar
@ -2504,6 +2563,7 @@ rebo
recursing recursing
Ree Ree
refactored refactored
refactoring
reflectionstyle reflectionstyle
regoin regoin
Reinders Reinders
@ -2589,6 +2649,7 @@ Rkouter
RkouterN RkouterN
rmask rmask
Rmask Rmask
rmass
rmax rmax
Rmax Rmax
rmdir rmdir
@ -2723,6 +2784,7 @@ shlib
SHM SHM
shm shm
shockvel shockvel
shrinkexceed
Shugaev Shugaev
si si
SiC SiC
@ -2851,11 +2913,16 @@ strcmp
streitz streitz
Streitz Streitz
Streiz Streiz
strerror
strided strided
strietz strietz
strmatch
strncmp
strstr
Stukowski Stukowski
Su Su
subbox subbox
Subclassed
subcutoff subcutoff
subcycle subcycle
subcycling subcycling
@ -2996,6 +3063,7 @@ Tmin
tmp tmp
tN tN
Tobias Tobias
tokenizer
tokyo tokyo
tol tol
toolchain toolchain
@ -3226,6 +3294,7 @@ vv
vx vx
Vx Vx
vxcm vxcm
vxmu
vy vy
Vy Vy
vycm vycm
@ -3258,8 +3327,9 @@ Widom
widom widom
Wijk Wijk
Wikipedia Wikipedia
wildcard
Wildcard Wildcard
wildcard
wildcards
Wirnsberger Wirnsberger
wirtes wirtes
witin witin
@ -3301,6 +3371,7 @@ Xmax
xmgrace xmgrace
xMIC xMIC
xmin xmin
xml
xmovie xmovie
Xmovie Xmovie
xmu xmu
@ -3315,6 +3386,7 @@ xsu
xtc xtc
xu xu
Xu Xu
xxt
xxxxx xxxxx
xy xy
xyz xyz

View File

@ -1 +0,0 @@
../../src/JPG/lammps-logo.png

11
fortran/README Normal file
View File

@ -0,0 +1,11 @@
This directory contains Fortran code which interface LAMMPS as a library
and allows the LAMMPS library interface to be invoked from Fortran codes.
It requires a Fortran compiler that supports the Fortran 2003 standard.
This interface is based on and supersedes the previous Fortran interfaces
in the examples/COUPLE/fortran* folders. But is fully supported by the
LAMMPS developers and included in the documentation and unit testing.
Details on this Fortran interface and how to build programs using it
are in the manual in the doc/html/pg_fortran.html file.

281
fortran/lammps.f90 Normal file
View File

@ -0,0 +1,281 @@
! -------------------------------------------------------------------------
! LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
! http://lammps.sandia.gov, Sandia National Laboratories
! Steve Plimpton, sjplimp@sandia.gov
!
! Copyright (2003) Sandia Corporation. Under the terms of Contract
! DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
! certain rights in this software. This software is distributed under
! the GNU General Public License.
!
! See the README file in the top-level LAMMPS directory.
! -------------------------------------------------------------------------
!
! Fortran interface to the LAMMPS library implemented as a Fortran 2003
! style module that wraps the C-style library interface in library.cpp
! and library.h using the ISO_C_BINDING module of the Fortran compiler.
!
! Based on the LAMMPS Fortran 2003 module contributed by:
! Karl D. Hammond <karlh@ugcs.caltech.edu>
! University of Tennessee, Knoxville (USA), 2012
!
! The Fortran module tries to follow the API of the C-library interface
! closely, but like the Python wrapper it employs an object oriented
! approach. To accommodate the object oriented approach, all exported
! subroutine and functions have to be implemented in Fortran to then
! call the interfaced C style functions with adapted calling conventions
! as needed. The C-library interfaced functions retain their names
! starting with "lammps_" while the Fortran versions start with "lmp_".
!
MODULE LIBLAMMPS
USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_ptr, c_null_ptr, c_loc, &
c_int, c_char, c_null_char, c_double
IMPLICIT NONE
PRIVATE
PUBLIC :: lammps
TYPE lammps
TYPE(c_ptr) :: handle
CONTAINS
PROCEDURE :: close => lmp_close
PROCEDURE :: file => lmp_file
PROCEDURE :: command => lmp_command
PROCEDURE :: commands_list => lmp_commands_list
PROCEDURE :: commands_string => lmp_commands_string
PROCEDURE :: version => lmp_version
PROCEDURE :: get_natoms => lmp_get_natoms
END TYPE lammps
INTERFACE lammps
MODULE PROCEDURE lmp_open
END INTERFACE lammps
! interface definitions for calling functions in library.cpp
INTERFACE
FUNCTION lammps_open(argc,argv,comm,handle) &
BIND(C, name='lammps_open_fortran')
IMPORT :: c_ptr, c_int
INTEGER(c_int), VALUE, INTENT(in) :: argc, comm
TYPE(c_ptr), DIMENSION(*), INTENT(in) :: argv
TYPE(c_ptr), INTENT(out) :: handle
TYPE(c_ptr) :: lammps_open
END FUNCTION lammps_open
FUNCTION lammps_open_no_mpi(argc,argv,handle) &
BIND(C, name='lammps_open_no_mpi')
IMPORT :: c_ptr, c_int
INTEGER(c_int), VALUE, INTENT(in) :: argc
TYPE(c_ptr), DIMENSION(*), INTENT(in) :: argv
TYPE(c_ptr), INTENT(out) :: handle
TYPE(c_ptr) :: lammps_open_no_mpi
END FUNCTION lammps_open_no_mpi
SUBROUTINE lammps_close(handle) BIND(C, name='lammps_close')
IMPORT :: c_ptr
TYPE(c_ptr), VALUE :: handle
END SUBROUTINE lammps_close
SUBROUTINE lammps_mpi_init(handle) BIND(C, name='lammps_mpi_init')
IMPORT :: c_ptr
TYPE(c_ptr), VALUE :: handle
END SUBROUTINE lammps_mpi_init
SUBROUTINE lammps_mpi_finalize(handle) &
BIND(C, name='lammps_mpi_finalize')
IMPORT :: c_ptr
TYPE(c_ptr), VALUE :: handle
END SUBROUTINE lammps_mpi_finalize
SUBROUTINE lammps_file(handle,filename) BIND(C, name='lammps_file')
IMPORT :: c_ptr
TYPE(c_ptr), VALUE :: handle
TYPE(c_ptr), VALUE :: filename
END SUBROUTINE lammps_file
SUBROUTINE lammps_command(handle,cmd) BIND(C, name='lammps_command')
IMPORT :: c_ptr
TYPE(c_ptr), VALUE :: handle
TYPE(c_ptr), VALUE :: cmd
END SUBROUTINE lammps_command
SUBROUTINE lammps_commands_list(handle,ncmd,cmds) &
BIND(C, name='lammps_commands_list')
IMPORT :: c_ptr, c_int
TYPE(c_ptr), VALUE :: handle
INTEGER(c_int), VALUE, INTENT(in) :: ncmd
TYPE(c_ptr), DIMENSION(*), INTENT(in) :: cmds
END SUBROUTINE lammps_commands_list
SUBROUTINE lammps_commands_string(handle,str) &
BIND(C, name='lammps_commands_string')
IMPORT :: c_ptr
TYPE(c_ptr), VALUE :: handle
TYPE(c_ptr), VALUE :: str
END SUBROUTINE lammps_commands_string
SUBROUTINE lammps_free(ptr) BIND(C, name='lammps_free')
IMPORT :: c_ptr
TYPE(c_ptr), VALUE :: ptr
END SUBROUTINE lammps_free
FUNCTION lammps_version(handle) BIND(C, name='lammps_version')
IMPORT :: c_ptr, c_int
TYPE(c_ptr), VALUE :: handle
INTEGER(c_int) :: lammps_version
END FUNCTION lammps_version
FUNCTION lammps_get_natoms(handle) BIND(C, name='lammps_get_natoms')
IMPORT :: c_ptr, c_double
TYPE(c_ptr), VALUE :: handle
REAL(c_double) :: lammps_get_natoms
END FUNCTION lammps_get_natoms
END INTERFACE
CONTAINS
! Fortran wrappers and helper functions.
! Constructor for the LAMMPS class.
! Combined wrapper around lammps_open_fortran() and lammps_open_no_mpi()
TYPE(lammps) FUNCTION lmp_open(args,comm)
IMPLICIT NONE
INTEGER,INTENT(in), OPTIONAL :: comm
CHARACTER(len=*), INTENT(in), OPTIONAL :: args(:)
TYPE(c_ptr), ALLOCATABLE :: argv(:)
TYPE(c_ptr) :: dummy=c_null_ptr
INTEGER :: i,argc
IF (PRESENT(args)) THEN
! convert argument list to c style
argc = SIZE(args)
ALLOCATE(argv(argc))
DO i=1,argc
argv(i) = f2c_string(args(i))
END DO
ELSE
argc = 1
ALLOCATE(argv(1))
argv(1) = f2c_string("liblammps")
ENDIF
IF (PRESENT(comm)) THEN
lmp_open%handle = lammps_open(argc,argv,comm,dummy)
ELSE
lmp_open%handle = lammps_open_no_mpi(argc,argv,dummy)
END IF
! Clean up allocated memory
DO i=1,argc
CALL lammps_free(argv(i))
END DO
DEALLOCATE(argv)
END FUNCTION lmp_open
! Combined Fortran wrapper around lammps_close() and lammps_mpi_finalize()
SUBROUTINE lmp_close(self,finalize)
IMPLICIT NONE
CLASS(lammps) :: self
LOGICAL,INTENT(in),OPTIONAL :: finalize
CALL lammps_close(self%handle)
IF (PRESENT(finalize)) THEN
IF (finalize) THEN
CALL lammps_mpi_finalize(self%handle)
END IF
END IF
END SUBROUTINE lmp_close
INTEGER FUNCTION lmp_version(self)
IMPLICIT NONE
CLASS(lammps) :: self
lmp_version = lammps_version(self%handle)
END FUNCTION lmp_version
DOUBLE PRECISION FUNCTION lmp_get_natoms(self)
IMPLICIT NONE
CLASS(lammps) :: self
lmp_get_natoms = lammps_get_natoms(self%handle)
END FUNCTION lmp_get_natoms
SUBROUTINE lmp_file(self,filename)
IMPLICIT NONE
CLASS(lammps) :: self
CHARACTER(len=*) :: filename
TYPE(c_ptr) :: str
str = f2c_string(filename)
CALL lammps_file(self%handle,str)
CALL lammps_free(str)
END SUBROUTINE lmp_file
! equivalent function to lammps_command()
SUBROUTINE lmp_command(self,cmd)
IMPLICIT NONE
CLASS(lammps) :: self
CHARACTER(len=*) :: cmd
TYPE(c_ptr) :: str
str = f2c_string(cmd)
CALL lammps_command(self%handle,str)
CALL lammps_free(str)
END SUBROUTINE lmp_command
! equivalent function to lammps_commands_list()
SUBROUTINE lmp_commands_list(self,cmds)
IMPLICIT NONE
CLASS(lammps) :: self
CHARACTER(len=*), INTENT(in), OPTIONAL :: cmds(:)
TYPE(c_ptr), ALLOCATABLE :: cmdv(:)
INTEGER :: i,ncmd
! convert command list to c style
ncmd = SIZE(cmds)
ALLOCATE(cmdv(ncmd))
DO i=1,ncmd
cmdv(i) = f2c_string(cmds(i))
END DO
CALL lammps_commands_list(self%handle,ncmd,cmdv)
! Clean up allocated memory
DO i=1,ncmd
CALL lammps_free(cmdv(i))
END DO
DEALLOCATE(cmdv)
END SUBROUTINE lmp_commands_list
! equivalent function to lammps_commands_string()
SUBROUTINE lmp_commands_string(self,str)
IMPLICIT NONE
CLASS(lammps) :: self
CHARACTER(len=*) :: str
TYPE(c_ptr) :: tmp
tmp = f2c_string(str)
CALL lammps_commands_string(self%handle,tmp)
CALL lammps_free(tmp)
END SUBROUTINE lmp_commands_string
! ----------------------------------------------------------------------
! local helper functions
! copy fortran string to zero terminated c string
FUNCTION f2c_string(f_string) RESULT(ptr)
CHARACTER (len=*), INTENT(in) :: f_string
CHARACTER (len=1, kind=c_char), POINTER :: c_string(:)
TYPE(c_ptr) :: ptr
INTEGER :: i, n
n = LEN_TRIM(f_string)
ALLOCATE(c_string(n+1))
DO i=1,n
c_string(i) = f_string(i:i)
END DO
c_string(n+1) = c_null_char
ptr = c_loc(c_string(1))
END FUNCTION f2c_string
END MODULE LIBLAMMPS

View File

@ -22,13 +22,13 @@ NVCC = nvcc
#CUDA_ARCH = -arch=sm_21 #CUDA_ARCH = -arch=sm_21
# Kepler hardware # Kepler hardware
CUDA_ARCH = -arch=sm_30 #CUDA_ARCH = -arch=sm_30
#CUDA_ARCH = -arch=sm_32 #CUDA_ARCH = -arch=sm_32
#CUDA_ARCH = -arch=sm_35 #CUDA_ARCH = -arch=sm_35
#CUDA_ARCH = -arch=sm_37 #CUDA_ARCH = -arch=sm_37
# Maxwell hardware # Maxwell hardware
#CUDA_ARCH = -arch=sm_50 CUDA_ARCH = -arch=sm_50
#CUDA_ARCH = -arch=sm_52 #CUDA_ARCH = -arch=sm_52
# Pascal hardware # Pascal hardware

View File

@ -7,18 +7,40 @@
EXTRAMAKE = Makefile.lammps.standard EXTRAMAKE = Makefile.lammps.standard
ifeq ($(CUDA_HOME),)
CUDA_HOME = /usr/local/cuda CUDA_HOME = /usr/local/cuda
endif
NVCC = nvcc NVCC = nvcc
# Kepler CUDA # obsolete hardware. not supported by current drivers anymore.
#CUDA_ARCH = -arch=sm_35
# Tesla CUDA
CUDA_ARCH = -arch=sm_21
# newer CUDA
#CUDA_ARCH = -arch=sm_13 #CUDA_ARCH = -arch=sm_13
# older CUDA
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
# Fermi hardware
#CUDA_ARCH = -arch=sm_20
#CUDA_ARCH = -arch=sm_21
# Kepler hardware
#CUDA_ARCH = -arch=sm_30
#CUDA_ARCH = -arch=sm_32
#CUDA_ARCH = -arch=sm_35
#CUDA_ARCH = -arch=sm_37
# Maxwell hardware
CUDA_ARCH = -arch=sm_50
#CUDA_ARCH = -arch=sm_52
# Pascal hardware
#CUDA_ARCH = -arch=sm_60
#CUDA_ARCH = -arch=sm_61
# Volta hardware
#CUDA_ARCH = -arch=sm_70
# Turing hardware
#CUDA_ARCH = -arch=sm_75
# this setting should match LAMMPS Makefile # this setting should match LAMMPS Makefile
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
@ -33,7 +55,7 @@ CUDA_PRECISION = -D_DOUBLE_DOUBLE
CUDA_INCLUDE = -I$(CUDA_HOME)/include CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs
CUDA_OPTS = -DUNIX -O3 --use_fast_math CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC
CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias

View File

@ -7,18 +7,41 @@
EXTRAMAKE = Makefile.lammps.standard EXTRAMAKE = Makefile.lammps.standard
ifeq ($(CUDA_HOME),)
CUDA_HOME = /usr/local/cuda CUDA_HOME = /usr/local/cuda
endif
NVCC = nvcc NVCC = nvcc
# Kepler CUDA # obsolete hardware. not supported by current drivers anymore.
#CUDA_ARCH = -arch=sm_35
# Tesla CUDA
CUDA_ARCH = -arch=sm_21
# newer CUDA
#CUDA_ARCH = -arch=sm_13 #CUDA_ARCH = -arch=sm_13
# older CUDA # older CUDA
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
# Fermi hardware
#CUDA_ARCH = -arch=sm_20
#CUDA_ARCH = -arch=sm_21
# Kepler hardware
#CUDA_ARCH = -arch=sm_30
#CUDA_ARCH = -arch=sm_32
#CUDA_ARCH = -arch=sm_35
#CUDA_ARCH = -arch=sm_37
# Maxwell hardware
CUDA_ARCH = -arch=sm_50
#CUDA_ARCH = -arch=sm_52
# Pascal hardware
#CUDA_ARCH = -arch=sm_60
#CUDA_ARCH = -arch=sm_61
# Volta hardware
#CUDA_ARCH = -arch=sm_70
# Turing hardware
#CUDA_ARCH = -arch=sm_75
# this setting should match LAMMPS Makefile # this setting should match LAMMPS Makefile
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
@ -33,7 +56,7 @@ CUDA_PRECISION = -D_SINGLE_DOUBLE
CUDA_INCLUDE = -I$(CUDA_HOME)/include CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs
CUDA_OPTS = -DUNIX -O3 --use_fast_math CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC
CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias

View File

@ -7,18 +7,40 @@
EXTRAMAKE = Makefile.lammps.standard EXTRAMAKE = Makefile.lammps.standard
ifeq ($(CUDA_HOME),)
CUDA_HOME = /usr/local/cuda CUDA_HOME = /usr/local/cuda
endif
NVCC = nvcc NVCC = nvcc
# Kepler CUDA # obsolete hardware. not supported by current drivers anymore.
#CUDA_ARCH = -arch=sm_35
# Tesla CUDA
CUDA_ARCH = -arch=sm_21
# newer CUDA
#CUDA_ARCH = -arch=sm_13 #CUDA_ARCH = -arch=sm_13
# older CUDA
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
# Fermi hardware
#CUDA_ARCH = -arch=sm_20
#CUDA_ARCH = -arch=sm_21
# Kepler hardware
#CUDA_ARCH = -arch=sm_30
#CUDA_ARCH = -arch=sm_32
#CUDA_ARCH = -arch=sm_35
#CUDA_ARCH = -arch=sm_37
# Maxwell hardware
CUDA_ARCH = -arch=sm_50
#CUDA_ARCH = -arch=sm_52
# Pascal hardware
#CUDA_ARCH = -arch=sm_60
#CUDA_ARCH = -arch=sm_61
# Volta hardware
#CUDA_ARCH = -arch=sm_70
# Turing hardware
#CUDA_ARCH = -arch=sm_75
# this setting should match LAMMPS Makefile # this setting should match LAMMPS Makefile
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
@ -33,7 +55,7 @@ CUDA_PRECISION = -D_SINGLE_SINGLE
CUDA_INCLUDE = -I$(CUDA_HOME)/include CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs
CUDA_OPTS = -DUNIX -O3 --use_fast_math CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC
CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias

View File

@ -13,17 +13,27 @@ endif
NVCC = nvcc NVCC = nvcc
# Kepler CUDA # obsolete hardware. not supported by current drivers anymore.
#CUDA_ARCH = -arch=sm_35
# newer CUDA
#CUDA_ARCH = -arch=sm_13 #CUDA_ARCH = -arch=sm_13
# older CUDA
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
CUDA_ARCH = -arch=sm_30
CUDA_CODE = -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] \ # Fermi hardware
-gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] \ #CUDA_ARCH = -arch=sm_20
-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] #CUDA_ARCH = -arch=sm_21
# Kepler hardware
#CUDA_ARCH = -arch=sm_30
#CUDA_ARCH = -arch=sm_32
#CUDA_ARCH = -arch=sm_35
#CUDA_ARCH = -arch=sm_37
# Maxwell hardware
CUDA_ARCH = -arch=sm_50
#CUDA_ARCH = -arch=sm_52
CUDA_CODE = -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] \
-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] \
-gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_75,code=[sm_75,compute_75]
CUDA_ARCH += $(CUDA_CODE) CUDA_ARCH += $(CUDA_CODE)

View File

@ -13,13 +13,33 @@ endif
NVCC = nvcc NVCC = nvcc
# Tesla CUDA # obsolete hardware. not supported by current drivers anymore.
CUDA_ARCH = -arch=sm_21
# newer CUDA
#CUDA_ARCH = -arch=sm_13 #CUDA_ARCH = -arch=sm_13
# older CUDA
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
CUDA_ARCH = -arch=sm_35
# Fermi hardware
#CUDA_ARCH = -arch=sm_20
#CUDA_ARCH = -arch=sm_21
# Kepler hardware
#CUDA_ARCH = -arch=sm_30
#CUDA_ARCH = -arch=sm_32
#CUDA_ARCH = -arch=sm_35
#CUDA_ARCH = -arch=sm_37
# Maxwell hardware
CUDA_ARCH = -arch=sm_50
#CUDA_ARCH = -arch=sm_52
# Pascal hardware
#CUDA_ARCH = -arch=sm_60
#CUDA_ARCH = -arch=sm_61
# Volta hardware
#CUDA_ARCH = -arch=sm_70
# Turing hardware
#CUDA_ARCH = -arch=sm_75
# this setting should match LAMMPS Makefile # this setting should match LAMMPS Makefile
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL # one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
@ -35,7 +55,7 @@ CUDA_PRECISION = -D_SINGLE_DOUBLE
CUDA_INCLUDE = -I$(CUDA_HOME)/include CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs -L../../src/STUBS -lmpi_stubs CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs -L../../src/STUBS -lmpi_stubs
CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC
CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -fPIC -I../../src/STUBS CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -fPIC -I../../src/STUBS
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias

View File

@ -81,7 +81,7 @@ __kernel void k_gauss(const __global numtyp4 *restrict x_,
numtyp r2inv = ucl_recip(rsq); numtyp r2inv = ucl_recip(rsq);
numtyp r = ucl_sqrt(rsq); numtyp r = ucl_sqrt(rsq);
numtyp force = (numtyp)-2.0*gauss1[mtype].x*gauss1[mtype].y*rsq* numtyp force = (numtyp)-2.0*gauss1[mtype].x*gauss1[mtype].y*rsq*
ucl_exp(-gauss1[mtype].y*rsq)*r2inv*factor_lj; ucl_exp(-gauss1[mtype].y*rsq)*r2inv; //*factor_lj;
f.x+=delx*force; f.x+=delx*force;
f.y+=dely*force; f.y+=dely*force;
@ -90,7 +90,7 @@ __kernel void k_gauss(const __global numtyp4 *restrict x_,
if (eflag>0) { if (eflag>0) {
numtyp e=-(gauss1[mtype].x*ucl_exp(-gauss1[mtype].y*rsq) - numtyp e=-(gauss1[mtype].x*ucl_exp(-gauss1[mtype].y*rsq) -
gauss1[mtype].w); gauss1[mtype].w);
energy+=factor_lj*e; energy+=e; //factor_lj*e;
} }
if (vflag>0) { if (vflag>0) {
virial[0] += delx*delx*force; virial[0] += delx*delx*force;
@ -168,7 +168,7 @@ __kernel void k_gauss_fast(const __global numtyp4 *restrict x_,
numtyp r2inv = ucl_recip(rsq); numtyp r2inv = ucl_recip(rsq);
numtyp r = ucl_sqrt(rsq); numtyp r = ucl_sqrt(rsq);
numtyp force = (numtyp)-2.0*gauss1[mtype].x*gauss1[mtype].y*rsq* numtyp force = (numtyp)-2.0*gauss1[mtype].x*gauss1[mtype].y*rsq*
ucl_exp(-gauss1[mtype].y*rsq)*r2inv*factor_lj; ucl_exp(-gauss1[mtype].y*rsq)*r2inv; //*factor_lj;
f.x+=delx*force; f.x+=delx*force;
f.y+=dely*force; f.y+=dely*force;
@ -177,7 +177,7 @@ __kernel void k_gauss_fast(const __global numtyp4 *restrict x_,
if (eflag>0) { if (eflag>0) {
numtyp e=-(gauss1[mtype].x*ucl_exp(-gauss1[mtype].y*rsq) - numtyp e=-(gauss1[mtype].x*ucl_exp(-gauss1[mtype].y*rsq) -
gauss1[mtype].w); gauss1[mtype].w);
energy+=factor_lj*e; energy+=e; //factor_lj*e;
} }
if (vflag>0) { if (vflag>0) {
virial[0] += delx*delx*force; virial[0] += delx*delx*force;

View File

@ -709,7 +709,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
for (int i=0; i<6; i++) for (int i=0; i<6; i++)
virial[i]=(acctyp)0; virial[i]=(acctyp)0;
__local int red_acc[BLOCK_PAIR]; __local int ijnum_shared[BLOCK_PAIR];
__syncthreads(); __syncthreads();
@ -789,14 +789,14 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
k &= NEIGHMASK; k &= NEIGHMASK;
if (k == i) { if (k == i) {
ijnum = nbor_k; ijnum = nbor_k;
red_acc[m] = ijnum; ijnum_shared[m] = ijnum;
break; break;
} }
} }
numtyp r1 = ucl_sqrt(rsq1); numtyp r1 = ucl_sqrt(rsq1);
numtyp r1inv = ucl_rsqrt(rsq1); numtyp r1inv = ucl_rsqrt(rsq1);
if (ijnum < 0) ijnum = red_acc[m]; if (ijnum < 0) ijnum = ijnum_shared[m];
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
int idx = ijnum; int idx = ijnum;

View File

@ -719,7 +719,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
for (int i=0; i<6; i++) for (int i=0; i<6; i++)
virial[i]=(acctyp)0; virial[i]=(acctyp)0;
__local int red_acc[BLOCK_PAIR]; __local int ijnum_shared[BLOCK_PAIR];
__syncthreads(); __syncthreads();
@ -799,14 +799,14 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
k &= NEIGHMASK; k &= NEIGHMASK;
if (k == i) { if (k == i) {
ijnum = nbor_k; ijnum = nbor_k;
red_acc[m] = ijnum; ijnum_shared[m] = ijnum;
break; break;
} }
} }
numtyp r1 = ucl_sqrt(rsq1); numtyp r1 = ucl_sqrt(rsq1);
numtyp r1inv = ucl_rsqrt(rsq1); numtyp r1inv = ucl_rsqrt(rsq1);
if (ijnum < 0) ijnum = red_acc[m]; if (ijnum < 0) ijnum = ijnum_shared[m];
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
int idx = ijnum; int idx = ijnum;
@ -957,7 +957,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
for (int i=0; i<6; i++) for (int i=0; i<6; i++)
virial[i]=(acctyp)0; virial[i]=(acctyp)0;
__local int red_acc[BLOCK_PAIR]; __local int ijnum_shared[BLOCK_PAIR];
__syncthreads(); __syncthreads();
@ -1037,14 +1037,14 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
k &= NEIGHMASK; k &= NEIGHMASK;
if (k == i) { if (k == i) {
ijnum = nbor_k; ijnum = nbor_k;
red_acc[m] = ijnum; ijnum_shared[m] = ijnum;
break; break;
} }
} }
numtyp r1 = ucl_sqrt(rsq1); numtyp r1 = ucl_sqrt(rsq1);
numtyp r1inv = ucl_rsqrt(rsq1); numtyp r1inv = ucl_rsqrt(rsq1);
if (ijnum < 0) ijnum = red_acc[m]; if (ijnum < 0) ijnum = ijnum_shared[m];
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
int idx = ijnum; int idx = ijnum;

View File

@ -729,7 +729,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
for (int i=0; i<6; i++) for (int i=0; i<6; i++)
virial[i]=(acctyp)0; virial[i]=(acctyp)0;
__local int red_acc[BLOCK_PAIR]; __local int ijnum_shared[BLOCK_PAIR];
__syncthreads(); __syncthreads();
@ -809,14 +809,14 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
k &= NEIGHMASK; k &= NEIGHMASK;
if (k == i) { if (k == i) {
ijnum = nbor_k; ijnum = nbor_k;
red_acc[m] = ijnum; ijnum_shared[m] = ijnum;
break; break;
} }
} }
numtyp r1 = ucl_sqrt(rsq1); numtyp r1 = ucl_sqrt(rsq1);
numtyp r1inv = ucl_rsqrt(rsq1); numtyp r1inv = ucl_rsqrt(rsq1);
if (ijnum < 0) ijnum = red_acc[m]; if (ijnum < 0) ijnum = ijnum_shared[m];
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor // idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
int idx = ijnum; int idx = ijnum;

View File

@ -10,33 +10,45 @@ for C++. Applications heavily leveraging Kokkos are strongly encouraged to use
You can either use Kokkos as an installed package (encouraged) or use Kokkos in-tree in your project. You can either use Kokkos as an installed package (encouraged) or use Kokkos in-tree in your project.
Modern CMake is exceedingly simple at a high-level (with the devil in the details). Modern CMake is exceedingly simple at a high-level (with the devil in the details).
Once Kokkos is installed In your `CMakeLists.txt` simply use: Once Kokkos is installed In your `CMakeLists.txt` simply use:
```` ````cmake
find_package(Kokkos REQUIRED) find_package(Kokkos REQUIRED)
```` ````
Then for every executable or library in your project: Then for every executable or library in your project:
```` ````cmake
target_link_libraries(myTarget Kokkos::kokkos) target_link_libraries(myTarget Kokkos::kokkos)
```` ````
That's it! There is no checking Kokkos preprocessor, compiler, or linker flags. That's it! There is no checking Kokkos preprocessor, compiler, or linker flags.
Kokkos propagates all the necessary flags to your project. Kokkos propagates all the necessary flags to your project.
This means not only is linking to Kokkos easy, but Kokkos itself can actually configure compiler and linker flags for *your* This means not only is linking to Kokkos easy, but Kokkos itself can actually configure compiler and linker flags for *your*
project. If building in-tree, there is no `find_package` and you link with `target_link_libraries(kokkos)`. project.
When configuring your project just set:
````bash
> cmake ${srcdir} \
-DKokkos_ROOT=${kokkos_install_prefix} \
-DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos}
````
Note: You may need the following if using some versions of CMake (e.g. 3.12):
````cmake
cmake_policy(SET CMP0074 NEW)
````
If building in-tree, there is no `find_package`. You can use `add_subdirectory(kokkos)` with the Kokkos source and again just link with `target_link_libraries(Kokkos::kokkos)`.
The examples in `examples/cmake_build_installed` and `examples/cmake_build_in_tree` can help get you started.
## Configuring CMake ## Configuring CMake
A very basic installation is done with: A very basic installation of Kokkos is done with:
```` ````bash
cmake ${srcdir} \ > cmake ${srcdir} \
-DCMAKE_CXX_COMPILER=g++ \ -DCMAKE_CXX_COMPILER=g++ \
-DCMAKE_INSTALL_PREFIX=${my_install_folder} -DCMAKE_INSTALL_PREFIX=${kokkos_install_folder}
```` ````
which builds and installed a default Kokkos when you run `make install`. which builds and installed a default Kokkos when you run `make install`.
There are numerous device backends, options, and architecture-specific optimizations that can be configured, e.g. There are numerous device backends, options, and architecture-specific optimizations that can be configured, e.g.
```` ````bash
cmake ${srcdir} \ > cmake ${srcdir} \
-DCMAKE_CXX_COMPILER=g++ \ -DCMAKE_CXX_COMPILER=g++ \
-DCMAKE_INSTALL_PREFIX=${my_install_folder} \ -DCMAKE_INSTALL_PREFIX=${kokkos_install_folder} \
-DKokkos_ENABLE_OPENMP=On -DKokkos_ENABLE_OPENMP=ON
```` ````
which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below. which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below.
@ -50,16 +62,16 @@ which activates the OpenMP backend. All of the options controlling device backen
## Spack ## Spack
An alternative to manually building with the CMake is to use the Spack package manager. An alternative to manually building with the CMake is to use the Spack package manager.
To do so, download the `kokkos-spack` git repo and add to the package list: To do so, download the `kokkos-spack` git repo and add to the package list:
```` ````bash
spack repo add $path-to-kokkos-spack > spack repo add $path-to-kokkos-spack
```` ````
A basic installation would be done as: A basic installation would be done as:
```` ````bash
spack install kokkos > spack install kokkos
```` ````
Spack allows options and and compilers to be tuned in the install command. Spack allows options and and compilers to be tuned in the install command.
```` ````bash
spack install kokkos@3.0 %gcc@7.3.0 +openmp > spack install kokkos@3.0 %gcc@7.3.0 +openmp
```` ````
This example illustrates the three most common parameters to Spack: This example illustrates the three most common parameters to Spack:
* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options. * Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
@ -67,17 +79,17 @@ This example illustrates the three most common parameters to Spack:
* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option. * Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.
For a complete list of Kokkos options, run: For a complete list of Kokkos options, run:
````bash
> spack info kokkos
```` ````
spack info kokkos More details can be found in the [Spack README](Spack.md)
````
More details can be found in the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
#### Spack Development #### Spack Development
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable". Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
Generally, Spack usage should never really require you to reference the computer-generated unique install folder. Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
If you must know, you can locate Spack Kokkos installations with: If you must know, you can locate Spack Kokkos installations with:
```` ````bash
spack find -p kokkos ... > spack find -p kokkos ...
```` ````
where `...` is the unique spec identifying the particular Kokkos configuration and version. where `...` is the unique spec identifying the particular Kokkos configuration and version.
@ -104,6 +116,12 @@ Device backends can be enabled by specifying `-DKokkos_ENABLE_X`.
* Kokkos_ENABLE_SERIAL * Kokkos_ENABLE_SERIAL
* Whether to build serial backend * Whether to build serial backend
* BOOL Default: ON * BOOL Default: ON
* Kokkos_ENABLE_HIP (Experimental)
* Whether to build HIP backend
* BOOL Default: OFF
* Kokkos_ENABLE_OPENMPTARGET (Experimental)
* Whether to build the OpenMP target backend
* BOOL Default: OFF
## Enable Options ## Enable Options
Options can be enabled by specifying `-DKokkos_ENABLE_X`. Options can be enabled by specifying `-DKokkos_ENABLE_X`.
@ -138,9 +156,6 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
* Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK * Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
* Debug check on dual views * Debug check on dual views
* BOOL Default: OFF * BOOL Default: OFF
* Kokkos_ENABLE_DEPRECATED_CODE
* Whether to enable deprecated code
* BOOL Default: OFF
* Kokkos_ENABLE_EXAMPLES * Kokkos_ENABLE_EXAMPLES
* Whether to enable building examples * Whether to enable building examples
* BOOL Default: OFF * BOOL Default: OFF
@ -150,9 +165,6 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
* Kokkos_ENABLE_LARGE_MEM_TESTS * Kokkos_ENABLE_LARGE_MEM_TESTS
* Whether to perform extra large memory tests * Whether to perform extra large memory tests
* BOOL_Default: OFF * BOOL_Default: OFF
* Kokkos_ENABLE_PROFILING
* Whether to create bindings for profiling tools
* BOOL Default: ON
* Kokkos_ENABLE_PROFILING_LOAD_PRINT * Kokkos_ENABLE_PROFILING_LOAD_PRINT
* Whether to print information about which profiling tools gotloaded * Whether to print information about which profiling tools gotloaded
* BOOL Default: OFF * BOOL Default: OFF
@ -235,8 +247,11 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_
* Kokkos_ARCH_BGQ * Kokkos_ARCH_BGQ
* Whether to optimize for the BGQ architecture * Whether to optimize for the BGQ architecture
* BOOL Default: OFF * BOOL Default: OFF
* Kokkos_ARCH_EPYC * Kokkos_ARCH_ZEN
* Whether to optimize for the EPYC architecture * Whether to optimize for the Zen architecture
* BOOL Default: OFF
* Kokkos_ARCH_ZEN2
* Whether to optimize for the Zen2 architecture
* BOOL Default: OFF * BOOL Default: OFF
* Kokkos_ARCH_HSW * Kokkos_ARCH_HSW
* Whether to optimize for the HSW architecture * Whether to optimize for the HSW architecture

View File

@ -1,6 +1,113 @@
# Change Log # Change Log
## [3.1.1](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14) ## [3.2.00](https://github.com/kokkos/kokkos/tree/3.2.00) (2020-08-19)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.01...3.2.00)
**Implemented enhancements:**
- HIP:Enable stream in HIP [\#3163](https://github.com/kokkos/kokkos/issues/3163)
- HIP:Add support for shuffle reduction for the HIP backend [\#3154](https://github.com/kokkos/kokkos/issues/3154)
- HIP:Add implementations of missing HIPHostPinnedSpace methods for LAMMPS [\#3137](https://github.com/kokkos/kokkos/issues/3137)
- HIP:Require HIP 3.5.0 or higher [\#3099](https://github.com/kokkos/kokkos/issues/3099)
- HIP:WorkGraphPolicy for HIP [\#3096](https://github.com/kokkos/kokkos/issues/3096)
- OpenMPTarget: Significant update to the new experimental backend. Requires C++17, works on Intel GPUs, reference counting fixes. [\#3169](https://github.com/kokkos/kokkos/issues/3169)
- Windows Cuda support [\#3018](https://github.com/kokkos/kokkos/issues/3018)
- Pass `-Wext-lambda-captures-this` to NVCC when support for `__host__ __device__` lambda is enabled from CUDA 11 [\#3241](https://github.com/kokkos/kokkos/issues/3241)
- Use explicit staging buffer for constant memory kernel launches and cleanup host/device synchronization [\#3234](https://github.com/kokkos/kokkos/issues/3234)
- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 1: [\#3202](https://github.com/kokkos/kokkos/issues/3202)
- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 2: [\#3203](https://github.com/kokkos/kokkos/issues/3203)
- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 3: [\#3196](https://github.com/kokkos/kokkos/issues/3196)
- Annotations for `DefaultExectutionSpace` and `DefaultHostExectutionSpace` to use in static analysis [\#3189](https://github.com/kokkos/kokkos/issues/3189)
- Add documentation on using Spack to install Kokkos and developing packages that depend on Kokkos [\#3187](https://github.com/kokkos/kokkos/issues/3187)
- Improve support for nvcc\_wrapper with exotic host compiler [\#3186](https://github.com/kokkos/kokkos/issues/3186)
- Add OpenMPTarget backend flags for NVC++ compiler [\#3185](https://github.com/kokkos/kokkos/issues/3185)
- Move deep\_copy/create\_mirror\_view on Experimental::OffsetView into Kokkos:: namespace [\#3166](https://github.com/kokkos/kokkos/issues/3166)
- Allow for larger block size in HIP [\#3165](https://github.com/kokkos/kokkos/issues/3165)
- View: Added names of Views to the different View initialize/free kernels [\#3159](https://github.com/kokkos/kokkos/issues/3159)
- Cuda: Caching cudaFunctorAttributes and whether L1/Shmem prefer was set [\#3151](https://github.com/kokkos/kokkos/issues/3151)
- BuildSystem: Provide an explicit default CMAKE\_BUILD\_TYPE [\#3131](https://github.com/kokkos/kokkos/issues/3131)
- Cuda: Update CUDA occupancy calculation [\#3124](https://github.com/kokkos/kokkos/issues/3124)
- Vector: Adding data() to Vector [\#3123](https://github.com/kokkos/kokkos/issues/3123)
- BuildSystem: Add CUDA Ampere configuration support [\#3122](https://github.com/kokkos/kokkos/issues/3122)
- General: Apply [[noreturn]] to Kokkos::abort when applicable [\#3106](https://github.com/kokkos/kokkos/issues/3106)
- TeamPolicy: Validate storage level argument passed to TeamPolicy::set\_scratch\_size() [\#3098](https://github.com/kokkos/kokkos/issues/3098)
- nvcc\_wrapper: send --cudart to nvcc instead of host compiler [\#3092](https://github.com/kokkos/kokkos/issues/3092)
- BuildSystem: Make kokkos\_has\_string() function in Makefile.kokkos case insensitive [\#3091](https://github.com/kokkos/kokkos/issues/3091)
- Modify KOKKOS\_FUNCTION macro for clang-tidy analysis [\#3087](https://github.com/kokkos/kokkos/issues/3087)
- Move allocation profiling to allocate/deallocate calls [\#3084](https://github.com/kokkos/kokkos/issues/3084)
- BuildSystem: FATAL\_ERROR when attempting in-source build [\#3082](https://github.com/kokkos/kokkos/issues/3082)
- Change enums in ScatterView to types [\#3076](https://github.com/kokkos/kokkos/issues/3076)
- HIP: Changes for new compiler/runtime [\#3067](https://github.com/kokkos/kokkos/issues/3067)
- Extract and use get\_gpu [\#3061](https://github.com/kokkos/kokkos/issues/3061)
- Extract and use get\_gpu [\#3048](https://github.com/kokkos/kokkos/issues/3048)
- Add is\_allocated to View-like containers [\#3059](https://github.com/kokkos/kokkos/issues/3059)
- Combined reducers for scalar references [\#3052](https://github.com/kokkos/kokkos/issues/3052)
- Add configurable capacity for UniqueToken [\#3051](https://github.com/kokkos/kokkos/issues/3051)
- Add installation testing [\#3034](https://github.com/kokkos/kokkos/issues/3034)
- BuildSystem: Add -expt-relaxed-constexpr flag to nvcc\_wrapper [\#3021](https://github.com/kokkos/kokkos/issues/3021)
- HIP: Add UniqueToken [\#3020](https://github.com/kokkos/kokkos/issues/3020)
- Autodetect number of devices [\#3013](https://github.com/kokkos/kokkos/issues/3013)
**Fixed bugs:**
- Check error code from `cudaStreamSynchronize` in CUDA fences [\#3255](https://github.com/kokkos/kokkos/issues/3255)
- Fix issue with C++ standard flags when using `nvcc\_wrapper` with PGI [\#3254](https://github.com/kokkos/kokkos/issues/3254)
- Add missing threadfence in lock-based atomics [\#3208](https://github.com/kokkos/kokkos/issues/3208)
- Fix dedup of linker flags for shared lib on CMake <=3.12 [\#3176](https://github.com/kokkos/kokkos/issues/3176)
- Fix memory leak with CUDA streams [\#3170](https://github.com/kokkos/kokkos/issues/3170)
- BuildSystem: Fix OpenMP Target flags for Cray [\#3161](https://github.com/kokkos/kokkos/issues/3161)
- ScatterView: fix for OpenmpTarget remove inheritance from reducers [\#3162](https://github.com/kokkos/kokkos/issues/3162)
- BuildSystem: Set OpenMP flags according to host compiler [\#3127](https://github.com/kokkos/kokkos/issues/3127)
- OpenMP: Fix logic for nested omp in partition\_master bug [\#3101](https://github.com/kokkos/kokkos/issues/3101)
- BuildSystem: Fixes for Cuda/11 and c++17 [\#3085](https://github.com/kokkos/kokkos/issues/3085)
- HIP: Fix print\_configuration [\#3080](https://github.com/kokkos/kokkos/issues/3080)
- Conditionally define get\_gpu [\#3072](https://github.com/kokkos/kokkos/issues/3072)
- Fix bounds for ranges in random number generator [\#3069](https://github.com/kokkos/kokkos/issues/3069)
- Fix Cuda minor arch check [\#3035](https://github.com/kokkos/kokkos/issues/3035)
**Incompatibilities:**
- Remove ETI support [\#3157](https://github.com/kokkos/kokkos/issues/3157)
- Remove KOKKOS\_INTERNAL\_ENABLE\_NON\_CUDA\_BACKEND [\#3147](https://github.com/kokkos/kokkos/issues/3147)
- Remove core/unit\_test/config [\#3146](https://github.com/kokkos/kokkos/issues/3146)
- Removed the preprocessor branch for KOKKOS\_ENABLE\_PROFILING [\#3115](https://github.com/kokkos/kokkos/issues/3115)
- Disable profiling with MSVC [\#3066](https://github.com/kokkos/kokkos/issues/3066)
**Closed issues:**
- Silent error (Validate storage level arg to set_scratch_size) [\#3097](https://github.com/kokkos/kokkos/issues/3097)
- Remove KOKKKOS\_ENABLE\_PROFILING Option [\#3095](https://github.com/kokkos/kokkos/issues/3095)
- Cuda 11 -\> allow C++17 [\#3083](https://github.com/kokkos/kokkos/issues/3083)
- In source build failure not explained [\#3081](https://github.com/kokkos/kokkos/issues/3081)
- Allow naming of Views for initialization kernel [\#3070](https://github.com/kokkos/kokkos/issues/3070)
- DefaultInit tests failing when using CTest resource allocation feature [\#3040](https://github.com/kokkos/kokkos/issues/3040)
- Add installation testing. [\#3037](https://github.com/kokkos/kokkos/issues/3037)
- nvcc\_wrapper needs to handle `-expt-relaxed-constexpr` flag [\#3017](https://github.com/kokkos/kokkos/issues/3017)
- CPU core oversubscription warning on macOS with OpenMP backend [\#2996](https://github.com/kokkos/kokkos/issues/2996)
- Default behavior of KOKKOS\_NUM\_DEVICES to use all devices available [\#2975](https://github.com/kokkos/kokkos/issues/2975)
- Assert blocksize \> 0 [\#2974](https://github.com/kokkos/kokkos/issues/2974)
- Add ability to assign kokkos profile function from executable [\#2973](https://github.com/kokkos/kokkos/issues/2973)
- ScatterView Support for the pre/post increment operator [\#2967](https://github.com/kokkos/kokkos/issues/2967)
- Compiler issue: Cuda build with clang 10 has errors with the atomic unit tests [\#3237](https://github.com/kokkos/kokkos/issues/3237)
- Incompatibility of flags for C++ standard with PGI v20.4 on Power9/NVIDIA V100 system [\#3252](https://github.com/kokkos/kokkos/issues/3252)
- Error configuring as subproject [\#3140](https://github.com/kokkos/kokkos/issues/3140)
- CMake fails with Nvidia compilers when the GPU architecture option is not supplied (Fix configure with OMPT and Cuda) [\#3207](https://github.com/kokkos/kokkos/issues/3207)
- PGI compiler being passed the gcc -fopenmp flag [\#3125](https://github.com/kokkos/kokkos/issues/3125)
- Cuda: Memory leak when using CUDA stream [\#3167](https://github.com/kokkos/kokkos/issues/3167)
- RangePolicy has an implicitly deleted assignment operator [\#3192](https://github.com/kokkos/kokkos/issues/3192)
- MemorySpace::allocate needs to have memory pool counting. [\#3064](https://github.com/kokkos/kokkos/issues/3064)
- Missing write fence for lock based atomics on CUDA [\#3038](https://github.com/kokkos/kokkos/issues/3038)
- CUDA compute capability version check problem [\#3026](https://github.com/kokkos/kokkos/issues/3026)
- Make DynRankView fencing consistent [\#3014](https://github.com/kokkos/kokkos/issues/3014)
- nvcc\_wrapper cant handle -Xcompiler -o out.o [\#2993](https://github.com/kokkos/kokkos/issues/2993)
- Reductions of non-trivial types of size 4 fail in CUDA shfl operations [\#2990](https://github.com/kokkos/kokkos/issues/2990)
- complex\_double misalignment in reduce, clang+CUDA [\#2989](https://github.com/kokkos/kokkos/issues/2989)
- Span of degenerated \(zero-length\) subviews is not zero in some special cases [\#2979](https://github.com/kokkos/kokkos/issues/2979)
- Rank 1 custom layouts dont work as expected. [\#2840](https://github.com/kokkos/kokkos/issues/2840)
## [3.1.01](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.00...3.1.1) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.00...3.1.1)
**Fixed bugs:** **Fixed bugs:**

View File

@ -1,4 +1,9 @@
# Disable in-source builds to prevent source tree corruption.
if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" )
message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files." )
endif()
# We want to determine if options are given with the wrong case # We want to determine if options are given with the wrong case
# In order to detect which arguments are given to compare against # In order to detect which arguments are given to compare against
# the list of valid arguments, at the beginning here we need to # the list of valid arguments, at the beginning here we need to
@ -34,6 +39,9 @@ IF(COMMAND TRIBITS_PACKAGE_DECL)
ELSE() ELSE()
SET(KOKKOS_HAS_TRILINOS OFF) SET(KOKKOS_HAS_TRILINOS OFF)
ENDIF() ENDIF()
# Is this build a subdirectory of another project
GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_pick_cxx_std.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_pick_cxx_std.cmake)
@ -75,16 +83,17 @@ IF(NOT KOKKOS_HAS_TRILINOS)
SET(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE) SET(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE)
SET(ENV{CXX} ${SPACK_CXX}) SET(ENV{CXX} ${SPACK_CXX})
ENDIF() ENDIF()
ENDif()
IF(NOT DEFINED ${PROJECT_NAME})
# WORKAROUND FOR HIPCC
IF(Kokkos_ENABLE_HIP)
SET(KOKKOS_INTERNAL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --amdgpu-target=gfx906")
ENDIF() ENDIF()
# Always call the project command to define Kokkos_ variables
# and to make sure that C++ is an enabled language
PROJECT(Kokkos CXX) PROJECT(Kokkos CXX)
IF(Kokkos_ENABLE_HIP) IF(NOT HAS_PARENT)
SET(CMAKE_CXX_FLAGS ${KOKKOS_INTERNAL_CMAKE_CXX_FLAGS}) IF (NOT CMAKE_BUILD_TYPE)
SET(DEFAULT_BUILD_TYPE "RelWithDebInfo")
MESSAGE(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.")
SET(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING
"Choose the type of build, options are: Debug, Release, RelWithDebInfo and MinSizeRel."
FORCE)
ENDIF() ENDIF()
ENDIF() ENDIF()
ENDIF() ENDIF()
@ -102,8 +111,8 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 3) set(Kokkos_VERSION_MAJOR 3)
set(Kokkos_VERSION_MINOR 1) set(Kokkos_VERSION_MINOR 2)
set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION_PATCH 0)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
@ -147,6 +156,7 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake)
# Check the environment and set certain variables # Check the environment and set certain variables
# to allow platform-specific checks # to allow platform-specific checks
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake)
# The build environment setup goes in the following steps # The build environment setup goes in the following steps
# 1) Check all the enable options. This includes checking Kokkos_DEVICES # 1) Check all the enable options. This includes checking Kokkos_DEVICES
# 2) Check the compiler ID (type and version) # 2) Check the compiler ID (type and version)
@ -169,7 +179,6 @@ SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontain
SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms) SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms)
SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES}) SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES})
GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY)
IF (KOKKOS_HAS_TRILINOS) IF (KOKKOS_HAS_TRILINOS)
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR}) SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR})
@ -203,7 +212,7 @@ IF (KOKKOS_HAS_TRILINOS)
SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}") SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG}) LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
ENDFOREACH() ENDFOREACH()
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_XCOMPILER_OPTIONS}") SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
IF (KOKKOS_ENABLE_CUDA) IF (KOKKOS_ENABLE_CUDA)
STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}") STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}")
FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS}) FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
@ -246,7 +255,7 @@ KOKKOS_PACKAGE_POSTPROCESS()
#We are ready to configure the header #We are ready to configure the header
CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY) CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY)
IF (NOT KOKKOS_HAS_TRILINOS) IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING)
ADD_LIBRARY(kokkos INTERFACE) ADD_LIBRARY(kokkos INTERFACE)
#Make sure in-tree projects can reference this as Kokkos:: #Make sure in-tree projects can reference this as Kokkos::
#to match the installed target names #to match the installed target names
@ -262,8 +271,6 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake)
# If the argument of DESTINATION is a relative path, CMake computes it # If the argument of DESTINATION is a relative path, CMake computes it
# as relative to ${CMAKE_INSTALL_PATH}. # as relative to ${CMAKE_INSTALL_PATH}.
INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION ${CMAKE_INSTALL_BINDIR}) INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION ${CMAKE_INSTALL_BINDIR})
INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
# Finally - if we are a subproject - make sure the enabled devices are visible # Finally - if we are a subproject - make sure the enabled devices are visible
IF (HAS_PARENT) IF (HAS_PARENT)

View File

@ -11,8 +11,8 @@ CXXFLAGS += $(SHFLAGS)
endif endif
KOKKOS_VERSION_MAJOR = 3 KOKKOS_VERSION_MAJOR = 3
KOKKOS_VERSION_MINOR = 1 KOKKOS_VERSION_MINOR = 2
KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION_PATCH = 0
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial # Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial
@ -20,11 +20,11 @@ KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthread" #KOKKOS_DEVICES ?= "Pthread"
# Options: # Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX # Intel: KNC,KNL,SNB,HSW,BDW,SKX
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75 # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
# IBM: BGQ,Power7,Power8,Power9 # IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Vega900,Vega906 # AMD-GPUS: Vega900,Vega906
# AMD-CPUS: AMDAVX,EPYC # AMD-CPUS: AMDAVX,Zen,Zen2
KOKKOS_ARCH ?= "" KOKKOS_ARCH ?= ""
# Options: yes,no # Options: yes,no
KOKKOS_DEBUG ?= "no" KOKKOS_DEBUG ?= "no"
@ -32,10 +32,8 @@ KOKKOS_DEBUG ?= "no"
KOKKOS_USE_TPLS ?= "" KOKKOS_USE_TPLS ?= ""
# Options: c++11,c++14,c++1y,c++17,c++1z,c++2a # Options: c++11,c++14,c++1y,c++17,c++1z,c++2a
KOKKOS_CXX_STANDARD ?= "c++11" KOKKOS_CXX_STANDARD ?= "c++11"
# Options: aggressive_vectorization,disable_profiling,enable_deprecated_code,disable_deprecated_code,enable_large_mem_tests,disable_complex_align # Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align
KOKKOS_OPTIONS ?= "" KOKKOS_OPTIONS ?= ""
# Option for setting ETI path
KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti
KOKKOS_CMAKE ?= "no" KOKKOS_CMAKE ?= "no"
KOKKOS_TRIBITS ?= "no" KOKKOS_TRIBITS ?= "no"
KOKKOS_STANDALONE_CMAKE ?= "no" KOKKOS_STANDALONE_CMAKE ?= "no"
@ -74,6 +72,7 @@ KOKKOS_INTERNAL_ENABLE_CXX1Y := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),
KOKKOS_INTERNAL_ENABLE_CXX17 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++17) KOKKOS_INTERNAL_ENABLE_CXX17 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++17)
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z) KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z)
KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a) KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a)
KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++20)
# Check for external libraries. # Check for external libraries.
KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc) KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
@ -83,9 +82,7 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),exper
# Check for advanced settings. # Check for advanced settings.
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings) KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization) KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling) KOKKOS_INTERNAL_ENABLE_TUNING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_tuning)
KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code)
KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecated_code)
KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align) KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align)
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check) KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print) KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print)
@ -96,7 +93,6 @@ KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr) KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr)
KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti)
KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc) KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)
@ -140,6 +136,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_DEVICELIST += OPENMPTARGET KOKKOS_DEVICELIST += OPENMPTARGET
KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER := $(shell expr $(KOKKOS_INTERNAL_ENABLE_CXX17) \
+ $(KOKKOS_INTERNAL_ENABLE_CXX20) \
+ $(KOKKOS_INTERNAL_ENABLE_CXX2A))
ifneq ($(KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER), 1)
$(error OpenMPTarget backend requires C++17 or newer)
endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
@ -281,7 +283,7 @@ endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_CXX11_FLAG := --c++11 KOKKOS_INTERNAL_CXX11_FLAG := --c++11
KOKKOS_INTERNAL_CXX14_FLAG := --c++14 KOKKOS_INTERNAL_CXX14_FLAG := --c++14
#KOKKOS_INTERNAL_CXX17_FLAG := --c++17 KOKKOS_INTERNAL_CXX17_FLAG := --c++17
else else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11 KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
@ -338,35 +340,27 @@ KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pas
KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70) KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70)
KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72) KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72)
KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75) KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75)
KOKKOS_INTERNAL_USE_ARCH_AMPERE80 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere80)
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
+ $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80))
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
#SEK: This seems like a bug to me #SEK: This seems like a bug to me
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell) KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell)
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler) KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler)
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50))
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
+ $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
@ -394,19 +388,20 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
# AMD based. # AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC) KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900) KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906) KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906)
# Any AVX? # Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM)) KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC)) KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL)) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX)) KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Decide what ISA level we are able to support. # Decide what ISA level we are able to support.
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC)) KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC)) KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9)) KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7)) KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
@ -430,7 +425,7 @@ endif
KOKKOS_CPPFLAGS = KOKKOS_CPPFLAGS =
KOKKOS_LIBDIRS = KOKKOS_LIBDIRS =
ifneq ($(KOKKOS_CMAKE), yes) ifneq ($(KOKKOS_CMAKE), yes)
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH) KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
endif endif
KOKKOS_TPL_INCLUDE_DIRS = KOKKOS_TPL_INCLUDE_DIRS =
KOKKOS_TPL_LIBRARY_DIRS = KOKKOS_TPL_LIBRARY_DIRS =
@ -458,88 +453,91 @@ KOKKOS_CONFIG_HEADER=KokkosCore_config.h
# Functions for generating config header file # Functions for generating config header file
kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP)) kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP))
# assign hash sign to variable for compat. with make 4.3
H := \#
# Do not append first line # Do not append first line
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp) tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
tmp := $(call kokkos_append_header,"Makefile constructed configuration:") tmp := $(call kokkos_append_header,"Makefile constructed configuration:")
tmp := $(call kokkos_append_header,"$(shell date)") tmp := $(call kokkos_append_header,"$(shell date)")
tmp := $(call kokkos_append_header,"----------------------------------------------*/") tmp := $(call kokkos_append_header,"----------------------------------------------*/")
tmp := $(call kokkos_append_header,'\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)') tmp := $(call kokkos_append_header,'$H''if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."') tmp := $(call kokkos_append_header,'$H''error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
tmp := $(call kokkos_append_header,'\#else') tmp := $(call kokkos_append_header,'$H''else')
tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H') tmp := $(call kokkos_append_header,'$H''define KOKKOS_CORE_CONFIG_H')
tmp := $(call kokkos_append_header,'\#endif') tmp := $(call kokkos_append_header,'$H''endif')
tmp := $(call kokkos_append_header,"") tmp := $(call kokkos_append_header,"")
tmp := $(call kokkos_append_header,"\#define KOKKOS_VERSION $(KOKKOS_VERSION)") tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION $(KOKKOS_VERSION)")
tmp := $(call kokkos_append_header,"") tmp := $(call kokkos_append_header,"")
tmp := $(call kokkos_append_header,"/* Execution Spaces */") tmp := $(call kokkos_append_header,"/* Execution Spaces */")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA")
tmp := $(call kokkos_append_header,"\#define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)") tmp := $(call kokkos_append_header,"$H""define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM') tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_ROCM')
tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1') tmp := $(call kokkos_append_header,'$H''define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_HIP') tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_HIP')
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET') tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMPTARGET')
ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_WORKAROUND_OPENMPTARGET_GCC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMP') tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMP')
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_THREADS")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_SERIAL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_TM), 1) ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__") tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TM") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TM")
tmp := $(call kokkos_append_header,"\#endif") tmp := $(call kokkos_append_header,"$H""endif")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__") tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_X86_64") tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_X86_64")
tmp := $(call kokkos_append_header,"\#endif") tmp := $(call kokkos_append_header,"$H""endif")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__") tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_KNC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_KNC")
tmp := $(call kokkos_append_header,"\#endif") tmp := $(call kokkos_append_header,"$H""endif")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__") tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCLE") tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCLE")
tmp := $(call kokkos_append_header,"\#endif") tmp := $(call kokkos_append_header,"$H""endif")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1) ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__") tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCBE") tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCBE")
tmp := $(call kokkos_append_header,"\#endif") tmp := $(call kokkos_append_header,"$H""endif")
endif endif
#only add the c++ standard flags if this is not CMake #only add the c++ standard flags if this is not CMake
@ -548,34 +546,39 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
endif endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX11")
endif endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1)
ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG)
endif endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX14")
endif endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1)
#I cannot make CMake add this in a good way - so add it here #I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Y_FLAG) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Y_FLAG)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX14")
endif endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1)
ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG)
endif endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17")
endif endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
#I cannot make CMake add this in a good way - so add it here #I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17")
endif endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1)
#I cannot make CMake add this in a good way - so add it here #I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX20") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX20), 1)
#I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX20_FLAG)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20")
endif endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
@ -585,20 +588,26 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
KOKKOS_CXXFLAGS += -g KOKKOS_CXXFLAGS += -g
KOKKOS_LDFLAGS += -g KOKKOS_LDFLAGS += -g
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG")
ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0) ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0) ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_COMPLEX_ALIGN") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_COMPLEX_ALIGN")
endif endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
endif endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_TUNING), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TUNING")
endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LIBDL")
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
ifneq ($(KOKKOS_CMAKE), yes) ifneq ($(KOKKOS_CMAKE), yes)
ifneq ($(HWLOC_PATH),) ifneq ($(HWLOC_PATH),)
@ -611,11 +620,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
KOKKOS_LIBS += -lhwloc KOKKOS_LIBS += -lhwloc
KOKKOS_TPL_LIBRARY_NAMES += hwloc KOKKOS_TPL_LIBRARY_NAMES += hwloc
endif endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HWLOC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT") tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_LIBRT")
KOKKOS_LIBS += -lrt KOKKOS_LIBS += -lrt
KOKKOS_TPL_LIBRARY_NAMES += rt KOKKOS_TPL_LIBRARY_NAMES += rt
endif endif
@ -632,50 +641,36 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
KOKKOS_LIBS += -lmemkind -lnuma KOKKOS_LIBS += -lmemkind -lnuma
KOKKOS_TPL_LIBRARY_NAMES += memkind numa KOKKOS_TPL_LIBRARY_NAMES += memkind numa
endif endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HBWSPACE") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HBWSPACE")
endif
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE")
endif
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_ETI")
endif endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LARGE_MEM_TESTS") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS")
endif endif
tmp := $(call kokkos_append_header,"/* Optimization Settings */") tmp := $(call kokkos_append_header,"/* Optimization Settings */")
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1) ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION") tmp := $(call kokkos_append_header,"$H""define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
endif endif
tmp := $(call kokkos_append_header,"/* Cuda Settings */") tmp := $(call kokkos_append_header,"/* Cuda Settings */")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
else else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_UVM") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_UVM")
endif endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_CXXFLAGS += -fcuda-rdc KOKKOS_CXXFLAGS += -fcuda-rdc
KOKKOS_LDFLAGS += -fcuda-rdc KOKKOS_LDFLAGS += -fcuda-rdc
@ -696,7 +691,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
KOKKOS_CXXFLAGS += -expt-extended-lambda KOKKOS_CXXFLAGS += -expt-extended-lambda
else else
$(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.) $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
@ -704,14 +699,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1) ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -ge 80; echo $$?),0) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -ge 80; echo $$?),0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR")
KOKKOS_CXXFLAGS += -expt-relaxed-constexpr KOKKOS_CXXFLAGS += -expt-relaxed-constexpr
else else
$(warning Warning: Cuda relaxed constexpr support was requested but NVCC version is too low. This requires NVCC for Cuda version 8.0 or higher. Disabling relaxed constexpr support now.) $(warning Warning: Cuda relaxed constexpr support was requested but NVCC version is too low. This requires NVCC for Cuda version 8.0 or higher. Disabling relaxed constexpr support now.)
@ -719,25 +714,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR")
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND") tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1) ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH")
endif endif
endif endif
# Add Architecture flags. # Add Architecture flags.
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS += KOKKOS_CXXFLAGS +=
@ -754,7 +749,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS += KOKKOS_CXXFLAGS +=
@ -770,9 +765,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_EPYC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_AVX2") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -mavx2 KOKKOS_CXXFLAGS += -mavx2
@ -783,9 +778,22 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -mavx2
KOKKOS_LDFLAGS += -mavx2
else
KOKKOS_CXXFLAGS += -march=znver2 -mtune=znver2
KOKKOS_LDFLAGS += -march=znver2 -mtune=znver2
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS += KOKKOS_CXXFLAGS +=
@ -802,8 +810,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX2") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS += KOKKOS_CXXFLAGS +=
@ -820,7 +828,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_SSE42")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xSSE4.2 KOKKOS_CXXFLAGS += -xSSE4.2
@ -842,7 +850,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -mavx KOKKOS_CXXFLAGS += -mavx
@ -864,7 +872,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER7") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER7")
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -876,7 +884,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER8") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER8")
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -897,7 +905,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER9") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER9")
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -918,7 +926,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX2 KOKKOS_CXXFLAGS += -xCORE-AVX2
@ -940,7 +948,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX2 KOKKOS_CXXFLAGS += -xCORE-AVX2
@ -962,7 +970,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512MIC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512MIC")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xMIC-AVX512 KOKKOS_CXXFLAGS += -xMIC-AVX512
@ -983,7 +991,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512XEON") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX512 KOKKOS_CXXFLAGS += -xCORE-AVX512
@ -1004,7 +1012,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KNC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC")
KOKKOS_CXXFLAGS += -mmic KOKKOS_CXXFLAGS += -mmic
KOKKOS_LDFLAGS += -mmic KOKKOS_LDFLAGS += -mmic
endif endif
@ -1039,65 +1047,70 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER32") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER35") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER37") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL50") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL52") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL53") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL60") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA70") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING75") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80
endif
ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
@ -1121,13 +1134,13 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
# Lets start with adding architecture defines # Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 900") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 900")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA900") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900 KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 906") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 906")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA906") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906 KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906
endif endif
@ -1138,7 +1151,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1) ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
KOKKOS_CXXFLAGS+=-fgpu-rdc KOKKOS_CXXFLAGS+=-fgpu-rdc
KOKKOS_LDFLAGS+=-fgpu-rdc KOKKOS_LDFLAGS+=-fgpu-rdc
else else
@ -1171,9 +1184,6 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Cuda/*.cpp)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
ifneq ($(CUDA_PATH),) ifneq ($(CUDA_PATH),)
KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include
@ -1211,9 +1221,6 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/OpenMP/*.cpp)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -1228,9 +1235,6 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Threads/*.cpp)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
KOKKOS_LIBS += -lpthread KOKKOS_LIBS += -lpthread
KOKKOS_TPL_LIBRARY_NAMES += pthread KOKKOS_TPL_LIBRARY_NAMES += pthread
@ -1279,9 +1283,6 @@ endif
# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial # Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
# device to avoid a link warning. # device to avoid a link warning.
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Serial/*.cpp)
endif
endif endif
ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC)) KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))

View File

@ -26,21 +26,17 @@ Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spi
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
include $(KOKKOS_ETI_PATH)/Serial/Makefile.eti_Serial
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
@ -50,9 +46,6 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
include $(KOKKOS_ETI_PATH)/Cuda/Makefile.eti_Cuda
endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
@ -75,9 +68,6 @@ Kokkos_ROCm_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_RO
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
Kokkos_ROCm_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp Kokkos_ROCm_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
include $(KOKKOS_ETI_PATH)/ROCm/Makefile.eti_ROCm
endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
@ -85,9 +75,6 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
include $(KOKKOS_ETI_PATH)/Threads/Makefile.eti_Threads
endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -95,9 +82,6 @@ Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokko
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
include $(KOKKOS_ETI_PATH)/OpenMP/Makefile.eti_OpenMP
endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)

View File

@ -151,7 +151,7 @@ Full details are given in the [build instructions](BUILD.md). Basic setups are s
## CMake ## CMake
The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`: The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
```` ````bash
cmake $srcdir \ cmake $srcdir \
-DCMAKE_CXX_COMPILER=$path_to_compiler \ -DCMAKE_CXX_COMPILER=$path_to_compiler \
-DCMAKE_INSTALL_PREFIX=$path_to_install \ -DCMAKE_INSTALL_PREFIX=$path_to_install \
@ -170,7 +170,7 @@ and run `make test` after completing the build.
For your CMake project using Kokkos, code such as the following: For your CMake project using Kokkos, code such as the following:
```` ````cmake
find_package(Kokkos) find_package(Kokkos)
... ...
target_link_libraries(myTarget Kokkos::kokkos) target_link_libraries(myTarget Kokkos::kokkos)
@ -187,17 +187,15 @@ for the install location given above.
## Spack ## Spack
An alternative to manually building with the CMake is to use the Spack package manager. An alternative to manually building with the CMake is to use the Spack package manager.
To do so, download the `kokkos-spack` git repo and add to the package list: To get started, download the Spack [repo](https://github.com/spack/spack).
````
spack repo add $path-to-kokkos-spack
```` ````
A basic installation would be done as: A basic installation would be done as:
```` ````bash
spack install kokkos > spack install kokkos
```` ````
Spack allows options and and compilers to be tuned in the install command. Spack allows options and and compilers to be tuned in the install command.
```` ````bash
spack install kokkos@3.0 %gcc@7.3.0 +openmp > spack install kokkos@3.0 %gcc@7.3.0 +openmp
```` ````
This example illustrates the three most common parameters to Spack: This example illustrates the three most common parameters to Spack:
* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options. * Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
@ -205,33 +203,33 @@ This example illustrates the three most common parameters to Spack:
* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option. * Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.
For a complete list of Kokkos options, run: For a complete list of Kokkos options, run:
```` ````bash
spack info kokkos > spack info kokkos
```` ````
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable". Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
Generally, Spack usage should never really require you to reference the computer-generated unique install folder. Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with: More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with:
```` ````bash
spack find -p kokkos ... > spack find -p kokkos ...
```` ````
where `...` is the unique spec identifying the particular Kokkos configuration and version. where `...` is the unique spec identifying the particular Kokkos configuration and version.
Some more details can found in the Kokkos spack [documentation](Spack.md) or the Spack [website](https://spack.readthedocs.io/en/latest).
## Raw Makefile ## Raw Makefile
A bash script is provided to generate raw makefiles. A bash script is provided to generate raw makefiles.
To install Kokkos as a library create a build directory and run the following To install Kokkos as a library create a build directory and run the following
```` ````bash
$KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install > $KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install
```` ````
Once the Makefile is generated, run: Once the Makefile is generated, run:
```` ````bash
make kokkoslib > make kokkoslib
make install > make install
```` ````
To additionally run the unit tests: To additionally run the unit tests:
```` ````bash
make build-test > make build-test
make test > make test
```` ````
Run `generate_makefile.bash --help` for more detailed options such as Run `generate_makefile.bash --help` for more detailed options such as
changing the device type for which to build. changing the device type for which to build.
@ -274,7 +272,7 @@ more than a single GPU is used by a single process.
If you publish work which mentions Kokkos, please cite the following paper: If you publish work which mentions Kokkos, please cite the following paper:
```` ````BibTeX
@article{CarterEdwards20143202, @article{CarterEdwards20143202,
title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ", title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",
journal = "Journal of Parallel and Distributed Computing ", journal = "Journal of Parallel and Distributed Computing ",

267
lib/kokkos/Spack.md Normal file
View File

@ -0,0 +1,267 @@
![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4)
# Kokkos Spack
This gives instructions for using Spack to install Kokkos and developing packages that depend on Kokkos.
## Getting Started
Make sure you have downloaded [Spack](https://github.com/spack/spack).
The easiest way to configure the Spack environment is:
````bash
> source spack/share/spack/setup-env.sh
````
with other scripts available for other shells.
You can display information about how to install packages with:
````bash
> spack info kokkos
````
This will print all the information about how to install Kokkos with Spack.
For detailed instructions on how to use Spack, see the [User Manual](https://spack.readthedocs.io).
## Setting Up Spack: Avoiding the Package Cascade
By default, Spack doesn't 'see' anything on your system - including things like CMake and CUDA.
This can be limited by adding a `packages.yaml` to your `$HOME/.spack` folder that includes CMake (and CUDA, if applicable). For example, your `packages.yaml` file could be:
````yaml
packages:
cuda:
modules:
cuda@10.1.243: [cuda/10.1.243]
paths:
cuda@10.1.243:
/opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243
buildable: false
cmake:
modules:
cmake: [cmake/3.16.8]
paths:
cmake:
/opt/local/ppc64le/cmake/3.16.8
buildable: false
````
The `modules` entry is only necessary on systems that require loading Modules (i.e. most DOE systems).
The `buildable` flag is useful to make sure Spack crashes if there is a path error,
rather than having a type-o and Spack rebuilding everything because `cmake` isn't found.
You can verify your environment is set up correctly by running `spack graph` or `spack spec`.
For example:
````bash
> spack graph kokkos +cuda
o kokkos
|\
o | cuda
/
o cmake
````
Without the existing CUDA and CMake being identified in `packages.yaml`, a (subset!) of the output would be:
````bash
o kokkos
|\
| o cmake
| |\
| | | |\
| | | | | |\
| | | | | | | |\
| | | | | | | | | |\
| | | | | | | o | | | libarchive
| | | | | | | |\ \ \ \
| | | | | | | | | |\ \ \ \
| | | | | | | | | | | | |_|/
| | | | | | | | | | | |/| |
| | | | | | | | | | | | | o curl
| | |_|_|_|_|_|_|_|_|_|_|/|
| |/| | | |_|_|_|_|_|_|_|/
| | | | |/| | | | | | | |
| | | | o | | | | | | | | openssl
| |/| | | | | | | | | | |
| | | | | | | | | | o | | libxml2
| | |_|_|_|_|_|_|_|/| | |
| | | | | | | | | | |\ \ \
| o | | | | | | | | | | | | zlib
| / / / / / / / / / / / /
| o | | | | | | | | | | | xz
| / / / / / / / / / / /
| o | | | | | | | | | | rhash
| / / / / / / / / / /
| | | | o | | | | | | nettle
| | | | |\ \ \ \ \ \ \
| | | o | | | | | | | | libuv
| | | | o | | | | | | | autoconf
| | |_|/| | | | | | | |
| | | | |/ / / / / / /
| o | | | | | | | | | perl
| o | | | | | | | | | gdbm
| o | | | | | | | | | readline
````
## Configuring Kokkos as a Project Dependency
Say you have a project "SuperScience" which needs to use Kokkos.
In your `package.py` file, you would generally include something like:
````python
class SuperScience(CMakePackage):
...
depends_on("kokkos")
````
Often projects want to tweak behavior when using certain features, e.g.
````python
depends_on("kokkos+cuda", when="+cuda")
````
if your project needs CUDA-specific logic to configure and build.
This illustrates the general principle in Spack of "flowing-up".
A user requests a feature in the final app:
````bash
> spack install superscience+cuda
````
This flows upstream to the Kokkos dependency, causing the `kokkos+cuda` variant to build.
The downstream app (SuperScience) tells the upstream app (Kokkos) how to build.
Because Kokkos is a performance portability library, it somewhat inverts this principle.
Kokkos "flows-down", telling your application how best to configure for performance.
Rather than a downstream app (SuperScience) telling the upstream (Kokkos) what variants to build,
a pre-built Kokkos should be telling the downstream app SuperScience what variants to use.
Kokkos works best when there is an "expert" configuration installed on your system.
Your build should simply request `-DKokkos_ROOT=<BEST_KOKKOS_FOR_MY_SYSTEM>` and configure appropriately based on the Kokkos it finds.
Kokkos has many, many build variants.
Where possible, projects should only depend on a general Kokkos, not specific variants.
We recommend instead adding for each system you build on a Kokkos configuration to your `packages.yaml` file (usually found in `~/.spack` for specific users).
For a Xeon + Volta system, this could look like:
````yaml
kokkos:
variants: +cuda +openmp +cuda_lambda +wrapper ^cuda@10.1 cuda_arch=70
compiler: [gcc@7.2.0]
````
which gives the "best" Kokkos configuration as CUDA+OpenMP optimized for a Volta 70 architecture using CUDA 10.1.
It also enables support for CUDA Lambdas.
The `+wrapper` option tells Kokkos to build with the special `nvcc_wrapper` (more below).
Note here that we use the built-in `cuda_arch` variant of Spack to specify the archicture.
For a Haswell system, we use
````yaml
kokkos:
variants: +openmp std=14 target=haswell
compiler: [intel@18]
````
which uses the built-in microarchitecture variants of Spack.
Consult the Spack documentation for more details of Spack microarchitectures
and CUDA architectures.
Spack does not currently provide an AMD GPU microarchitecture option.
If building for HIP or an AMD GPU, Kokkos provides an `amd_gpu_arch` similar to `cuda_arch`.
````yaml
kokkos:
variants: +hip amd_gpu_arch=vega900
````
Without an optimal default in your `packages.yaml` file, it is highly likely that the default Kokkos configuration you get will not be what you want.
For example, CUDA is not enabled by default (there is no easy logic to conditionally activate this for CUDA-enabled systems).
If you don't specify a CUDA build variant in a `packages.yaml` and you build your Kokkos-dependent project:
````bash
> spack install superscience
````
you may end up just getting the default Kokkos (i.e. Serial).
Some examples are included in the `config/yaml` folder for common platforms.
Before running `spack install <package>` we recommend running `spack spec <package>` to confirm your dependency tree is correct.
For example, with Kokkos Kernels:
````bash
kokkos-kernels@3.0%gcc@8.3.0~blas build_type=RelWithDebInfo ~cblas~complex_double~complex_float~cublas~cuda cuda_arch=none ~cusparse~diy+double execspace_cuda=auto execspace_openmp=auto execspace_serial=auto execspace_threads=auto ~float~lapack~lapacke+layoutleft~layoutright memspace_cudaspace=auto memspace_cudauvmspace=auto +memspace_hostspace~mkl+offset_int+offset_size_t~openmp+ordinal_int~ordinal_int64_t~serial~superlu arch=linux-rhel7-skylake_avx512
^cmake@3.16.2%gcc@8.3.0~doc+ncurses+openssl+ownlibs~qt arch=linux-rhel7-skylake_avx512
^kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=14 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512
^cuda@10.1%gcc@8.3.0 arch=linux-rhel7-skylake_avx512
^kokkos-nvcc-wrapper@old%gcc@8.3.0 build_type=RelWithDebInfo +mpi arch=linux-rhel7-skylake_avx512
^openmpi@4.0.2%gcc@8.3.0~cuda+cxx_exceptions fabrics=none ~java~legacylaunchers~memchecker patches=073477a76bba780c67c36e959cd3ee6910743e2735c7e76850ffba6791d498e4 ~pmi schedulers=none ~sqlite3~thread_multiple+vt arch=linux-rhel7-skylake_avx512
````
The output can be very verbose, but we can verify the expected `kokkos`:
````bash
kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=11 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512
````
We see that we do have `+volta70` and `+wrapper`, e.g.
### Spack Environments
The encouraged way to use Spack is with Spack environments ([more details here](https://spack-tutorial.readthedocs.io/en/latest/tutorial_environments.html#dealing-with-many-specs-at-once)).
Rather than installing packages one-at-a-time, you add packages to an environment.
After adding all packages, you concretize and install them all.
Using environments, one can explicitly add a desired Kokkos for the environment, e.g.
````bash
> spack add kokkos +cuda +cuda_lambda +volta70
> spack add my_project +my_variant
> ...
> spack install
````
All packages within the environment will build against the CUDA-enabled Kokkos,
even if they only request a default Kokkos.
## NVCC Wrapper
Kokkos is a C++ project, but often builds for the CUDA backend.
This is particularly problematic with CMake. At this point, `nvcc` does not accept all the flags that normally get passed to a C++ compiler.
Kokkos provides `nvcc_wrapper` that identifies correctly as a C++ compiler to CMake and accepts C++ flags, but uses `nvcc` as the underlying compiler.
`nvcc` itself also uses an underlying host compiler, e.g. GCC.
In Spack, the underlying host compiler is specified as below, e.g.:
````bash
> spack install package %gcc@8.0.0
````
This is still valid for Kokkos. To use the special wrapper for CUDA builds, request a desired compiler and simply add the `+wrapper` variant.
````bash
> spack install kokkos +cuda +wrapper %gcc@7.2.0
````
Downstream projects depending on Kokkos need to override their compiler.
Kokkos provides the compiler in a `kokkos_cxx` variable,
which points to either `nvcc_wrapper` when needed or the regular compiler otherwise.
Spack projects already do this to use MPI compiler wrappers.
````python
def cmake_args(self):
options = []
...
options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["kokkos"].kokkos_cxx)
...
return options
````
Note: `nvcc_wrapper` works with the MPI compiler wrappers.
If building your project with MPI, do NOT set your compiler to `nvcc_wrapper`.
Instead set your compiler to `mpicxx` and `nvcc_wrapper` will be used under the hood.
````python
def cmake_args(self):
options = []
...
options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["mpi"].mpicxx)
...
return options
````
To accomplish this, `nvcc_wrapper` must depend on MPI (even though it uses no MPI).
This has the unfortunate consequence that Kokkos CUDA projects not using MPI will implicitly depend on MPI anyway.
This behavior is necessary for now, but will hopefully be removed later.
When using environments, if MPI is not needed, you can remove the MPI dependency with:
````bash
> spack add kokkos-nvcc-wrapper ~mpi
````
## Developing With Spack
Spack has historically been much more suited to *deployment* of mature packages than active testing or developing.
However, recent features have improved support for development.
Future releases are likely to make this even easier and incorporate Git integration.
The most common commands will do a full build and install of the packages.
If doing development, you may wish to merely set up a build environment.
This allows you to modify the source and re-build.
In this case, you can stop after configuring.
Suppose you have Kokkos checkout in the folder `kokkos-src`:
````bash
> spack dev-build -d kokkos-src -u cmake kokkos@develop +wrapper +openmp
````
This sets up a development environment for you in `kokkos-src` which you can use (Bash example shown):
Note: Always specify `develop` as the version when doing `dev-build`, except in rare cases.
You are usually developing a feature branch that will merge into `develop`,
hence you are making a new `develop` branch.
````bash
> cd kokko-src
> source spack-build-env.txt
> cd spack-build
> make
````
Before sourcing the Spack development environment, you may wish to save your current environment:
````bash
> declare -px > myenv.sh
````
When done with Spack, you can then restore your original environment:
````bash
> source myenv.sh
````

View File

@ -2,7 +2,9 @@
KOKKOS_SUBPACKAGE(Algorithms) KOKKOS_SUBPACKAGE(Algorithms)
ADD_SUBDIRECTORY(src) IF (NOT Kokkos_INSTALL_TESTING)
ADD_SUBDIRECTORY(src)
ENDIF()
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)

View File

@ -7,9 +7,15 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
FILE(GLOB HEADERS *.hpp) FILE(GLOB ALGO_HEADERS *.hpp)
FILE(GLOB SOURCES *.cpp) FILE(GLOB ALGO_SOURCES *.cpp)
LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
INSTALL (
DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
DESTINATION ${KOKKOS_HEADER_DIR}
FILES_MATCHING PATTERN "*.hpp"
)
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
@ -17,8 +23,8 @@ LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
# These will get ignored for standalone CMake and a true interface library made # These will get ignored for standalone CMake and a true interface library made
KOKKOS_ADD_INTERFACE_LIBRARY( KOKKOS_ADD_INTERFACE_LIBRARY(
kokkosalgorithms kokkosalgorithms
HEADERS ${HEADERS} HEADERS ${ALGO_HEADERS}
SOURCES ${SOURCES} SOURCES ${ALGO_SOURCES}
) )
KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
${KOKKOS_TOP_BUILD_DIR} ${KOKKOS_TOP_BUILD_DIR}

View File

@ -94,9 +94,9 @@ namespace Kokkos {
class Pool { class Pool {
public: public:
//The Kokkos device type //The Kokkos device type
typedef Device device_type; using device_type = Device;
//The actual generator type //The actual generator type
typedef Generator<Device> generator_type; using generator_type = Generator<Device>;
//Default constructor: does not initialize a pool //Default constructor: does not initialize a pool
Pool(); Pool();
@ -124,7 +124,7 @@ namespace Kokkos {
class Generator { class Generator {
public: public:
//The Kokkos device type //The Kokkos device type
typedef DeviceType device_type; using device_type = DeviceType;
//Max return values of respective [X]rand[S]() functions //Max return values of respective [X]rand[S]() functions
enum {MAX_URAND = 0xffffffffU}; enum {MAX_URAND = 0xffffffffU};
@ -138,75 +138,75 @@ namespace Kokkos {
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
Generator (STATE_ARGUMENTS, int state_idx = 0); Generator (STATE_ARGUMENTS, int state_idx = 0);
//Draw a equidistributed uint32_t in the range (0,MAX_URAND] //Draw a equidistributed uint32_t in the range [0,MAX_URAND)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
uint32_t urand(); uint32_t urand();
//Draw a equidistributed uint64_t in the range (0,MAX_URAND64] //Draw a equidistributed uint64_t in the range [0,MAX_URAND64)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
uint64_t urand64(); uint64_t urand64();
//Draw a equidistributed uint32_t in the range (0,range] //Draw a equidistributed uint32_t in the range [0,range)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range); uint32_t urand(const uint32_t& range);
//Draw a equidistributed uint32_t in the range (start,end] //Draw a equidistributed uint32_t in the range [start,end)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end ); uint32_t urand(const uint32_t& start, const uint32_t& end );
//Draw a equidistributed uint64_t in the range (0,range] //Draw a equidistributed uint64_t in the range [0,range)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range); uint64_t urand64(const uint64_t& range);
//Draw a equidistributed uint64_t in the range (start,end] //Draw a equidistributed uint64_t in the range [start,end)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end ); uint64_t urand64(const uint64_t& start, const uint64_t& end );
//Draw a equidistributed int in the range (0,MAX_RAND] //Draw a equidistributed int in the range [0,MAX_RAND)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
int rand(); int rand();
//Draw a equidistributed int in the range (0,range] //Draw a equidistributed int in the range [0,range)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
int rand(const int& range); int rand(const int& range);
//Draw a equidistributed int in the range (start,end] //Draw a equidistributed int in the range [start,end)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end ); int rand(const int& start, const int& end );
//Draw a equidistributed int64_t in the range (0,MAX_RAND64] //Draw a equidistributed int64_t in the range [0,MAX_RAND64)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
int64_t rand64(); int64_t rand64();
//Draw a equidistributed int64_t in the range (0,range] //Draw a equidistributed int64_t in the range [0,range)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range); int64_t rand64(const int64_t& range);
//Draw a equidistributed int64_t in the range (start,end] //Draw a equidistributed int64_t in the range [start,end)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end ); int64_t rand64(const int64_t& start, const int64_t& end );
//Draw a equidistributed float in the range (0,1.0] //Draw a equidistributed float in the range [0,1.0)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
float frand(); float frand();
//Draw a equidistributed float in the range (0,range] //Draw a equidistributed float in the range [0,range)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
float frand(const float& range); float frand(const float& range);
//Draw a equidistributed float in the range (start,end] //Draw a equidistributed float in the range [start,end)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end ); float frand(const float& start, const float& end );
//Draw a equidistributed double in the range (0,1.0] //Draw a equidistributed double in the range [0,1.0)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double drand(); double drand();
//Draw a equidistributed double in the range (0,range] //Draw a equidistributed double in the range [0,range)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double drand(const double& range); double drand(const double& range);
//Draw a equidistributed double in the range (start,end] //Draw a equidistributed double in the range [start,end)
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end ); double drand(const double& start, const double& end );
@ -221,11 +221,11 @@ namespace Kokkos {
//Additional Functions: //Additional Functions:
//Fills view with random numbers in the range (0,range] //Fills view with random numbers in the range [0,range)
template<class ViewType, class PoolType> template<class ViewType, class PoolType>
void fill_random(ViewType view, PoolType pool, ViewType::value_type range); void fill_random(ViewType view, PoolType pool, ViewType::value_type range);
//Fills view with random numbers in the range (start,end] //Fills view with random numbers in the range [start,end)
template<class ViewType, class PoolType> template<class ViewType, class PoolType>
void fill_random(ViewType view, PoolType pool, void fill_random(ViewType view, PoolType pool,
ViewType::value_type start, ViewType::value_type end); ViewType::value_type start, ViewType::value_type end);
@ -381,7 +381,7 @@ struct rand<Generator, unsigned long> {
// NOTE (mfh 26 oct 2014) This is a partial specialization for long // NOTE (mfh 26 oct 2014) This is a partial specialization for long
// long, a C99 / C++11 signed type which is guaranteed to be at // long, a C99 / C++11 signed type which is guaranteed to be at
// least 64 bits. Do NOT write a partial specialization for // least 64 bits. Do NOT write a partial specialization for
// int64_t!!! This is just a typedef! It could be either long or // int64_t!!! This is just an alias! It could be either long or
// long long. We don't know which a priori, and I've seen both. // long long. We don't know which a priori, and I've seen both.
// The types long and long long are guaranteed to differ, so it's // The types long and long long are guaranteed to differ, so it's
// always safe to specialize for both. // always safe to specialize for both.
@ -413,7 +413,7 @@ struct rand<Generator, long long> {
// NOTE (mfh 26 oct 2014) This is a partial specialization for // NOTE (mfh 26 oct 2014) This is a partial specialization for
// unsigned long long, a C99 / C++11 unsigned type which is // unsigned long long, a C99 / C++11 unsigned type which is
// guaranteed to be at least 64 bits. Do NOT write a partial // guaranteed to be at least 64 bits. Do NOT write a partial
// specialization for uint64_t!!! This is just a typedef! It could // specialization for uint64_t!!! This is just an alias! It could
// be either unsigned long or unsigned long long. We don't know // be either unsigned long or unsigned long long. We don't know
// which a priori, and I've seen both. The types unsigned long and // which a priori, and I've seen both. The types unsigned long and
// unsigned long long are guaranteed to differ, so it's always safe // unsigned long long are guaranteed to differ, so it's always safe
@ -604,11 +604,7 @@ struct Random_UniqueIndex {
KOKKOS_FUNCTION KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type) { static int get_state_idx(const locks_view_type) {
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
const int i = ExecutionSpace::hardware_thread_id();
#else
const int i = ExecutionSpace::impl_hardware_thread_id(); const int i = ExecutionSpace::impl_hardware_thread_id();
#endif
return i; return i;
#else #else
return 0; return 0;
@ -652,15 +648,13 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
static int get_state_idx(const locks_view_type& locks_) { static int get_state_idx(const locks_view_type& locks_) {
#ifdef __HIP_DEVICE_COMPILE__ #ifdef __HIP_DEVICE_COMPILE__
const int i_offset = const int i_offset =
(hipThreadIdx_x * hipBlockDim_y + hipThreadIdx_y) * hipBlockDim_z + (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
hipThreadIdx_z; int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
int i = (((hipBlockIdx_x * hipGridDim_y + hipBlockIdx_y) * hipGridDim_z + blockDim.x * blockDim.y * blockDim.z +
hipBlockIdx_z) *
hipBlockDim_x * hipBlockDim_y * hipBlockDim_z +
i_offset) % i_offset) %
locks_.extent(0); locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += hipBlockDim_x * hipBlockDim_y * hipBlockDim_z; i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) { if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset; i = i_offset;
} }
@ -687,7 +681,7 @@ class Random_XorShift64 {
friend class Random_XorShift64_Pool<DeviceType>; friend class Random_XorShift64_Pool<DeviceType>;
public: public:
typedef DeviceType device_type; using device_type = DeviceType;
constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max(); constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max(); constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
@ -805,11 +799,6 @@ class Random_XorShift64 {
// number // number
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double normal() { double normal() {
#ifndef __HIP_DEVICE_COMPILE__ // FIXME_HIP
using std::sqrt;
#else
using ::sqrt;
#endif
double S = 2.0; double S = 2.0;
double U; double U;
while (S >= 1.0) { while (S >= 1.0) {
@ -817,7 +806,7 @@ class Random_XorShift64 {
const double V = 2.0 * drand() - 1.0; const double V = 2.0 * drand() - 1.0;
S = U * U + V * V; S = U * U + V * V;
} }
return U * sqrt(-2.0 * log(S) / S); return U * std::sqrt(-2.0 * log(S) / S);
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -830,15 +819,15 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift64_Pool { class Random_XorShift64_Pool {
private: private:
using execution_space = typename DeviceType::execution_space; using execution_space = typename DeviceType::execution_space;
typedef View<int*, execution_space> locks_type; using locks_type = View<int*, execution_space>;
typedef View<uint64_t*, DeviceType> state_data_type; using state_data_type = View<uint64_t*, DeviceType>;
locks_type locks_; locks_type locks_;
state_data_type state_; state_data_type state_;
int num_states_; int num_states_;
public: public:
typedef Random_XorShift64<DeviceType> generator_type; using generator_type = Random_XorShift64<DeviceType>;
typedef DeviceType device_type; using device_type = DeviceType;
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
Random_XorShift64_Pool() { num_states_ = 0; } Random_XorShift64_Pool() { num_states_ = 0; }
@ -923,8 +912,8 @@ class Random_XorShift1024 {
friend class Random_XorShift1024_Pool<DeviceType>; friend class Random_XorShift1024_Pool<DeviceType>;
public: public:
typedef Random_XorShift1024_Pool<DeviceType> pool_type; using pool_type = Random_XorShift1024_Pool<DeviceType>;
typedef DeviceType device_type; using device_type = DeviceType;
constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max(); constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max(); constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
@ -1046,11 +1035,6 @@ class Random_XorShift1024 {
// number // number
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double normal() { double normal() {
#ifndef KOKKOS_ENABLE_HIP // FIXME_HIP
using std::sqrt;
#else
using ::sqrt;
#endif
double S = 2.0; double S = 2.0;
double U; double U;
while (S >= 1.0) { while (S >= 1.0) {
@ -1058,7 +1042,7 @@ class Random_XorShift1024 {
const double V = 2.0 * drand() - 1.0; const double V = 2.0 * drand() - 1.0;
S = U * U + V * V; S = U * U + V * V;
} }
return U * sqrt(-2.0 * log(S) / S); return U * std::sqrt(-2.0 * log(S) / S);
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -1071,9 +1055,9 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift1024_Pool { class Random_XorShift1024_Pool {
private: private:
using execution_space = typename DeviceType::execution_space; using execution_space = typename DeviceType::execution_space;
typedef View<int*, execution_space> locks_type; using locks_type = View<int*, execution_space>;
typedef View<int*, DeviceType> int_view_type; using int_view_type = View<int*, DeviceType>;
typedef View<uint64_t * [16], DeviceType> state_data_type; using state_data_type = View<uint64_t * [16], DeviceType>;
locks_type locks_; locks_type locks_;
state_data_type state_; state_data_type state_;
@ -1082,9 +1066,9 @@ class Random_XorShift1024_Pool {
friend class Random_XorShift1024<DeviceType>; friend class Random_XorShift1024<DeviceType>;
public: public:
typedef Random_XorShift1024<DeviceType> generator_type; using generator_type = Random_XorShift1024<DeviceType>;
typedef DeviceType device_type; using device_type = DeviceType;
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
Random_XorShift1024_Pool() { num_states_ = 0; } Random_XorShift1024_Pool() { num_states_ = 0; }
@ -1176,14 +1160,13 @@ struct fill_random_functor_begin_end;
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> { struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type range; typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_, fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_) typename ViewType::const_value_type range_)
@ -1203,14 +1186,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> { struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type range; typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_, fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_) typename ViewType::const_value_type range_)
@ -1232,14 +1214,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> { struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type range; typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_, fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_) typename ViewType::const_value_type range_)
@ -1262,14 +1243,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> { struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type range; typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_, fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_) typename ViewType::const_value_type range_)
@ -1293,14 +1273,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> { struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type range; typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_, fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_) typename ViewType::const_value_type range_)
@ -1326,14 +1305,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> { struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type range; typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_, fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_) typename ViewType::const_value_type range_)
@ -1361,14 +1339,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> { struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type range; typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_, fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_) typename ViewType::const_value_type range_)
@ -1398,14 +1375,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> { struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type range; typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_, fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_) typename ViewType::const_value_type range_)
@ -1437,14 +1413,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
IndexType> { IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type begin_,
@ -1466,14 +1441,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
IndexType> { IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type begin_,
@ -1497,14 +1471,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
IndexType> { IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type begin_,
@ -1529,14 +1502,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
IndexType> { IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type begin_,
@ -1562,14 +1534,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
IndexType> { IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type begin_,
@ -1597,14 +1568,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
IndexType> { IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type begin_,
@ -1634,14 +1604,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
IndexType> { IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type begin_,
@ -1673,14 +1642,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8,
IndexType> { IndexType> {
typedef typename ViewType::execution_space execution_space; using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type, using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type> typename ViewType::non_const_value_type>;
Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type begin_,

View File

@ -95,9 +95,9 @@ class BinSort {
public: public:
template <class DstViewType, class SrcViewType> template <class DstViewType, class SrcViewType>
struct copy_functor { struct copy_functor {
typedef typename SrcViewType::const_type src_view_type; using src_view_type = typename SrcViewType::const_type;
typedef Impl::CopyOp<DstViewType, src_view_type> copy_op; using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
DstViewType dst_values; DstViewType dst_values;
src_view_type src_values; src_view_type src_values;
@ -120,17 +120,17 @@ class BinSort {
// If a Kokkos::View then can generate constant random access // If a Kokkos::View then can generate constant random access
// otherwise can only use the constant type. // otherwise can only use the constant type.
typedef typename std::conditional< using src_view_type = typename std::conditional<
Kokkos::is_view<SrcViewType>::value, Kokkos::is_view<SrcViewType>::value,
Kokkos::View<typename SrcViewType::const_data_type, Kokkos::View<typename SrcViewType::const_data_type,
typename SrcViewType::array_layout, typename SrcViewType::array_layout,
typename SrcViewType::device_type, typename SrcViewType::device_type,
Kokkos::MemoryTraits<Kokkos::RandomAccess> >, Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
typename SrcViewType::const_type>::type src_view_type; typename SrcViewType::const_type>::type;
typedef typename PermuteViewType::const_type perm_view_type; using perm_view_type = typename PermuteViewType::const_type;
typedef Impl::CopyOp<DstViewType, src_view_type> copy_op; using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
DstViewType dst_values; DstViewType dst_values;
perm_view_type sort_order; perm_view_type sort_order;
@ -151,8 +151,8 @@ class BinSort {
} }
}; };
typedef typename Space::execution_space execution_space; using execution_space = typename Space::execution_space;
typedef BinSortOp bin_op_type; using bin_op_type = BinSortOp;
struct bin_count_tag {}; struct bin_count_tag {};
struct bin_offset_tag {}; struct bin_offset_tag {};
@ -160,30 +160,30 @@ class BinSort {
struct bin_sort_bins_tag {}; struct bin_sort_bins_tag {};
public: public:
typedef SizeType size_type; using size_type = SizeType;
typedef size_type value_type; using value_type = size_type;
typedef Kokkos::View<size_type*, Space> offset_type; using offset_type = Kokkos::View<size_type*, Space>;
typedef Kokkos::View<const int*, Space> bin_count_type; using bin_count_type = Kokkos::View<const int*, Space>;
typedef typename KeyViewType::const_type const_key_view_type; using const_key_view_type = typename KeyViewType::const_type;
// If a Kokkos::View then can generate constant random access // If a Kokkos::View then can generate constant random access
// otherwise can only use the constant type. // otherwise can only use the constant type.
typedef typename std::conditional< using const_rnd_key_view_type = typename std::conditional<
Kokkos::is_view<KeyViewType>::value, Kokkos::is_view<KeyViewType>::value,
Kokkos::View<typename KeyViewType::const_data_type, Kokkos::View<typename KeyViewType::const_data_type,
typename KeyViewType::array_layout, typename KeyViewType::array_layout,
typename KeyViewType::device_type, typename KeyViewType::device_type,
Kokkos::MemoryTraits<Kokkos::RandomAccess> >, Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
const_key_view_type>::type const_rnd_key_view_type; const_key_view_type>::type;
typedef typename KeyViewType::non_const_value_type non_const_key_scalar; using non_const_key_scalar = typename KeyViewType::non_const_value_type;
typedef typename KeyViewType::const_value_type const_key_scalar; using const_key_scalar = typename KeyViewType::const_value_type;
typedef Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> > using bin_count_atomic_type =
bin_count_atomic_type; Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >;
private: private:
const_key_view_type keys; const_key_view_type keys;
@ -266,10 +266,10 @@ class BinSort {
template <class ValuesViewType> template <class ValuesViewType>
void sort(ValuesViewType const& values, int values_range_begin, void sort(ValuesViewType const& values, int values_range_begin,
int values_range_end) const { int values_range_end) const {
typedef Kokkos::View<typename ValuesViewType::data_type, using scratch_view_type =
Kokkos::View<typename ValuesViewType::data_type,
typename ValuesViewType::array_layout, typename ValuesViewType::array_layout,
typename ValuesViewType::device_type> typename ValuesViewType::device_type>;
scratch_view_type;
const size_t len = range_end - range_begin; const size_t len = range_end - range_begin;
const size_t values_len = values_range_end - values_range_begin; const size_t values_len = values_range_end - values_range_begin;
@ -278,13 +278,6 @@ class BinSort {
"BinSort::sort: values range length != permutation vector length"); "BinSort::sort: values range length != permutation vector length");
} }
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
scratch_view_type sorted_values(
ViewAllocateWithoutInitializing(
"Kokkos::SortImpl::BinSortFunctor::sorted_values"),
len, values.extent(1), values.extent(2), values.extent(3),
values.extent(4), values.extent(5), values.extent(6), values.extent(7));
#else
scratch_view_type sorted_values( scratch_view_type sorted_values(
ViewAllocateWithoutInitializing( ViewAllocateWithoutInitializing(
"Kokkos::SortImpl::BinSortFunctor::sorted_values"), "Kokkos::SortImpl::BinSortFunctor::sorted_values"),
@ -303,7 +296,6 @@ class BinSort {
: KOKKOS_IMPL_CTOR_DEFAULT_ARG, : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
values.rank_dynamic > 7 ? values.extent(7) values.rank_dynamic > 7 ? values.extent(7)
: KOKKOS_IMPL_CTOR_DEFAULT_ARG); : KOKKOS_IMPL_CTOR_DEFAULT_ARG);
#endif
{ {
copy_permute_functor<scratch_view_type /* DstViewType */ copy_permute_functor<scratch_view_type /* DstViewType */
@ -511,8 +503,8 @@ bool try_std_sort(ViewType view) {
template <class ViewType> template <class ViewType>
struct min_max_functor { struct min_max_functor {
typedef Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> using minmax_scalar =
minmax_scalar; Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>;
ViewType view; ViewType view;
min_max_functor(const ViewType& view_) : view(view_) {} min_max_functor(const ViewType& view_) : view(view_) {}
@ -531,7 +523,7 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
if (!always_use_kokkos_sort) { if (!always_use_kokkos_sort) {
if (Impl::try_std_sort(view)) return; if (Impl::try_std_sort(view)) return;
} }
typedef BinOp1D<ViewType> CompType; using CompType = BinOp1D<ViewType>;
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result; Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result); Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
@ -548,8 +540,8 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
template <class ViewType> template <class ViewType>
void sort(ViewType view, size_t const begin, size_t const end) { void sort(ViewType view, size_t const begin, size_t const end) {
typedef Kokkos::RangePolicy<typename ViewType::execution_space> range_policy; using range_policy = Kokkos::RangePolicy<typename ViewType::execution_space>;
typedef BinOp1D<ViewType> CompType; using CompType = BinOp1D<ViewType>;
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result; Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result); Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);

View File

@ -20,14 +20,18 @@ KOKKOS_ADD_TEST_LIBRARY(
HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
) )
# WORKAROUND FOR HIPCC
IF(Kokkos_ENABLE_HIP) # avoid deprecation warnings from MSVC
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0 --amdgpu-target=gfx906") TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC GTEST_HAS_TR1_TUPLE=0 GTEST_HAS_PTHREAD=0)
ELSE()
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") IF(NOT (Kokkos_ENABLE_CUDA AND WIN32))
TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
ENDIF() ENDIF()
TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11) # Suppress clang-tidy diagnostics on code that we do not have control over
IF(CMAKE_CXX_CLANG_TIDY)
SET_TARGET_PROPERTIES(kokkosalgorithms_gtest PROPERTIES CXX_CLANG_TIDY "")
ENDIF()
SET(SOURCES SET(SOURCES
UnitTestMain.cpp UnitTestMain.cpp

View File

@ -111,10 +111,10 @@ struct RandomProperties {
template <class GeneratorPool, class Scalar> template <class GeneratorPool, class Scalar>
struct test_random_functor { struct test_random_functor {
typedef typename GeneratorPool::generator_type rnd_type; using rnd_type = typename GeneratorPool::generator_type;
typedef RandomProperties value_type; using value_type = RandomProperties;
typedef typename GeneratorPool::device_type device_type; using device_type = typename GeneratorPool::device_type;
GeneratorPool rand_pool; GeneratorPool rand_pool;
const double mean; const double mean;
@ -125,12 +125,12 @@ struct test_random_functor {
// implementations might violate this upper bound, due to rounding // implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each // error. Just in case, we leave an extra space at the end of each
// dimension, in the View types below. // dimension, in the View types below.
typedef Kokkos::View<int[HIST_DIM1D + 1], typename GeneratorPool::device_type> using type_1d =
type_1d; Kokkos::View<int[HIST_DIM1D + 1], typename GeneratorPool::device_type>;
type_1d density_1d; type_1d density_1d;
typedef Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1], using type_3d =
typename GeneratorPool::device_type> Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
type_3d; typename GeneratorPool::device_type>;
type_3d density_3d; type_3d density_3d;
test_random_functor(GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) test_random_functor(GeneratorPool rand_pool_, type_1d d1d, type_3d d3d)
@ -200,9 +200,9 @@ struct test_random_functor {
template <class DeviceType> template <class DeviceType>
struct test_histogram1d_functor { struct test_histogram1d_functor {
typedef RandomProperties value_type; using value_type = RandomProperties;
typedef typename DeviceType::execution_space execution_space; using execution_space = typename DeviceType::execution_space;
typedef typename DeviceType::memory_space memory_space; using memory_space = typename DeviceType::memory_space;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that // an exclusive upper bound on the range of random numbers that
@ -210,7 +210,7 @@ struct test_histogram1d_functor {
// implementations might violate this upper bound, due to rounding // implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each // error. Just in case, we leave an extra space at the end of each
// dimension, in the View type below. // dimension, in the View type below.
typedef Kokkos::View<int[HIST_DIM1D + 1], memory_space> type_1d; using type_1d = Kokkos::View<int[HIST_DIM1D + 1], memory_space>;
type_1d density_1d; type_1d density_1d;
double mean; double mean;
@ -219,7 +219,7 @@ struct test_histogram1d_functor {
KOKKOS_INLINE_FUNCTION void operator()( KOKKOS_INLINE_FUNCTION void operator()(
const typename memory_space::size_type i, RandomProperties& prop) const { const typename memory_space::size_type i, RandomProperties& prop) const {
typedef typename memory_space::size_type size_type; using size_type = typename memory_space::size_type;
const double count = density_1d(i); const double count = density_1d(i);
prop.mean += count; prop.mean += count;
prop.variance += 1.0 * (count - mean) * (count - mean); prop.variance += 1.0 * (count - mean) * (count - mean);
@ -234,9 +234,9 @@ struct test_histogram1d_functor {
template <class DeviceType> template <class DeviceType>
struct test_histogram3d_functor { struct test_histogram3d_functor {
typedef RandomProperties value_type; using value_type = RandomProperties;
typedef typename DeviceType::execution_space execution_space; using execution_space = typename DeviceType::execution_space;
typedef typename DeviceType::memory_space memory_space; using memory_space = typename DeviceType::memory_space;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that // an exclusive upper bound on the range of random numbers that
@ -244,9 +244,9 @@ struct test_histogram3d_functor {
// implementations might violate this upper bound, due to rounding // implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each // error. Just in case, we leave an extra space at the end of each
// dimension, in the View type below. // dimension, in the View type below.
typedef Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1], using type_3d =
memory_space> Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
type_3d; memory_space>;
type_3d density_3d; type_3d density_3d;
double mean; double mean;
@ -255,7 +255,7 @@ struct test_histogram3d_functor {
KOKKOS_INLINE_FUNCTION void operator()( KOKKOS_INLINE_FUNCTION void operator()(
const typename memory_space::size_type i, RandomProperties& prop) const { const typename memory_space::size_type i, RandomProperties& prop) const {
typedef typename memory_space::size_type size_type; using size_type = typename memory_space::size_type;
const double count = density_3d( const double count = density_3d(
i / (HIST_DIM3D * HIST_DIM3D), i / (HIST_DIM3D * HIST_DIM3D),
(i % (HIST_DIM3D * HIST_DIM3D)) / HIST_DIM3D, i % HIST_DIM3D); (i % (HIST_DIM3D * HIST_DIM3D)) / HIST_DIM3D, i % HIST_DIM3D);
@ -276,7 +276,7 @@ struct test_histogram3d_functor {
// //
template <class RandomGenerator, class Scalar> template <class RandomGenerator, class Scalar>
struct test_random_scalar { struct test_random_scalar {
typedef typename RandomGenerator::generator_type rnd_type; using rnd_type = typename RandomGenerator::generator_type;
int pass_mean, pass_var, pass_covar; int pass_mean, pass_var, pass_covar;
int pass_hist1d_mean, pass_hist1d_var, pass_hist1d_covar; int pass_hist1d_mean, pass_hist1d_var, pass_hist1d_covar;
@ -294,7 +294,7 @@ struct test_random_scalar {
cout << " -- Testing randomness properties" << endl; cout << " -- Testing randomness properties" << endl;
RandomProperties result; RandomProperties result;
typedef test_random_functor<RandomGenerator, Scalar> functor_type; using functor_type = test_random_functor<RandomGenerator, Scalar>;
parallel_reduce(num_draws / 1024, parallel_reduce(num_draws / 1024,
functor_type(pool, density_1d, density_3d), result); functor_type(pool, density_1d, density_3d), result);
@ -325,8 +325,8 @@ struct test_random_scalar {
cout << " -- Testing 1-D histogram" << endl; cout << " -- Testing 1-D histogram" << endl;
RandomProperties result; RandomProperties result;
typedef test_histogram1d_functor<typename RandomGenerator::device_type> using functor_type =
functor_type; test_histogram1d_functor<typename RandomGenerator::device_type>;
parallel_reduce(HIST_DIM1D, functor_type(density_1d, num_draws), result); parallel_reduce(HIST_DIM1D, functor_type(density_1d, num_draws), result);
double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D); double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D);
@ -357,8 +357,8 @@ struct test_random_scalar {
cout << " -- Testing 3-D histogram" << endl; cout << " -- Testing 3-D histogram" << endl;
RandomProperties result; RandomProperties result;
typedef test_histogram3d_functor<typename RandomGenerator::device_type> using functor_type =
functor_type; test_histogram3d_functor<typename RandomGenerator::device_type>;
parallel_reduce(HIST_DIM1D, functor_type(density_3d, num_draws), result); parallel_reduce(HIST_DIM1D, functor_type(density_3d, num_draws), result);
double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D); double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D);

View File

@ -55,8 +55,8 @@ namespace Impl {
template <class ExecutionSpace, class Scalar> template <class ExecutionSpace, class Scalar>
struct is_sorted_struct { struct is_sorted_struct {
typedef unsigned int value_type; using value_type = unsigned int;
typedef ExecutionSpace execution_space; using execution_space = ExecutionSpace;
Kokkos::View<Scalar*, ExecutionSpace> keys; Kokkos::View<Scalar*, ExecutionSpace> keys;
@ -69,8 +69,8 @@ struct is_sorted_struct {
template <class ExecutionSpace, class Scalar> template <class ExecutionSpace, class Scalar>
struct sum { struct sum {
typedef double value_type; using value_type = double;
typedef ExecutionSpace execution_space; using execution_space = ExecutionSpace;
Kokkos::View<Scalar*, ExecutionSpace> keys; Kokkos::View<Scalar*, ExecutionSpace> keys;
@ -81,8 +81,8 @@ struct sum {
template <class ExecutionSpace, class Scalar> template <class ExecutionSpace, class Scalar>
struct bin3d_is_sorted_struct { struct bin3d_is_sorted_struct {
typedef unsigned int value_type; using value_type = unsigned int;
typedef ExecutionSpace execution_space; using execution_space = ExecutionSpace;
Kokkos::View<Scalar * [3], ExecutionSpace> keys; Kokkos::View<Scalar * [3], ExecutionSpace> keys;
@ -115,8 +115,8 @@ struct bin3d_is_sorted_struct {
template <class ExecutionSpace, class Scalar> template <class ExecutionSpace, class Scalar>
struct sum3D { struct sum3D {
typedef double value_type; using value_type = double;
typedef ExecutionSpace execution_space; using execution_space = ExecutionSpace;
Kokkos::View<Scalar * [3], ExecutionSpace> keys; Kokkos::View<Scalar * [3], ExecutionSpace> keys;
@ -131,7 +131,7 @@ struct sum3D {
template <class ExecutionSpace, typename KeyType> template <class ExecutionSpace, typename KeyType>
void test_1D_sort_impl(unsigned int n, bool force_kokkos) { void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType; using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
KeyViewType keys("Keys", n); KeyViewType keys("Keys", n);
// Test sorting array with all numbers equal // Test sorting array with all numbers equal
@ -166,7 +166,7 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
template <class ExecutionSpace, typename KeyType> template <class ExecutionSpace, typename KeyType>
void test_3D_sort_impl(unsigned int n) { void test_3D_sort_impl(unsigned int n) {
typedef Kokkos::View<KeyType * [3], ExecutionSpace> KeyViewType; using KeyViewType = Kokkos::View<KeyType * [3], ExecutionSpace>;
KeyViewType keys("Keys", n * n * n); KeyViewType keys("Keys", n * n * n);
@ -186,7 +186,7 @@ void test_3D_sort_impl(unsigned int n) {
typename KeyViewType::value_type min[3] = {0, 0, 0}; typename KeyViewType::value_type min[3] = {0, 0, 0};
typename KeyViewType::value_type max[3] = {100, 100, 100}; typename KeyViewType::value_type max[3] = {100, 100, 100};
typedef Kokkos::BinOp3D<KeyViewType> BinOp; using BinOp = Kokkos::BinOp3D<KeyViewType>;
BinOp bin_op(bin_max, min, max); BinOp bin_op(bin_max, min, max);
Kokkos::BinSort<KeyViewType, BinOp> Sorter(keys, bin_op, false); Kokkos::BinSort<KeyViewType, BinOp> Sorter(keys, bin_op, false);
Sorter.create_permute_vector(); Sorter.create_permute_vector();
@ -215,9 +215,9 @@ void test_3D_sort_impl(unsigned int n) {
template <class ExecutionSpace, typename KeyType> template <class ExecutionSpace, typename KeyType>
void test_dynamic_view_sort_impl(unsigned int n) { void test_dynamic_view_sort_impl(unsigned int n) {
typedef Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace> using KeyDynamicViewType =
KeyDynamicViewType; Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>;
typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType; using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
const size_t upper_bound = 2 * n; const size_t upper_bound = 2 * n;
const size_t min_chunk_size = 1024; const size_t min_chunk_size = 1024;
@ -305,8 +305,8 @@ void test_issue_1160_impl() {
Kokkos::deep_copy(x_, h_x); Kokkos::deep_copy(x_, h_x);
Kokkos::deep_copy(v_, h_v); Kokkos::deep_copy(v_, h_v);
typedef decltype(element_) KeyViewType; using KeyViewType = decltype(element_);
typedef Kokkos::BinOp1D<KeyViewType> BinOp; using BinOp = Kokkos::BinOp1D<KeyViewType>;
int begin = 3; int begin = 3;
int end = 8; int end = 8;

View File

@ -5,6 +5,6 @@ build_script:
- cmd: >- - cmd: >-
mkdir build && mkdir build &&
cd build && cd build &&
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_LIBDL=OFF -DKokkos_ENABLE_PROFILING=OFF && cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON &&
cmake --build . --target install && cmake --build . --target install &&
ctest -C Debug -V ctest -C Debug -V

View File

@ -69,13 +69,13 @@ int main(int argc, char* argv[]) {
return 0; return 0;
} }
int L = atoi(argv[1]); int L = std::stoi(argv[1]);
int N = atoi(argv[2]); int N = std::stoi(argv[2]);
int M = atoi(argv[3]); int M = std::stoi(argv[3]);
int D = atoi(argv[4]); int D = std::stoi(argv[4]);
int K = atoi(argv[5]); int K = std::stoi(argv[5]);
int R = atoi(argv[6]); int R = std::stoi(argv[6]);
int type = atoi(argv[7]); int type = std::stoi(argv[7]);
Kokkos::View<int*> offsets("Offsets", L, M); Kokkos::View<int*> offsets("Offsets", L, M);
Kokkos::Random_XorShift64_Pool<> pool(12371); Kokkos::Random_XorShift64_Pool<> pool(12371);

View File

@ -73,15 +73,15 @@ int main(int argc, char* argv[]) {
return 0; return 0;
} }
int P = atoi(argv[1]); int P = std::stoi(argv[1]);
int N = atoi(argv[2]); int N = std::stoi(argv[2]);
int K = atoi(argv[3]); int K = std::stoi(argv[3]);
int R = atoi(argv[4]); int R = std::stoi(argv[4]);
int D = atoi(argv[5]); int D = std::stoi(argv[5]);
int U = atoi(argv[6]); int U = std::stoi(argv[6]);
int F = atoi(argv[7]); int F = std::stoi(argv[7]);
int T = atoi(argv[8]); int T = std::stoi(argv[8]);
int S = atoi(argv[9]); int S = std::stoi(argv[9]);
if (U > 8) { if (U > 8) {
printf("U must be 1-8\n"); printf("U must be 1-8\n");

View File

@ -72,13 +72,13 @@ int main(int argc, char* argv[]) {
return 0; return 0;
} }
int S = atoi(argv[1]); int S = std::stoi(argv[1]);
int N = atoi(argv[2]); int N = std::stoi(argv[2]);
int K = atoi(argv[3]); int K = std::stoi(argv[3]);
int D = atoi(argv[4]); int D = std::stoi(argv[4]);
int R = atoi(argv[5]); int R = std::stoi(argv[5]);
int U = atoi(argv[6]); int U = std::stoi(argv[6]);
int F = atoi(argv[7]); int F = std::stoi(argv[7]);
if ((S != 1) && (S != 2) && (S != 4)) { if ((S != 1) && (S != 2) && (S != 4)) {
printf("S must be one of 1,2,4\n"); printf("S must be one of 1,2,4\n");

View File

@ -50,58 +50,61 @@
#define HLINE "-------------------------------------------------------------\n" #define HLINE "-------------------------------------------------------------\n"
#if defined(KOKKOS_ENABLE_CUDA) #if defined(KOKKOS_ENABLE_CUDA)
typedef Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror GUPSHostArray; using GUPSHostArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror;
typedef Kokkos::View<int64_t*, Kokkos::CudaSpace> GUPSDeviceArray; using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>;
#else #else
typedef Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror GUPSHostArray; using GUPSHostArray = Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror;
typedef Kokkos::View<int64_t*, Kokkos::HostSpace> GUPSDeviceArray; using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::HostSpace>;
#endif #endif
typedef int GUPSIndex; using GUPSIndex = int;
double now() { double now() {
struct timeval now; struct timeval now;
gettimeofday(&now, nullptr); gettimeofday(&now, nullptr);
return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6); return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
} }
void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices, const int64_t dataCount) { void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices,
for( GUPSIndex i = 0; i < indices.extent(0); ++i ) { const int64_t dataCount) {
for (GUPSIndex i = 0; i < indices.extent(0); ++i) {
indices[i] = lrand48() % dataCount; indices[i] = lrand48() % dataCount;
} }
Kokkos::deep_copy(dev_indices, indices); Kokkos::deep_copy(dev_indices, indices);
} }
void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data, const int64_t datum, void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data,
const bool performAtomics) { const int64_t datum, const bool performAtomics) {
if (performAtomics) {
if( performAtomics ) { Kokkos::parallel_for(
Kokkos::parallel_for("bench-gups-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) { "bench-gups-atomic", indices.extent(0),
Kokkos::atomic_fetch_xor( &data[indices[i]], datum ); KOKKOS_LAMBDA(const GUPSIndex i) {
Kokkos::atomic_fetch_xor(&data[indices[i]], datum);
}); });
} else { } else {
Kokkos::parallel_for("bench-gups-non-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) { Kokkos::parallel_for(
data[indices[i]] ^= datum; "bench-gups-non-atomic", indices.extent(0),
}); KOKKOS_LAMBDA(const GUPSIndex i) { data[indices[i]] ^= datum; });
} }
Kokkos::fence(); Kokkos::fence();
} }
int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const int repeats, int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount,
const bool useAtomics) { const int repeats, const bool useAtomics) {
printf("Reports fastest timing per kernel\n"); printf("Reports fastest timing per kernel\n");
printf("Creating Views...\n"); printf("Creating Views...\n");
printf("Memory Sizes:\n"); printf("Memory Sizes:\n");
printf("- Elements: %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(dataCount), printf("- Elements: %15" PRIu64 " (%12.4f MB)\n",
1.0e-6 * ((double) dataCount * (double) sizeof(int64_t))); static_cast<uint64_t>(dataCount),
printf("- Indices: %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(indicesCount), 1.0e-6 * ((double)dataCount * (double)sizeof(int64_t)));
1.0e-6 * ((double) indicesCount * (double) sizeof(int64_t))); printf("- Indices: %15" PRIu64 " (%12.4f MB)\n",
printf(" - Atomics: %15s\n", (useAtomics ? "Yes" : "No") ); static_cast<uint64_t>(indicesCount),
1.0e-6 * ((double)indicesCount * (double)sizeof(int64_t)));
printf(" - Atomics: %15s\n", (useAtomics ? "Yes" : "No"));
printf("Benchmark kernels will be performed for %d iterations.\n", repeats); printf("Benchmark kernels will be performed for %d iterations.\n", repeats);
printf(HLINE); printf(HLINE);
@ -118,24 +121,22 @@ int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const
printf("Initializing Views...\n"); printf("Initializing Views...\n");
#if defined(KOKKOS_HAVE_OPENMP) #if defined(KOKKOS_HAVE_OPENMP)
Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount), Kokkos::parallel_for(
"init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
#else #else
Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount), Kokkos::parallel_for(
"init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
#endif #endif
KOKKOS_LAMBDA(const int i) { KOKKOS_LAMBDA(const int i) { data[i] = 10101010101; });
data[i] = 10101010101;
});
#if defined(KOKKOS_HAVE_OPENMP) #if defined(KOKKOS_HAVE_OPENMP)
Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount), Kokkos::parallel_for(
"init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
#else #else
Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount), Kokkos::parallel_for(
"init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
#endif #endif
KOKKOS_LAMBDA(const int i) { KOKKOS_LAMBDA(const int i) { indices[i] = 0; });
indices[i] = 0;
});
Kokkos::deep_copy(dev_data, data); Kokkos::deep_copy(dev_data, data);
Kokkos::deep_copy(dev_indices, indices); Kokkos::deep_copy(dev_indices, indices);
@ -143,7 +144,7 @@ int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const
printf("Starting benchmarking...\n"); printf("Starting benchmarking...\n");
for( GUPSIndex k = 0; k < repeats; ++k ) { for (GUPSIndex k = 0; k < repeats; ++k) {
randomize_indices(indices, dev_indices, data.extent(0)); randomize_indices(indices, dev_indices, data.extent(0));
start = now(); start = now();
@ -155,15 +156,15 @@ int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const
Kokkos::deep_copy(data, dev_data); Kokkos::deep_copy(data, dev_data);
printf(HLINE); printf(HLINE);
printf("GUP/s Random: %18.6f\n", printf(
(1.0e-9 * ((double) repeats) * (double) dev_indices.extent(0)) / gupsTime); "GUP/s Random: %18.6f\n",
(1.0e-9 * ((double)repeats) * (double)dev_indices.extent(0)) / gupsTime);
printf(HLINE); printf(HLINE);
return 0; return 0;
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
printf(HLINE); printf(HLINE);
printf("Kokkos GUPS Benchmark\n"); printf("Kokkos GUPS Benchmark\n");
printf(HLINE); printf(HLINE);
@ -177,17 +178,17 @@ int main(int argc, char* argv[]) {
int64_t repeats = 10; int64_t repeats = 10;
bool useAtomics = false; bool useAtomics = false;
for( int i = 1; i < argc; ++i ) { for (int i = 1; i < argc; ++i) {
if( strcmp( argv[i], "--indices" ) == 0 ) { if (strcmp(argv[i], "--indices") == 0) {
indices = std::atoll(argv[i+1]); indices = std::atoll(argv[i + 1]);
++i; ++i;
} else if( strcmp( argv[i], "--data" ) == 0 ) { } else if (strcmp(argv[i], "--data") == 0) {
data = std::atoll(argv[i+1]); data = std::atoll(argv[i + 1]);
++i; ++i;
} else if( strcmp( argv[i], "--repeats" ) == 0 ) { } else if (strcmp(argv[i], "--repeats") == 0) {
repeats = std::atoll(argv[i+1]); repeats = std::atoll(argv[i + 1]);
++i; ++i;
} else if( strcmp( argv[i], "--atomics" ) == 0 ) { } else if (strcmp(argv[i], "--atomics") == 0) {
useAtomics = true; useAtomics = true;
} }
} }

Some files were not shown because too many files have changed in this diff Show More