Merge branch 'master' into library-refactor
This commit is contained in:
@ -15,75 +15,93 @@ if(BUILD_DOC)
|
||||
endif()
|
||||
set(VIRTUALENV ${Python3_EXECUTABLE} -m virtualenv -p ${Python3_EXECUTABLE})
|
||||
endif()
|
||||
find_package(Doxygen 1.8.10 REQUIRED)
|
||||
|
||||
file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.rst)
|
||||
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT docenv
|
||||
COMMAND ${VIRTUALENV} docenv
|
||||
)
|
||||
|
||||
set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
|
||||
set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt)
|
||||
|
||||
set(SPHINX_CONFIG_DIR ${LAMMPS_DOC_DIR}/utils/sphinx-config)
|
||||
set(SPHINX_CONFIG_FILE_TEMPLATE ${SPHINX_CONFIG_DIR}/conf.py.in)
|
||||
set(SPHINX_STATIC_DIR ${SPHINX_CONFIG_DIR}/_static)
|
||||
|
||||
# configuration and static files are copied to binary dir to avoid collisions with parallel builds
|
||||
set(DOC_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/doc)
|
||||
set(DOC_BUILD_CONFIG_FILE ${DOC_BUILD_DIR}/conf.py)
|
||||
set(DOC_BUILD_STATIC_DIR ${DOC_BUILD_DIR}/_static)
|
||||
set(DOXYGEN_BUILD_DIR ${DOC_BUILD_DIR}/doxygen)
|
||||
set(DOXYGEN_XML_DIR ${DOXYGEN_BUILD_DIR}/xml)
|
||||
|
||||
# copy entire configuration folder to doc build directory
|
||||
# files in _static are automatically copied during sphinx-build, so no need to copy them individually
|
||||
file(COPY ${SPHINX_CONFIG_DIR}/ DESTINATION ${DOC_BUILD_DIR})
|
||||
|
||||
# configure paths in conf.py, since relative paths change when file is copied
|
||||
configure_file(${SPHINX_CONFIG_FILE_TEMPLATE} ${DOC_BUILD_CONFIG_FILE})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT requirements.txt
|
||||
DEPENDS docenv
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/utils/requirements.txt requirements.txt
|
||||
OUTPUT ${DOC_BUILD_DIR}/requirements.txt
|
||||
DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${DOCENV_REQUIREMENTS_FILE} ${DOC_BUILD_DIR}/requirements.txt
|
||||
COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade pip
|
||||
COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
|
||||
COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r requirements.txt --upgrade
|
||||
COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
|
||||
)
|
||||
|
||||
# download mathjax distribution and unpack to folder "mathjax"
|
||||
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/mathjax/es5)
|
||||
if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/mathjax/es5)
|
||||
file(DOWNLOAD "https://github.com/mathjax/MathJax/archive/3.0.5.tar.gz"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/mathjax.tar.gz"
|
||||
EXPECTED_MD5 5d9d3799cce77a1a95eee6be04eb68e7)
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf mathjax.tar.gz WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
file(GLOB MATHJAX_VERSION_DIR ${CMAKE_CURRENT_BINARY_DIR}/MathJax-*)
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${CMAKE_CURRENT_BINARY_DIR}/mathjax)
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${DOC_BUILD_STATIC_DIR}/mathjax)
|
||||
endif()
|
||||
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax)
|
||||
file(COPY ${CMAKE_CURRENT_BINARY_DIR}/mathjax/es5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax/)
|
||||
|
||||
# for increased browser compatibility
|
||||
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/html/_static/polyfill.js)
|
||||
if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/polyfill.js)
|
||||
file(DOWNLOAD "https://polyfill.io/v3/polyfill.min.js?features=es6"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/html/_static/polyfill.js")
|
||||
"${DOC_BUILD_STATIC_DIR}/polyfill.js")
|
||||
endif()
|
||||
|
||||
# note, this may run in parallel with other tasks, so we must not use multiple processes here
|
||||
# set up doxygen and add targets to run it
|
||||
file(MAKE_DIRECTORY ${DOXYGEN_BUILD_DIR})
|
||||
file(COPY ${LAMMPS_DOC_DIR}/doxygen/lammps-logo.png DESTINATION ${DOXYGEN_BUILD_DIR}/lammps-logo.png)
|
||||
configure_file(${LAMMPS_DOC_DIR}/doxygen/Doxyfile.in ${DOXYGEN_BUILD_DIR}/Doxyfile)
|
||||
get_target_property(LAMMPS_SOURCES lammps SOURCES)
|
||||
add_custom_command(
|
||||
OUTPUT html
|
||||
DEPENDS ${DOC_SOURCES} docenv requirements.txt
|
||||
COMMAND ${DOCENV_BINARY_DIR}/sphinx-build -b html -c ${LAMMPS_DOC_DIR}/utils/sphinx-config -d ${CMAKE_BINARY_DIR}/doctrees ${LAMMPS_DOC_DIR}/src html
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${CMAKE_CURRENT_BINARY_DIR}/html/index.html
|
||||
OUTPUT ${DOXYGEN_XML_DIR}/index.xml
|
||||
DEPENDS ${DOC_SOURCES} ${LAMMPS_SOURCES}
|
||||
COMMAND Doxygen::doxygen ${DOXYGEN_BUILD_DIR}/Doxyfile WORKING_DIRECTORY ${DOXYGEN_BUILD_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E touch ${DOXYGEN_XML_DIR}/run.stamp
|
||||
)
|
||||
|
||||
# copy selected image files to html output tree
|
||||
file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/html/JPG)
|
||||
set(HTML_EXTRA_IMAGES balance_nonuniform.jpg balance_rcb.jpg
|
||||
balance_uniform.jpg bow_tutorial_01.png bow_tutorial_02.png
|
||||
bow_tutorial_03.png bow_tutorial_04.png bow_tutorial_05.png
|
||||
dump1.jpg dump2.jpg examples_mdpd.gif gran_funnel.png gran_mixer.png
|
||||
hop1.jpg hop2.jpg saed_ewald_intersect.jpg saed_mesh.jpg
|
||||
screenshot_atomeye.jpg screenshot_gl.jpg screenshot_pymol.jpg
|
||||
screenshot_vmd.jpg sinusoid.jpg xrd_mesh.jpg)
|
||||
set(HTML_IMAGE_TARGETS "")
|
||||
foreach(_IMG ${HTML_EXTRA_IMAGES})
|
||||
string(PREPEND _IMG JPG/)
|
||||
list(APPEND HTML_IMAGE_TARGETS "${CMAKE_CURRENT_BINARY_DIR}/html/${_IMG}")
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/html/${_IMG}
|
||||
DEPENDS ${LAMMPS_DOC_DIR}/src/${_IMG} ${CMAKE_CURRENT_BINARY_DIR}/html/JPG
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/src/${_IMG} ${CMAKE_BINARY_DIR}/html/${_IMG}
|
||||
)
|
||||
endforeach()
|
||||
if(EXISTS ${DOXYGEN_XML_DIR}/run.stamp)
|
||||
set(SPHINX_EXTRA_OPTS "-E")
|
||||
else()
|
||||
set(SPHINX_EXTRA_OPTS "")
|
||||
endif()
|
||||
add_custom_command(
|
||||
OUTPUT html
|
||||
DEPENDS ${DOC_SOURCES} docenv ${DOC_BUILD_DIR}/requirements.txt ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE}
|
||||
COMMAND ${DOCENV_BINARY_DIR}/sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${DOC_BUILD_DIR}/html/index.html
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src/PDF ${DOC_BUILD_DIR}/html/PDF
|
||||
COMMAND ${CMAKE_COMMAND} -E remove -f ${DOXYGEN_XML_DIR}/run.stamp
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
doc ALL
|
||||
DEPENDS html ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax/es5 ${HTML_IMAGE_TARGETS}
|
||||
DEPENDS html ${DOC_BUILD_STATIC_DIR}/mathjax/es5
|
||||
SOURCES ${LAMMPS_DOC_DIR}/utils/requirements.txt ${DOC_SOURCES}
|
||||
)
|
||||
|
||||
install(DIRECTORY ${CMAKE_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
|
||||
install(DIRECTORY ${DOC_BUILD_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
|
||||
endif()
|
||||
|
||||
@ -35,8 +35,8 @@ if(DOWNLOAD_KOKKOS)
|
||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(kokkos_build
|
||||
URL https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz
|
||||
URL_MD5 3ccb2100f7fc316891e7dad3bc33fa37
|
||||
URL https://github.com/kokkos/kokkos/archive/3.2.00.tar.gz
|
||||
URL_MD5 81569170fe232e5e64ab074f7cca5e50
|
||||
CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS}
|
||||
BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a
|
||||
)
|
||||
@ -50,7 +50,7 @@ if(DOWNLOAD_KOKKOS)
|
||||
target_link_libraries(lammps PRIVATE LAMMPS::KOKKOS)
|
||||
add_dependencies(LAMMPS::KOKKOS kokkos_build)
|
||||
elseif(EXTERNAL_KOKKOS)
|
||||
find_package(Kokkos 3.1.01 REQUIRED CONFIG)
|
||||
find_package(Kokkos 3.2.00 REQUIRED CONFIG)
|
||||
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
|
||||
else()
|
||||
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
|
||||
|
||||
7
doc/.gitignore
vendored
7
doc/.gitignore
vendored
@ -1,6 +1,7 @@
|
||||
/old
|
||||
/html
|
||||
/html-offline
|
||||
/epub
|
||||
/latex
|
||||
/mathjax
|
||||
/spelling
|
||||
@ -10,3 +11,9 @@
|
||||
/Developer.pdf
|
||||
/doctrees
|
||||
/docenv
|
||||
/doxygen-warn.log
|
||||
/utils/sphinx-config/conf.py
|
||||
/doxygen/Doxyfile
|
||||
*.el
|
||||
/utils/sphinx-config/_static/mathjax
|
||||
/utils/sphinx-config/_static/polyfill.js
|
||||
|
||||
106
doc/Makefile
106
doc/Makefile
@ -1,21 +1,29 @@
|
||||
# Makefile for LAMMPS documentation
|
||||
|
||||
SHELL = /bin/bash
|
||||
BUILDDIR = ${CURDIR}
|
||||
RSTDIR = $(BUILDDIR)/src
|
||||
VENV = $(BUILDDIR)/docenv
|
||||
MATHJAX = $(BUILDDIR)/mathjax
|
||||
TXT2RST = $(VENV)/bin/txt2rst
|
||||
ANCHORCHECK = $(VENV)/bin/rst_anchor_check
|
||||
SHELL = /bin/bash
|
||||
BUILDDIR = ${CURDIR}
|
||||
RSTDIR = $(BUILDDIR)/src
|
||||
VENV = $(BUILDDIR)/docenv
|
||||
TXT2RST = $(VENV)/bin/txt2rst
|
||||
ANCHORCHECK = $(VENV)/bin/rst_anchor_check
|
||||
SPHINXCONFIG = $(BUILDDIR)/utils/sphinx-config
|
||||
MATHJAX = $(SPHINXCONFIG)/_static/mathjax
|
||||
POLYFILL = $(SPHINXCONFIG)/_static/polyfill.js
|
||||
|
||||
PYTHON = $(shell which python3)
|
||||
PYTHON = $(shell which python3)
|
||||
DOXYGEN = $(shell which doxygen)
|
||||
VIRTUALENV = virtualenv
|
||||
HAS_PYTHON3 = NO
|
||||
HAS_VIRTUALENV = NO
|
||||
HAS_DOXYGEN = NO
|
||||
HAS_PDFLATEX = NO
|
||||
|
||||
ifeq ($(shell which python3 >/dev/null 2>&1; echo $$?), 0)
|
||||
HAS_PYTHON3 = YES
|
||||
HAS_PYTHON3 = YES
|
||||
endif
|
||||
|
||||
ifeq ($(shell which doxygen >/dev/null 2>&1; echo $$?), 0)
|
||||
HAS_DOXYGEN = YES
|
||||
endif
|
||||
|
||||
ifeq ($(shell which virtualenv-3 >/dev/null 2>&1; echo $$?), 0)
|
||||
@ -33,9 +41,13 @@ HAS_PDFLATEX = YES
|
||||
endif
|
||||
|
||||
|
||||
SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())')
|
||||
SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())') $(shell test -f $(BUILDDIR)/doxygen/xml/run.stamp && printf -- "-E")
|
||||
|
||||
.PHONY: help clean-all clean clean-spelling epub mobi rst html pdf spelling anchor_check style_check
|
||||
# grab list of sources from doxygen config file.
|
||||
# we only want to use explicitly listed files.
|
||||
DOXYFILES = $(shell sed -n -e 's/\#.*$$//' -e '/^ *INPUT \+=/,/^[A-Z_]\+ \+=/p' doxygen/Doxyfile.in | sed -e 's/@LAMMPS_SOURCE_DIR@/..\/src/g' -e 's/\\//g' -e 's/ \+/ /' -e 's/[A-Z_]\+ \+= *\(YES\|NO\|\)//')
|
||||
|
||||
.PHONY: help clean-all clean clean-spelling epub mobi rst html pdf spelling anchor_check style_check xmlgen
|
||||
|
||||
# ------------------------------------------
|
||||
|
||||
@ -57,23 +69,32 @@ help:
|
||||
# ------------------------------------------
|
||||
|
||||
clean-all: clean
|
||||
rm -rf $(BUILDDIR)/docenv $(BUILDDIR)/doctrees $(BUILDDIR)/mathjax Manual.pdf Developer.pdf
|
||||
rm -rf $(BUILDDIR)/docenv $(MATHJAX) $(BUILDDIR)/LAMMPS.mobi $(BUILDDIR)/LAMMPS.epub $(BUILDDIR)/Manual.pdf $(BUILDDIR)/Developer.pdf
|
||||
|
||||
clean: clean-spelling
|
||||
rm -rf html epub latex
|
||||
rm -rf $(BUILDDIR)/html $(BUILDDIR)/epub $(BUILDDIR)/latex $(BUILDDIR)/doctrees $(BUILDDIR)/doxygen/xml $(BUILDDIR)/doxygen-warn.log $(BUILDDIR)/doxygen/Doxyfile $(SPHINXCONFIG)/conf.py
|
||||
|
||||
clean-spelling:
|
||||
rm -rf spelling
|
||||
rm -rf $(BUILDDIR)/spelling
|
||||
|
||||
html: $(ANCHORCHECK) $(MATHJAX)
|
||||
$(SPHINXCONFIG)/conf.py: $(SPHINXCONFIG)/conf.py.in
|
||||
sed -e 's,@DOXYGEN_XML_DIR@,$(BUILDDIR)/doxygen/xml,g' \
|
||||
-e 's,@LAMMPS_SOURCE_DIR@,$(BUILDDIR)/../src,g' \
|
||||
-e 's,@LAMMPS_PYTHON_DIR@,$(BUILDDIR)/../python,g' \
|
||||
-e 's,@LAMMPS_DOC_DIR@,$(BUILDDIR),g' $< > $@
|
||||
|
||||
html: xmlgen $(SPHINXCONFIG)/conf.py $(ANCHORCHECK) $(MATHJAX) $(POLYFILL)
|
||||
@$(MAKE) $(MFLAGS) -C graphviz all
|
||||
@(\
|
||||
. $(VENV)/bin/activate ;\
|
||||
sphinx-build $(SPHINXEXTRA) -b html -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) html ;\
|
||||
. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
|
||||
sphinx-build $(SPHINXEXTRA) -b html -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) html ;\
|
||||
ln -sf Manual.html html/index.html;\
|
||||
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
|
||||
echo "############################################" ;\
|
||||
rst_anchor_check src/*.rst ;\
|
||||
python utils/check-packages.py -s ../src -d src ;\
|
||||
python $(BUILDDIR)/utils/check-packages.py -s ../src -d src ;\
|
||||
env LC_ALL=C grep -n '[^ -~]' $(RSTDIR)/*.rst ;\
|
||||
python utils/check-styles.py -s ../src -d src ;\
|
||||
python $(BUILDDIR)/utils/check-styles.py -s ../src -d src ;\
|
||||
echo "############################################" ;\
|
||||
deactivate ;\
|
||||
)
|
||||
@ -82,30 +103,28 @@ html: $(ANCHORCHECK) $(MATHJAX)
|
||||
@rm -rf html/USER
|
||||
@rm -rf html/JPG
|
||||
@cp -r src/PDF html/PDF
|
||||
@mkdir -p html/JPG
|
||||
@cp `grep -A2 '\.\. .*\(image\|figure\)::' src/*.rst | grep ':target: JPG' | sed -e 's,.*:target: JPG/,src/JPG/,' | sort | uniq` html/JPG/
|
||||
@rm -rf html/PDF/.[sg]*
|
||||
@mkdir -p html/_static/mathjax
|
||||
@cp -r $(MATHJAX)/es5 html/_static/mathjax/
|
||||
@echo "Build finished. The HTML pages are in doc/html."
|
||||
|
||||
spelling: $(VENV) utils/sphinx-config/false_positives.txt
|
||||
spelling: xmlgen $(VENV) $(SPHINXCONFIG)/false_positives.txt
|
||||
@(\
|
||||
. $(VENV)/bin/activate ;\
|
||||
cp utils/sphinx-config/false_positives.txt $(RSTDIR)/ ; env PYTHONWARNINGS= \
|
||||
sphinx-build -b spelling -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) spelling ;\
|
||||
. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
|
||||
cp $(SPHINXCONFIG)/false_positives.txt $(RSTDIR)/ ; env PYTHONWARNINGS= \
|
||||
sphinx-build -b spelling -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) spelling ;\
|
||||
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
|
||||
deactivate ;\
|
||||
)
|
||||
@echo "Spell check finished."
|
||||
|
||||
epub: $(VENV)
|
||||
epub: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
|
||||
@$(MAKE) $(MFLAGS) -C graphviz all
|
||||
@mkdir -p epub/JPG
|
||||
@rm -f LAMMPS.epub
|
||||
@cp src/JPG/lammps-logo.png epub/
|
||||
@cp src/JPG/*.* epub/JPG
|
||||
@(\
|
||||
. $(VENV)/bin/activate ;\
|
||||
sphinx-build $(SPHINXEXTRA) -b epub -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) epub ;\
|
||||
sphinx-build $(SPHINXEXTRA) -b epub -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) epub ;\
|
||||
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
|
||||
deactivate ;\
|
||||
)
|
||||
@mv epub/LAMMPS.epub .
|
||||
@ -117,7 +136,8 @@ mobi: epub
|
||||
@ebook-convert LAMMPS.epub LAMMPS.mobi
|
||||
@echo "Conversion finished. The MOBI manual file is created."
|
||||
|
||||
pdf: $(ANCHORCHECK)
|
||||
pdf: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
|
||||
@$(MAKE) $(MFLAGS) -C graphviz all
|
||||
@if [ "$(HAS_PDFLATEX)" == "NO" ] ; then echo "PDFLaTeX was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
|
||||
@(\
|
||||
cd src/Developer; \
|
||||
@ -127,8 +147,9 @@ pdf: $(ANCHORCHECK)
|
||||
cd ../../; \
|
||||
)
|
||||
@(\
|
||||
. $(VENV)/bin/activate ;\
|
||||
sphinx-build $(SPHINXEXTRA) -b latex -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) latex ;\
|
||||
. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
|
||||
sphinx-build $(SPHINXEXTRA) -b latex -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) latex ;\
|
||||
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
|
||||
echo "############################################" ;\
|
||||
rst_anchor_check src/*.rst ;\
|
||||
python utils/check-packages.py -s ../src -d src ;\
|
||||
@ -185,21 +206,32 @@ package_check : $(VENV)
|
||||
deactivate ;\
|
||||
)
|
||||
|
||||
xmlgen : doxygen/xml/index.xml
|
||||
|
||||
doxygen/Doxyfile: doxygen/Doxyfile.in
|
||||
sed -e 's/@LAMMPS_SOURCE_DIR@/..\/..\/src/g' $< > $@
|
||||
|
||||
doxygen/xml/index.xml : $(VENV) doxygen/Doxyfile $(DOXYFILES)
|
||||
@(cd doxygen; $(DOXYGEN) Doxyfile && touch xml/run.stamp)
|
||||
# ------------------------------------------
|
||||
|
||||
$(VENV):
|
||||
@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "Python3 was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
|
||||
@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
|
||||
@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "python3 was not found! Please see README for further instructions" 1>&2; exit 1; fi
|
||||
@if [ "$(HAS_DOXYGEN)" == "NO" ] ; then echo "doxygen was not found! Please see README for further instructions" 1>&2; exit 1; fi
|
||||
@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please see README for further instructions" 1>&2; exit 1; fi
|
||||
@( \
|
||||
$(VIRTUALENV) -p $(PYTHON) $(VENV); \
|
||||
. $(VENV)/bin/activate; \
|
||||
pip install --upgrade pip; \
|
||||
pip install --use-feature=2020-resolver -r requirements.txt; \
|
||||
pip install --use-feature=2020-resolver -r $(BUILDDIR)/utils/requirements.txt; \
|
||||
deactivate;\
|
||||
)
|
||||
|
||||
$(MATHJAX):
|
||||
@git clone --depth 1 https://github.com/mathjax/MathJax.git mathjax
|
||||
@git clone --depth 1 https://github.com/mathjax/MathJax.git $@
|
||||
|
||||
$(POLYFILL): $(MATHJAX)
|
||||
@curl -s -o $@ "https://polyfill.io/v3/polyfill.min.js?features=es6"
|
||||
|
||||
$(TXT2RST) $(ANCHORCHECK): $(VENV)
|
||||
@( \
|
||||
|
||||
148
doc/README
148
doc/README
@ -1,97 +1,60 @@
|
||||
LAMMPS Documentation
|
||||
|
||||
Depending on how you obtained LAMMPS, this directory has 2 or 3
|
||||
sub-directories and optionally 2 PDF files and an ePUB file:
|
||||
Depending on how you obtained LAMMPS and whether you have built
|
||||
the manual yourself, this directory has a varying number of
|
||||
sub-directories and files. Here is a list with descriptions:
|
||||
|
||||
src content files for LAMMPS documentation
|
||||
html HTML version of the LAMMPS manual (see html/Manual.html)
|
||||
utils utilities and settings for building the documentation
|
||||
Manual.pdf large PDF version of entire manual
|
||||
Developer.pdf small PDF with info about how LAMMPS is structured
|
||||
LAMMPS.epub Manual in ePUB format
|
||||
README this file
|
||||
src content files for LAMMPS documentation
|
||||
html HTML version of the LAMMPS manual (see html/Manual.html)
|
||||
utils utilities and settings for building the documentation
|
||||
Manual.pdf PDF version of entire manual
|
||||
Developer.pdf PDF with info about how LAMMPS is structured
|
||||
LAMMPS.epub Manual in ePUB format
|
||||
LAMMPS.mobi Manual in MOBI (Kindle) format
|
||||
lammps.1 man page for the lammps command
|
||||
msi2lmp.1 man page for the msi2lmp command
|
||||
mathjax code and fonts for rendering math in html
|
||||
doctree temporary data
|
||||
docenv python virtual environment for generating the manual
|
||||
doxygen Doxygen configuration and output
|
||||
.gitignore list of files and folders to be ignored by git
|
||||
doxygen-warn.log logfile with warnings from running doxygen
|
||||
|
||||
If you downloaded LAMMPS as a tarball from the web site, all these
|
||||
directories and files should be included.
|
||||
and:
|
||||
|
||||
If you downloaded LAMMPS from the public SVN or Git repositories, then
|
||||
the HTML and PDF files are not included. Instead you need to create
|
||||
them, in one of three ways:
|
||||
github-development-workflow.md notes on the LAMMPS development workflow
|
||||
include-file-conventions.md notes on LAMMPS' include file conventions
|
||||
documentation_conventions.md notes on writing documentation for LAMMPS
|
||||
|
||||
If you downloaded a LAMMPS tarball from lammps.sandia.gov, then the html
|
||||
folder and the PDF manual should be included. If you downloaded LAMMPS
|
||||
from GitHub then you either need to download them or build them.
|
||||
|
||||
(a) You can "fetch" the current HTML and PDF files from the LAMMPS web
|
||||
site. Just type "make fetch". This should create a html_www dir and
|
||||
Manual_www.pdf/Developer_www.pdf files. Note that if new LAMMPS
|
||||
features have been added more recently than the date of your version,
|
||||
the fetched documentation will include those changes (but your source
|
||||
code will not, unless you update your local repository).
|
||||
Manual_www.pdf/Developer_www.pdf files. These files will always
|
||||
represent the latest published patch/development version of LAMMPS.
|
||||
|
||||
(b) You can build the HTML and PDF files yourself, by typing "make
|
||||
html" or by "make pdf", respectively. This requires various tools
|
||||
including the Python documentation processing tool Sphinx, which the
|
||||
build process will attempt to download and install on your system into
|
||||
a python virtual environment, if not already available. The PDF file
|
||||
will require a working LaTeX installation with several add-on packages
|
||||
in addition to the Python/Sphinx setup. See more details below.
|
||||
(b) You can build the HTML and PDF files yourself, by typing "make html"
|
||||
or by "make pdf", respectively. This requires various tools and files.
|
||||
Some of them have to be installed (more on that below). For the rest the
|
||||
build process will attempt to download and install into a python virtual
|
||||
environment and local folders.
|
||||
|
||||
----------------
|
||||
|
||||
The generation of all documentation is managed by the Makefile in this
|
||||
dir.
|
||||
Installing prerequisites for the documentation build
|
||||
|
||||
Options:
|
||||
To run the HTML documention build toolchain, python 3.x, doxygen, git,
|
||||
and virtualenv have to be installed. Also internet access is initially
|
||||
required to download external files and tools.
|
||||
|
||||
make html # generate HTML in html dir using Sphinx
|
||||
make pdf # generate 2 PDF files (Manual.pdf,Developer.pdf)
|
||||
# in this dir via Sphinx and PDFLaTeX
|
||||
make fetch # fetch HTML doc pages and 2 PDF files from web site
|
||||
# as a tarball and unpack into html dir and 2 PDFs
|
||||
make epub # generate LAMMPS.epub in ePUB format using Sphinx
|
||||
make clean # remove intermediate RST files created by HTML build
|
||||
make clean-all # remove entire build folder and any cached data
|
||||
|
||||
----------------
|
||||
|
||||
Installing prerequisites for HTML build
|
||||
|
||||
To run the HTML documention build toolchain, Python 3 and virtualenv
|
||||
have to be installed. Here are instructions for common setups:
|
||||
|
||||
# Ubuntu
|
||||
|
||||
sudo apt-get install python-virtualenv
|
||||
|
||||
# Fedora (up to version 21)
|
||||
# Red Hat Enterprise Linux or CentOS (up to version 7.x)
|
||||
|
||||
sudo yum install python3-virtualenv
|
||||
|
||||
# Fedora (since version 22)
|
||||
|
||||
sudo dnf install python3-virtualenv
|
||||
|
||||
# MacOS X
|
||||
|
||||
## Python 3
|
||||
|
||||
Download the latest Python 3 MacOS X package from
|
||||
https://www.python.org and install it. This will install both Python
|
||||
3 and pip3.
|
||||
|
||||
## virtualenv
|
||||
|
||||
Once Python 3 is installed, open a Terminal and type
|
||||
|
||||
pip3 install virtualenv
|
||||
|
||||
This will install virtualenv from the Python Package Index.
|
||||
|
||||
----------------
|
||||
|
||||
Installing prerequisites for PDF build
|
||||
|
||||
Same as for HTML plus a compatible LaTeX installation with
|
||||
support for PDFLaTeX. Also the following LaTeX packages need
|
||||
to be installed (e.g. from texlive):
|
||||
Building the PDF format manual requires in addition a compatible LaTeX
|
||||
installation with support for PDFLaTeX and several add-on LaTeX packages
|
||||
installed. This includes:
|
||||
- amsmath
|
||||
- anysize
|
||||
- babel
|
||||
- capt-of
|
||||
- cmap
|
||||
@ -105,24 +68,13 @@ to be installed (e.g. from texlive):
|
||||
- tabulary
|
||||
- upquote
|
||||
- wrapfig
|
||||
|
||||
Building the EPUB format requires LaTeX installation with the same packages
|
||||
as for the PDF format plus the 'dvipng' command to convert the embedded math
|
||||
into images. The MOBI format is generated from the EPUB format file by using
|
||||
the tool 'ebook-convert' from the 'calibre' e-book management software
|
||||
(https://calibre-ebook.com).
|
||||
----------------
|
||||
|
||||
Installing prerequisites for epub build
|
||||
|
||||
## ePUB
|
||||
|
||||
Same as for HTML. This uses the same tools and configuration
|
||||
files as the HTML tree. The ePUB format conversion currently
|
||||
does not support processing mathematical expressions via MathJAX,
|
||||
so there will be limitations on some pages. For the time being
|
||||
until this is resolved, building and using the PDF format file
|
||||
is recommended instead.
|
||||
|
||||
For converting the generated ePUB file to a mobi format file
|
||||
(for e-book readers like Kindle, that cannot read ePUB), you
|
||||
also need to have the 'ebook-convert' tool from the "calibre"
|
||||
software installed. http://calibre-ebook.com/
|
||||
You first create the ePUB file with 'make epub' and then do:
|
||||
|
||||
ebook-convert LAMMPS.epub LAMMPS.mobi
|
||||
|
||||
More details this can be found in the manual itself. The online
|
||||
version is at: https://lammps.sandia.gov/doc/Manual_build.html
|
||||
|
||||
93
doc/documentation_conventions.md
Normal file
93
doc/documentation_conventions.md
Normal file
@ -0,0 +1,93 @@
|
||||
# Outline of LAMMPS documentation file conventions
|
||||
|
||||
This purpose of this document is to provide a point of reference
|
||||
for LAMMPS developers and contributors as to what conventions
|
||||
should be used to structure and format files in the LAMMPS manual.
|
||||
|
||||
Last change: 2020-04-23
|
||||
|
||||
## File format and tools
|
||||
|
||||
In fall 2019, the LAMMPS documentation file format has changed from
|
||||
a home grown minimal markup designed to generate HTML format files
|
||||
from a mostly plain text format to using the reStructuredText file
|
||||
format. For a transition period all files in the old .txt format
|
||||
were transparently converted to .rst and then processed. The txt2rst
|
||||
tool is still included in the distribution to obtain an initial .rst
|
||||
file for integration into the manual. Since the transition to
|
||||
reStructured text as source format, many of the artifacts or the
|
||||
translation have been removed though and parts of the documentation
|
||||
refactored and expanded to take advantage of the capabilities
|
||||
reStructuredText and associated tools. The conversion from the
|
||||
source to the final formats (HTML, PDF, and optionally e-book
|
||||
reader formats ePUB and MOBI) is mostly automated and controlled
|
||||
by a Makefile in the `doc` folder. This makefile assumes that the
|
||||
processing is done on a Unix-like machine and Python 3.5 or later
|
||||
and a matching virtualenv module are available. Additional Python
|
||||
packages (like the Sphinx tool and several extensions) are
|
||||
transparently installed into a virtual environment over the
|
||||
internet using the `pip` package manager. Further requirements
|
||||
and details are discussed in the manual.
|
||||
|
||||
## Work in progress
|
||||
|
||||
The refactoring and improving of the documentation is an ongoing
|
||||
process, so statements in this document may not always be fully
|
||||
up-to-date. If in doubt, contact the LAMMPS developers.
|
||||
|
||||
## General structure
|
||||
|
||||
The layout and formatting of added files should follow the example
|
||||
of the existing files. Since those are directly derived from their
|
||||
former .txt format versions and the manual has been maintained in
|
||||
that format for many years, there is a large degree of consistency
|
||||
already, so comparision with similar files should give you a good
|
||||
idea what kind of information and sections are needed.
|
||||
|
||||
## Formatting conventions
|
||||
|
||||
Filenames, folders, paths, (shell) commands, definitions, makefile
|
||||
settings and similar should be formatted as "literals" with
|
||||
double backward quotes bracketing the item: \`\`path/to/some/file\`\`
|
||||
|
||||
Keywords and options are formatted in italics: \*option\*
|
||||
|
||||
Mathematical expressions, equations, symbols are typeset using
|
||||
either a `.. math:`` block or the `:math:` role.
|
||||
|
||||
Groups of shell commands or LAMMPS input script or C/C++ source
|
||||
code should be typeset into a `.. code-block::` section. A syntax
|
||||
highlighting extension for LAMMPS input scripts is provided, so
|
||||
`LAMMPS` can be used to indicate the language in the code block
|
||||
in addition to `bash`, `c`, or `python`. When no syntax style
|
||||
is indicated, no syntax highlighting is performed.
|
||||
|
||||
As an alternative, e.g. to typeset the syntax of file formats
|
||||
a `.. parsed-literal::` block can be used, which allows some
|
||||
formatting directives, which means that related characters need
|
||||
to be escaped with a preceding backslash: `\*`.
|
||||
|
||||
Special remarks can be highlighted with a `.. note::` block and
|
||||
strong warnings can be put into a `.. warning::` block.
|
||||
|
||||
## Required steps when adding a custom style to LAMMPS
|
||||
|
||||
When adding a new style (e.g. pair style or a compute or a fix)
|
||||
or a new command, it is **required** to include the corresponding
|
||||
documentation. Those are often new files that need to be added.
|
||||
In order to be included in the documentation, those new files
|
||||
need to be reference in a `.. toctree::` block. Most of those
|
||||
use patterns with wildcards, so the addition will be automatic.
|
||||
However, those additions also need to be added to some lists of
|
||||
styles or commands. The `make style\_check` command will perform
|
||||
a test and report any missing entries and list the affected files.
|
||||
Any references defined with `.. \_refname:` have to be unique
|
||||
across all documentation files and this can be checked for with
|
||||
`make anchor\_check`. Finally, a spell-check should be done,
|
||||
which is triggered via `make spelling`. Any offenses need to
|
||||
be corrected and false positives should be added to the file
|
||||
`utils/sphinx-config/false\_positives.txt`.
|
||||
|
||||
## Required additional steps when adding a new package to LAMMPS
|
||||
|
||||
TODO
|
||||
1
doc/doxygen/.gitignore
vendored
Normal file
1
doc/doxygen/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/xml
|
||||
522
doc/doxygen/Doxyfile.in
Normal file
522
doc/doxygen/Doxyfile.in
Normal file
@ -0,0 +1,522 @@
|
||||
# Doxyfile 1.8.15 -*- makefile -*-
|
||||
|
||||
DOXYFILE_ENCODING = UTF-8
|
||||
PROJECT_NAME = "LAMMPS Programmer's Guide"
|
||||
PROJECT_NUMBER = "24 August 2020"
|
||||
PROJECT_BRIEF = "Documentation of the LAMMPS library interface and Python wrapper"
|
||||
PROJECT_LOGO = lammps-logo.png
|
||||
CREATE_SUBDIRS = NO
|
||||
ALLOW_UNICODE_NAMES = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
OUTPUT_TEXT_DIRECTION = LTR
|
||||
|
||||
BRIEF_MEMBER_DESC = YES
|
||||
REPEAT_BRIEF = YES
|
||||
|
||||
ALWAYS_DETAILED_SEC = NO
|
||||
INLINE_INHERITED_MEMB = NO
|
||||
FULL_PATH_NAMES = NO
|
||||
INHERIT_DOCS = YES
|
||||
TAB_SIZE = 2
|
||||
|
||||
# When enabled doxygen tries to link words that correspond to documented
|
||||
# classes, or namespaces to their corresponding documentation. Such a link can
|
||||
# be prevented in individual cases by putting a % sign in front of the word or
|
||||
# globally by setting AUTOLINK_SUPPORT to NO.
|
||||
# The default value is: YES.
|
||||
AUTOLINK_SUPPORT = YES
|
||||
|
||||
# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
|
||||
# to include (a tag file for) the STL sources as input, then you should set this
|
||||
# tag to YES in order to let doxygen match functions declarations and
|
||||
# definitions whose arguments contain STL classes (e.g. func(std::string);
|
||||
# versus func(std::string) {}). This also make the inheritance and collaboration
|
||||
# diagrams that involve STL classes more complete and accurate.
|
||||
# The default value is: NO.
|
||||
|
||||
BUILTIN_STL_SUPPORT = YES
|
||||
IDL_PROPERTY_SUPPORT = NO
|
||||
|
||||
# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
|
||||
# cache is used to resolve symbols given their name and scope. Since this can be
|
||||
# an expensive process and often the same symbol appears multiple times in the
|
||||
# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
|
||||
# doxygen will become slower. If the cache is too large, memory is wasted. The
|
||||
# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
|
||||
# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
|
||||
# symbols. At the end of a run doxygen will report the cache usage and suggest
|
||||
# the optimal cache size from a speed point of view.
|
||||
# Minimum value: 0, maximum value: 9, default value: 0.
|
||||
|
||||
LOOKUP_CACHE_SIZE = 2
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Build related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
|
||||
# documentation are documented, even if no documentation was available. Private
|
||||
# class members and static file members will be hidden unless the
|
||||
# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
|
||||
# Note: This will also disable the warnings about undocumented members that are
|
||||
# normally produced when WARNINGS is set to YES.
|
||||
# The default value is: NO.
|
||||
|
||||
EXTRACT_ALL = NO
|
||||
|
||||
# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
|
||||
# be included in the documentation.
|
||||
# The default value is: NO.
|
||||
|
||||
EXTRACT_PRIVATE = YES
|
||||
|
||||
# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
|
||||
# scope will be included in the documentation.
|
||||
# The default value is: NO.
|
||||
|
||||
EXTRACT_PACKAGE = YES
|
||||
|
||||
# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
|
||||
# included in the documentation.
|
||||
# The default value is: NO.
|
||||
|
||||
EXTRACT_STATIC = YES
|
||||
|
||||
# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
|
||||
# locally in source files will be included in the documentation. If set to NO,
|
||||
# only classes defined in header files are included. Does not have any effect
|
||||
# for Java sources.
|
||||
# The default value is: YES.
|
||||
|
||||
EXTRACT_LOCAL_CLASSES = YES
|
||||
|
||||
# If this flag is set to YES, the members of anonymous namespaces will be
|
||||
# extracted and appear in the documentation as a namespace called
|
||||
# 'anonymous_namespace{file}', where file will be replaced with the base name of
|
||||
# the file that contains the anonymous namespace. By default anonymous namespace
|
||||
# are hidden.
|
||||
# The default value is: NO.
|
||||
|
||||
EXTRACT_ANON_NSPACES = YES
|
||||
|
||||
# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
|
||||
# undocumented members inside documented classes or files. If set to NO these
|
||||
# members will be included in the various overviews, but no documentation
|
||||
# section is generated. This option has no effect if EXTRACT_ALL is enabled.
|
||||
# The default value is: NO.
|
||||
|
||||
HIDE_UNDOC_MEMBERS = YES
|
||||
|
||||
# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
|
||||
# undocumented classes that are normally visible in the class hierarchy. If set
|
||||
# to NO, these classes will be included in the various overviews. This option
|
||||
# has no effect if EXTRACT_ALL is enabled.
|
||||
# The default value is: NO.
|
||||
|
||||
HIDE_UNDOC_CLASSES = YES
|
||||
|
||||
# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
|
||||
# (class|struct|union) declarations. If set to NO, these declarations will be
|
||||
# included in the documentation.
|
||||
# The default value is: NO.
|
||||
|
||||
HIDE_FRIEND_COMPOUNDS = NO
|
||||
|
||||
# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
|
||||
# documentation blocks found inside the body of a function. If set to NO, these
|
||||
# blocks will be appended to the function's detailed documentation block.
|
||||
# The default value is: NO.
|
||||
|
||||
HIDE_IN_BODY_DOCS = NO
|
||||
|
||||
# The INTERNAL_DOCS tag determines if documentation that is typed after a
|
||||
# \internal command is included. If the tag is set to NO then the documentation
|
||||
# will be excluded. Set it to YES to include the internal documentation.
|
||||
# The default value is: NO.
|
||||
|
||||
INTERNAL_DOCS = NO
|
||||
|
||||
# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
|
||||
# names in lower-case letters. If set to YES, upper-case letters are also
|
||||
# allowed. This is useful if you have classes or files whose names only differ
|
||||
# in case and if your file system supports case sensitive file names. Windows
|
||||
# and Mac users are advised to set this option to NO.
|
||||
# The default value is: system dependent.
|
||||
|
||||
CASE_SENSE_NAMES = YES
|
||||
|
||||
# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
|
||||
# their full class and namespace scopes in the documentation. If set to YES, the
|
||||
# scope will be hidden.
|
||||
# The default value is: NO.
|
||||
|
||||
HIDE_SCOPE_NAMES = YES
|
||||
|
||||
# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
|
||||
# append additional text to a page's title, such as Class Reference. If set to
|
||||
# YES the compound reference will be hidden.
|
||||
# The default value is: NO.
|
||||
|
||||
HIDE_COMPOUND_REFERENCE= NO
|
||||
|
||||
# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
|
||||
# the files that are included by a file in the documentation of that file.
|
||||
# The default value is: YES.
|
||||
|
||||
SHOW_INCLUDE_FILES = NO
|
||||
|
||||
# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
|
||||
# grouped member an include statement to the documentation, telling the reader
|
||||
# which file to include in order to use the member.
|
||||
# The default value is: NO.
|
||||
|
||||
SHOW_GROUPED_MEMB_INC = NO
|
||||
|
||||
# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
|
||||
# files with double quotes in the documentation rather than with sharp brackets.
|
||||
# The default value is: NO.
|
||||
|
||||
FORCE_LOCAL_INCLUDES = NO
|
||||
|
||||
# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
|
||||
# documentation for inline members.
|
||||
# The default value is: YES.
|
||||
|
||||
INLINE_INFO = YES
|
||||
|
||||
# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
|
||||
# (detailed) documentation of file and class members alphabetically by member
|
||||
# name. If set to NO, the members will appear in declaration order.
|
||||
# The default value is: YES.
|
||||
|
||||
SORT_MEMBER_DOCS = NO
|
||||
|
||||
# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
|
||||
# descriptions of file, namespace and class members alphabetically by member
|
||||
# name. If set to NO, the members will appear in declaration order. Note that
|
||||
# this will also influence the order of the classes in the class list.
|
||||
# The default value is: NO.
|
||||
|
||||
SORT_BRIEF_DOCS = NO
|
||||
|
||||
# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
|
||||
# (brief and detailed) documentation of class members so that constructors and
|
||||
# destructors are listed first. If set to NO the constructors will appear in the
|
||||
# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
|
||||
# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
|
||||
# member documentation.
|
||||
# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
|
||||
# detailed member documentation.
|
||||
# The default value is: NO.
|
||||
|
||||
SORT_MEMBERS_CTORS_1ST = NO
|
||||
|
||||
# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
|
||||
# of group names into alphabetical order. If set to NO the group names will
|
||||
# appear in their defined order.
|
||||
# The default value is: NO.
|
||||
|
||||
SORT_GROUP_NAMES = NO
|
||||
|
||||
# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
|
||||
# fully-qualified names, including namespaces. If set to NO, the class list will
|
||||
# be sorted only by class name, not including the namespace part.
|
||||
# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
|
||||
# Note: This option applies only to the class list, not to the alphabetical
|
||||
# list.
|
||||
# The default value is: NO.
|
||||
|
||||
SORT_BY_SCOPE_NAME = NO
|
||||
|
||||
# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
|
||||
# type resolution of all parameters of a function it will reject a match between
|
||||
# the prototype and the implementation of a member function even if there is
|
||||
# only one candidate or it is obvious which candidate to choose by doing a
|
||||
# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
|
||||
# accept a match between prototype and implementation in such cases.
|
||||
# The default value is: NO.
|
||||
|
||||
STRICT_PROTO_MATCHING = NO
|
||||
|
||||
# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
|
||||
# list. This list is created by putting \todo commands in the documentation.
|
||||
# The default value is: YES.
|
||||
|
||||
GENERATE_TODOLIST = YES
|
||||
|
||||
# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
|
||||
# list. This list is created by putting \test commands in the documentation.
|
||||
# The default value is: YES.
|
||||
|
||||
GENERATE_TESTLIST = YES
|
||||
|
||||
# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
|
||||
# list. This list is created by putting \bug commands in the documentation.
|
||||
# The default value is: YES.
|
||||
|
||||
GENERATE_BUGLIST = YES
|
||||
|
||||
# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
|
||||
# the deprecated list. This list is created by putting \deprecated commands in
|
||||
# the documentation.
|
||||
# The default value is: YES.
|
||||
|
||||
GENERATE_DEPRECATEDLIST= YES
|
||||
|
||||
# The ENABLED_SECTIONS tag can be used to enable conditional documentation
|
||||
# sections, marked by \if <section_label> ... \endif and \cond <section_label>
|
||||
# ... \endcond blocks.
|
||||
|
||||
ENABLED_SECTIONS =
|
||||
|
||||
# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
|
||||
# initial value of a variable or macro / define can have for it to appear in the
|
||||
# documentation. If the initializer consists of more lines than specified here
|
||||
# it will be hidden. Use a value of 0 to hide initializers completely. The
|
||||
# appearance of the value of individual variables and macros / defines can be
|
||||
# controlled using \showinitializer or \hideinitializer command in the
|
||||
# documentation regardless of this setting.
|
||||
# Minimum value: 0, maximum value: 10000, default value: 30.
|
||||
|
||||
MAX_INITIALIZER_LINES = 30
|
||||
|
||||
# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
|
||||
# the bottom of the documentation of classes and structs. If set to YES, the
|
||||
# list will mention the files that were used to generate the documentation.
|
||||
# The default value is: YES.
|
||||
|
||||
SHOW_USED_FILES = YES
|
||||
|
||||
# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
|
||||
# will remove the Files entry from the Quick Index and from the Folder Tree View
|
||||
# (if specified).
|
||||
# The default value is: YES.
|
||||
|
||||
SHOW_FILES = NO
|
||||
|
||||
# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
|
||||
# page. This will remove the Namespaces entry from the Quick Index and from the
|
||||
# Folder Tree View (if specified).
|
||||
# The default value is: YES.
|
||||
|
||||
SHOW_NAMESPACES = YES
|
||||
|
||||
# The FILE_VERSION_FILTER tag can be used to specify a program or script that
|
||||
# doxygen should invoke to get the current version for each file (typically from
|
||||
# the version control system). Doxygen will invoke the program by executing (via
|
||||
# popen()) the command command input-file, where command is the value of the
|
||||
# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
|
||||
# by doxygen. Whatever the program writes to standard output is used as the file
|
||||
# version. For an example see the documentation.
|
||||
|
||||
FILE_VERSION_FILTER =
|
||||
|
||||
# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
|
||||
# by doxygen. The layout file controls the global structure of the generated
|
||||
# output files in an output format independent way. To create the layout file
|
||||
# that represents doxygen's defaults, run doxygen with the -l option. You can
|
||||
# optionally specify a file name after the option, if omitted DoxygenLayout.xml
|
||||
# will be used as the name of the layout file.
|
||||
#
|
||||
# Note that if you run doxygen from a directory containing a file called
|
||||
# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
|
||||
# tag is left empty.
|
||||
|
||||
LAYOUT_FILE =
|
||||
|
||||
# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
|
||||
# the reference definitions. This must be a list of .bib files. The .bib
|
||||
# extension is automatically appended if omitted. This requires the bibtex tool
|
||||
# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
|
||||
# For LaTeX the style of the bibliography can be controlled using
|
||||
# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
|
||||
# search path. See also \cite for info how to create references.
|
||||
|
||||
CITE_BIB_FILES =
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to warning and progress messages
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
# The QUIET tag can be used to turn on/off the messages that are generated to
|
||||
# standard output by doxygen. If QUIET is set to YES this implies that the
|
||||
# messages are off.
|
||||
# The default value is: NO.
|
||||
|
||||
QUIET = NO
|
||||
|
||||
# The WARNINGS tag can be used to turn on/off the warning messages that are
|
||||
# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
|
||||
# this implies that the warnings are on.
|
||||
#
|
||||
# Tip: Turn warnings on while writing the documentation.
|
||||
# The default value is: YES.
|
||||
|
||||
WARNINGS = YES
|
||||
|
||||
# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
|
||||
# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
|
||||
# will automatically be disabled.
|
||||
# The default value is: YES.
|
||||
|
||||
WARN_IF_UNDOCUMENTED = YES
|
||||
|
||||
# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
|
||||
# potential errors in the documentation, such as not documenting some parameters
|
||||
# in a documented function, or documenting parameters that don't exist or using
|
||||
# markup commands wrongly.
|
||||
# The default value is: YES.
|
||||
|
||||
WARN_IF_DOC_ERROR = YES
|
||||
|
||||
# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
|
||||
# are documented, but have no documentation for their parameters or return
|
||||
# value. If set to NO, doxygen will only warn about wrong or incomplete
|
||||
# parameter documentation, but not about the absence of documentation. If
|
||||
# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
|
||||
# The default value is: NO.
|
||||
|
||||
WARN_NO_PARAMDOC = YES
|
||||
|
||||
# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
|
||||
# a warning is encountered.
|
||||
# The default value is: NO.
|
||||
|
||||
WARN_AS_ERROR = NO
|
||||
|
||||
# The WARN_FORMAT tag determines the format of the warning messages that doxygen
|
||||
# can produce. The string should contain the $file, $line, and $text tags, which
|
||||
# will be replaced by the file and line number from which the warning originated
|
||||
# and the warning text. Optionally the format may contain $version, which will
|
||||
# be replaced by the version of the file (if it could be obtained via
|
||||
# FILE_VERSION_FILTER)
|
||||
# The default value is: $file:$line: $text.
|
||||
|
||||
WARN_FORMAT = "$file:$line: $text"
|
||||
|
||||
# The WARN_LOGFILE tag can be used to specify a file to which warning and error
|
||||
# messages should be written. If left blank the output is written to standard
|
||||
# error (stderr).
|
||||
|
||||
WARN_LOGFILE = "../doxygen-warn.log"
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to the input files
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
# The INPUT tag is used to specify the files and/or directories that contain
|
||||
# documented source files. You may enter file names like myfile.cpp or
|
||||
# directories like /usr/src/myproject. Separate the files or directories with
|
||||
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
|
||||
# Note: If this tag is empty the current directory is searched.
|
||||
|
||||
INPUT = @LAMMPS_SOURCE_DIR@/utils.cpp \
|
||||
@LAMMPS_SOURCE_DIR@/utils.h \
|
||||
@LAMMPS_SOURCE_DIR@/library.cpp \
|
||||
@LAMMPS_SOURCE_DIR@/library.h \
|
||||
@LAMMPS_SOURCE_DIR@/lammps.cpp \
|
||||
@LAMMPS_SOURCE_DIR@/lammps.h \
|
||||
@LAMMPS_SOURCE_DIR@/lmptype.h \
|
||||
@LAMMPS_SOURCE_DIR@/pointers.h \
|
||||
@LAMMPS_SOURCE_DIR@/atom.cpp \
|
||||
@LAMMPS_SOURCE_DIR@/atom.h \
|
||||
@LAMMPS_SOURCE_DIR@/input.cpp \
|
||||
@LAMMPS_SOURCE_DIR@/input.h \
|
||||
|
||||
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
|
||||
# directories that are symbolic links (a Unix file system feature) are excluded
|
||||
# from the input.
|
||||
# The default value is: NO.
|
||||
|
||||
EXCLUDE_SYMLINKS = YES
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to output
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
GENERATE_HTML = NO
|
||||
GENERATE_LATEX = NO
|
||||
GENERATE_XML = YES
|
||||
XML_OUTPUT = xml
|
||||
XML_PROGRAMLISTING = YES
|
||||
XML_NS_MEMB_FILE_SCOPE = NO
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to the preprocessor
|
||||
#---------------------------------------------------------------------------
|
||||
|
||||
# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
|
||||
# C-preprocessor directives found in the sources and include files.
|
||||
# The default value is: YES.
|
||||
|
||||
#ENABLE_PREPROCESSING = YES
|
||||
ENABLE_PREPROCESSING = NO
|
||||
|
||||
# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
|
||||
# in the source code. If set to NO, only conditional compilation will be
|
||||
# performed. Macro expansion can be done in a controlled way by setting
|
||||
# EXPAND_ONLY_PREDEF to YES.
|
||||
# The default value is: NO.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
MACRO_EXPANSION = NO
|
||||
|
||||
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
|
||||
# the macro expansion is limited to the macros specified with the PREDEFINED and
|
||||
# EXPAND_AS_DEFINED tags.
|
||||
# The default value is: NO.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
EXPAND_ONLY_PREDEF = NO
|
||||
|
||||
# If the SEARCH_INCLUDES tag is set to YES, the include files in the
|
||||
# INCLUDE_PATH will be searched if a #include is found.
|
||||
# The default value is: YES.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
SEARCH_INCLUDES = YES
|
||||
|
||||
# The INCLUDE_PATH tag can be used to specify one or more directories that
|
||||
# contain include files that are not input files but should be processed by the
|
||||
# preprocessor.
|
||||
# This tag requires that the tag SEARCH_INCLUDES is set to YES.
|
||||
|
||||
INCLUDE_PATH =
|
||||
|
||||
# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
|
||||
# patterns (like *.h and *.hpp) to filter out the header-files in the
|
||||
# directories. If left blank, the patterns specified with FILE_PATTERNS will be
|
||||
# used.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
INCLUDE_FILE_PATTERNS =
|
||||
|
||||
# The PREDEFINED tag can be used to specify one or more macro names that are
|
||||
# defined before the preprocessor is started (similar to the -D option of e.g.
|
||||
# gcc). The argument of the tag is a list of macros of the form: name or
|
||||
# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
|
||||
# is assumed. To prevent a macro definition from being undefined via #undef or
|
||||
# recursively expanded use the := operator instead of the = operator.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
PREDEFINED =
|
||||
|
||||
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
|
||||
# tag can be used to specify a list of macro names that should be expanded. The
|
||||
# macro definition that is found in the sources will be used. Use the PREDEFINED
|
||||
# tag if you want to use a different macro definition that overrules the
|
||||
# definition found in the source code.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
EXPAND_AS_DEFINED =
|
||||
|
||||
# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
|
||||
# remove all references to function-like macros that are alone on a line, have
|
||||
# an all uppercase name, and do not end with a semicolon. Such function macros
|
||||
# are typically used for boiler-plate code, and will confuse the parser if not
|
||||
# removed.
|
||||
# The default value is: YES.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
SKIP_FUNCTION_MACROS = YES
|
||||
|
||||
BIN
doc/doxygen/lammps-logo.png
Normal file
BIN
doc/doxygen/lammps-logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 13 KiB |
3
doc/graphviz/.gitignore
vendored
Normal file
3
doc/graphviz/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
/*.png
|
||||
/*.svg
|
||||
/*.pdf
|
||||
30
doc/graphviz/Makefile
Normal file
30
doc/graphviz/Makefile
Normal file
@ -0,0 +1,30 @@
|
||||
# Makefile for generating images with graphviz
|
||||
#
|
||||
SHELL = /bin/bash
|
||||
BUILDDIR = ${CURDIR}/..
|
||||
IMGDIR = $(BUILDDIR)/src/JPG
|
||||
IMGSRC = $(wildcard *.dot)
|
||||
IMGPNG = $(IMGSRC:%.dot=$(IMGDIR)/%.png)
|
||||
|
||||
HAS_DOT = NO
|
||||
ifeq ($(shell which dot >/dev/null 2>&1; echo $$?), 0)
|
||||
HAS_DOT = YES
|
||||
endif
|
||||
|
||||
all: $(IMGPNG)
|
||||
|
||||
clean:
|
||||
rm -f $(IMGSVG) $(IMGPDF) $(IMGPNG) *~
|
||||
|
||||
ifeq ($(HAS_DOT),YES)
|
||||
$(IMGDIR)/%.png: %.dot
|
||||
dot -Tpng -o $@ $<
|
||||
endif
|
||||
|
||||
ifeq ($(HAS_DOT),NO)
|
||||
$(IMGDIR)/%.png: %.dot
|
||||
@echo '###################################################'
|
||||
@echo '# Need to install "graphviz" to regenerate graphs #'
|
||||
@echo '###################################################'
|
||||
endif
|
||||
|
||||
90
doc/graphviz/lammps-classes.dot
Normal file
90
doc/graphviz/lammps-classes.dot
Normal file
@ -0,0 +1,90 @@
|
||||
// LAMMPS Class topology
|
||||
digraph lammps {
|
||||
rankdir="LR"
|
||||
La [shape=circle label="LAMMPS"]
|
||||
At [shape=box label="Atom" color=blue]
|
||||
Ci [shape=box label="CiteMe"]
|
||||
Co [shape=box label="Comm" color=blue]
|
||||
Do [shape=box label="Domain" color=blue]
|
||||
Er [shape=box label="Error" color=blue]
|
||||
Fo [shape=box label="Force" color=blue]
|
||||
Gr [shape=box label="Group" color=blue]
|
||||
In [shape=box label="Input" color=blue]
|
||||
Ko [shape=box label="KokkosLMP"]
|
||||
Ak [shape=box label="AtomKK" color=blue]
|
||||
Mk [shape=box label="MemoryKK" color=blue]
|
||||
Me [shape=box label="Memory" color=blue]
|
||||
Mo [shape=box label="Modify" color=blue]
|
||||
Ne [shape=box label="Neighbor" color=blue]
|
||||
Ou [shape=box label="Output" color=blue]
|
||||
Py [shape=box label="Python" color=blue]
|
||||
Up [shape=box label="Update" color=blue]
|
||||
Un [shape=box label="Universe" color=blue]
|
||||
Ti [shape=box label="Timer" color=blue]
|
||||
Rg [label="Region" color=red]
|
||||
Rb [shape=box label="RegionBlock"]
|
||||
Rs [shape=box label="RegionSphere"]
|
||||
Av [label="AtomVec" color=red]
|
||||
It [label="Integrate" color=red]
|
||||
Mi [label="Min" color=red]
|
||||
Pa [label="Pair" color=red]
|
||||
Bo [label="Bond" color=red]
|
||||
An [label="Angle" color=red]
|
||||
Di [label="Dihedral" color=red]
|
||||
Im [label="Improper" color=red]
|
||||
Ks [label="Kspace" color=red]
|
||||
Du [label="Dump" color=red]
|
||||
Fi [label="Fix" color=red]
|
||||
Cp [label="Compute" color=red]
|
||||
Th [label="Thermo"]
|
||||
Va [label="Variable"]
|
||||
Ew [shape=box label="Ewald"]
|
||||
Pp [shape=box label="PPPM"]
|
||||
Ff [label="FFT3d"]
|
||||
Re [label="Remap"]
|
||||
Gc [label="GridComm"]
|
||||
Cb [shape=box label="CommBrick"]
|
||||
Ct [shape=box label="CommTiled"]
|
||||
Aa [shape=box label="AtomVecAtomic"]
|
||||
Am [shape=box label="AtomVecMolecular"]
|
||||
Lj [shape=box label="PairLJCut"]
|
||||
Lo [shape=box label="PairLJCutOMP"]
|
||||
Lg [shape=box label="PairLJCutGPU"]
|
||||
Te [shape=box label="PairTersoff"]
|
||||
Bh [shape=box label="BondHarmonic"]
|
||||
Bf [shape=box label="BondFENE"]
|
||||
Fa [shape=box label="FixAveTime"]
|
||||
Fn [shape=box label="FixNVE"]
|
||||
Fh [shape=box label="FixNH"]
|
||||
Fp [shape=box label="FixNPT"]
|
||||
Ft [shape=box label="FixNVT"]
|
||||
Da [shape=box label="DumpAtom"]
|
||||
Dc [shape=box label="DumpCustom"]
|
||||
Dg [shape=box label="DumpCFG"]
|
||||
Ve [shape=box label="Verlet"]
|
||||
Rr [shape=box label="Respa"]
|
||||
Po [shape=box label="PPPMOmp"]
|
||||
La -> {At Ci Co Do Er Fo Gr In Ko Ak Mk Me Mo Ne Ou Py Ti Up Un} [penwidth=2]
|
||||
Do -> {Rg} [penwidth=2]
|
||||
Co -> {Cb Ct} [style=dashed penwidth=2]
|
||||
Rg -> {Rb Rs} [style=dashed penwidth=2]
|
||||
In -> Va [penwidth=2]
|
||||
Mo -> {Fi Cp} [penwidth=2]
|
||||
Fo -> {Pa Bo An Di Im Ks} [penwidth=2]
|
||||
Ks -> {Ew Pp} [style=dashed penwidth=2]
|
||||
Pp -> {Ff Re Gc} [penwidth=2]
|
||||
Pp -> {Po} [style=dashed penwidth=2]
|
||||
Up -> {It Mi} [penwidth=2]
|
||||
It -> {Ve Rr} [style=dashed penwidth=2]
|
||||
Ou -> {Du Th} [penwidth=2]
|
||||
Du -> {Da Dc} [style=dashed penwidth=2]
|
||||
Dc -> {Dg} [style=dashed penwidth=2]
|
||||
At -> Av [penwidth=2]
|
||||
Av -> {Aa Am} [style=dashed penwidth=2]
|
||||
Pa -> {Lj Te} [style=dashed penwidth=2]
|
||||
Lj -> {Lo Lg} [style=dashed penwidth=2]
|
||||
Bo -> {Bh Bf} [style=dashed penwidth=2]
|
||||
Fi -> {Fa Fn Fh} [style=dashed penwidth=2]
|
||||
Fh -> {Fp Ft} [style=dashed penwidth=2]
|
||||
}
|
||||
|
||||
@ -1,4 +0,0 @@
|
||||
Sphinx
|
||||
sphinxcontrib-spelling
|
||||
breathe
|
||||
Pygments
|
||||
@ -378,22 +378,22 @@ The images below illustrate how the data is presented.
|
||||
.. list-table::
|
||||
|
||||
* - .. figure:: JPG/coverage-overview-top.png
|
||||
:target: JPG/coverage-overview-top.png
|
||||
:scale: 25%
|
||||
|
||||
Top of the overview page
|
||||
|
||||
- .. figure:: JPG/coverage-overview-manybody.png
|
||||
:target: JPG/coverage-overview-manybody.png
|
||||
:scale: 25%
|
||||
|
||||
Styles with good coverage
|
||||
|
||||
- .. figure:: JPG/coverage-file-top.png
|
||||
:target: JPG/coverage-file-top.png
|
||||
:scale: 25%
|
||||
|
||||
Top of individual source page
|
||||
|
||||
- .. figure:: JPG/coverage-file-branches.png
|
||||
:target: JPG/coverage-file-branches.png
|
||||
:scale: 25%
|
||||
|
||||
Source page with branches
|
||||
|
||||
|
||||
@ -361,9 +361,12 @@ be specified in uppercase.
|
||||
* - AMDAVX
|
||||
- HOST
|
||||
- AMD 64-bit x86 CPU (AVX 1)
|
||||
* - EPYC
|
||||
* - ZEN
|
||||
- HOST
|
||||
- AMD EPYC Zen class CPU (AVX 2)
|
||||
- AMD Zen class CPU (AVX 2)
|
||||
* - ZEN2
|
||||
- HOST
|
||||
- AMD Zen2 class CPU (AVX 2)
|
||||
* - ARMV80
|
||||
- HOST
|
||||
- ARMv8.0 Compatible CPU
|
||||
@ -445,12 +448,18 @@ be specified in uppercase.
|
||||
* - TURING75
|
||||
- GPU
|
||||
- NVIDIA Turing generation CC 7.5 GPU
|
||||
* - AMPERE80
|
||||
- GPU
|
||||
- NVIDIA Ampere generation CC 8.0 GPU
|
||||
* - VEGA900
|
||||
- GPU
|
||||
- AMD GPU MI25 GFX900
|
||||
* - VEGA906
|
||||
- GPU
|
||||
- AMD GPU MI50/MI60 GFX906
|
||||
* - INTEL_GEN
|
||||
- GPU
|
||||
- Intel GPUs Gen9+
|
||||
|
||||
Basic CMake build settings:
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
@ -502,10 +502,10 @@ Doc page with :doc:`WARNING messages <Errors_warnings>`
|
||||
*Bond/react: Unknown section in map file*
|
||||
Please ensure reaction map files are properly formatted.
|
||||
|
||||
*Bond/react: Atom affected by reaction too close to template edge*
|
||||
*Bond/react: Atom/Bond type affected by reaction too close to template edge*
|
||||
This means an atom which changes type or connectivity during the
|
||||
reaction is too close to an 'edge' atom defined in the map
|
||||
file. This could cause incorrect assignment of bonds, angle, etc.
|
||||
file. This could cause incorrect assignment of bonds, angle, etc.
|
||||
Generally, this means you must include more atoms in your templates,
|
||||
such that there are at least two atoms between each atom involved in
|
||||
the reaction and an edge atom.
|
||||
|
||||
@ -191,19 +191,19 @@ You start the command ``ccmake ../cmake`` in the ``build`` folder.
|
||||
.. list-table::
|
||||
|
||||
* - .. figure:: JPG/ccmake-initial.png
|
||||
:target: JPG/ccmake-initial.png
|
||||
:scale: 33%
|
||||
:align: center
|
||||
|
||||
Initial ``ccmake`` screen
|
||||
|
||||
- .. figure:: JPG/ccmake-config.png
|
||||
:target: JPG/ccmake-config.png
|
||||
:scale: 33%
|
||||
:align: center
|
||||
|
||||
Configure output of ``ccmake``
|
||||
|
||||
- .. figure:: JPG/ccmake-options.png
|
||||
:target: JPG/ccmake-options.png
|
||||
:scale: 33%
|
||||
:align: center
|
||||
|
||||
Options screen of ``ccmake``
|
||||
@ -236,19 +236,19 @@ not required, it can also be entered from the GUI.
|
||||
.. list-table::
|
||||
|
||||
* - .. figure:: JPG/cmake-gui-initial.png
|
||||
:target: JPG/cmake-gui-initial.png
|
||||
:scale: 40%
|
||||
:align: center
|
||||
|
||||
Initial ``cmake-gui`` screen
|
||||
|
||||
- .. figure:: JPG/cmake-gui-popup.png
|
||||
:target: JPG/cmake-gui-popup.png
|
||||
:scale: 60%
|
||||
:align: center
|
||||
|
||||
Generator selection in ``cmake-gui``
|
||||
|
||||
- .. figure:: JPG/cmake-gui-options.png
|
||||
:target: JPG/cmake-gui-options.png
|
||||
:scale: 40%
|
||||
:align: center
|
||||
|
||||
Options screen of ``cmake-gui``
|
||||
|
||||
BIN
doc/src/JPG/lammps-classes.png
Normal file
BIN
doc/src/JPG/lammps-classes.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 245 KiB |
@ -60,6 +60,19 @@ every LAMMPS command.
|
||||
Errors
|
||||
Manual_build
|
||||
|
||||
.. _programmer_documentation:
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:numbered: 3
|
||||
:caption: Programmer Documentation
|
||||
:name: progdoc
|
||||
:includehidden:
|
||||
|
||||
pg_developer
|
||||
.. pg_library
|
||||
.. pg_modify
|
||||
.. pg_base
|
||||
|
||||
.. toctree::
|
||||
:caption: Index
|
||||
:name: index
|
||||
|
||||
@ -14,19 +14,22 @@ Syntax
|
||||
react react-ID react-group-ID Nevery Rmin Rmax template-ID(pre-reacted) template-ID(post-reacted) map_file individual_keyword values ...
|
||||
...
|
||||
|
||||
* ID, group-ID are documented in :doc:`fix <fix>` command. Group-ID is ignored.
|
||||
* ID, group-ID are documented in :doc:`fix <fix>` command.
|
||||
* bond/react = style name of this fix command
|
||||
* the common keyword/values may be appended directly after 'bond/react'
|
||||
* this applies to all reaction specifications (below)
|
||||
* common_keyword = *stabilization*
|
||||
* common_keyword = *stabilization* or *reset_mol_ids*
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
*stabilization* values = *no* or *yes* *group-ID* *xmax*
|
||||
*no* = no reaction site stabilization
|
||||
*no* = no reaction site stabilization (default)
|
||||
*yes* = perform reaction site stabilization
|
||||
*group-ID* = user-assigned prefix for the dynamic group of atoms not currently involved in a reaction
|
||||
*xmax* = xmax value that is used by an internally-created :doc:`nve/limit <fix_nve_limit>` integrator
|
||||
*reset_mol_ids* values = *yes* or *no*
|
||||
*yes* = update molecule IDs based on new global topology (default)
|
||||
*no* = do not update molecule IDs
|
||||
|
||||
* react = mandatory argument indicating new reaction specification
|
||||
* react-ID = user-assigned name for the reaction
|
||||
@ -50,9 +53,9 @@ Syntax
|
||||
*stabilize_steps* value = timesteps
|
||||
timesteps = number of timesteps to apply the internally-created :doc:`nve/limit <fix_nve_limit>` fix to reacting atoms
|
||||
*update_edges* value = *none* or *charges* or *custom*
|
||||
none = do not update topology near the edges of reaction templates
|
||||
charges = update atomic charges of all atoms in reaction templates
|
||||
custom = force the update of user-specified atomic charges
|
||||
*none* = do not update topology near the edges of reaction templates
|
||||
*charges* = update atomic charges of all atoms in reaction templates
|
||||
*custom* = force the update of user-specified atomic charges
|
||||
|
||||
Examples
|
||||
""""""""
|
||||
@ -154,6 +157,13 @@ due to the internal dynamic grouping performed by fix bond/react.
|
||||
If the group-ID is an existing static group, react-group-IDs
|
||||
should also be specified as this static group, or a subset.
|
||||
|
||||
The *reset_mol_ids* keyword invokes the :doc:`reset_mol_ids <reset_mol_ids>`
|
||||
command after a reaction occurs, to ensure that molecule IDs are
|
||||
consistent with the new bond topology. The group-ID used for
|
||||
:doc:`reset_mol_ids <reset_mol_ids>` is the group-ID for this fix.
|
||||
Resetting molecule IDs is necessarily a global operation, and so can
|
||||
be slow for very large systems.
|
||||
|
||||
The following comments pertain to each *react* argument (in other
|
||||
words, can be customized for each reaction, or reaction step):
|
||||
|
||||
@ -203,9 +213,10 @@ surrounding topology. As described below, the bonding atom pairs of
|
||||
the pre-reacted template are specified by atom ID in the map file. The
|
||||
pre-reacted molecule template should contain as few atoms as possible
|
||||
while still completely describing the topology of all atoms affected
|
||||
by the reaction. For example, if the force field contains dihedrals,
|
||||
the pre-reacted template should contain any atom within three bonds of
|
||||
reacting atoms.
|
||||
by the reaction (which includes all atoms that change atom type or
|
||||
connectivity, and all bonds that change bond type). For example, if
|
||||
the force field contains dihedrals, the pre-reacted template should
|
||||
contain any atom within three bonds of reacting atoms.
|
||||
|
||||
Some atoms in the pre-reacted template that are not reacting may have
|
||||
missing topology with respect to the simulation. For example, the
|
||||
@ -554,7 +565,7 @@ Default
|
||||
"""""""
|
||||
|
||||
The option defaults are stabilization = no, prob = 1.0, stabilize_steps = 60,
|
||||
update_edges = none
|
||||
reset_mol_ids = yes, update_edges = none
|
||||
|
||||
----------
|
||||
|
||||
|
||||
@ -93,7 +93,7 @@ on particle *i* due to contact with particle *j* is given by:
|
||||
|
||||
.. math::
|
||||
|
||||
\mathbf{F}_{ne, Hooke} = k_N \delta_{ij} \mathbf{n}
|
||||
\mathbf{F}_{ne, Hooke} = k_n \delta_{ij} \mathbf{n}
|
||||
|
||||
Where :math:`\delta_{ij} = R_i + R_j - \|\mathbf{r}_{ij}\|` is the particle
|
||||
overlap, :math:`R_i, R_j` are the particle radii, :math:`\mathbf{r}_{ij} = \mathbf{r}_i - \mathbf{r}_j` is the vector separating the two
|
||||
@ -106,7 +106,7 @@ For the *hertz* model, the normal component of force is given by:
|
||||
|
||||
.. math::
|
||||
|
||||
\mathbf{F}_{ne, Hertz} = k_N R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n}
|
||||
\mathbf{F}_{ne, Hertz} = k_n R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n}
|
||||
|
||||
Here, :math:`R_{eff} = \frac{R_i R_j}{R_i + R_j}` is the effective
|
||||
radius, denoted for simplicity as *R* from here on. For *hertz*\ , the
|
||||
@ -123,7 +123,7 @@ Here, :math:`E_{eff} = E = \left(\frac{1-\nu_i^2}{E_i} + \frac{1-\nu_j^2}{E_j}\r
|
||||
modulus, with :math:`\nu_i, \nu_j` the Poisson ratios of the particles of
|
||||
types *i* and *j*\ . Note that if the elastic modulus and the shear
|
||||
modulus of the two particles are the same, the *hertz/material* model
|
||||
is equivalent to the *hertz* model with :math:`k_N = 4/3 E_{eff}`
|
||||
is equivalent to the *hertz* model with :math:`k_n = 4/3 E_{eff}`
|
||||
|
||||
The *dmt* model corresponds to the
|
||||
:ref:`(Derjaguin-Muller-Toporov) <DMT1975>` cohesive model, where the force
|
||||
@ -140,7 +140,7 @@ where the force is computed as:
|
||||
|
||||
\mathbf{F}_{ne, jkr} = \left(\frac{4Ea^3}{3R} - 2\pi a^2\sqrt{\frac{4\gamma E}{\pi a}}\right)\mathbf{n}
|
||||
|
||||
Here, *a* is the radius of the contact zone, related to the overlap
|
||||
Here, :math:`a` is the radius of the contact zone, related to the overlap
|
||||
:math:`\delta` according to:
|
||||
|
||||
.. math::
|
||||
@ -167,7 +167,7 @@ following general form:
|
||||
|
||||
\mathbf{F}_{n,damp} = -\eta_n \mathbf{v}_{n,rel}
|
||||
|
||||
Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot \mathbf{n} \mathbf{n}` is the component of relative velocity along
|
||||
Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot \mathbf{n}\ \mathbf{n}` is the component of relative velocity along
|
||||
:math:`\mathbf{n}`.
|
||||
|
||||
The optional *damping* keyword to the *pair_coeff* command followed by
|
||||
@ -259,7 +259,9 @@ tangential model choices and their expected parameters are as follows:
|
||||
1. *linear_nohistory* : :math:`x_{\gamma,t}`, :math:`\mu_s`
|
||||
2. *linear_history* : :math:`k_t`, :math:`x_{\gamma,t}`, :math:`\mu_s`
|
||||
3. *mindlin* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
|
||||
4. *mindlin_rescale* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
|
||||
4. *mindlin/force* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
|
||||
5. *mindlin_rescale* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
|
||||
6. *mindlin_rescale/force* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
|
||||
|
||||
Here, :math:`x_{\gamma,t}` is a dimensionless multiplier for the normal
|
||||
damping :math:`\eta_n` that determines the magnitude of the tangential
|
||||
@ -268,11 +270,11 @@ coefficient, and :math:`k_t` is the tangential stiffness coefficient.
|
||||
|
||||
For *tangential linear_nohistory*, a simple velocity-dependent Coulomb
|
||||
friction criterion is used, which mimics the behavior of the *pair
|
||||
gran/hooke* style. The tangential force (\mathbf{F}_t\) is given by:
|
||||
gran/hooke* style. The tangential force :math:`\mathbf{F}_t` is given by:
|
||||
|
||||
.. math::
|
||||
|
||||
\mathbf{F}_t = -min(\mu_t F_{n0}, \|\mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
|
||||
\mathbf{F}_t = -\min(\mu_t F_{n0}, \|\mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
|
||||
|
||||
The tangential damping force :math:`\mathbf{F}_\mathrm{t,damp}` is given by:
|
||||
|
||||
@ -294,8 +296,8 @@ keyword also affects the tangential damping. The parameter
|
||||
literature use :math:`x_{\gamma,t} = 1` (:ref:`Marshall <Marshall2009>`,
|
||||
:ref:`Tsuji et al <Tsuji1992>`, :ref:`Silbert et al <Silbert2001>`). The relative
|
||||
tangential velocity at the point of contact is given by
|
||||
:math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\Omega_i + R_j\Omega_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}{n}`,
|
||||
:math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i`.
|
||||
:math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\mathbf{\Omega}_i + R_j\mathbf{\Omega}_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}\ \mathbf{n}`,
|
||||
:math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i` .
|
||||
The direction of the applied force is :math:`\mathbf{t} = \mathbf{v_{t,rel}}/\|\mathbf{v_{t,rel}}\|` .
|
||||
|
||||
The normal force value :math:`F_{n0}` used to compute the critical force
|
||||
@ -314,21 +316,24 @@ form:
|
||||
|
||||
.. math::
|
||||
|
||||
F_{n0} = \|\mathbf{F}_ne + 2 F_{pulloff}\|
|
||||
F_{n0} = \|\mathbf{F}_{ne} + 2 F_{pulloff}\|
|
||||
|
||||
Where :math:`F_{pulloff} = 3\pi \gamma R` for *jkr*\ , and
|
||||
:math:`F_{pulloff} = 4\pi \gamma R` for *dmt*\ .
|
||||
|
||||
The remaining tangential options all use accumulated tangential
|
||||
displacement (i.e. contact history). This is discussed below in the
|
||||
context of the *linear_history* option, but the same treatment of the
|
||||
accumulated displacement applies to the other options as well.
|
||||
displacement (i.e. contact history), except for the options
|
||||
*mindlin/force* and *mindlin_rescale/force*, that use accumulated
|
||||
tangential force instead, and are discussed further below.
|
||||
The accumulated tangential displacement is discussed in details below
|
||||
in the context of the *linear_history* option. The same treatment of
|
||||
the accumulated displacement applies to the other options as well.
|
||||
|
||||
For *tangential linear_history*, the tangential force is given by:
|
||||
|
||||
.. math::
|
||||
|
||||
\mathbf{F}_t = -min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
|
||||
\mathbf{F}_t = -\min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
|
||||
|
||||
Here, :math:`\mathbf{\xi}` is the tangential displacement accumulated
|
||||
during the entire duration of the contact:
|
||||
@ -356,7 +361,7 @@ work:
|
||||
|
||||
.. math::
|
||||
|
||||
\mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'}\| - \mathbf{n}\cdot\mathbf{\xi'}}
|
||||
\mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'} - (\mathbf{n}\cdot\mathbf{\xi'})\mathbf{n}\|}
|
||||
|
||||
Here, :math:`\mathbf{\xi'}` is the accumulated displacement prior to the
|
||||
current time step and :math:`\mathbf{\xi}` is the corrected
|
||||
@ -372,7 +377,7 @@ discussion):
|
||||
|
||||
.. math::
|
||||
|
||||
\mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} + \mathbf{F}_{t,damp}\right)
|
||||
\mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} - \mathbf{F}_{t,damp}\right)
|
||||
|
||||
The tangential force is added to the total normal force (elastic plus
|
||||
damping) to produce the total force on the particle. The tangential
|
||||
@ -387,27 +392,68 @@ overlap region) to induce a torque on each particle according to:
|
||||
|
||||
\mathbf{\tau}_j = -(R_j - 0.5 \delta) \mathbf{n} \times \mathbf{F}_t
|
||||
|
||||
For *tangential mindlin*\ , the :ref:`Mindlin <Mindlin1949>` no-slip solution is used, which differs from the *linear_history*
|
||||
option by an additional factor of *a*\ , the radius of the contact region. The tangential force is given by:
|
||||
For *tangential mindlin*\ , the :ref:`Mindlin <Mindlin1949>` no-slip solution
|
||||
is used which differs from the *linear_history* option by an additional factor
|
||||
of :math:`a`, the radius of the contact region. The tangential force is given by:
|
||||
|
||||
.. math::
|
||||
|
||||
\mathbf{F}_t = -min(\mu_t F_{n0}, \|-k_t a \mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
|
||||
\mathbf{F}_t = -\min(\mu_t F_{n0}, \|-k_t a \mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
|
||||
|
||||
Here, *a* is the radius of the contact region, given by :math:`a =\sqrt{R\delta}`
|
||||
|
||||
Here, :math:`a` is the radius of the contact region, given by :math:`a =\sqrt{R\delta}`
|
||||
for all normal contact models, except for *jkr*\ , where it is given
|
||||
implicitly by :math:`\delta = a^2/R - 2\sqrt{\pi \gamma a/E}`, see
|
||||
discussion above. To match the Mindlin solution, one should set :math:`k_t = 4G/(2-\nu)`, where :math:`G` is the shear modulus, related to Young's modulus
|
||||
:math:`E` by :math:`G = E/(2(1+\nu))`, where :math:`\nu` is Poisson's ratio. This
|
||||
can also be achieved by specifying *NULL* for :math:`k_t`, in which case a
|
||||
discussion above. To match the Mindlin solution, one should set
|
||||
:math:`k_t = 8G_{eff}`, where :math:`G_{eff}` is the effective shear modulus given by:
|
||||
|
||||
.. math::
|
||||
|
||||
G_{eff} = \left(\frac{2-\nu_i}{G_i} + \frac{2-\nu_j}{G_j}\right)^{-1}
|
||||
|
||||
where :math:`G` is the shear modulus, related to Young's modulus :math:`E`
|
||||
and Poisson's ratio :math:`\nu` by :math:`G = E/(2(1+\nu))`. This can also be
|
||||
achieved by specifying *NULL* for :math:`k_t`, in which case a
|
||||
normal contact model that specifies material parameters :math:`E` and
|
||||
:math:`\nu` is required (e.g. *hertz/material*\ , *dmt* or *jkr*\ ). In this
|
||||
case, mixing of the shear modulus for different particle types *i* and
|
||||
*j* is done according to:
|
||||
*j* is done according to the formula above.
|
||||
|
||||
.. note::
|
||||
|
||||
The radius of the contact region :math:`a` depends on the normal overlap.
|
||||
As a result, the tangential force for *mindlin* can change due to
|
||||
a variation in normal overlap, even with no change in tangential displacement.
|
||||
|
||||
For *tangential mindlin/force*, the accumulated elastic tangential force
|
||||
characterizes the contact history, instead of the accumulated tangential
|
||||
displacement. This prevents the dependence of the tangential force on the
|
||||
normal overlap as noted above. The tangential force is given by:
|
||||
|
||||
.. math::
|
||||
|
||||
1/G = 2(2-\nu_i)(1+\nu_i)/E_i + 2(2-\nu_j)(1+\nu_j)/E_j
|
||||
\mathbf{F}_t = -\min(\mu_t F_{n0}, \|\mathbf{F}_{te} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
|
||||
|
||||
The increment of the elastic component of the tangential force
|
||||
:math:`\mathbf{F}_{te}` is given by:
|
||||
|
||||
.. math::
|
||||
|
||||
\mathrm{d}\mathbf{F}_{te} = -k_t a \mathbf{v}_{t,rel} \mathrm{d}\tau
|
||||
|
||||
The changes in frame of reference of the contacting pair of particles during
|
||||
contact are accounted for by the same formula as above, replacing the
|
||||
accumulated tangential displacement :math:`\xi`, by the accumulated tangential
|
||||
elastic force :math:`F_{te}`. When the tangential force exceeds the critical
|
||||
force, the tangential force is directly re-scaled to match the value for
|
||||
the critical force:
|
||||
|
||||
.. math::
|
||||
|
||||
\mathbf{F}_{te} = - \mu_t F_{n0}\mathbf{t} + \mathbf{F}_{t,damp}
|
||||
|
||||
The same rules as those described for *mindlin* apply regarding the tangential
|
||||
stiffness and mixing of the shear modulus for different particle types.
|
||||
|
||||
The *mindlin_rescale* option uses the same form as *mindlin*\ , but the
|
||||
magnitude of the tangential displacement is re-scaled as the contact
|
||||
@ -421,9 +467,32 @@ Here, :math:`t_{n-1}` indicates the value at the previous time
|
||||
step. This rescaling accounts for the fact that a decrease in the
|
||||
contact area upon unloading leads to the contact being unable to
|
||||
support the previous tangential loading, and spurious energy is
|
||||
created without the rescaling above (:ref:`Walton <WaltonPC>` ). See also
|
||||
discussion in :ref:`Thornton et al, 2013 <Thornton2013>` , particularly
|
||||
equation 18(b) of that work and associated discussion.
|
||||
created without the rescaling above (:ref:`Walton <WaltonPC>` ).
|
||||
|
||||
.. note::
|
||||
|
||||
For *mindlin*, a decrease in the tangential force already occurs as the
|
||||
contact unloads, due to the dependence of the tangential force on the normal
|
||||
force described above. By re-scaling :math:`\xi`, *mindlin_rescale*
|
||||
effectively re-scales the tangential force twice, i.e., proportionally to
|
||||
:math:`a^2`. This peculiar behavior results from use of the accumulated
|
||||
tangential displacement to characterize the contact history. Although
|
||||
*mindlin_rescale* remains available for historic reasons and backward
|
||||
compatibility purposes, it should be avoided in favor of *mindlin_rescale/force*.
|
||||
|
||||
The *mindlin_rescale/force* option uses the same form as *mindlin/force*,
|
||||
but the magnitude of the tangential elastic force is re-scaled as the contact
|
||||
unloads, i.e. if :math:`a < a_{t_{n-1}}`:
|
||||
|
||||
.. math::
|
||||
|
||||
\mathbf{F}_{te} = \mathbf{F}_{te, t_{n-1}} \frac{a}{a_{t_{n-1}}}
|
||||
|
||||
This approach provides a better approximation of the :ref:`Mindlin-Deresiewicz <Mindlin1953>`
|
||||
laws and is more consistent than *mindlin_rescale*. See discussions in
|
||||
:ref:`Thornton et al, 2013 <Thornton2013>`, particularly equation 18(b) of that
|
||||
work and associated discussion, and :ref:`Agnolin and Roux, 2007 <AgnolinRoux2007>`,
|
||||
particularly Appendix A.
|
||||
|
||||
----------
|
||||
|
||||
@ -460,7 +529,7 @@ exceeds a critical value:
|
||||
|
||||
.. math::
|
||||
|
||||
\mathbf{F}_{roll} = min(\mu_{roll} F_{n,0}, \|\mathbf{F}_{roll,0}\|)\mathbf{k}
|
||||
\mathbf{F}_{roll} = \min(\mu_{roll} F_{n,0}, \|\mathbf{F}_{roll,0}\|)\mathbf{k}
|
||||
|
||||
Here, :math:`\mathbf{k} = \mathbf{v}_{roll}/\|\mathbf{v}_{roll}\|` is the direction of
|
||||
the pseudo-force. As with tangential displacement, the rolling
|
||||
@ -512,7 +581,7 @@ is then truncated according to:
|
||||
|
||||
.. math::
|
||||
|
||||
\tau_{twist} = min(\mu_{twist} F_{n,0}, \tau_{twist,0})
|
||||
\tau_{twist} = \min(\mu_{twist} F_{n,0}, \tau_{twist,0})
|
||||
|
||||
Similar to the sliding and rolling displacement, the angular
|
||||
displacement is rescaled so that it corresponds to the critical value
|
||||
@ -763,3 +832,15 @@ Technology, 233, 30-46.
|
||||
.. _WaltonPC:
|
||||
|
||||
**(Otis R. Walton)** Walton, O.R., Personal Communication
|
||||
|
||||
.. _Mindlin1953:
|
||||
|
||||
**(Mindlin and Deresiewicz, 1953)** Mindlin, R.D., & Deresiewicz, H (1953).
|
||||
Elastic Spheres in Contact under Varying Oblique Force.
|
||||
J. Appl. Mech., ASME 20, 327-344.
|
||||
|
||||
.. _AgnolinRoux2007:
|
||||
|
||||
**(Agnolin and Roux 2007)** Agnolin, I. & Roux, J-N. (2007).
|
||||
Internal states of model isotropic granular packings.
|
||||
I. Assembling process, geometry, and contact networks. Phys. Rev. E, 76, 061302.
|
||||
|
||||
@ -250,8 +250,12 @@ from :ref:`(Li2013_POF) <Li2013_POF>`. The short mDPD run (about 2 minutes
|
||||
on a single core) generates a particle trajectory which can
|
||||
be visualized as follows.
|
||||
|
||||
.. only:: html
|
||||
|
||||
.. image:: JPG/examples_mdpd.gif
|
||||
:align: center
|
||||
|
||||
.. image:: JPG/examples_mdpd_first.jpg
|
||||
:target: JPG/examples_mdpd.gif
|
||||
:align: center
|
||||
|
||||
.. image:: JPG/examples_mdpd_last.jpg
|
||||
|
||||
120
doc/src/pg_developer.rst
Normal file
120
doc/src/pg_developer.rst
Normal file
@ -0,0 +1,120 @@
|
||||
LAMMPS Developer Guide
|
||||
**********************
|
||||
|
||||
This section describes the internal structure and basic algorithms
|
||||
of the LAMMPS code. This is a work in progress and additional
|
||||
information will be added incrementally depending on availability
|
||||
of time and requests from the LAMMPS user community.
|
||||
|
||||
|
||||
LAMMPS source files
|
||||
===================
|
||||
|
||||
The source files of the LAMMPS code are distributed across two
|
||||
directories of the distribution. The core of the code is located in the
|
||||
``src`` folder and its sub-directories. Almost all of those are C++ files
|
||||
(implementation files have a ``.cpp`` extension and and headers a
|
||||
``.h``). A sizable number of these files are in the ``src`` directory
|
||||
itself, but there are plenty of :doc:`packages <Packages>`, which can be
|
||||
included or excluded when LAMMPS is built. See the :doc:`Include
|
||||
packages in build <Build_package>` section of the manual for more
|
||||
information about that part of the build process. LAMMPS currently
|
||||
supports building with :doc:`conventional makefiles <Build_make>` and
|
||||
through :doc:`CMake <Build_cmake>` which differ in how packages are
|
||||
enabled or disabled for a LAMMPS binary. The source files for each
|
||||
package are in all-uppercase sub-directories of the ``src`` folder, for
|
||||
example ``src/MOLECULE`` or ``src/USER-MISC``. The ``src/STUBS``
|
||||
sub-directory is not a package but contains a dummy MPI library, that is
|
||||
used when building a serial version of the code. the ``src/MAKE``
|
||||
directory contains makefiles with settings and flags for a variety of
|
||||
configuration and machines for the build process with traditional
|
||||
makefiles.
|
||||
|
||||
The ``lib`` directory contains the source code for several supporting
|
||||
libraries or files with configuration settings to use globally installed
|
||||
libraries, that are required by some of the optional packages.
|
||||
Each sub-directory, like ``lib/poems`` or ``lib/gpu``, contains the
|
||||
source files, some of which are in different languages such as Fortran
|
||||
or CUDA. These libraries are linked to during a LAMMPS build, if the
|
||||
corresponding package is installed.
|
||||
|
||||
LAMMPS C++ source files almost always come in pairs, such as
|
||||
``src/run.cpp`` and ``src/run.h``. The pair of files defines a C++
|
||||
class, for example the :cpp:class:`LAMMPS_NS::Run` class which contains
|
||||
the code invoked by the :doc:`run <run>` command in a LAMMPS input script.
|
||||
As this example illustrates, source file and class names often have a
|
||||
one-to-one correspondence with a command used in a LAMMPS input script.
|
||||
Some source files and classes do not have a corresponding input script
|
||||
command, e.g. ``src/force.cpp`` and the :cpp:class:`LAMMPS_NS::Force`
|
||||
class. They are discussed in the next section.
|
||||
|
||||
Overview of LAMMPS class topology
|
||||
=================================
|
||||
|
||||
Though LAMMPS has a lot of source files and classes, its class topology
|
||||
is relative flat, as outlined in the :ref:`class-topology` figure. Each
|
||||
name refers to a class and has a pair of associated source files in the
|
||||
``src`` folder, for example the class :cpp:class:`LAMMPS_NS::Memory`
|
||||
corresponds to the files ``memory.cpp`` and ``memory.h``, or the class
|
||||
:cpp:class:`LAMMPS_NS::AtomVec` corresponds to the files
|
||||
``atom_vec.cpp`` and ``atom_vec.h``. Full lines in the figure represent
|
||||
compositing: that is the class to the left holds a pointer to an
|
||||
instance of the class to the right. Dashed lines instead represent
|
||||
inheritance: the class to the right is derived from the class on the
|
||||
left. Classes with a red boundary are not instantiated directly, but
|
||||
they represent the base classes for "styles". Those "styles" make up
|
||||
the bulk of the LAMMPS code and only a few typical examples are included
|
||||
in the figure for demonstration purposes.
|
||||
|
||||
.. _class-topology:
|
||||
.. figure:: JPG/lammps-classes.png
|
||||
|
||||
LAMMPS class topology
|
||||
|
||||
This figure shows some of the relations of the base classes of the
|
||||
LAMMPS simulation package. Full lines indicate that a class holds an
|
||||
instance of the class it is pointing to; dashed lines point to
|
||||
derived classes that are given as examples of what classes may be
|
||||
instantiated during a LAMMPS run based on the input commands and
|
||||
accessed through the API define by their respective base classes. At
|
||||
the core is the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class, which
|
||||
holds pointers to class instances with specific purposes. Those may
|
||||
hold instances of other classes, sometimes directly, or only
|
||||
temporarily, sometimes as derived classes or derived classes or
|
||||
derived classes, which may also hold instances of other classes.
|
||||
|
||||
The :cpp:class:`LAMMPS_NS::LAMMPS` class is the topmost class and
|
||||
represents what is referred to an "instance" of LAMMPS. It is a
|
||||
composite holding references to instances of other core classes
|
||||
providing the core functionality of the MD engine in LAMMPS and through
|
||||
them abstractions of the required operations. The constructor of the
|
||||
LAMMPS class will instantiate those instances, process the command line
|
||||
flags, initialize MPI (if not already done) and set up file pointers for
|
||||
input and output. The destructor will shut everything down and free all
|
||||
associated memory. Thus code for the standalone LAMMPS executable in
|
||||
``main.cpp`` simply initializes MPI, instantiates a single instance of
|
||||
LAMMPS, and passes it the command line flags and input script. It
|
||||
deletes the LAMMPS instance after the method reading the input returns
|
||||
and shuts down the MPI environment before it exits the executable.
|
||||
|
||||
The :cpp:class:`LAMMPS_NS::Pointers` is not shown in the
|
||||
:ref:`class-topology` figure, it holds references to members of the
|
||||
`LAMMPS_NS::LAMMPS`, so that all classes derived from
|
||||
:cpp:class:`LAMMPS_NS::Pointers` have direct access to those reference.
|
||||
From the class topology all classes with blue boundary are referenced in
|
||||
this class and all classes in the second and third columns, that are not
|
||||
listed as derived classes are instead derived from
|
||||
:cpp:class:`LAMMPS_NS::Pointers`.
|
||||
|
||||
Since all storage is encapsulated, the LAMMPS class can also be
|
||||
instantiated multiple times by a calling code, and that can be either
|
||||
simultaneously or consecutively. When running in parallel with MPI,
|
||||
care has to be taken, that suitable communicators are used to not
|
||||
create conflicts between different instances.
|
||||
|
||||
The LAMMPS class currently holds instances of 19 classes representing
|
||||
different core functionalities
|
||||
There are a handful of virtual parent classes in LAMMPS that define
|
||||
what LAMMPS calls ``styles``. They are shaded red in Fig
|
||||
\ref{fig:classes}. Each of these are parents of a number of child
|
||||
classes that implement the interface defined by the parent class.
|
||||
@ -1 +1,5 @@
|
||||
Sphinx
|
||||
sphinxcontrib-spelling
|
||||
sphinx-fortran
|
||||
breathe
|
||||
Pygments
|
||||
|
||||
@ -7,3 +7,10 @@
|
||||
display: block;
|
||||
margin-bottom: 0.809em;
|
||||
}
|
||||
|
||||
.lammps_release {
|
||||
text-align: center;
|
||||
font-size: 11px;
|
||||
display: block;
|
||||
margin-bottom: 0.405em;
|
||||
}
|
||||
|
||||
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
@ -103,6 +103,12 @@
|
||||
{%- endif %}
|
||||
{%- endblock %}
|
||||
{%- block extrahead %} {% endblock %}
|
||||
|
||||
{# Keep modernizr in head - http://modernizr.com/docs/#installing #}
|
||||
<script src="{{ pathto('_static/js/modernizr.min.js', 1) }}"></script>
|
||||
|
||||
{# for improved browser compatibility #}
|
||||
<script src="{{ pathto('_static/polyfill.js', 1) }}"></script>
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
@ -135,9 +141,8 @@
|
||||
{%- set nav_version = current_version %}
|
||||
{% endif %}
|
||||
{% if nav_version %}
|
||||
<div class="version">
|
||||
{{ nav_version }}
|
||||
</div>
|
||||
<div class="lammps_version">Version: <b>{{ nav_version }}</b></div>
|
||||
<div class="lammps_release">git info: {{ release }}</div>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
|
||||
@ -23,11 +23,16 @@ try:
|
||||
except:
|
||||
pass
|
||||
|
||||
LAMMPS_DOC_DIR = '@LAMMPS_DOC_DIR@'
|
||||
LAMMPS_SOURCE_DIR = '@LAMMPS_SOURCE_DIR@'
|
||||
LAMMPS_PYTHON_DIR = '@LAMMPS_PYTHON_DIR@'
|
||||
LAMMPS_DOXYGEN_XML_DIR = '@DOXYGEN_XML_DIR@'
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#sys.path.insert(0, os.path.abspath('.'))
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '../../src/_ext'))
|
||||
sys.path.append(os.path.join(LAMMPS_DOC_DIR, 'src', '_ext'))
|
||||
|
||||
# -- General configuration ------------------------------------------------
|
||||
|
||||
@ -41,7 +46,9 @@ extensions = [
|
||||
'sphinx.ext.mathjax',
|
||||
'sphinx.ext.imgmath',
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinxfortran.fortran_domain',
|
||||
'table_from_list',
|
||||
'breathe',
|
||||
]
|
||||
# 2017-12-07: commented out, since this package is broken with Sphinx 16.x
|
||||
# yet we can no longer use Sphinx 15.x, since that breaks with
|
||||
@ -72,12 +79,24 @@ copyright = '2003-2020 Sandia Corporation'
|
||||
def get_lammps_version():
|
||||
import os
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
with open(os.path.join(script_dir, '../../../src/version.h'), 'r') as f:
|
||||
with open(os.path.join(LAMMPS_SOURCE_DIR, 'version.h'), 'r') as f:
|
||||
line = f.readline()
|
||||
start_pos = line.find('"')+1
|
||||
end_pos = line.find('"', start_pos)
|
||||
return line[start_pos:end_pos]
|
||||
|
||||
def get_git_info():
|
||||
import subprocess,time
|
||||
|
||||
git_n_date = ''
|
||||
try:
|
||||
gitinfo = subprocess.run(['git','describe'],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
|
||||
if gitinfo.returncode == 0:
|
||||
git_n_date = gitinfo.stdout.decode().replace('_',' ')
|
||||
except:
|
||||
pass
|
||||
return git_n_date
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
@ -85,7 +104,7 @@ def get_lammps_version():
|
||||
# The short X.Y version.
|
||||
version = get_lammps_version()
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = ''
|
||||
release = get_git_info()
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
@ -153,7 +172,7 @@ html_title = "LAMMPS documentation"
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top
|
||||
# of the sidebar.
|
||||
html_logo = 'lammps-logo.png'
|
||||
html_logo = '_static/lammps-logo.png'
|
||||
|
||||
# The name of an image file (within the static path) to use as favicon of the
|
||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||
@ -314,7 +333,7 @@ texinfo_documents = [
|
||||
|
||||
epub_title = 'LAMMPS Documentation - ' + get_lammps_version()
|
||||
|
||||
epub_cover = ('lammps-logo.png', '')
|
||||
epub_cover = ('_static/lammps-logo.png', '')
|
||||
|
||||
epub_description = """
|
||||
This is the Manual for the LAMMPS software package.
|
||||
@ -342,13 +361,29 @@ if spelling_spec and has_enchant:
|
||||
spelling_lang='en_US'
|
||||
spelling_word_list_filename='false_positives.txt'
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '.'))
|
||||
conf_script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.append(os.path.join(conf_script_dir, '.'))
|
||||
import LAMMPSLexer
|
||||
from sphinx.highlighting import lexers
|
||||
|
||||
lexers['LAMMPS'] = LAMMPSLexer.LAMMPSLexer(startinline=True)
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../python'))
|
||||
sys.path.append(LAMMPS_PYTHON_DIR)
|
||||
|
||||
# avoid syntax highlighting in blocks that don't specify language
|
||||
highlight_language = 'none'
|
||||
|
||||
# autodoc configuration
|
||||
|
||||
autodoc_member_order = 'bysource'
|
||||
#autoclass_content = 'both'
|
||||
|
||||
# breathe configuration
|
||||
|
||||
breathe_projects = { 'progguide' : LAMMPS_DOXYGEN_XML_DIR }
|
||||
breathe_default_project = 'progguide'
|
||||
breathe_show_define_initializer = True
|
||||
breathe_domain_by_extension = { 'h' : 'cpp',
|
||||
'cpp' : 'cpp',
|
||||
'c' : 'c',
|
||||
}
|
||||
@ -43,6 +43,7 @@ Afshar
|
||||
agilio
|
||||
Agilio
|
||||
agni
|
||||
Agnolin
|
||||
Ai
|
||||
Aidan
|
||||
aij
|
||||
@ -436,6 +437,7 @@ Colvars
|
||||
COLVARS
|
||||
comID
|
||||
Commun
|
||||
compositing
|
||||
compressibility
|
||||
compressive
|
||||
Comput
|
||||
@ -599,6 +601,7 @@ Dequidt
|
||||
der
|
||||
dereference
|
||||
derekt
|
||||
Deresiewicz
|
||||
Derjagin
|
||||
Derjaguin
|
||||
Derlet
|
||||
@ -2219,6 +2222,7 @@ oxdna
|
||||
oxrna
|
||||
oxDNA
|
||||
oxRNA
|
||||
packings
|
||||
padua
|
||||
Padua
|
||||
palegoldenrod
|
||||
|
||||
@ -1 +0,0 @@
|
||||
../../src/JPG/lammps-logo.png
|
||||
@ -709,7 +709,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
__local int red_acc[BLOCK_PAIR];
|
||||
__local int ijnum_shared[BLOCK_PAIR];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
@ -789,14 +789,14 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
||||
k &= NEIGHMASK;
|
||||
if (k == i) {
|
||||
ijnum = nbor_k;
|
||||
red_acc[m] = ijnum;
|
||||
ijnum_shared[m] = ijnum;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
numtyp r1 = ucl_sqrt(rsq1);
|
||||
numtyp r1inv = ucl_rsqrt(rsq1);
|
||||
if (ijnum < 0) ijnum = red_acc[m];
|
||||
if (ijnum < 0) ijnum = ijnum_shared[m];
|
||||
|
||||
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
|
||||
int idx = ijnum;
|
||||
|
||||
@ -719,7 +719,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
__local int red_acc[BLOCK_PAIR];
|
||||
__local int ijnum_shared[BLOCK_PAIR];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
@ -799,14 +799,14 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
|
||||
k &= NEIGHMASK;
|
||||
if (k == i) {
|
||||
ijnum = nbor_k;
|
||||
red_acc[m] = ijnum;
|
||||
ijnum_shared[m] = ijnum;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
numtyp r1 = ucl_sqrt(rsq1);
|
||||
numtyp r1inv = ucl_rsqrt(rsq1);
|
||||
if (ijnum < 0) ijnum = red_acc[m];
|
||||
if (ijnum < 0) ijnum = ijnum_shared[m];
|
||||
|
||||
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
|
||||
int idx = ijnum;
|
||||
@ -957,7 +957,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
__local int red_acc[BLOCK_PAIR];
|
||||
__local int ijnum_shared[BLOCK_PAIR];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
@ -1037,14 +1037,14 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
k &= NEIGHMASK;
|
||||
if (k == i) {
|
||||
ijnum = nbor_k;
|
||||
red_acc[m] = ijnum;
|
||||
ijnum_shared[m] = ijnum;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
numtyp r1 = ucl_sqrt(rsq1);
|
||||
numtyp r1inv = ucl_rsqrt(rsq1);
|
||||
if (ijnum < 0) ijnum = red_acc[m];
|
||||
if (ijnum < 0) ijnum = ijnum_shared[m];
|
||||
|
||||
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
|
||||
int idx = ijnum;
|
||||
|
||||
@ -729,7 +729,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
__local int red_acc[BLOCK_PAIR];
|
||||
__local int ijnum_shared[BLOCK_PAIR];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
@ -809,14 +809,14 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
|
||||
k &= NEIGHMASK;
|
||||
if (k == i) {
|
||||
ijnum = nbor_k;
|
||||
red_acc[m] = ijnum;
|
||||
ijnum_shared[m] = ijnum;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
numtyp r1 = ucl_sqrt(rsq1);
|
||||
numtyp r1inv = ucl_rsqrt(rsq1);
|
||||
if (ijnum < 0) ijnum = red_acc[m];
|
||||
if (ijnum < 0) ijnum = ijnum_shared[m];
|
||||
|
||||
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
|
||||
int idx = ijnum;
|
||||
|
||||
@ -10,33 +10,45 @@ for C++. Applications heavily leveraging Kokkos are strongly encouraged to use
|
||||
You can either use Kokkos as an installed package (encouraged) or use Kokkos in-tree in your project.
|
||||
Modern CMake is exceedingly simple at a high-level (with the devil in the details).
|
||||
Once Kokkos is installed In your `CMakeLists.txt` simply use:
|
||||
````
|
||||
````cmake
|
||||
find_package(Kokkos REQUIRED)
|
||||
````
|
||||
Then for every executable or library in your project:
|
||||
````
|
||||
````cmake
|
||||
target_link_libraries(myTarget Kokkos::kokkos)
|
||||
````
|
||||
That's it! There is no checking Kokkos preprocessor, compiler, or linker flags.
|
||||
Kokkos propagates all the necessary flags to your project.
|
||||
This means not only is linking to Kokkos easy, but Kokkos itself can actually configure compiler and linker flags for *your*
|
||||
project. If building in-tree, there is no `find_package` and you link with `target_link_libraries(kokkos)`.
|
||||
project.
|
||||
When configuring your project just set:
|
||||
````bash
|
||||
> cmake ${srcdir} \
|
||||
-DKokkos_ROOT=${kokkos_install_prefix} \
|
||||
-DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos}
|
||||
````
|
||||
Note: You may need the following if using some versions of CMake (e.g. 3.12):
|
||||
````cmake
|
||||
cmake_policy(SET CMP0074 NEW)
|
||||
````
|
||||
If building in-tree, there is no `find_package`. You can use `add_subdirectory(kokkos)` with the Kokkos source and again just link with `target_link_libraries(Kokkos::kokkos)`.
|
||||
The examples in `examples/cmake_build_installed` and `examples/cmake_build_in_tree` can help get you started.
|
||||
|
||||
|
||||
## Configuring CMake
|
||||
A very basic installation is done with:
|
||||
````
|
||||
cmake ${srcdir} \
|
||||
A very basic installation of Kokkos is done with:
|
||||
````bash
|
||||
> cmake ${srcdir} \
|
||||
-DCMAKE_CXX_COMPILER=g++ \
|
||||
-DCMAKE_INSTALL_PREFIX=${my_install_folder}
|
||||
-DCMAKE_INSTALL_PREFIX=${kokkos_install_folder}
|
||||
````
|
||||
which builds and installed a default Kokkos when you run `make install`.
|
||||
There are numerous device backends, options, and architecture-specific optimizations that can be configured, e.g.
|
||||
````
|
||||
cmake ${srcdir} \
|
||||
````bash
|
||||
> cmake ${srcdir} \
|
||||
-DCMAKE_CXX_COMPILER=g++ \
|
||||
-DCMAKE_INSTALL_PREFIX=${my_install_folder} \
|
||||
-DKokkos_ENABLE_OPENMP=On
|
||||
-DCMAKE_INSTALL_PREFIX=${kokkos_install_folder} \
|
||||
-DKokkos_ENABLE_OPENMP=ON
|
||||
````
|
||||
which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below.
|
||||
|
||||
@ -50,16 +62,16 @@ which activates the OpenMP backend. All of the options controlling device backen
|
||||
## Spack
|
||||
An alternative to manually building with the CMake is to use the Spack package manager.
|
||||
To do so, download the `kokkos-spack` git repo and add to the package list:
|
||||
````
|
||||
spack repo add $path-to-kokkos-spack
|
||||
````bash
|
||||
> spack repo add $path-to-kokkos-spack
|
||||
````
|
||||
A basic installation would be done as:
|
||||
````
|
||||
spack install kokkos
|
||||
````bash
|
||||
> spack install kokkos
|
||||
````
|
||||
Spack allows options and and compilers to be tuned in the install command.
|
||||
````
|
||||
spack install kokkos@3.0 %gcc@7.3.0 +openmp
|
||||
````bash
|
||||
> spack install kokkos@3.0 %gcc@7.3.0 +openmp
|
||||
````
|
||||
This example illustrates the three most common parameters to Spack:
|
||||
* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
|
||||
@ -67,17 +79,17 @@ This example illustrates the three most common parameters to Spack:
|
||||
* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.
|
||||
|
||||
For a complete list of Kokkos options, run:
|
||||
````bash
|
||||
> spack info kokkos
|
||||
````
|
||||
spack info kokkos
|
||||
````
|
||||
More details can be found in the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
|
||||
More details can be found in the [Spack README](Spack.md)
|
||||
|
||||
#### Spack Development
|
||||
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
|
||||
Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
|
||||
If you must know, you can locate Spack Kokkos installations with:
|
||||
````
|
||||
spack find -p kokkos ...
|
||||
````bash
|
||||
> spack find -p kokkos ...
|
||||
````
|
||||
where `...` is the unique spec identifying the particular Kokkos configuration and version.
|
||||
|
||||
@ -102,8 +114,14 @@ Device backends can be enabled by specifying `-DKokkos_ENABLE_X`.
|
||||
* Whether to build Pthread backend
|
||||
* BOOL Default: OFF
|
||||
* Kokkos_ENABLE_SERIAL
|
||||
* Whether to build serial backend
|
||||
* Whether to build serial backend
|
||||
* BOOL Default: ON
|
||||
* Kokkos_ENABLE_HIP (Experimental)
|
||||
* Whether to build HIP backend
|
||||
* BOOL Default: OFF
|
||||
* Kokkos_ENABLE_OPENMPTARGET (Experimental)
|
||||
* Whether to build the OpenMP target backend
|
||||
* BOOL Default: OFF
|
||||
|
||||
## Enable Options
|
||||
Options can be enabled by specifying `-DKokkos_ENABLE_X`.
|
||||
@ -138,9 +156,6 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
|
||||
* Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
|
||||
* Debug check on dual views
|
||||
* BOOL Default: OFF
|
||||
* Kokkos_ENABLE_DEPRECATED_CODE
|
||||
* Whether to enable deprecated code
|
||||
* BOOL Default: OFF
|
||||
* Kokkos_ENABLE_EXAMPLES
|
||||
* Whether to enable building examples
|
||||
* BOOL Default: OFF
|
||||
@ -150,9 +165,6 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
|
||||
* Kokkos_ENABLE_LARGE_MEM_TESTS
|
||||
* Whether to perform extra large memory tests
|
||||
* BOOL_Default: OFF
|
||||
* Kokkos_ENABLE_PROFILING
|
||||
* Whether to create bindings for profiling tools
|
||||
* BOOL Default: ON
|
||||
* Kokkos_ENABLE_PROFILING_LOAD_PRINT
|
||||
* Whether to print information about which profiling tools gotloaded
|
||||
* BOOL Default: OFF
|
||||
@ -235,8 +247,11 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_
|
||||
* Kokkos_ARCH_BGQ
|
||||
* Whether to optimize for the BGQ architecture
|
||||
* BOOL Default: OFF
|
||||
* Kokkos_ARCH_EPYC
|
||||
* Whether to optimize for the EPYC architecture
|
||||
* Kokkos_ARCH_ZEN
|
||||
* Whether to optimize for the Zen architecture
|
||||
* BOOL Default: OFF
|
||||
* Kokkos_ARCH_ZEN2
|
||||
* Whether to optimize for the Zen2 architecture
|
||||
* BOOL Default: OFF
|
||||
* Kokkos_ARCH_HSW
|
||||
* Whether to optimize for the HSW architecture
|
||||
|
||||
@ -1,6 +1,113 @@
|
||||
# Change Log
|
||||
|
||||
## [3.1.1](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14)
|
||||
## [3.2.00](https://github.com/kokkos/kokkos/tree/3.2.00) (2020-08-19)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.01...3.2.00)
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- HIP:Enable stream in HIP [\#3163](https://github.com/kokkos/kokkos/issues/3163)
|
||||
- HIP:Add support for shuffle reduction for the HIP backend [\#3154](https://github.com/kokkos/kokkos/issues/3154)
|
||||
- HIP:Add implementations of missing HIPHostPinnedSpace methods for LAMMPS [\#3137](https://github.com/kokkos/kokkos/issues/3137)
|
||||
- HIP:Require HIP 3.5.0 or higher [\#3099](https://github.com/kokkos/kokkos/issues/3099)
|
||||
- HIP:WorkGraphPolicy for HIP [\#3096](https://github.com/kokkos/kokkos/issues/3096)
|
||||
- OpenMPTarget: Significant update to the new experimental backend. Requires C++17, works on Intel GPUs, reference counting fixes. [\#3169](https://github.com/kokkos/kokkos/issues/3169)
|
||||
- Windows Cuda support [\#3018](https://github.com/kokkos/kokkos/issues/3018)
|
||||
- Pass `-Wext-lambda-captures-this` to NVCC when support for `__host__ __device__` lambda is enabled from CUDA 11 [\#3241](https://github.com/kokkos/kokkos/issues/3241)
|
||||
- Use explicit staging buffer for constant memory kernel launches and cleanup host/device synchronization [\#3234](https://github.com/kokkos/kokkos/issues/3234)
|
||||
- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 1: [\#3202](https://github.com/kokkos/kokkos/issues/3202)
|
||||
- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 2: [\#3203](https://github.com/kokkos/kokkos/issues/3203)
|
||||
- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 3: [\#3196](https://github.com/kokkos/kokkos/issues/3196)
|
||||
- Annotations for `DefaultExectutionSpace` and `DefaultHostExectutionSpace` to use in static analysis [\#3189](https://github.com/kokkos/kokkos/issues/3189)
|
||||
- Add documentation on using Spack to install Kokkos and developing packages that depend on Kokkos [\#3187](https://github.com/kokkos/kokkos/issues/3187)
|
||||
- Improve support for nvcc\_wrapper with exotic host compiler [\#3186](https://github.com/kokkos/kokkos/issues/3186)
|
||||
- Add OpenMPTarget backend flags for NVC++ compiler [\#3185](https://github.com/kokkos/kokkos/issues/3185)
|
||||
- Move deep\_copy/create\_mirror\_view on Experimental::OffsetView into Kokkos:: namespace [\#3166](https://github.com/kokkos/kokkos/issues/3166)
|
||||
- Allow for larger block size in HIP [\#3165](https://github.com/kokkos/kokkos/issues/3165)
|
||||
- View: Added names of Views to the different View initialize/free kernels [\#3159](https://github.com/kokkos/kokkos/issues/3159)
|
||||
- Cuda: Caching cudaFunctorAttributes and whether L1/Shmem prefer was set [\#3151](https://github.com/kokkos/kokkos/issues/3151)
|
||||
- BuildSystem: Provide an explicit default CMAKE\_BUILD\_TYPE [\#3131](https://github.com/kokkos/kokkos/issues/3131)
|
||||
- Cuda: Update CUDA occupancy calculation [\#3124](https://github.com/kokkos/kokkos/issues/3124)
|
||||
- Vector: Adding data() to Vector [\#3123](https://github.com/kokkos/kokkos/issues/3123)
|
||||
- BuildSystem: Add CUDA Ampere configuration support [\#3122](https://github.com/kokkos/kokkos/issues/3122)
|
||||
- General: Apply [[noreturn]] to Kokkos::abort when applicable [\#3106](https://github.com/kokkos/kokkos/issues/3106)
|
||||
- TeamPolicy: Validate storage level argument passed to TeamPolicy::set\_scratch\_size() [\#3098](https://github.com/kokkos/kokkos/issues/3098)
|
||||
- nvcc\_wrapper: send --cudart to nvcc instead of host compiler [\#3092](https://github.com/kokkos/kokkos/issues/3092)
|
||||
- BuildSystem: Make kokkos\_has\_string() function in Makefile.kokkos case insensitive [\#3091](https://github.com/kokkos/kokkos/issues/3091)
|
||||
- Modify KOKKOS\_FUNCTION macro for clang-tidy analysis [\#3087](https://github.com/kokkos/kokkos/issues/3087)
|
||||
- Move allocation profiling to allocate/deallocate calls [\#3084](https://github.com/kokkos/kokkos/issues/3084)
|
||||
- BuildSystem: FATAL\_ERROR when attempting in-source build [\#3082](https://github.com/kokkos/kokkos/issues/3082)
|
||||
- Change enums in ScatterView to types [\#3076](https://github.com/kokkos/kokkos/issues/3076)
|
||||
- HIP: Changes for new compiler/runtime [\#3067](https://github.com/kokkos/kokkos/issues/3067)
|
||||
- Extract and use get\_gpu [\#3061](https://github.com/kokkos/kokkos/issues/3061)
|
||||
- Extract and use get\_gpu [\#3048](https://github.com/kokkos/kokkos/issues/3048)
|
||||
- Add is\_allocated to View-like containers [\#3059](https://github.com/kokkos/kokkos/issues/3059)
|
||||
- Combined reducers for scalar references [\#3052](https://github.com/kokkos/kokkos/issues/3052)
|
||||
- Add configurable capacity for UniqueToken [\#3051](https://github.com/kokkos/kokkos/issues/3051)
|
||||
- Add installation testing [\#3034](https://github.com/kokkos/kokkos/issues/3034)
|
||||
- BuildSystem: Add -expt-relaxed-constexpr flag to nvcc\_wrapper [\#3021](https://github.com/kokkos/kokkos/issues/3021)
|
||||
- HIP: Add UniqueToken [\#3020](https://github.com/kokkos/kokkos/issues/3020)
|
||||
- Autodetect number of devices [\#3013](https://github.com/kokkos/kokkos/issues/3013)
|
||||
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- Check error code from `cudaStreamSynchronize` in CUDA fences [\#3255](https://github.com/kokkos/kokkos/issues/3255)
|
||||
- Fix issue with C++ standard flags when using `nvcc\_wrapper` with PGI [\#3254](https://github.com/kokkos/kokkos/issues/3254)
|
||||
- Add missing threadfence in lock-based atomics [\#3208](https://github.com/kokkos/kokkos/issues/3208)
|
||||
- Fix dedup of linker flags for shared lib on CMake <=3.12 [\#3176](https://github.com/kokkos/kokkos/issues/3176)
|
||||
- Fix memory leak with CUDA streams [\#3170](https://github.com/kokkos/kokkos/issues/3170)
|
||||
- BuildSystem: Fix OpenMP Target flags for Cray [\#3161](https://github.com/kokkos/kokkos/issues/3161)
|
||||
- ScatterView: fix for OpenmpTarget remove inheritance from reducers [\#3162](https://github.com/kokkos/kokkos/issues/3162)
|
||||
- BuildSystem: Set OpenMP flags according to host compiler [\#3127](https://github.com/kokkos/kokkos/issues/3127)
|
||||
- OpenMP: Fix logic for nested omp in partition\_master bug [\#3101](https://github.com/kokkos/kokkos/issues/3101)
|
||||
- BuildSystem: Fixes for Cuda/11 and c++17 [\#3085](https://github.com/kokkos/kokkos/issues/3085)
|
||||
- HIP: Fix print\_configuration [\#3080](https://github.com/kokkos/kokkos/issues/3080)
|
||||
- Conditionally define get\_gpu [\#3072](https://github.com/kokkos/kokkos/issues/3072)
|
||||
- Fix bounds for ranges in random number generator [\#3069](https://github.com/kokkos/kokkos/issues/3069)
|
||||
- Fix Cuda minor arch check [\#3035](https://github.com/kokkos/kokkos/issues/3035)
|
||||
|
||||
**Incompatibilities:**
|
||||
|
||||
- Remove ETI support [\#3157](https://github.com/kokkos/kokkos/issues/3157)
|
||||
- Remove KOKKOS\_INTERNAL\_ENABLE\_NON\_CUDA\_BACKEND [\#3147](https://github.com/kokkos/kokkos/issues/3147)
|
||||
- Remove core/unit\_test/config [\#3146](https://github.com/kokkos/kokkos/issues/3146)
|
||||
- Removed the preprocessor branch for KOKKOS\_ENABLE\_PROFILING [\#3115](https://github.com/kokkos/kokkos/issues/3115)
|
||||
- Disable profiling with MSVC [\#3066](https://github.com/kokkos/kokkos/issues/3066)
|
||||
|
||||
**Closed issues:**
|
||||
|
||||
- Silent error (Validate storage level arg to set_scratch_size) [\#3097](https://github.com/kokkos/kokkos/issues/3097)
|
||||
- Remove KOKKKOS\_ENABLE\_PROFILING Option [\#3095](https://github.com/kokkos/kokkos/issues/3095)
|
||||
- Cuda 11 -\> allow C++17 [\#3083](https://github.com/kokkos/kokkos/issues/3083)
|
||||
- In source build failure not explained [\#3081](https://github.com/kokkos/kokkos/issues/3081)
|
||||
- Allow naming of Views for initialization kernel [\#3070](https://github.com/kokkos/kokkos/issues/3070)
|
||||
- DefaultInit tests failing when using CTest resource allocation feature [\#3040](https://github.com/kokkos/kokkos/issues/3040)
|
||||
- Add installation testing. [\#3037](https://github.com/kokkos/kokkos/issues/3037)
|
||||
- nvcc\_wrapper needs to handle `-expt-relaxed-constexpr` flag [\#3017](https://github.com/kokkos/kokkos/issues/3017)
|
||||
- CPU core oversubscription warning on macOS with OpenMP backend [\#2996](https://github.com/kokkos/kokkos/issues/2996)
|
||||
- Default behavior of KOKKOS\_NUM\_DEVICES to use all devices available [\#2975](https://github.com/kokkos/kokkos/issues/2975)
|
||||
- Assert blocksize \> 0 [\#2974](https://github.com/kokkos/kokkos/issues/2974)
|
||||
- Add ability to assign kokkos profile function from executable [\#2973](https://github.com/kokkos/kokkos/issues/2973)
|
||||
- ScatterView Support for the pre/post increment operator [\#2967](https://github.com/kokkos/kokkos/issues/2967)
|
||||
|
||||
- Compiler issue: Cuda build with clang 10 has errors with the atomic unit tests [\#3237](https://github.com/kokkos/kokkos/issues/3237)
|
||||
- Incompatibility of flags for C++ standard with PGI v20.4 on Power9/NVIDIA V100 system [\#3252](https://github.com/kokkos/kokkos/issues/3252)
|
||||
- Error configuring as subproject [\#3140](https://github.com/kokkos/kokkos/issues/3140)
|
||||
- CMake fails with Nvidia compilers when the GPU architecture option is not supplied (Fix configure with OMPT and Cuda) [\#3207](https://github.com/kokkos/kokkos/issues/3207)
|
||||
- PGI compiler being passed the gcc -fopenmp flag [\#3125](https://github.com/kokkos/kokkos/issues/3125)
|
||||
- Cuda: Memory leak when using CUDA stream [\#3167](https://github.com/kokkos/kokkos/issues/3167)
|
||||
- RangePolicy has an implicitly deleted assignment operator [\#3192](https://github.com/kokkos/kokkos/issues/3192)
|
||||
- MemorySpace::allocate needs to have memory pool counting. [\#3064](https://github.com/kokkos/kokkos/issues/3064)
|
||||
- Missing write fence for lock based atomics on CUDA [\#3038](https://github.com/kokkos/kokkos/issues/3038)
|
||||
- CUDA compute capability version check problem [\#3026](https://github.com/kokkos/kokkos/issues/3026)
|
||||
- Make DynRankView fencing consistent [\#3014](https://github.com/kokkos/kokkos/issues/3014)
|
||||
- nvcc\_wrapper cant handle -Xcompiler -o out.o [\#2993](https://github.com/kokkos/kokkos/issues/2993)
|
||||
- Reductions of non-trivial types of size 4 fail in CUDA shfl operations [\#2990](https://github.com/kokkos/kokkos/issues/2990)
|
||||
- complex\_double misalignment in reduce, clang+CUDA [\#2989](https://github.com/kokkos/kokkos/issues/2989)
|
||||
- Span of degenerated \(zero-length\) subviews is not zero in some special cases [\#2979](https://github.com/kokkos/kokkos/issues/2979)
|
||||
- Rank 1 custom layouts dont work as expected. [\#2840](https://github.com/kokkos/kokkos/issues/2840)
|
||||
|
||||
## [3.1.01](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.00...3.1.1)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
@ -1,4 +1,9 @@
|
||||
|
||||
# Disable in-source builds to prevent source tree corruption.
|
||||
if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" )
|
||||
message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files." )
|
||||
endif()
|
||||
|
||||
# We want to determine if options are given with the wrong case
|
||||
# In order to detect which arguments are given to compare against
|
||||
# the list of valid arguments, at the beginning here we need to
|
||||
@ -34,6 +39,9 @@ IF(COMMAND TRIBITS_PACKAGE_DECL)
|
||||
ELSE()
|
||||
SET(KOKKOS_HAS_TRILINOS OFF)
|
||||
ENDIF()
|
||||
# Is this build a subdirectory of another project
|
||||
GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY)
|
||||
|
||||
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake)
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_pick_cxx_std.cmake)
|
||||
@ -75,16 +83,17 @@ IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
SET(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE)
|
||||
SET(ENV{CXX} ${SPACK_CXX})
|
||||
ENDIF()
|
||||
ENDif()
|
||||
IF(NOT DEFINED ${PROJECT_NAME})
|
||||
# WORKAROUND FOR HIPCC
|
||||
IF(Kokkos_ENABLE_HIP)
|
||||
SET(KOKKOS_INTERNAL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --amdgpu-target=gfx906")
|
||||
ENDIF()
|
||||
PROJECT(Kokkos CXX)
|
||||
IF(Kokkos_ENABLE_HIP)
|
||||
SET(CMAKE_CXX_FLAGS ${KOKKOS_INTERNAL_CMAKE_CXX_FLAGS})
|
||||
ENDIF()
|
||||
# Always call the project command to define Kokkos_ variables
|
||||
# and to make sure that C++ is an enabled language
|
||||
PROJECT(Kokkos CXX)
|
||||
IF(NOT HAS_PARENT)
|
||||
IF (NOT CMAKE_BUILD_TYPE)
|
||||
SET(DEFAULT_BUILD_TYPE "RelWithDebInfo")
|
||||
MESSAGE(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.")
|
||||
SET(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING
|
||||
"Choose the type of build, options are: Debug, Release, RelWithDebInfo and MinSizeRel."
|
||||
FORCE)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
@ -102,8 +111,8 @@ ENDIF()
|
||||
|
||||
|
||||
set(Kokkos_VERSION_MAJOR 3)
|
||||
set(Kokkos_VERSION_MINOR 1)
|
||||
set(Kokkos_VERSION_PATCH 1)
|
||||
set(Kokkos_VERSION_MINOR 2)
|
||||
set(Kokkos_VERSION_PATCH 0)
|
||||
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
||||
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
|
||||
|
||||
@ -147,6 +156,7 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake)
|
||||
# Check the environment and set certain variables
|
||||
# to allow platform-specific checks
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake)
|
||||
|
||||
# The build environment setup goes in the following steps
|
||||
# 1) Check all the enable options. This includes checking Kokkos_DEVICES
|
||||
# 2) Check the compiler ID (type and version)
|
||||
@ -169,7 +179,6 @@ SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontain
|
||||
SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms)
|
||||
SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES})
|
||||
|
||||
GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY)
|
||||
IF (KOKKOS_HAS_TRILINOS)
|
||||
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
|
||||
SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR})
|
||||
@ -203,7 +212,7 @@ IF (KOKKOS_HAS_TRILINOS)
|
||||
SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
|
||||
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
|
||||
ENDFOREACH()
|
||||
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
|
||||
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
|
||||
IF (KOKKOS_ENABLE_CUDA)
|
||||
STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}")
|
||||
FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
|
||||
@ -246,7 +255,7 @@ KOKKOS_PACKAGE_POSTPROCESS()
|
||||
#We are ready to configure the header
|
||||
CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY)
|
||||
|
||||
IF (NOT KOKKOS_HAS_TRILINOS)
|
||||
IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING)
|
||||
ADD_LIBRARY(kokkos INTERFACE)
|
||||
#Make sure in-tree projects can reference this as Kokkos::
|
||||
#to match the installed target names
|
||||
@ -262,8 +271,6 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake)
|
||||
# If the argument of DESTINATION is a relative path, CMake computes it
|
||||
# as relative to ${CMAKE_INSTALL_PATH}.
|
||||
INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
|
||||
# Finally - if we are a subproject - make sure the enabled devices are visible
|
||||
IF (HAS_PARENT)
|
||||
|
||||
@ -11,20 +11,20 @@ CXXFLAGS += $(SHFLAGS)
|
||||
endif
|
||||
|
||||
KOKKOS_VERSION_MAJOR = 3
|
||||
KOKKOS_VERSION_MINOR = 1
|
||||
KOKKOS_VERSION_PATCH = 1
|
||||
KOKKOS_VERSION_MINOR = 2
|
||||
KOKKOS_VERSION_PATCH = 0
|
||||
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
||||
|
||||
# Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial
|
||||
KOKKOS_DEVICES ?= "OpenMP"
|
||||
#KOKKOS_DEVICES ?= "Pthread"
|
||||
# Options:
|
||||
# Options:
|
||||
# Intel: KNC,KNL,SNB,HSW,BDW,SKX
|
||||
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75
|
||||
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80
|
||||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
|
||||
# IBM: BGQ,Power7,Power8,Power9
|
||||
# AMD-GPUS: Vega900,Vega906
|
||||
# AMD-CPUS: AMDAVX,EPYC
|
||||
# AMD-CPUS: AMDAVX,Zen,Zen2
|
||||
KOKKOS_ARCH ?= ""
|
||||
# Options: yes,no
|
||||
KOKKOS_DEBUG ?= "no"
|
||||
@ -32,10 +32,8 @@ KOKKOS_DEBUG ?= "no"
|
||||
KOKKOS_USE_TPLS ?= ""
|
||||
# Options: c++11,c++14,c++1y,c++17,c++1z,c++2a
|
||||
KOKKOS_CXX_STANDARD ?= "c++11"
|
||||
# Options: aggressive_vectorization,disable_profiling,enable_deprecated_code,disable_deprecated_code,enable_large_mem_tests,disable_complex_align
|
||||
# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align
|
||||
KOKKOS_OPTIONS ?= ""
|
||||
# Option for setting ETI path
|
||||
KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti
|
||||
KOKKOS_CMAKE ?= "no"
|
||||
KOKKOS_TRIBITS ?= "no"
|
||||
KOKKOS_STANDALONE_CMAKE ?= "no"
|
||||
@ -74,6 +72,7 @@ KOKKOS_INTERNAL_ENABLE_CXX1Y := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),
|
||||
KOKKOS_INTERNAL_ENABLE_CXX17 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++17)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++20)
|
||||
|
||||
# Check for external libraries.
|
||||
KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
|
||||
@ -83,9 +82,7 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),exper
|
||||
# Check for advanced settings.
|
||||
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
|
||||
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
|
||||
KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling)
|
||||
KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code)
|
||||
KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecated_code)
|
||||
KOKKOS_INTERNAL_ENABLE_TUNING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_tuning)
|
||||
KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align)
|
||||
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
|
||||
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print)
|
||||
@ -96,7 +93,6 @@ KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS
|
||||
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
|
||||
KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr)
|
||||
KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
|
||||
KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti)
|
||||
|
||||
KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)
|
||||
|
||||
@ -140,6 +136,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
KOKKOS_DEVICELIST += OPENMPTARGET
|
||||
KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER := $(shell expr $(KOKKOS_INTERNAL_ENABLE_CXX17) \
|
||||
+ $(KOKKOS_INTERNAL_ENABLE_CXX20) \
|
||||
+ $(KOKKOS_INTERNAL_ENABLE_CXX2A))
|
||||
ifneq ($(KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER), 1)
|
||||
$(error OpenMPTarget backend requires C++17 or newer)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
@ -281,7 +283,7 @@ endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_INTERNAL_CXX11_FLAG := --c++11
|
||||
KOKKOS_INTERNAL_CXX14_FLAG := --c++14
|
||||
#KOKKOS_INTERNAL_CXX17_FLAG := --c++17
|
||||
KOKKOS_INTERNAL_CXX17_FLAG := --c++17
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
||||
KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
|
||||
@ -338,35 +340,27 @@ KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pas
|
||||
KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70)
|
||||
KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72)
|
||||
KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMPERE80 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere80)
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80))
|
||||
|
||||
#SEK: This seems like a bug to me
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell)
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler)
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50))
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
|
||||
@ -394,19 +388,20 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
|
||||
|
||||
# AMD based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
|
||||
KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
|
||||
KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906)
|
||||
|
||||
# Any AVX?
|
||||
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
|
||||
|
||||
# Decide what ISA level we are able to support.
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
|
||||
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
|
||||
@ -430,7 +425,7 @@ endif
|
||||
KOKKOS_CPPFLAGS =
|
||||
KOKKOS_LIBDIRS =
|
||||
ifneq ($(KOKKOS_CMAKE), yes)
|
||||
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH)
|
||||
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
|
||||
endif
|
||||
KOKKOS_TPL_INCLUDE_DIRS =
|
||||
KOKKOS_TPL_LIBRARY_DIRS =
|
||||
@ -458,88 +453,91 @@ KOKKOS_CONFIG_HEADER=KokkosCore_config.h
|
||||
# Functions for generating config header file
|
||||
kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP))
|
||||
|
||||
# assign hash sign to variable for compat. with make 4.3
|
||||
H := \#
|
||||
|
||||
# Do not append first line
|
||||
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
|
||||
tmp := $(call kokkos_append_header,"Makefile constructed configuration:")
|
||||
tmp := $(call kokkos_append_header,"$(shell date)")
|
||||
tmp := $(call kokkos_append_header,"----------------------------------------------*/")
|
||||
|
||||
tmp := $(call kokkos_append_header,'\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
|
||||
tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
|
||||
tmp := $(call kokkos_append_header,'\#else')
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H')
|
||||
tmp := $(call kokkos_append_header,'\#endif')
|
||||
tmp := $(call kokkos_append_header,'$H''if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
|
||||
tmp := $(call kokkos_append_header,'$H''error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
|
||||
tmp := $(call kokkos_append_header,'$H''else')
|
||||
tmp := $(call kokkos_append_header,'$H''define KOKKOS_CORE_CONFIG_H')
|
||||
tmp := $(call kokkos_append_header,'$H''endif')
|
||||
|
||||
tmp := $(call kokkos_append_header,"")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_VERSION $(KOKKOS_VERSION)")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION $(KOKKOS_VERSION)")
|
||||
tmp := $(call kokkos_append_header,"")
|
||||
|
||||
|
||||
tmp := $(call kokkos_append_header,"/* Execution Spaces */")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM')
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
|
||||
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_ROCM')
|
||||
tmp := $(call kokkos_append_header,'$H''define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_HIP')
|
||||
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_HIP')
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET')
|
||||
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMPTARGET')
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMP')
|
||||
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMP')
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_THREADS")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_SERIAL")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
|
||||
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TM")
|
||||
tmp := $(call kokkos_append_header,"\#endif")
|
||||
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TM")
|
||||
tmp := $(call kokkos_append_header,"$H""endif")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
|
||||
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_X86_64")
|
||||
tmp := $(call kokkos_append_header,"\#endif")
|
||||
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_X86_64")
|
||||
tmp := $(call kokkos_append_header,"$H""endif")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
|
||||
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_KNC")
|
||||
tmp := $(call kokkos_append_header,"\#endif")
|
||||
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_KNC")
|
||||
tmp := $(call kokkos_append_header,"$H""endif")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
|
||||
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCLE")
|
||||
tmp := $(call kokkos_append_header,"\#endif")
|
||||
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCLE")
|
||||
tmp := $(call kokkos_append_header,"$H""endif")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
|
||||
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCBE")
|
||||
tmp := $(call kokkos_append_header,"\#endif")
|
||||
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCBE")
|
||||
tmp := $(call kokkos_append_header,"$H""endif")
|
||||
endif
|
||||
|
||||
#only add the c++ standard flags if this is not CMake
|
||||
@ -548,34 +546,39 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
|
||||
ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
|
||||
endif
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX11")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1)
|
||||
ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG)
|
||||
endif
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX14")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1)
|
||||
#I cannot make CMake add this in a good way - so add it here
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Y_FLAG)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX14")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1)
|
||||
ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG)
|
||||
endif
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
|
||||
#I cannot make CMake add this in a good way - so add it here
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1)
|
||||
#I cannot make CMake add this in a good way - so add it here
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX20")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX20), 1)
|
||||
#I cannot make CMake add this in a good way - so add it here
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX20_FLAG)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
@ -585,20 +588,26 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
|
||||
KOKKOS_CXXFLAGS += -g
|
||||
KOKKOS_LDFLAGS += -g
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG")
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
|
||||
endif
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_COMPLEX_ALIGN")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_COMPLEX_ALIGN")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_TUNING), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TUNING")
|
||||
endif
|
||||
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LIBDL")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
||||
ifneq ($(KOKKOS_CMAKE), yes)
|
||||
ifneq ($(HWLOC_PATH),)
|
||||
@ -611,11 +620,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
||||
KOKKOS_LIBS += -lhwloc
|
||||
KOKKOS_TPL_LIBRARY_NAMES += hwloc
|
||||
endif
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HWLOC")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_LIBRT")
|
||||
KOKKOS_LIBS += -lrt
|
||||
KOKKOS_TPL_LIBRARY_NAMES += rt
|
||||
endif
|
||||
@ -632,50 +641,36 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
KOKKOS_LIBS += -lmemkind -lnuma
|
||||
KOKKOS_TPL_LIBRARY_NAMES += memkind numa
|
||||
endif
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HBWSPACE")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_ETI")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HBWSPACE")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LARGE_MEM_TESTS")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS")
|
||||
endif
|
||||
|
||||
tmp := $(call kokkos_append_header,"/* Optimization Settings */")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
|
||||
endif
|
||||
|
||||
tmp := $(call kokkos_append_header,"/* Cuda Settings */")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_UVM")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_UVM")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_CXXFLAGS += -fcuda-rdc
|
||||
KOKKOS_LDFLAGS += -fcuda-rdc
|
||||
@ -696,7 +691,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
|
||||
KOKKOS_CXXFLAGS += -expt-extended-lambda
|
||||
else
|
||||
$(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
|
||||
@ -704,14 +699,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -ge 80; echo $$?),0)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR")
|
||||
KOKKOS_CXXFLAGS += -expt-relaxed-constexpr
|
||||
else
|
||||
$(warning Warning: Cuda relaxed constexpr support was requested but NVCC version is too low. This requires NVCC for Cuda version 8.0 or higher. Disabling relaxed constexpr support now.)
|
||||
@ -719,25 +714,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH")
|
||||
endif
|
||||
endif
|
||||
|
||||
# Add Architecture flags.
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
@ -754,7 +749,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
@ -770,9 +765,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_EPYC")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_AVX2")
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -mavx2
|
||||
@ -783,9 +778,22 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -mavx2
|
||||
KOKKOS_LDFLAGS += -mavx2
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=znver2 -mtune=znver2
|
||||
KOKKOS_LDFLAGS += -march=znver2 -mtune=znver2
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
@ -802,8 +810,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX2")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
@ -820,7 +828,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_SSE42")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xSSE4.2
|
||||
@ -842,7 +850,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
@ -864,7 +872,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER7")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER7")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
@ -876,7 +884,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER8")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER8")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
@ -897,7 +905,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER9")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER9")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
@ -918,7 +926,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX2
|
||||
@ -940,7 +948,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX2
|
||||
@ -962,7 +970,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512MIC")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512MIC")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xMIC-AVX512
|
||||
@ -983,7 +991,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512XEON")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX512
|
||||
@ -1004,7 +1012,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KNC")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC")
|
||||
KOKKOS_CXXFLAGS += -mmic
|
||||
KOKKOS_LDFLAGS += -mmic
|
||||
endif
|
||||
@ -1022,8 +1030,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch
|
||||
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
|
||||
KOKKOS_CXXFLAGS += -x cuda
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
|
||||
KOKKOS_CXXFLAGS += -x cuda
|
||||
else
|
||||
$(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) )
|
||||
endif
|
||||
@ -1039,65 +1047,70 @@ endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER32")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER35")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER37")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL50")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL52")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL53")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL60")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA70")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING75")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80
|
||||
endif
|
||||
|
||||
ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
|
||||
@ -1121,13 +1134,13 @@ endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
|
||||
# Lets start with adding architecture defines
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 900")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA900")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 900")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900")
|
||||
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 906")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA906")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 906")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906")
|
||||
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906
|
||||
endif
|
||||
|
||||
@ -1138,7 +1151,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
|
||||
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
|
||||
KOKKOS_CXXFLAGS+=-fgpu-rdc
|
||||
KOKKOS_LDFLAGS+=-fgpu-rdc
|
||||
else
|
||||
@ -1171,9 +1184,6 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Cuda/*.cpp)
|
||||
endif
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||
ifneq ($(CUDA_PATH),)
|
||||
KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include
|
||||
@ -1211,9 +1221,6 @@ endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/OpenMP/*.cpp)
|
||||
endif
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
@ -1228,9 +1235,6 @@ endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Threads/*.cpp)
|
||||
endif
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
||||
KOKKOS_LIBS += -lpthread
|
||||
KOKKOS_TPL_LIBRARY_NAMES += pthread
|
||||
@ -1279,9 +1283,6 @@ endif
|
||||
# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
|
||||
# device to avoid a link warning.
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Serial/*.cpp)
|
||||
endif
|
||||
endif
|
||||
ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))
|
||||
|
||||
@ -26,21 +26,17 @@ Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spi
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
|
||||
Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
|
||||
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
||||
Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
|
||||
Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
|
||||
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
|
||||
Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
|
||||
Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
include $(KOKKOS_ETI_PATH)/Serial/Makefile.eti_Serial
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
|
||||
@ -50,9 +46,6 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
|
||||
Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
include $(KOKKOS_ETI_PATH)/Cuda/Makefile.eti_Cuda
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
|
||||
@ -75,9 +68,6 @@ Kokkos_ROCm_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_RO
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
|
||||
Kokkos_ROCm_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
include $(KOKKOS_ETI_PATH)/ROCm/Makefile.eti_ROCm
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
@ -85,9 +75,6 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
|
||||
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
include $(KOKKOS_ETI_PATH)/Threads/Makefile.eti_Threads
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
@ -95,9 +82,6 @@ Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokko
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
|
||||
Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
include $(KOKKOS_ETI_PATH)/OpenMP/Makefile.eti_OpenMP
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
|
||||
|
||||
@ -151,7 +151,7 @@ Full details are given in the [build instructions](BUILD.md). Basic setups are s
|
||||
## CMake
|
||||
|
||||
The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
|
||||
````
|
||||
````bash
|
||||
cmake $srcdir \
|
||||
-DCMAKE_CXX_COMPILER=$path_to_compiler \
|
||||
-DCMAKE_INSTALL_PREFIX=$path_to_install \
|
||||
@ -170,7 +170,7 @@ and run `make test` after completing the build.
|
||||
|
||||
For your CMake project using Kokkos, code such as the following:
|
||||
|
||||
````
|
||||
````cmake
|
||||
find_package(Kokkos)
|
||||
...
|
||||
target_link_libraries(myTarget Kokkos::kokkos)
|
||||
@ -187,17 +187,15 @@ for the install location given above.
|
||||
|
||||
## Spack
|
||||
An alternative to manually building with the CMake is to use the Spack package manager.
|
||||
To do so, download the `kokkos-spack` git repo and add to the package list:
|
||||
````
|
||||
spack repo add $path-to-kokkos-spack
|
||||
To get started, download the Spack [repo](https://github.com/spack/spack).
|
||||
````
|
||||
A basic installation would be done as:
|
||||
````
|
||||
spack install kokkos
|
||||
````bash
|
||||
> spack install kokkos
|
||||
````
|
||||
Spack allows options and and compilers to be tuned in the install command.
|
||||
````
|
||||
spack install kokkos@3.0 %gcc@7.3.0 +openmp
|
||||
````bash
|
||||
> spack install kokkos@3.0 %gcc@7.3.0 +openmp
|
||||
````
|
||||
This example illustrates the three most common parameters to Spack:
|
||||
* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
|
||||
@ -205,33 +203,33 @@ This example illustrates the three most common parameters to Spack:
|
||||
* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.
|
||||
|
||||
For a complete list of Kokkos options, run:
|
||||
````
|
||||
spack info kokkos
|
||||
````bash
|
||||
> spack info kokkos
|
||||
````
|
||||
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
|
||||
Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
|
||||
More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with:
|
||||
````
|
||||
spack find -p kokkos ...
|
||||
````bash
|
||||
> spack find -p kokkos ...
|
||||
````
|
||||
where `...` is the unique spec identifying the particular Kokkos configuration and version.
|
||||
|
||||
Some more details can found in the Kokkos spack [documentation](Spack.md) or the Spack [website](https://spack.readthedocs.io/en/latest).
|
||||
|
||||
## Raw Makefile
|
||||
A bash script is provided to generate raw makefiles.
|
||||
To install Kokkos as a library create a build directory and run the following
|
||||
````
|
||||
$KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install
|
||||
````bash
|
||||
> $KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install
|
||||
````
|
||||
Once the Makefile is generated, run:
|
||||
````
|
||||
make kokkoslib
|
||||
make install
|
||||
````bash
|
||||
> make kokkoslib
|
||||
> make install
|
||||
````
|
||||
To additionally run the unit tests:
|
||||
````
|
||||
make build-test
|
||||
make test
|
||||
````bash
|
||||
> make build-test
|
||||
> make test
|
||||
````
|
||||
Run `generate_makefile.bash --help` for more detailed options such as
|
||||
changing the device type for which to build.
|
||||
@ -274,7 +272,7 @@ more than a single GPU is used by a single process.
|
||||
|
||||
If you publish work which mentions Kokkos, please cite the following paper:
|
||||
|
||||
````
|
||||
````BibTeX
|
||||
@article{CarterEdwards20143202,
|
||||
title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",
|
||||
journal = "Journal of Parallel and Distributed Computing ",
|
||||
|
||||
267
lib/kokkos/Spack.md
Normal file
267
lib/kokkos/Spack.md
Normal file
@ -0,0 +1,267 @@
|
||||

|
||||
|
||||
# Kokkos Spack
|
||||
|
||||
This gives instructions for using Spack to install Kokkos and developing packages that depend on Kokkos.
|
||||
|
||||
## Getting Started
|
||||
|
||||
Make sure you have downloaded [Spack](https://github.com/spack/spack).
|
||||
The easiest way to configure the Spack environment is:
|
||||
````bash
|
||||
> source spack/share/spack/setup-env.sh
|
||||
````
|
||||
with other scripts available for other shells.
|
||||
You can display information about how to install packages with:
|
||||
````bash
|
||||
> spack info kokkos
|
||||
````
|
||||
This will print all the information about how to install Kokkos with Spack.
|
||||
For detailed instructions on how to use Spack, see the [User Manual](https://spack.readthedocs.io).
|
||||
|
||||
## Setting Up Spack: Avoiding the Package Cascade
|
||||
By default, Spack doesn't 'see' anything on your system - including things like CMake and CUDA.
|
||||
This can be limited by adding a `packages.yaml` to your `$HOME/.spack` folder that includes CMake (and CUDA, if applicable). For example, your `packages.yaml` file could be:
|
||||
````yaml
|
||||
packages:
|
||||
cuda:
|
||||
modules:
|
||||
cuda@10.1.243: [cuda/10.1.243]
|
||||
paths:
|
||||
cuda@10.1.243:
|
||||
/opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243
|
||||
buildable: false
|
||||
cmake:
|
||||
modules:
|
||||
cmake: [cmake/3.16.8]
|
||||
paths:
|
||||
cmake:
|
||||
/opt/local/ppc64le/cmake/3.16.8
|
||||
buildable: false
|
||||
````
|
||||
The `modules` entry is only necessary on systems that require loading Modules (i.e. most DOE systems).
|
||||
The `buildable` flag is useful to make sure Spack crashes if there is a path error,
|
||||
rather than having a type-o and Spack rebuilding everything because `cmake` isn't found.
|
||||
You can verify your environment is set up correctly by running `spack graph` or `spack spec`.
|
||||
For example:
|
||||
````bash
|
||||
> spack graph kokkos +cuda
|
||||
o kokkos
|
||||
|\
|
||||
o | cuda
|
||||
/
|
||||
o cmake
|
||||
````
|
||||
Without the existing CUDA and CMake being identified in `packages.yaml`, a (subset!) of the output would be:
|
||||
````bash
|
||||
o kokkos
|
||||
|\
|
||||
| o cmake
|
||||
| |\
|
||||
| | | |\
|
||||
| | | | | |\
|
||||
| | | | | | | |\
|
||||
| | | | | | | | | |\
|
||||
| | | | | | | o | | | libarchive
|
||||
| | | | | | | |\ \ \ \
|
||||
| | | | | | | | | |\ \ \ \
|
||||
| | | | | | | | | | | | |_|/
|
||||
| | | | | | | | | | | |/| |
|
||||
| | | | | | | | | | | | | o curl
|
||||
| | |_|_|_|_|_|_|_|_|_|_|/|
|
||||
| |/| | | |_|_|_|_|_|_|_|/
|
||||
| | | | |/| | | | | | | |
|
||||
| | | | o | | | | | | | | openssl
|
||||
| |/| | | | | | | | | | |
|
||||
| | | | | | | | | | o | | libxml2
|
||||
| | |_|_|_|_|_|_|_|/| | |
|
||||
| | | | | | | | | | |\ \ \
|
||||
| o | | | | | | | | | | | | zlib
|
||||
| / / / / / / / / / / / /
|
||||
| o | | | | | | | | | | | xz
|
||||
| / / / / / / / / / / /
|
||||
| o | | | | | | | | | | rhash
|
||||
| / / / / / / / / / /
|
||||
| | | | o | | | | | | nettle
|
||||
| | | | |\ \ \ \ \ \ \
|
||||
| | | o | | | | | | | | libuv
|
||||
| | | | o | | | | | | | autoconf
|
||||
| | |_|/| | | | | | | |
|
||||
| | | | |/ / / / / / /
|
||||
| o | | | | | | | | | perl
|
||||
| o | | | | | | | | | gdbm
|
||||
| o | | | | | | | | | readline
|
||||
````
|
||||
|
||||
## Configuring Kokkos as a Project Dependency
|
||||
Say you have a project "SuperScience" which needs to use Kokkos.
|
||||
In your `package.py` file, you would generally include something like:
|
||||
````python
|
||||
class SuperScience(CMakePackage):
|
||||
...
|
||||
depends_on("kokkos")
|
||||
````
|
||||
Often projects want to tweak behavior when using certain features, e.g.
|
||||
````python
|
||||
depends_on("kokkos+cuda", when="+cuda")
|
||||
````
|
||||
if your project needs CUDA-specific logic to configure and build.
|
||||
This illustrates the general principle in Spack of "flowing-up".
|
||||
A user requests a feature in the final app:
|
||||
````bash
|
||||
> spack install superscience+cuda
|
||||
````
|
||||
This flows upstream to the Kokkos dependency, causing the `kokkos+cuda` variant to build.
|
||||
The downstream app (SuperScience) tells the upstream app (Kokkos) how to build.
|
||||
|
||||
Because Kokkos is a performance portability library, it somewhat inverts this principle.
|
||||
Kokkos "flows-down", telling your application how best to configure for performance.
|
||||
Rather than a downstream app (SuperScience) telling the upstream (Kokkos) what variants to build,
|
||||
a pre-built Kokkos should be telling the downstream app SuperScience what variants to use.
|
||||
Kokkos works best when there is an "expert" configuration installed on your system.
|
||||
Your build should simply request `-DKokkos_ROOT=<BEST_KOKKOS_FOR_MY_SYSTEM>` and configure appropriately based on the Kokkos it finds.
|
||||
|
||||
Kokkos has many, many build variants.
|
||||
Where possible, projects should only depend on a general Kokkos, not specific variants.
|
||||
We recommend instead adding for each system you build on a Kokkos configuration to your `packages.yaml` file (usually found in `~/.spack` for specific users).
|
||||
For a Xeon + Volta system, this could look like:
|
||||
````yaml
|
||||
kokkos:
|
||||
variants: +cuda +openmp +cuda_lambda +wrapper ^cuda@10.1 cuda_arch=70
|
||||
compiler: [gcc@7.2.0]
|
||||
````
|
||||
which gives the "best" Kokkos configuration as CUDA+OpenMP optimized for a Volta 70 architecture using CUDA 10.1.
|
||||
It also enables support for CUDA Lambdas.
|
||||
The `+wrapper` option tells Kokkos to build with the special `nvcc_wrapper` (more below).
|
||||
Note here that we use the built-in `cuda_arch` variant of Spack to specify the archicture.
|
||||
For a Haswell system, we use
|
||||
````yaml
|
||||
kokkos:
|
||||
variants: +openmp std=14 target=haswell
|
||||
compiler: [intel@18]
|
||||
````
|
||||
which uses the built-in microarchitecture variants of Spack.
|
||||
Consult the Spack documentation for more details of Spack microarchitectures
|
||||
and CUDA architectures.
|
||||
Spack does not currently provide an AMD GPU microarchitecture option.
|
||||
If building for HIP or an AMD GPU, Kokkos provides an `amd_gpu_arch` similar to `cuda_arch`.
|
||||
````yaml
|
||||
kokkos:
|
||||
variants: +hip amd_gpu_arch=vega900
|
||||
````
|
||||
|
||||
Without an optimal default in your `packages.yaml` file, it is highly likely that the default Kokkos configuration you get will not be what you want.
|
||||
For example, CUDA is not enabled by default (there is no easy logic to conditionally activate this for CUDA-enabled systems).
|
||||
If you don't specify a CUDA build variant in a `packages.yaml` and you build your Kokkos-dependent project:
|
||||
````bash
|
||||
> spack install superscience
|
||||
````
|
||||
you may end up just getting the default Kokkos (i.e. Serial).
|
||||
Some examples are included in the `config/yaml` folder for common platforms.
|
||||
Before running `spack install <package>` we recommend running `spack spec <package>` to confirm your dependency tree is correct.
|
||||
For example, with Kokkos Kernels:
|
||||
````bash
|
||||
kokkos-kernels@3.0%gcc@8.3.0~blas build_type=RelWithDebInfo ~cblas~complex_double~complex_float~cublas~cuda cuda_arch=none ~cusparse~diy+double execspace_cuda=auto execspace_openmp=auto execspace_serial=auto execspace_threads=auto ~float~lapack~lapacke+layoutleft~layoutright memspace_cudaspace=auto memspace_cudauvmspace=auto +memspace_hostspace~mkl+offset_int+offset_size_t~openmp+ordinal_int~ordinal_int64_t~serial~superlu arch=linux-rhel7-skylake_avx512
|
||||
^cmake@3.16.2%gcc@8.3.0~doc+ncurses+openssl+ownlibs~qt arch=linux-rhel7-skylake_avx512
|
||||
^kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=14 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512
|
||||
^cuda@10.1%gcc@8.3.0 arch=linux-rhel7-skylake_avx512
|
||||
^kokkos-nvcc-wrapper@old%gcc@8.3.0 build_type=RelWithDebInfo +mpi arch=linux-rhel7-skylake_avx512
|
||||
^openmpi@4.0.2%gcc@8.3.0~cuda+cxx_exceptions fabrics=none ~java~legacylaunchers~memchecker patches=073477a76bba780c67c36e959cd3ee6910743e2735c7e76850ffba6791d498e4 ~pmi schedulers=none ~sqlite3~thread_multiple+vt arch=linux-rhel7-skylake_avx512
|
||||
````
|
||||
The output can be very verbose, but we can verify the expected `kokkos`:
|
||||
````bash
|
||||
kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=11 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512
|
||||
````
|
||||
We see that we do have `+volta70` and `+wrapper`, e.g.
|
||||
|
||||
### Spack Environments
|
||||
The encouraged way to use Spack is with Spack environments ([more details here](https://spack-tutorial.readthedocs.io/en/latest/tutorial_environments.html#dealing-with-many-specs-at-once)).
|
||||
Rather than installing packages one-at-a-time, you add packages to an environment.
|
||||
After adding all packages, you concretize and install them all.
|
||||
Using environments, one can explicitly add a desired Kokkos for the environment, e.g.
|
||||
````bash
|
||||
> spack add kokkos +cuda +cuda_lambda +volta70
|
||||
> spack add my_project +my_variant
|
||||
> ...
|
||||
> spack install
|
||||
````
|
||||
All packages within the environment will build against the CUDA-enabled Kokkos,
|
||||
even if they only request a default Kokkos.
|
||||
|
||||
## NVCC Wrapper
|
||||
Kokkos is a C++ project, but often builds for the CUDA backend.
|
||||
This is particularly problematic with CMake. At this point, `nvcc` does not accept all the flags that normally get passed to a C++ compiler.
|
||||
Kokkos provides `nvcc_wrapper` that identifies correctly as a C++ compiler to CMake and accepts C++ flags, but uses `nvcc` as the underlying compiler.
|
||||
`nvcc` itself also uses an underlying host compiler, e.g. GCC.
|
||||
|
||||
In Spack, the underlying host compiler is specified as below, e.g.:
|
||||
````bash
|
||||
> spack install package %gcc@8.0.0
|
||||
````
|
||||
This is still valid for Kokkos. To use the special wrapper for CUDA builds, request a desired compiler and simply add the `+wrapper` variant.
|
||||
````bash
|
||||
> spack install kokkos +cuda +wrapper %gcc@7.2.0
|
||||
````
|
||||
Downstream projects depending on Kokkos need to override their compiler.
|
||||
Kokkos provides the compiler in a `kokkos_cxx` variable,
|
||||
which points to either `nvcc_wrapper` when needed or the regular compiler otherwise.
|
||||
Spack projects already do this to use MPI compiler wrappers.
|
||||
````python
|
||||
def cmake_args(self):
|
||||
options = []
|
||||
...
|
||||
options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["kokkos"].kokkos_cxx)
|
||||
...
|
||||
return options
|
||||
````
|
||||
Note: `nvcc_wrapper` works with the MPI compiler wrappers.
|
||||
If building your project with MPI, do NOT set your compiler to `nvcc_wrapper`.
|
||||
Instead set your compiler to `mpicxx` and `nvcc_wrapper` will be used under the hood.
|
||||
````python
|
||||
def cmake_args(self):
|
||||
options = []
|
||||
...
|
||||
options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["mpi"].mpicxx)
|
||||
...
|
||||
return options
|
||||
````
|
||||
To accomplish this, `nvcc_wrapper` must depend on MPI (even though it uses no MPI).
|
||||
This has the unfortunate consequence that Kokkos CUDA projects not using MPI will implicitly depend on MPI anyway.
|
||||
This behavior is necessary for now, but will hopefully be removed later.
|
||||
When using environments, if MPI is not needed, you can remove the MPI dependency with:
|
||||
````bash
|
||||
> spack add kokkos-nvcc-wrapper ~mpi
|
||||
````
|
||||
|
||||
## Developing With Spack
|
||||
|
||||
Spack has historically been much more suited to *deployment* of mature packages than active testing or developing.
|
||||
However, recent features have improved support for development.
|
||||
Future releases are likely to make this even easier and incorporate Git integration.
|
||||
The most common commands will do a full build and install of the packages.
|
||||
If doing development, you may wish to merely set up a build environment.
|
||||
This allows you to modify the source and re-build.
|
||||
In this case, you can stop after configuring.
|
||||
Suppose you have Kokkos checkout in the folder `kokkos-src`:
|
||||
````bash
|
||||
> spack dev-build -d kokkos-src -u cmake kokkos@develop +wrapper +openmp
|
||||
````
|
||||
This sets up a development environment for you in `kokkos-src` which you can use (Bash example shown):
|
||||
Note: Always specify `develop` as the version when doing `dev-build`, except in rare cases.
|
||||
You are usually developing a feature branch that will merge into `develop`,
|
||||
hence you are making a new `develop` branch.
|
||||
|
||||
````bash
|
||||
> cd kokko-src
|
||||
> source spack-build-env.txt
|
||||
> cd spack-build
|
||||
> make
|
||||
````
|
||||
Before sourcing the Spack development environment, you may wish to save your current environment:
|
||||
````bash
|
||||
> declare -px > myenv.sh
|
||||
````
|
||||
When done with Spack, you can then restore your original environment:
|
||||
````bash
|
||||
> source myenv.sh
|
||||
````
|
||||
@ -2,7 +2,9 @@
|
||||
|
||||
KOKKOS_SUBPACKAGE(Algorithms)
|
||||
|
||||
ADD_SUBDIRECTORY(src)
|
||||
IF (NOT Kokkos_INSTALL_TESTING)
|
||||
ADD_SUBDIRECTORY(src)
|
||||
ENDIF()
|
||||
|
||||
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
|
||||
|
||||
|
||||
@ -7,9 +7,15 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
FILE(GLOB HEADERS *.hpp)
|
||||
FILE(GLOB SOURCES *.cpp)
|
||||
LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
|
||||
FILE(GLOB ALGO_HEADERS *.hpp)
|
||||
FILE(GLOB ALGO_SOURCES *.cpp)
|
||||
LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
|
||||
|
||||
INSTALL (
|
||||
DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
|
||||
DESTINATION ${KOKKOS_HEADER_DIR}
|
||||
FILES_MATCHING PATTERN "*.hpp"
|
||||
)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -17,8 +23,8 @@ LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
|
||||
# These will get ignored for standalone CMake and a true interface library made
|
||||
KOKKOS_ADD_INTERFACE_LIBRARY(
|
||||
kokkosalgorithms
|
||||
HEADERS ${HEADERS}
|
||||
SOURCES ${SOURCES}
|
||||
HEADERS ${ALGO_HEADERS}
|
||||
SOURCES ${ALGO_SOURCES}
|
||||
)
|
||||
KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
|
||||
${KOKKOS_TOP_BUILD_DIR}
|
||||
|
||||
@ -94,9 +94,9 @@ namespace Kokkos {
|
||||
class Pool {
|
||||
public:
|
||||
//The Kokkos device type
|
||||
typedef Device device_type;
|
||||
using device_type = Device;
|
||||
//The actual generator type
|
||||
typedef Generator<Device> generator_type;
|
||||
using generator_type = Generator<Device>;
|
||||
|
||||
//Default constructor: does not initialize a pool
|
||||
Pool();
|
||||
@ -124,7 +124,7 @@ namespace Kokkos {
|
||||
class Generator {
|
||||
public:
|
||||
//The Kokkos device type
|
||||
typedef DeviceType device_type;
|
||||
using device_type = DeviceType;
|
||||
|
||||
//Max return values of respective [X]rand[S]() functions
|
||||
enum {MAX_URAND = 0xffffffffU};
|
||||
@ -138,75 +138,75 @@ namespace Kokkos {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Generator (STATE_ARGUMENTS, int state_idx = 0);
|
||||
|
||||
//Draw a equidistributed uint32_t in the range (0,MAX_URAND]
|
||||
//Draw a equidistributed uint32_t in the range [0,MAX_URAND)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint32_t urand();
|
||||
|
||||
//Draw a equidistributed uint64_t in the range (0,MAX_URAND64]
|
||||
//Draw a equidistributed uint64_t in the range [0,MAX_URAND64)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint64_t urand64();
|
||||
|
||||
//Draw a equidistributed uint32_t in the range (0,range]
|
||||
//Draw a equidistributed uint32_t in the range [0,range)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint32_t urand(const uint32_t& range);
|
||||
|
||||
//Draw a equidistributed uint32_t in the range (start,end]
|
||||
//Draw a equidistributed uint32_t in the range [start,end)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint32_t urand(const uint32_t& start, const uint32_t& end );
|
||||
|
||||
//Draw a equidistributed uint64_t in the range (0,range]
|
||||
//Draw a equidistributed uint64_t in the range [0,range)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint64_t urand64(const uint64_t& range);
|
||||
|
||||
//Draw a equidistributed uint64_t in the range (start,end]
|
||||
//Draw a equidistributed uint64_t in the range [start,end)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint64_t urand64(const uint64_t& start, const uint64_t& end );
|
||||
|
||||
//Draw a equidistributed int in the range (0,MAX_RAND]
|
||||
//Draw a equidistributed int in the range [0,MAX_RAND)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int rand();
|
||||
|
||||
//Draw a equidistributed int in the range (0,range]
|
||||
//Draw a equidistributed int in the range [0,range)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int rand(const int& range);
|
||||
|
||||
//Draw a equidistributed int in the range (start,end]
|
||||
//Draw a equidistributed int in the range [start,end)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int rand(const int& start, const int& end );
|
||||
|
||||
//Draw a equidistributed int64_t in the range (0,MAX_RAND64]
|
||||
//Draw a equidistributed int64_t in the range [0,MAX_RAND64)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int64_t rand64();
|
||||
|
||||
//Draw a equidistributed int64_t in the range (0,range]
|
||||
//Draw a equidistributed int64_t in the range [0,range)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int64_t rand64(const int64_t& range);
|
||||
|
||||
//Draw a equidistributed int64_t in the range (start,end]
|
||||
//Draw a equidistributed int64_t in the range [start,end)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int64_t rand64(const int64_t& start, const int64_t& end );
|
||||
|
||||
//Draw a equidistributed float in the range (0,1.0]
|
||||
//Draw a equidistributed float in the range [0,1.0)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float frand();
|
||||
|
||||
//Draw a equidistributed float in the range (0,range]
|
||||
//Draw a equidistributed float in the range [0,range)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float frand(const float& range);
|
||||
|
||||
//Draw a equidistributed float in the range (start,end]
|
||||
//Draw a equidistributed float in the range [start,end)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float frand(const float& start, const float& end );
|
||||
|
||||
//Draw a equidistributed double in the range (0,1.0]
|
||||
//Draw a equidistributed double in the range [0,1.0)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double drand();
|
||||
|
||||
//Draw a equidistributed double in the range (0,range]
|
||||
//Draw a equidistributed double in the range [0,range)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double drand(const double& range);
|
||||
|
||||
//Draw a equidistributed double in the range (start,end]
|
||||
//Draw a equidistributed double in the range [start,end)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double drand(const double& start, const double& end );
|
||||
|
||||
@ -221,11 +221,11 @@ namespace Kokkos {
|
||||
|
||||
//Additional Functions:
|
||||
|
||||
//Fills view with random numbers in the range (0,range]
|
||||
//Fills view with random numbers in the range [0,range)
|
||||
template<class ViewType, class PoolType>
|
||||
void fill_random(ViewType view, PoolType pool, ViewType::value_type range);
|
||||
|
||||
//Fills view with random numbers in the range (start,end]
|
||||
//Fills view with random numbers in the range [start,end)
|
||||
template<class ViewType, class PoolType>
|
||||
void fill_random(ViewType view, PoolType pool,
|
||||
ViewType::value_type start, ViewType::value_type end);
|
||||
@ -381,7 +381,7 @@ struct rand<Generator, unsigned long> {
|
||||
// NOTE (mfh 26 oct 2014) This is a partial specialization for long
|
||||
// long, a C99 / C++11 signed type which is guaranteed to be at
|
||||
// least 64 bits. Do NOT write a partial specialization for
|
||||
// int64_t!!! This is just a typedef! It could be either long or
|
||||
// int64_t!!! This is just an alias! It could be either long or
|
||||
// long long. We don't know which a priori, and I've seen both.
|
||||
// The types long and long long are guaranteed to differ, so it's
|
||||
// always safe to specialize for both.
|
||||
@ -413,7 +413,7 @@ struct rand<Generator, long long> {
|
||||
// NOTE (mfh 26 oct 2014) This is a partial specialization for
|
||||
// unsigned long long, a C99 / C++11 unsigned type which is
|
||||
// guaranteed to be at least 64 bits. Do NOT write a partial
|
||||
// specialization for uint64_t!!! This is just a typedef! It could
|
||||
// specialization for uint64_t!!! This is just an alias! It could
|
||||
// be either unsigned long or unsigned long long. We don't know
|
||||
// which a priori, and I've seen both. The types unsigned long and
|
||||
// unsigned long long are guaranteed to differ, so it's always safe
|
||||
@ -604,11 +604,7 @@ struct Random_UniqueIndex {
|
||||
KOKKOS_FUNCTION
|
||||
static int get_state_idx(const locks_view_type) {
|
||||
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
const int i = ExecutionSpace::hardware_thread_id();
|
||||
#else
|
||||
const int i = ExecutionSpace::impl_hardware_thread_id();
|
||||
#endif
|
||||
return i;
|
||||
#else
|
||||
return 0;
|
||||
@ -652,15 +648,13 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
|
||||
static int get_state_idx(const locks_view_type& locks_) {
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
const int i_offset =
|
||||
(hipThreadIdx_x * hipBlockDim_y + hipThreadIdx_y) * hipBlockDim_z +
|
||||
hipThreadIdx_z;
|
||||
int i = (((hipBlockIdx_x * hipGridDim_y + hipBlockIdx_y) * hipGridDim_z +
|
||||
hipBlockIdx_z) *
|
||||
hipBlockDim_x * hipBlockDim_y * hipBlockDim_z +
|
||||
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
|
||||
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
|
||||
blockDim.x * blockDim.y * blockDim.z +
|
||||
i_offset) %
|
||||
locks_.extent(0);
|
||||
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
|
||||
i += hipBlockDim_x * hipBlockDim_y * hipBlockDim_z;
|
||||
i += blockDim.x * blockDim.y * blockDim.z;
|
||||
if (i >= static_cast<int>(locks_.extent(0))) {
|
||||
i = i_offset;
|
||||
}
|
||||
@ -687,7 +681,7 @@ class Random_XorShift64 {
|
||||
friend class Random_XorShift64_Pool<DeviceType>;
|
||||
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
using device_type = DeviceType;
|
||||
|
||||
constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
|
||||
constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
|
||||
@ -805,11 +799,6 @@ class Random_XorShift64 {
|
||||
// number
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double normal() {
|
||||
#ifndef __HIP_DEVICE_COMPILE__ // FIXME_HIP
|
||||
using std::sqrt;
|
||||
#else
|
||||
using ::sqrt;
|
||||
#endif
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while (S >= 1.0) {
|
||||
@ -817,7 +806,7 @@ class Random_XorShift64 {
|
||||
const double V = 2.0 * drand() - 1.0;
|
||||
S = U * U + V * V;
|
||||
}
|
||||
return U * sqrt(-2.0 * log(S) / S);
|
||||
return U * std::sqrt(-2.0 * log(S) / S);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -830,15 +819,15 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
|
||||
class Random_XorShift64_Pool {
|
||||
private:
|
||||
using execution_space = typename DeviceType::execution_space;
|
||||
typedef View<int*, execution_space> locks_type;
|
||||
typedef View<uint64_t*, DeviceType> state_data_type;
|
||||
using locks_type = View<int*, execution_space>;
|
||||
using state_data_type = View<uint64_t*, DeviceType>;
|
||||
locks_type locks_;
|
||||
state_data_type state_;
|
||||
int num_states_;
|
||||
|
||||
public:
|
||||
typedef Random_XorShift64<DeviceType> generator_type;
|
||||
typedef DeviceType device_type;
|
||||
using generator_type = Random_XorShift64<DeviceType>;
|
||||
using device_type = DeviceType;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift64_Pool() { num_states_ = 0; }
|
||||
@ -923,8 +912,8 @@ class Random_XorShift1024 {
|
||||
friend class Random_XorShift1024_Pool<DeviceType>;
|
||||
|
||||
public:
|
||||
typedef Random_XorShift1024_Pool<DeviceType> pool_type;
|
||||
typedef DeviceType device_type;
|
||||
using pool_type = Random_XorShift1024_Pool<DeviceType>;
|
||||
using device_type = DeviceType;
|
||||
|
||||
constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
|
||||
constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
|
||||
@ -1046,11 +1035,6 @@ class Random_XorShift1024 {
|
||||
// number
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double normal() {
|
||||
#ifndef KOKKOS_ENABLE_HIP // FIXME_HIP
|
||||
using std::sqrt;
|
||||
#else
|
||||
using ::sqrt;
|
||||
#endif
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while (S >= 1.0) {
|
||||
@ -1058,7 +1042,7 @@ class Random_XorShift1024 {
|
||||
const double V = 2.0 * drand() - 1.0;
|
||||
S = U * U + V * V;
|
||||
}
|
||||
return U * sqrt(-2.0 * log(S) / S);
|
||||
return U * std::sqrt(-2.0 * log(S) / S);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1071,9 +1055,9 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
|
||||
class Random_XorShift1024_Pool {
|
||||
private:
|
||||
using execution_space = typename DeviceType::execution_space;
|
||||
typedef View<int*, execution_space> locks_type;
|
||||
typedef View<int*, DeviceType> int_view_type;
|
||||
typedef View<uint64_t * [16], DeviceType> state_data_type;
|
||||
using locks_type = View<int*, execution_space>;
|
||||
using int_view_type = View<int*, DeviceType>;
|
||||
using state_data_type = View<uint64_t * [16], DeviceType>;
|
||||
|
||||
locks_type locks_;
|
||||
state_data_type state_;
|
||||
@ -1082,9 +1066,9 @@ class Random_XorShift1024_Pool {
|
||||
friend class Random_XorShift1024<DeviceType>;
|
||||
|
||||
public:
|
||||
typedef Random_XorShift1024<DeviceType> generator_type;
|
||||
using generator_type = Random_XorShift1024<DeviceType>;
|
||||
|
||||
typedef DeviceType device_type;
|
||||
using device_type = DeviceType;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift1024_Pool() { num_states_ = 0; }
|
||||
@ -1176,14 +1160,13 @@ struct fill_random_functor_begin_end;
|
||||
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type range;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type range_)
|
||||
@ -1203,14 +1186,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {
|
||||
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type range;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type range_)
|
||||
@ -1232,14 +1214,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {
|
||||
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type range;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type range_)
|
||||
@ -1262,14 +1243,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {
|
||||
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type range;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type range_)
|
||||
@ -1293,14 +1273,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {
|
||||
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type range;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type range_)
|
||||
@ -1326,14 +1305,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {
|
||||
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type range;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type range_)
|
||||
@ -1361,14 +1339,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {
|
||||
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type range;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type range_)
|
||||
@ -1398,14 +1375,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {
|
||||
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type range;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type range_)
|
||||
@ -1437,14 +1413,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
|
||||
IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type begin, end;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type begin_,
|
||||
@ -1466,14 +1441,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
|
||||
IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type begin, end;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type begin_,
|
||||
@ -1497,14 +1471,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
|
||||
IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type begin, end;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type begin_,
|
||||
@ -1529,14 +1502,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
|
||||
IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type begin, end;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type begin_,
|
||||
@ -1562,14 +1534,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
|
||||
IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type begin, end;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type begin_,
|
||||
@ -1597,14 +1568,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
|
||||
IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type begin, end;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type begin_,
|
||||
@ -1634,14 +1604,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
|
||||
IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type begin, end;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type begin_,
|
||||
@ -1673,14 +1642,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
|
||||
template <class ViewType, class RandomPool, int loops, class IndexType>
|
||||
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8,
|
||||
IndexType> {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
using execution_space = typename ViewType::execution_space;
|
||||
ViewType a;
|
||||
RandomPool rand_pool;
|
||||
typename ViewType::const_value_type begin, end;
|
||||
|
||||
typedef rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>
|
||||
Rand;
|
||||
using Rand = rand<typename RandomPool::generator_type,
|
||||
typename ViewType::non_const_value_type>;
|
||||
|
||||
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
|
||||
typename ViewType::const_value_type begin_,
|
||||
|
||||
@ -95,9 +95,9 @@ class BinSort {
|
||||
public:
|
||||
template <class DstViewType, class SrcViewType>
|
||||
struct copy_functor {
|
||||
typedef typename SrcViewType::const_type src_view_type;
|
||||
using src_view_type = typename SrcViewType::const_type;
|
||||
|
||||
typedef Impl::CopyOp<DstViewType, src_view_type> copy_op;
|
||||
using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
|
||||
|
||||
DstViewType dst_values;
|
||||
src_view_type src_values;
|
||||
@ -120,17 +120,17 @@ class BinSort {
|
||||
// If a Kokkos::View then can generate constant random access
|
||||
// otherwise can only use the constant type.
|
||||
|
||||
typedef typename std::conditional<
|
||||
using src_view_type = typename std::conditional<
|
||||
Kokkos::is_view<SrcViewType>::value,
|
||||
Kokkos::View<typename SrcViewType::const_data_type,
|
||||
typename SrcViewType::array_layout,
|
||||
typename SrcViewType::device_type,
|
||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
|
||||
typename SrcViewType::const_type>::type src_view_type;
|
||||
typename SrcViewType::const_type>::type;
|
||||
|
||||
typedef typename PermuteViewType::const_type perm_view_type;
|
||||
using perm_view_type = typename PermuteViewType::const_type;
|
||||
|
||||
typedef Impl::CopyOp<DstViewType, src_view_type> copy_op;
|
||||
using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
|
||||
|
||||
DstViewType dst_values;
|
||||
perm_view_type sort_order;
|
||||
@ -151,8 +151,8 @@ class BinSort {
|
||||
}
|
||||
};
|
||||
|
||||
typedef typename Space::execution_space execution_space;
|
||||
typedef BinSortOp bin_op_type;
|
||||
using execution_space = typename Space::execution_space;
|
||||
using bin_op_type = BinSortOp;
|
||||
|
||||
struct bin_count_tag {};
|
||||
struct bin_offset_tag {};
|
||||
@ -160,30 +160,30 @@ class BinSort {
|
||||
struct bin_sort_bins_tag {};
|
||||
|
||||
public:
|
||||
typedef SizeType size_type;
|
||||
typedef size_type value_type;
|
||||
using size_type = SizeType;
|
||||
using value_type = size_type;
|
||||
|
||||
typedef Kokkos::View<size_type*, Space> offset_type;
|
||||
typedef Kokkos::View<const int*, Space> bin_count_type;
|
||||
using offset_type = Kokkos::View<size_type*, Space>;
|
||||
using bin_count_type = Kokkos::View<const int*, Space>;
|
||||
|
||||
typedef typename KeyViewType::const_type const_key_view_type;
|
||||
using const_key_view_type = typename KeyViewType::const_type;
|
||||
|
||||
// If a Kokkos::View then can generate constant random access
|
||||
// otherwise can only use the constant type.
|
||||
|
||||
typedef typename std::conditional<
|
||||
using const_rnd_key_view_type = typename std::conditional<
|
||||
Kokkos::is_view<KeyViewType>::value,
|
||||
Kokkos::View<typename KeyViewType::const_data_type,
|
||||
typename KeyViewType::array_layout,
|
||||
typename KeyViewType::device_type,
|
||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
|
||||
const_key_view_type>::type const_rnd_key_view_type;
|
||||
const_key_view_type>::type;
|
||||
|
||||
typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
|
||||
typedef typename KeyViewType::const_value_type const_key_scalar;
|
||||
using non_const_key_scalar = typename KeyViewType::non_const_value_type;
|
||||
using const_key_scalar = typename KeyViewType::const_value_type;
|
||||
|
||||
typedef Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >
|
||||
bin_count_atomic_type;
|
||||
using bin_count_atomic_type =
|
||||
Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >;
|
||||
|
||||
private:
|
||||
const_key_view_type keys;
|
||||
@ -266,10 +266,10 @@ class BinSort {
|
||||
template <class ValuesViewType>
|
||||
void sort(ValuesViewType const& values, int values_range_begin,
|
||||
int values_range_end) const {
|
||||
typedef Kokkos::View<typename ValuesViewType::data_type,
|
||||
typename ValuesViewType::array_layout,
|
||||
typename ValuesViewType::device_type>
|
||||
scratch_view_type;
|
||||
using scratch_view_type =
|
||||
Kokkos::View<typename ValuesViewType::data_type,
|
||||
typename ValuesViewType::array_layout,
|
||||
typename ValuesViewType::device_type>;
|
||||
|
||||
const size_t len = range_end - range_begin;
|
||||
const size_t values_len = values_range_end - values_range_begin;
|
||||
@ -278,13 +278,6 @@ class BinSort {
|
||||
"BinSort::sort: values range length != permutation vector length");
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
scratch_view_type sorted_values(
|
||||
ViewAllocateWithoutInitializing(
|
||||
"Kokkos::SortImpl::BinSortFunctor::sorted_values"),
|
||||
len, values.extent(1), values.extent(2), values.extent(3),
|
||||
values.extent(4), values.extent(5), values.extent(6), values.extent(7));
|
||||
#else
|
||||
scratch_view_type sorted_values(
|
||||
ViewAllocateWithoutInitializing(
|
||||
"Kokkos::SortImpl::BinSortFunctor::sorted_values"),
|
||||
@ -303,7 +296,6 @@ class BinSort {
|
||||
: KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
values.rank_dynamic > 7 ? values.extent(7)
|
||||
: KOKKOS_IMPL_CTOR_DEFAULT_ARG);
|
||||
#endif
|
||||
|
||||
{
|
||||
copy_permute_functor<scratch_view_type /* DstViewType */
|
||||
@ -511,8 +503,8 @@ bool try_std_sort(ViewType view) {
|
||||
|
||||
template <class ViewType>
|
||||
struct min_max_functor {
|
||||
typedef Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>
|
||||
minmax_scalar;
|
||||
using minmax_scalar =
|
||||
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>;
|
||||
|
||||
ViewType view;
|
||||
min_max_functor(const ViewType& view_) : view(view_) {}
|
||||
@ -531,7 +523,7 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
|
||||
if (!always_use_kokkos_sort) {
|
||||
if (Impl::try_std_sort(view)) return;
|
||||
}
|
||||
typedef BinOp1D<ViewType> CompType;
|
||||
using CompType = BinOp1D<ViewType>;
|
||||
|
||||
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
@ -548,8 +540,8 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
|
||||
|
||||
template <class ViewType>
|
||||
void sort(ViewType view, size_t const begin, size_t const end) {
|
||||
typedef Kokkos::RangePolicy<typename ViewType::execution_space> range_policy;
|
||||
typedef BinOp1D<ViewType> CompType;
|
||||
using range_policy = Kokkos::RangePolicy<typename ViewType::execution_space>;
|
||||
using CompType = BinOp1D<ViewType>;
|
||||
|
||||
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
|
||||
@ -20,14 +20,18 @@ KOKKOS_ADD_TEST_LIBRARY(
|
||||
HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
|
||||
SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
|
||||
)
|
||||
# WORKAROUND FOR HIPCC
|
||||
IF(Kokkos_ENABLE_HIP)
|
||||
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0 --amdgpu-target=gfx906")
|
||||
ELSE()
|
||||
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0")
|
||||
|
||||
# avoid deprecation warnings from MSVC
|
||||
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC GTEST_HAS_TR1_TUPLE=0 GTEST_HAS_PTHREAD=0)
|
||||
|
||||
IF(NOT (Kokkos_ENABLE_CUDA AND WIN32))
|
||||
TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
|
||||
ENDIF()
|
||||
|
||||
TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
|
||||
# Suppress clang-tidy diagnostics on code that we do not have control over
|
||||
IF(CMAKE_CXX_CLANG_TIDY)
|
||||
SET_TARGET_PROPERTIES(kokkosalgorithms_gtest PROPERTIES CXX_CLANG_TIDY "")
|
||||
ENDIF()
|
||||
|
||||
SET(SOURCES
|
||||
UnitTestMain.cpp
|
||||
|
||||
@ -111,10 +111,10 @@ struct RandomProperties {
|
||||
|
||||
template <class GeneratorPool, class Scalar>
|
||||
struct test_random_functor {
|
||||
typedef typename GeneratorPool::generator_type rnd_type;
|
||||
using rnd_type = typename GeneratorPool::generator_type;
|
||||
|
||||
typedef RandomProperties value_type;
|
||||
typedef typename GeneratorPool::device_type device_type;
|
||||
using value_type = RandomProperties;
|
||||
using device_type = typename GeneratorPool::device_type;
|
||||
|
||||
GeneratorPool rand_pool;
|
||||
const double mean;
|
||||
@ -125,12 +125,12 @@ struct test_random_functor {
|
||||
// implementations might violate this upper bound, due to rounding
|
||||
// error. Just in case, we leave an extra space at the end of each
|
||||
// dimension, in the View types below.
|
||||
typedef Kokkos::View<int[HIST_DIM1D + 1], typename GeneratorPool::device_type>
|
||||
type_1d;
|
||||
using type_1d =
|
||||
Kokkos::View<int[HIST_DIM1D + 1], typename GeneratorPool::device_type>;
|
||||
type_1d density_1d;
|
||||
typedef Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
|
||||
typename GeneratorPool::device_type>
|
||||
type_3d;
|
||||
using type_3d =
|
||||
Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
|
||||
typename GeneratorPool::device_type>;
|
||||
type_3d density_3d;
|
||||
|
||||
test_random_functor(GeneratorPool rand_pool_, type_1d d1d, type_3d d3d)
|
||||
@ -200,9 +200,9 @@ struct test_random_functor {
|
||||
|
||||
template <class DeviceType>
|
||||
struct test_histogram1d_functor {
|
||||
typedef RandomProperties value_type;
|
||||
typedef typename DeviceType::execution_space execution_space;
|
||||
typedef typename DeviceType::memory_space memory_space;
|
||||
using value_type = RandomProperties;
|
||||
using execution_space = typename DeviceType::execution_space;
|
||||
using memory_space = typename DeviceType::memory_space;
|
||||
|
||||
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
|
||||
// an exclusive upper bound on the range of random numbers that
|
||||
@ -210,7 +210,7 @@ struct test_histogram1d_functor {
|
||||
// implementations might violate this upper bound, due to rounding
|
||||
// error. Just in case, we leave an extra space at the end of each
|
||||
// dimension, in the View type below.
|
||||
typedef Kokkos::View<int[HIST_DIM1D + 1], memory_space> type_1d;
|
||||
using type_1d = Kokkos::View<int[HIST_DIM1D + 1], memory_space>;
|
||||
type_1d density_1d;
|
||||
double mean;
|
||||
|
||||
@ -219,7 +219,7 @@ struct test_histogram1d_functor {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION void operator()(
|
||||
const typename memory_space::size_type i, RandomProperties& prop) const {
|
||||
typedef typename memory_space::size_type size_type;
|
||||
using size_type = typename memory_space::size_type;
|
||||
const double count = density_1d(i);
|
||||
prop.mean += count;
|
||||
prop.variance += 1.0 * (count - mean) * (count - mean);
|
||||
@ -234,9 +234,9 @@ struct test_histogram1d_functor {
|
||||
|
||||
template <class DeviceType>
|
||||
struct test_histogram3d_functor {
|
||||
typedef RandomProperties value_type;
|
||||
typedef typename DeviceType::execution_space execution_space;
|
||||
typedef typename DeviceType::memory_space memory_space;
|
||||
using value_type = RandomProperties;
|
||||
using execution_space = typename DeviceType::execution_space;
|
||||
using memory_space = typename DeviceType::memory_space;
|
||||
|
||||
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
|
||||
// an exclusive upper bound on the range of random numbers that
|
||||
@ -244,9 +244,9 @@ struct test_histogram3d_functor {
|
||||
// implementations might violate this upper bound, due to rounding
|
||||
// error. Just in case, we leave an extra space at the end of each
|
||||
// dimension, in the View type below.
|
||||
typedef Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
|
||||
memory_space>
|
||||
type_3d;
|
||||
using type_3d =
|
||||
Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
|
||||
memory_space>;
|
||||
type_3d density_3d;
|
||||
double mean;
|
||||
|
||||
@ -255,7 +255,7 @@ struct test_histogram3d_functor {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION void operator()(
|
||||
const typename memory_space::size_type i, RandomProperties& prop) const {
|
||||
typedef typename memory_space::size_type size_type;
|
||||
using size_type = typename memory_space::size_type;
|
||||
const double count = density_3d(
|
||||
i / (HIST_DIM3D * HIST_DIM3D),
|
||||
(i % (HIST_DIM3D * HIST_DIM3D)) / HIST_DIM3D, i % HIST_DIM3D);
|
||||
@ -276,7 +276,7 @@ struct test_histogram3d_functor {
|
||||
//
|
||||
template <class RandomGenerator, class Scalar>
|
||||
struct test_random_scalar {
|
||||
typedef typename RandomGenerator::generator_type rnd_type;
|
||||
using rnd_type = typename RandomGenerator::generator_type;
|
||||
|
||||
int pass_mean, pass_var, pass_covar;
|
||||
int pass_hist1d_mean, pass_hist1d_var, pass_hist1d_covar;
|
||||
@ -294,7 +294,7 @@ struct test_random_scalar {
|
||||
cout << " -- Testing randomness properties" << endl;
|
||||
|
||||
RandomProperties result;
|
||||
typedef test_random_functor<RandomGenerator, Scalar> functor_type;
|
||||
using functor_type = test_random_functor<RandomGenerator, Scalar>;
|
||||
parallel_reduce(num_draws / 1024,
|
||||
functor_type(pool, density_1d, density_3d), result);
|
||||
|
||||
@ -325,8 +325,8 @@ struct test_random_scalar {
|
||||
cout << " -- Testing 1-D histogram" << endl;
|
||||
|
||||
RandomProperties result;
|
||||
typedef test_histogram1d_functor<typename RandomGenerator::device_type>
|
||||
functor_type;
|
||||
using functor_type =
|
||||
test_histogram1d_functor<typename RandomGenerator::device_type>;
|
||||
parallel_reduce(HIST_DIM1D, functor_type(density_1d, num_draws), result);
|
||||
|
||||
double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D);
|
||||
@ -357,8 +357,8 @@ struct test_random_scalar {
|
||||
cout << " -- Testing 3-D histogram" << endl;
|
||||
|
||||
RandomProperties result;
|
||||
typedef test_histogram3d_functor<typename RandomGenerator::device_type>
|
||||
functor_type;
|
||||
using functor_type =
|
||||
test_histogram3d_functor<typename RandomGenerator::device_type>;
|
||||
parallel_reduce(HIST_DIM1D, functor_type(density_3d, num_draws), result);
|
||||
|
||||
double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D);
|
||||
|
||||
@ -55,8 +55,8 @@ namespace Impl {
|
||||
|
||||
template <class ExecutionSpace, class Scalar>
|
||||
struct is_sorted_struct {
|
||||
typedef unsigned int value_type;
|
||||
typedef ExecutionSpace execution_space;
|
||||
using value_type = unsigned int;
|
||||
using execution_space = ExecutionSpace;
|
||||
|
||||
Kokkos::View<Scalar*, ExecutionSpace> keys;
|
||||
|
||||
@ -69,8 +69,8 @@ struct is_sorted_struct {
|
||||
|
||||
template <class ExecutionSpace, class Scalar>
|
||||
struct sum {
|
||||
typedef double value_type;
|
||||
typedef ExecutionSpace execution_space;
|
||||
using value_type = double;
|
||||
using execution_space = ExecutionSpace;
|
||||
|
||||
Kokkos::View<Scalar*, ExecutionSpace> keys;
|
||||
|
||||
@ -81,8 +81,8 @@ struct sum {
|
||||
|
||||
template <class ExecutionSpace, class Scalar>
|
||||
struct bin3d_is_sorted_struct {
|
||||
typedef unsigned int value_type;
|
||||
typedef ExecutionSpace execution_space;
|
||||
using value_type = unsigned int;
|
||||
using execution_space = ExecutionSpace;
|
||||
|
||||
Kokkos::View<Scalar * [3], ExecutionSpace> keys;
|
||||
|
||||
@ -115,8 +115,8 @@ struct bin3d_is_sorted_struct {
|
||||
|
||||
template <class ExecutionSpace, class Scalar>
|
||||
struct sum3D {
|
||||
typedef double value_type;
|
||||
typedef ExecutionSpace execution_space;
|
||||
using value_type = double;
|
||||
using execution_space = ExecutionSpace;
|
||||
|
||||
Kokkos::View<Scalar * [3], ExecutionSpace> keys;
|
||||
|
||||
@ -131,7 +131,7 @@ struct sum3D {
|
||||
|
||||
template <class ExecutionSpace, typename KeyType>
|
||||
void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
|
||||
typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
|
||||
using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
|
||||
KeyViewType keys("Keys", n);
|
||||
|
||||
// Test sorting array with all numbers equal
|
||||
@ -166,7 +166,7 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
|
||||
|
||||
template <class ExecutionSpace, typename KeyType>
|
||||
void test_3D_sort_impl(unsigned int n) {
|
||||
typedef Kokkos::View<KeyType * [3], ExecutionSpace> KeyViewType;
|
||||
using KeyViewType = Kokkos::View<KeyType * [3], ExecutionSpace>;
|
||||
|
||||
KeyViewType keys("Keys", n * n * n);
|
||||
|
||||
@ -186,7 +186,7 @@ void test_3D_sort_impl(unsigned int n) {
|
||||
typename KeyViewType::value_type min[3] = {0, 0, 0};
|
||||
typename KeyViewType::value_type max[3] = {100, 100, 100};
|
||||
|
||||
typedef Kokkos::BinOp3D<KeyViewType> BinOp;
|
||||
using BinOp = Kokkos::BinOp3D<KeyViewType>;
|
||||
BinOp bin_op(bin_max, min, max);
|
||||
Kokkos::BinSort<KeyViewType, BinOp> Sorter(keys, bin_op, false);
|
||||
Sorter.create_permute_vector();
|
||||
@ -215,9 +215,9 @@ void test_3D_sort_impl(unsigned int n) {
|
||||
|
||||
template <class ExecutionSpace, typename KeyType>
|
||||
void test_dynamic_view_sort_impl(unsigned int n) {
|
||||
typedef Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>
|
||||
KeyDynamicViewType;
|
||||
typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
|
||||
using KeyDynamicViewType =
|
||||
Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>;
|
||||
using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
|
||||
|
||||
const size_t upper_bound = 2 * n;
|
||||
const size_t min_chunk_size = 1024;
|
||||
@ -305,8 +305,8 @@ void test_issue_1160_impl() {
|
||||
Kokkos::deep_copy(x_, h_x);
|
||||
Kokkos::deep_copy(v_, h_v);
|
||||
|
||||
typedef decltype(element_) KeyViewType;
|
||||
typedef Kokkos::BinOp1D<KeyViewType> BinOp;
|
||||
using KeyViewType = decltype(element_);
|
||||
using BinOp = Kokkos::BinOp1D<KeyViewType>;
|
||||
|
||||
int begin = 3;
|
||||
int end = 8;
|
||||
|
||||
@ -5,6 +5,6 @@ build_script:
|
||||
- cmd: >-
|
||||
mkdir build &&
|
||||
cd build &&
|
||||
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_LIBDL=OFF -DKokkos_ENABLE_PROFILING=OFF &&
|
||||
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON &&
|
||||
cmake --build . --target install &&
|
||||
ctest -C Debug -V
|
||||
|
||||
@ -69,13 +69,13 @@ int main(int argc, char* argv[]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int L = atoi(argv[1]);
|
||||
int N = atoi(argv[2]);
|
||||
int M = atoi(argv[3]);
|
||||
int D = atoi(argv[4]);
|
||||
int K = atoi(argv[5]);
|
||||
int R = atoi(argv[6]);
|
||||
int type = atoi(argv[7]);
|
||||
int L = std::stoi(argv[1]);
|
||||
int N = std::stoi(argv[2]);
|
||||
int M = std::stoi(argv[3]);
|
||||
int D = std::stoi(argv[4]);
|
||||
int K = std::stoi(argv[5]);
|
||||
int R = std::stoi(argv[6]);
|
||||
int type = std::stoi(argv[7]);
|
||||
|
||||
Kokkos::View<int*> offsets("Offsets", L, M);
|
||||
Kokkos::Random_XorShift64_Pool<> pool(12371);
|
||||
|
||||
@ -73,15 +73,15 @@ int main(int argc, char* argv[]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int P = atoi(argv[1]);
|
||||
int N = atoi(argv[2]);
|
||||
int K = atoi(argv[3]);
|
||||
int R = atoi(argv[4]);
|
||||
int D = atoi(argv[5]);
|
||||
int U = atoi(argv[6]);
|
||||
int F = atoi(argv[7]);
|
||||
int T = atoi(argv[8]);
|
||||
int S = atoi(argv[9]);
|
||||
int P = std::stoi(argv[1]);
|
||||
int N = std::stoi(argv[2]);
|
||||
int K = std::stoi(argv[3]);
|
||||
int R = std::stoi(argv[4]);
|
||||
int D = std::stoi(argv[5]);
|
||||
int U = std::stoi(argv[6]);
|
||||
int F = std::stoi(argv[7]);
|
||||
int T = std::stoi(argv[8]);
|
||||
int S = std::stoi(argv[9]);
|
||||
|
||||
if (U > 8) {
|
||||
printf("U must be 1-8\n");
|
||||
|
||||
@ -72,13 +72,13 @@ int main(int argc, char* argv[]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int S = atoi(argv[1]);
|
||||
int N = atoi(argv[2]);
|
||||
int K = atoi(argv[3]);
|
||||
int D = atoi(argv[4]);
|
||||
int R = atoi(argv[5]);
|
||||
int U = atoi(argv[6]);
|
||||
int F = atoi(argv[7]);
|
||||
int S = std::stoi(argv[1]);
|
||||
int N = std::stoi(argv[2]);
|
||||
int K = std::stoi(argv[3]);
|
||||
int D = std::stoi(argv[4]);
|
||||
int R = std::stoi(argv[5]);
|
||||
int U = std::stoi(argv[6]);
|
||||
int F = std::stoi(argv[7]);
|
||||
|
||||
if ((S != 1) && (S != 2) && (S != 4)) {
|
||||
printf("S must be one of 1,2,4\n");
|
||||
|
||||
@ -50,151 +50,152 @@
|
||||
#define HLINE "-------------------------------------------------------------\n"
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
typedef Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror GUPSHostArray;
|
||||
typedef Kokkos::View<int64_t*, Kokkos::CudaSpace> GUPSDeviceArray;
|
||||
using GUPSHostArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror;
|
||||
using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>;
|
||||
#else
|
||||
typedef Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror GUPSHostArray;
|
||||
typedef Kokkos::View<int64_t*, Kokkos::HostSpace> GUPSDeviceArray;
|
||||
using GUPSHostArray = Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror;
|
||||
using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::HostSpace>;
|
||||
#endif
|
||||
|
||||
typedef int GUPSIndex;
|
||||
using GUPSIndex = int;
|
||||
|
||||
double now() {
|
||||
struct timeval now;
|
||||
gettimeofday(&now, nullptr);
|
||||
struct timeval now;
|
||||
gettimeofday(&now, nullptr);
|
||||
|
||||
return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
|
||||
return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
|
||||
}
|
||||
|
||||
void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices, const int64_t dataCount) {
|
||||
for( GUPSIndex i = 0; i < indices.extent(0); ++i ) {
|
||||
indices[i] = lrand48() % dataCount;
|
||||
}
|
||||
void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices,
|
||||
const int64_t dataCount) {
|
||||
for (GUPSIndex i = 0; i < indices.extent(0); ++i) {
|
||||
indices[i] = lrand48() % dataCount;
|
||||
}
|
||||
|
||||
Kokkos::deep_copy(dev_indices, indices);
|
||||
Kokkos::deep_copy(dev_indices, indices);
|
||||
}
|
||||
|
||||
void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data, const int64_t datum,
|
||||
const bool performAtomics) {
|
||||
void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data,
|
||||
const int64_t datum, const bool performAtomics) {
|
||||
if (performAtomics) {
|
||||
Kokkos::parallel_for(
|
||||
"bench-gups-atomic", indices.extent(0),
|
||||
KOKKOS_LAMBDA(const GUPSIndex i) {
|
||||
Kokkos::atomic_fetch_xor(&data[indices[i]], datum);
|
||||
});
|
||||
} else {
|
||||
Kokkos::parallel_for(
|
||||
"bench-gups-non-atomic", indices.extent(0),
|
||||
KOKKOS_LAMBDA(const GUPSIndex i) { data[indices[i]] ^= datum; });
|
||||
}
|
||||
|
||||
if( performAtomics ) {
|
||||
Kokkos::parallel_for("bench-gups-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) {
|
||||
Kokkos::atomic_fetch_xor( &data[indices[i]], datum );
|
||||
});
|
||||
} else {
|
||||
Kokkos::parallel_for("bench-gups-non-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) {
|
||||
data[indices[i]] ^= datum;
|
||||
});
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const int repeats,
|
||||
const bool useAtomics) {
|
||||
int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount,
|
||||
const int repeats, const bool useAtomics) {
|
||||
printf("Reports fastest timing per kernel\n");
|
||||
printf("Creating Views...\n");
|
||||
|
||||
printf("Reports fastest timing per kernel\n");
|
||||
printf("Creating Views...\n");
|
||||
printf("Memory Sizes:\n");
|
||||
printf("- Elements: %15" PRIu64 " (%12.4f MB)\n",
|
||||
static_cast<uint64_t>(dataCount),
|
||||
1.0e-6 * ((double)dataCount * (double)sizeof(int64_t)));
|
||||
printf("- Indices: %15" PRIu64 " (%12.4f MB)\n",
|
||||
static_cast<uint64_t>(indicesCount),
|
||||
1.0e-6 * ((double)indicesCount * (double)sizeof(int64_t)));
|
||||
printf(" - Atomics: %15s\n", (useAtomics ? "Yes" : "No"));
|
||||
printf("Benchmark kernels will be performed for %d iterations.\n", repeats);
|
||||
|
||||
printf("Memory Sizes:\n");
|
||||
printf("- Elements: %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(dataCount),
|
||||
1.0e-6 * ((double) dataCount * (double) sizeof(int64_t)));
|
||||
printf("- Indices: %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(indicesCount),
|
||||
1.0e-6 * ((double) indicesCount * (double) sizeof(int64_t)));
|
||||
printf(" - Atomics: %15s\n", (useAtomics ? "Yes" : "No") );
|
||||
printf("Benchmark kernels will be performed for %d iterations.\n", repeats);
|
||||
printf(HLINE);
|
||||
|
||||
printf(HLINE);
|
||||
GUPSDeviceArray dev_indices("indices", indicesCount);
|
||||
GUPSDeviceArray dev_data("data", dataCount);
|
||||
int64_t datum = -1;
|
||||
|
||||
GUPSDeviceArray dev_indices("indices", indicesCount);
|
||||
GUPSDeviceArray dev_data("data", dataCount);
|
||||
int64_t datum = -1;
|
||||
GUPSHostArray indices = Kokkos::create_mirror_view(dev_indices);
|
||||
GUPSHostArray data = Kokkos::create_mirror_view(dev_data);
|
||||
|
||||
GUPSHostArray indices = Kokkos::create_mirror_view(dev_indices);
|
||||
GUPSHostArray data = Kokkos::create_mirror_view(dev_data);
|
||||
double gupsTime = 0.0;
|
||||
|
||||
double gupsTime = 0.0;
|
||||
|
||||
printf("Initializing Views...\n");
|
||||
printf("Initializing Views...\n");
|
||||
|
||||
#if defined(KOKKOS_HAVE_OPENMP)
|
||||
Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
|
||||
Kokkos::parallel_for(
|
||||
"init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
|
||||
#else
|
||||
Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
|
||||
Kokkos::parallel_for(
|
||||
"init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
|
||||
#endif
|
||||
KOKKOS_LAMBDA(const int i) {
|
||||
|
||||
data[i] = 10101010101;
|
||||
});
|
||||
KOKKOS_LAMBDA(const int i) { data[i] = 10101010101; });
|
||||
|
||||
#if defined(KOKKOS_HAVE_OPENMP)
|
||||
Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
|
||||
Kokkos::parallel_for(
|
||||
"init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
|
||||
#else
|
||||
Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
|
||||
Kokkos::parallel_for(
|
||||
"init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
|
||||
#endif
|
||||
KOKKOS_LAMBDA(const int i) {
|
||||
KOKKOS_LAMBDA(const int i) { indices[i] = 0; });
|
||||
|
||||
indices[i] = 0;
|
||||
});
|
||||
Kokkos::deep_copy(dev_data, data);
|
||||
Kokkos::deep_copy(dev_indices, indices);
|
||||
double start;
|
||||
|
||||
Kokkos::deep_copy(dev_data, data);
|
||||
Kokkos::deep_copy(dev_indices, indices);
|
||||
double start;
|
||||
printf("Starting benchmarking...\n");
|
||||
|
||||
printf("Starting benchmarking...\n");
|
||||
for (GUPSIndex k = 0; k < repeats; ++k) {
|
||||
randomize_indices(indices, dev_indices, data.extent(0));
|
||||
|
||||
for( GUPSIndex k = 0; k < repeats; ++k ) {
|
||||
randomize_indices(indices, dev_indices, data.extent(0));
|
||||
start = now();
|
||||
run_gups(dev_indices, dev_data, datum, useAtomics);
|
||||
gupsTime += now() - start;
|
||||
}
|
||||
|
||||
start = now();
|
||||
run_gups(dev_indices, dev_data, datum, useAtomics);
|
||||
gupsTime += now() - start;
|
||||
}
|
||||
Kokkos::deep_copy(indices, dev_indices);
|
||||
Kokkos::deep_copy(data, dev_data);
|
||||
|
||||
Kokkos::deep_copy(indices, dev_indices);
|
||||
Kokkos::deep_copy(data, dev_data);
|
||||
printf(HLINE);
|
||||
printf(
|
||||
"GUP/s Random: %18.6f\n",
|
||||
(1.0e-9 * ((double)repeats) * (double)dev_indices.extent(0)) / gupsTime);
|
||||
printf(HLINE);
|
||||
|
||||
printf(HLINE);
|
||||
printf("GUP/s Random: %18.6f\n",
|
||||
(1.0e-9 * ((double) repeats) * (double) dev_indices.extent(0)) / gupsTime);
|
||||
printf(HLINE);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
printf(HLINE);
|
||||
printf("Kokkos GUPS Benchmark\n");
|
||||
printf(HLINE);
|
||||
|
||||
printf(HLINE);
|
||||
printf("Kokkos GUPS Benchmark\n");
|
||||
printf(HLINE);
|
||||
srand48(1010101);
|
||||
|
||||
srand48(1010101);
|
||||
Kokkos::initialize(argc, argv);
|
||||
|
||||
Kokkos::initialize(argc, argv);
|
||||
int64_t indices = 8192;
|
||||
int64_t data = 33554432;
|
||||
int64_t repeats = 10;
|
||||
bool useAtomics = false;
|
||||
|
||||
int64_t indices = 8192;
|
||||
int64_t data = 33554432;
|
||||
int64_t repeats = 10;
|
||||
bool useAtomics = false;
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (strcmp(argv[i], "--indices") == 0) {
|
||||
indices = std::atoll(argv[i + 1]);
|
||||
++i;
|
||||
} else if (strcmp(argv[i], "--data") == 0) {
|
||||
data = std::atoll(argv[i + 1]);
|
||||
++i;
|
||||
} else if (strcmp(argv[i], "--repeats") == 0) {
|
||||
repeats = std::atoll(argv[i + 1]);
|
||||
++i;
|
||||
} else if (strcmp(argv[i], "--atomics") == 0) {
|
||||
useAtomics = true;
|
||||
}
|
||||
}
|
||||
|
||||
for( int i = 1; i < argc; ++i ) {
|
||||
if( strcmp( argv[i], "--indices" ) == 0 ) {
|
||||
indices = std::atoll(argv[i+1]);
|
||||
++i;
|
||||
} else if( strcmp( argv[i], "--data" ) == 0 ) {
|
||||
data = std::atoll(argv[i+1]);
|
||||
++i;
|
||||
} else if( strcmp( argv[i], "--repeats" ) == 0 ) {
|
||||
repeats = std::atoll(argv[i+1]);
|
||||
++i;
|
||||
} else if( strcmp( argv[i], "--atomics" ) == 0 ) {
|
||||
useAtomics = true;
|
||||
}
|
||||
}
|
||||
const int rc = run_benchmark(indices, data, repeats, useAtomics);
|
||||
|
||||
const int rc = run_benchmark(indices, data, repeats, useAtomics);
|
||||
Kokkos::finalize();
|
||||
|
||||
Kokkos::finalize();
|
||||
|
||||
return rc;
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -94,22 +94,22 @@ int main(int argc, char* argv[]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int team_range = atoi(argv[1]);
|
||||
int thread_range = atoi(argv[2]);
|
||||
int vector_range = atoi(argv[3]);
|
||||
int team_range = std::stoi(argv[1]);
|
||||
int thread_range = std::stoi(argv[2]);
|
||||
int vector_range = std::stoi(argv[3]);
|
||||
|
||||
int outer_repeat = atoi(argv[4]);
|
||||
int thread_repeat = atoi(argv[5]);
|
||||
int vector_repeat = atoi(argv[6]);
|
||||
int outer_repeat = std::stoi(argv[4]);
|
||||
int thread_repeat = std::stoi(argv[5]);
|
||||
int vector_repeat = std::stoi(argv[6]);
|
||||
|
||||
int team_size = atoi(argv[7]);
|
||||
int vector_size = atoi(argv[8]);
|
||||
int schedule = atoi(argv[9]);
|
||||
int test_type = atoi(argv[10]);
|
||||
int team_size = std::stoi(argv[7]);
|
||||
int vector_size = std::stoi(argv[8]);
|
||||
int schedule = std::stoi(argv[9]);
|
||||
int test_type = std::stoi(argv[10]);
|
||||
|
||||
int disable_verbose_output = 0;
|
||||
if (argc > 11) {
|
||||
disable_verbose_output = atoi(argv[11]);
|
||||
disable_verbose_output = std::stoi(argv[11]);
|
||||
}
|
||||
|
||||
if (schedule != 1 && schedule != 2) {
|
||||
@ -138,9 +138,9 @@ int main(int argc, char* argv[]) {
|
||||
double& lval) { lval += 1; },
|
||||
result);
|
||||
|
||||
typedef Kokkos::View<double*, Kokkos::LayoutRight> view_type_1d;
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutRight> view_type_2d;
|
||||
typedef Kokkos::View<double***, Kokkos::LayoutRight> view_type_3d;
|
||||
using view_type_1d = Kokkos::View<double*, Kokkos::LayoutRight>;
|
||||
using view_type_2d = Kokkos::View<double**, Kokkos::LayoutRight>;
|
||||
using view_type_3d = Kokkos::View<double***, Kokkos::LayoutRight>;
|
||||
|
||||
// Allocate view without initializing
|
||||
// Call a 'warmup' test with 1 repeat - this will initialize the corresponding
|
||||
|
||||
@ -68,8 +68,8 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
int team_size, int vector_size, int test_type, ViewType1& v1,
|
||||
ViewType2& v2, ViewType3& v3, double& result,
|
||||
double& result_expect, double& time) {
|
||||
typedef Kokkos::TeamPolicy<ScheduleType, IndexType> t_policy;
|
||||
typedef typename t_policy::member_type t_team;
|
||||
using t_policy = Kokkos::TeamPolicy<ScheduleType, IndexType>;
|
||||
using t_team = typename t_policy::member_type;
|
||||
Kokkos::Timer timer;
|
||||
|
||||
for (int orep = 0; orep < outer_repeat; orep++) {
|
||||
|
||||
@ -48,219 +48,224 @@
|
||||
#include <sys/time.h>
|
||||
|
||||
#define STREAM_ARRAY_SIZE 100000000
|
||||
#define STREAM_NTIMES 20
|
||||
#define STREAM_NTIMES 20
|
||||
|
||||
#define HLINE "-------------------------------------------------------------\n"
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
typedef Kokkos::View<double*, Kokkos::CudaSpace>::HostMirror StreamHostArray;
|
||||
typedef Kokkos::View<double*, Kokkos::CudaSpace> StreamDeviceArray;
|
||||
using StreamHostArray = Kokkos::View<double*, Kokkos::CudaSpace>::HostMirror;
|
||||
using StreamDeviceArray = Kokkos::View<double*, Kokkos::CudaSpace>;
|
||||
#else
|
||||
typedef Kokkos::View<double*, Kokkos::HostSpace>::HostMirror StreamHostArray;
|
||||
typedef Kokkos::View<double*, Kokkos::HostSpace> StreamDeviceArray;
|
||||
using StreamHostArray = Kokkos::View<double*, Kokkos::HostSpace>::HostMirror;
|
||||
using StreamDeviceArray = Kokkos::View<double*, Kokkos::HostSpace>;
|
||||
#endif
|
||||
|
||||
typedef int StreamIndex;
|
||||
using StreamIndex = int;
|
||||
|
||||
double now() {
|
||||
struct timeval now;
|
||||
gettimeofday(&now, nullptr);
|
||||
struct timeval now;
|
||||
gettimeofday(&now, nullptr);
|
||||
|
||||
return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
|
||||
return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
|
||||
}
|
||||
|
||||
void perform_copy(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c) {
|
||||
void perform_copy(StreamDeviceArray& a, StreamDeviceArray& b,
|
||||
StreamDeviceArray& c) {
|
||||
Kokkos::parallel_for(
|
||||
"copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) { c[i] = a[i]; });
|
||||
|
||||
Kokkos::parallel_for("copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
|
||||
c[i] = a[i];
|
||||
});
|
||||
|
||||
Kokkos::fence();
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
void perform_scale(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c,
|
||||
const double scalar) {
|
||||
void perform_scale(StreamDeviceArray& a, StreamDeviceArray& b,
|
||||
StreamDeviceArray& c, const double scalar) {
|
||||
Kokkos::parallel_for(
|
||||
"copy", a.extent(0),
|
||||
KOKKOS_LAMBDA(const StreamIndex i) { b[i] = scalar * c[i]; });
|
||||
|
||||
Kokkos::parallel_for("copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
|
||||
b[i] = scalar * c[i];
|
||||
});
|
||||
|
||||
Kokkos::fence();
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
void perform_add(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c) {
|
||||
Kokkos::parallel_for("add", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
|
||||
c[i] = a[i] + b[i];
|
||||
});
|
||||
void perform_add(StreamDeviceArray& a, StreamDeviceArray& b,
|
||||
StreamDeviceArray& c) {
|
||||
Kokkos::parallel_for(
|
||||
"add", a.extent(0),
|
||||
KOKKOS_LAMBDA(const StreamIndex i) { c[i] = a[i] + b[i]; });
|
||||
|
||||
Kokkos::fence();
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
void perform_triad(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c,
|
||||
const double scalar) {
|
||||
void perform_triad(StreamDeviceArray& a, StreamDeviceArray& b,
|
||||
StreamDeviceArray& c, const double scalar) {
|
||||
Kokkos::parallel_for(
|
||||
"triad", a.extent(0),
|
||||
KOKKOS_LAMBDA(const StreamIndex i) { a[i] = b[i] + scalar * c[i]; });
|
||||
|
||||
Kokkos::parallel_for("triad", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
|
||||
a[i] = b[i] + scalar * c[i];
|
||||
});
|
||||
|
||||
Kokkos::fence();
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
int perform_validation(StreamHostArray& a, StreamHostArray& b, StreamHostArray& c,
|
||||
const StreamIndex arraySize, const double scalar) {
|
||||
int perform_validation(StreamHostArray& a, StreamHostArray& b,
|
||||
StreamHostArray& c, const StreamIndex arraySize,
|
||||
const double scalar) {
|
||||
double ai = 1.0;
|
||||
double bi = 2.0;
|
||||
double ci = 0.0;
|
||||
|
||||
double ai = 1.0;
|
||||
double bi = 2.0;
|
||||
double ci = 0.0;
|
||||
for (StreamIndex i = 0; i < arraySize; ++i) {
|
||||
ci = ai;
|
||||
bi = scalar * ci;
|
||||
ci = ai + bi;
|
||||
ai = bi + scalar * ci;
|
||||
};
|
||||
|
||||
for( StreamIndex i = 0; i < arraySize; ++i ) {
|
||||
ci = ai;
|
||||
bi = scalar * ci;
|
||||
ci = ai + bi;
|
||||
ai = bi + scalar * ci;
|
||||
};
|
||||
double aError = 0.0;
|
||||
double bError = 0.0;
|
||||
double cError = 0.0;
|
||||
|
||||
double aError = 0.0;
|
||||
double bError = 0.0;
|
||||
double cError = 0.0;
|
||||
for (StreamIndex i = 0; i < arraySize; ++i) {
|
||||
aError = std::abs(a[i] - ai);
|
||||
bError = std::abs(b[i] - bi);
|
||||
cError = std::abs(c[i] - ci);
|
||||
}
|
||||
|
||||
for( StreamIndex i = 0; i < arraySize; ++i ) {
|
||||
aError = std::abs( a[i] - ai );
|
||||
bError = std::abs( b[i] - bi );
|
||||
cError = std::abs( c[i] - ci );
|
||||
}
|
||||
double aAvgError = aError / (double)arraySize;
|
||||
double bAvgError = bError / (double)arraySize;
|
||||
double cAvgError = cError / (double)arraySize;
|
||||
|
||||
double aAvgError = aError / (double) arraySize;
|
||||
double bAvgError = bError / (double) arraySize;
|
||||
double cAvgError = cError / (double) arraySize;
|
||||
const double epsilon = 1.0e-13;
|
||||
int errorCount = 0;
|
||||
|
||||
const double epsilon = 1.0e-13;
|
||||
int errorCount = 0;
|
||||
if (std::abs(aAvgError / ai) > epsilon) {
|
||||
fprintf(stderr, "Error: validation check on View a failed.\n");
|
||||
errorCount++;
|
||||
}
|
||||
|
||||
if( std::abs( aAvgError / ai ) > epsilon ) {
|
||||
fprintf(stderr, "Error: validation check on View a failed.\n");
|
||||
errorCount++;
|
||||
}
|
||||
if (std::abs(bAvgError / bi) > epsilon) {
|
||||
fprintf(stderr, "Error: validation check on View b failed.\n");
|
||||
errorCount++;
|
||||
}
|
||||
|
||||
if( std::abs( bAvgError / bi ) > epsilon ) {
|
||||
fprintf(stderr, "Error: validation check on View b failed.\n");
|
||||
errorCount++;
|
||||
}
|
||||
if (std::abs(cAvgError / ci) > epsilon) {
|
||||
fprintf(stderr, "Error: validation check on View c failed.\n");
|
||||
errorCount++;
|
||||
}
|
||||
|
||||
if( std::abs( cAvgError / ci ) > epsilon ) {
|
||||
fprintf(stderr, "Error: validation check on View c failed.\n");
|
||||
errorCount++;
|
||||
}
|
||||
if (errorCount == 0) {
|
||||
printf("All solutions checked and verified.\n");
|
||||
}
|
||||
|
||||
if( errorCount == 0 ) {
|
||||
printf("All solutions checked and verified.\n");
|
||||
}
|
||||
|
||||
return errorCount;
|
||||
return errorCount;
|
||||
}
|
||||
|
||||
int run_benchmark() {
|
||||
printf("Reports fastest timing per kernel\n");
|
||||
printf("Creating Views...\n");
|
||||
|
||||
printf("Reports fastest timing per kernel\n");
|
||||
printf("Creating Views...\n");
|
||||
printf("Memory Sizes:\n");
|
||||
printf("- Array Size: %" PRIu64 "\n",
|
||||
static_cast<uint64_t>(STREAM_ARRAY_SIZE));
|
||||
printf("- Per Array: %12.2f MB\n",
|
||||
1.0e-6 * (double)STREAM_ARRAY_SIZE * (double)sizeof(double));
|
||||
printf("- Total: %12.2f MB\n",
|
||||
3.0e-6 * (double)STREAM_ARRAY_SIZE * (double)sizeof(double));
|
||||
|
||||
printf("Memory Sizes:\n");
|
||||
printf("- Array Size: %" PRIu64 "\n", static_cast<uint64_t>(STREAM_ARRAY_SIZE));
|
||||
printf("- Per Array: %12.2f MB\n", 1.0e-6 * (double) STREAM_ARRAY_SIZE * (double) sizeof(double));
|
||||
printf("- Total: %12.2f MB\n", 3.0e-6 * (double) STREAM_ARRAY_SIZE * (double) sizeof(double));
|
||||
printf("Benchmark kernels will be performed for %d iterations.\n",
|
||||
STREAM_NTIMES);
|
||||
|
||||
printf("Benchmark kernels will be performed for %d iterations.\n", STREAM_NTIMES);
|
||||
printf(HLINE);
|
||||
|
||||
printf(HLINE);
|
||||
StreamDeviceArray dev_a("a", STREAM_ARRAY_SIZE);
|
||||
StreamDeviceArray dev_b("b", STREAM_ARRAY_SIZE);
|
||||
StreamDeviceArray dev_c("c", STREAM_ARRAY_SIZE);
|
||||
|
||||
StreamDeviceArray dev_a("a", STREAM_ARRAY_SIZE);
|
||||
StreamDeviceArray dev_b("b", STREAM_ARRAY_SIZE);
|
||||
StreamDeviceArray dev_c("c", STREAM_ARRAY_SIZE);
|
||||
StreamHostArray a = Kokkos::create_mirror_view(dev_a);
|
||||
StreamHostArray b = Kokkos::create_mirror_view(dev_b);
|
||||
StreamHostArray c = Kokkos::create_mirror_view(dev_c);
|
||||
|
||||
StreamHostArray a = Kokkos::create_mirror_view(dev_a);
|
||||
StreamHostArray b = Kokkos::create_mirror_view(dev_b);
|
||||
StreamHostArray c = Kokkos::create_mirror_view(dev_c);
|
||||
const double scalar = 3.0;
|
||||
|
||||
const double scalar = 3.0;
|
||||
double copyTime = std::numeric_limits<double>::max();
|
||||
double scaleTime = std::numeric_limits<double>::max();
|
||||
double addTime = std::numeric_limits<double>::max();
|
||||
double triadTime = std::numeric_limits<double>::max();
|
||||
|
||||
double copyTime = std::numeric_limits<double>::max();
|
||||
double scaleTime = std::numeric_limits<double>::max();
|
||||
double addTime = std::numeric_limits<double>::max();
|
||||
double triadTime = std::numeric_limits<double>::max();
|
||||
|
||||
printf("Initializing Views...\n");
|
||||
printf("Initializing Views...\n");
|
||||
|
||||
#if defined(KOKKOS_HAVE_OPENMP)
|
||||
Kokkos::parallel_for("init", Kokkos::RangePolicy<Kokkos::OpenMP>(0, STREAM_ARRAY_SIZE),
|
||||
Kokkos::parallel_for(
|
||||
"init", Kokkos::RangePolicy<Kokkos::OpenMP>(0, STREAM_ARRAY_SIZE),
|
||||
#else
|
||||
Kokkos::parallel_for("init", Kokkos::RangePolicy<Kokkos::Serial>(0, STREAM_ARRAY_SIZE),
|
||||
Kokkos::parallel_for(
|
||||
"init", Kokkos::RangePolicy<Kokkos::Serial>(0, STREAM_ARRAY_SIZE),
|
||||
#endif
|
||||
KOKKOS_LAMBDA(const int i) {
|
||||
KOKKOS_LAMBDA(const int i) {
|
||||
a[i] = 1.0;
|
||||
b[i] = 2.0;
|
||||
c[i] = 0.0;
|
||||
});
|
||||
|
||||
a[i] = 1.0;
|
||||
b[i] = 2.0;
|
||||
c[i] = 0.0;
|
||||
});
|
||||
// Copy contents of a (from the host) to the dev_a (device)
|
||||
Kokkos::deep_copy(dev_a, a);
|
||||
Kokkos::deep_copy(dev_b, b);
|
||||
Kokkos::deep_copy(dev_c, c);
|
||||
|
||||
// Copy contents of a (from the host) to the dev_a (device)
|
||||
Kokkos::deep_copy(dev_a, a);
|
||||
Kokkos::deep_copy(dev_b, b);
|
||||
Kokkos::deep_copy(dev_c, c);
|
||||
double start;
|
||||
|
||||
double start;
|
||||
printf("Starting benchmarking...\n");
|
||||
|
||||
printf("Starting benchmarking...\n");
|
||||
for (StreamIndex k = 0; k < STREAM_NTIMES; ++k) {
|
||||
start = now();
|
||||
perform_copy(dev_a, dev_b, dev_c);
|
||||
copyTime = std::min(copyTime, (now() - start));
|
||||
|
||||
for( StreamIndex k = 0; k < STREAM_NTIMES; ++k ) {
|
||||
start = now();
|
||||
perform_copy(dev_a, dev_b, dev_c);
|
||||
copyTime = std::min( copyTime, (now() - start) );
|
||||
start = now();
|
||||
perform_scale(dev_a, dev_b, dev_c, scalar);
|
||||
scaleTime = std::min(scaleTime, (now() - start));
|
||||
|
||||
start = now();
|
||||
perform_scale(dev_a, dev_b, dev_c, scalar);
|
||||
scaleTime = std::min( scaleTime, (now() - start) );
|
||||
start = now();
|
||||
perform_add(dev_a, dev_b, dev_c);
|
||||
addTime = std::min(addTime, (now() - start));
|
||||
|
||||
start = now();
|
||||
perform_add(dev_a, dev_b, dev_c);
|
||||
addTime = std::min( addTime, (now() - start) );
|
||||
start = now();
|
||||
perform_triad(dev_a, dev_b, dev_c, scalar);
|
||||
triadTime = std::min(triadTime, (now() - start));
|
||||
}
|
||||
|
||||
start = now();
|
||||
perform_triad(dev_a, dev_b, dev_c, scalar);
|
||||
triadTime = std::min( triadTime, (now() - start) );
|
||||
}
|
||||
Kokkos::deep_copy(a, dev_a);
|
||||
Kokkos::deep_copy(b, dev_b);
|
||||
Kokkos::deep_copy(c, dev_c);
|
||||
|
||||
Kokkos::deep_copy(a, dev_a);
|
||||
Kokkos::deep_copy(b, dev_b);
|
||||
Kokkos::deep_copy(c, dev_c);
|
||||
printf("Performing validation...\n");
|
||||
int rc = perform_validation(a, b, c, STREAM_ARRAY_SIZE, scalar);
|
||||
|
||||
printf("Performing validation...\n");
|
||||
int rc = perform_validation(a, b, c, STREAM_ARRAY_SIZE, scalar);
|
||||
printf(HLINE);
|
||||
|
||||
printf(HLINE);
|
||||
printf("Copy %11.2f MB/s\n",
|
||||
(1.0e-06 * 2.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
|
||||
copyTime);
|
||||
printf("Scale %11.2f MB/s\n",
|
||||
(1.0e-06 * 2.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
|
||||
scaleTime);
|
||||
printf("Add %11.2f MB/s\n",
|
||||
(1.0e-06 * 3.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
|
||||
addTime);
|
||||
printf("Triad %11.2f MB/s\n",
|
||||
(1.0e-06 * 3.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
|
||||
triadTime);
|
||||
|
||||
printf("Copy %11.2f MB/s\n",
|
||||
( 1.0e-06 * 2.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / copyTime );
|
||||
printf("Scale %11.2f MB/s\n",
|
||||
( 1.0e-06 * 2.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / scaleTime );
|
||||
printf("Add %11.2f MB/s\n",
|
||||
( 1.0e-06 * 3.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / addTime );
|
||||
printf("Triad %11.2f MB/s\n",
|
||||
( 1.0e-06 * 3.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / triadTime );
|
||||
printf(HLINE);
|
||||
|
||||
printf(HLINE);
|
||||
|
||||
return rc;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
printf(HLINE);
|
||||
printf("Kokkos STREAM Benchmark\n");
|
||||
printf(HLINE);
|
||||
|
||||
printf(HLINE);
|
||||
printf("Kokkos STREAM Benchmark\n");
|
||||
printf(HLINE);
|
||||
Kokkos::initialize(argc, argv);
|
||||
const int rc = run_benchmark();
|
||||
Kokkos::finalize();
|
||||
|
||||
Kokkos::initialize(argc, argv);
|
||||
const int rc = run_benchmark();
|
||||
Kokkos::finalize();
|
||||
|
||||
return rc;
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -19,6 +19,13 @@ default_arch="sm_35"
|
||||
# The default C++ compiler.
|
||||
#
|
||||
host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
|
||||
|
||||
# Default to whatever is in the path
|
||||
nvcc_compiler=nvcc
|
||||
if [ ! -z $CUDA_ROOT ]; then
|
||||
nvcc_compiler="$CUDA_ROOT/bin/nvcc"
|
||||
fi
|
||||
|
||||
#host_compiler="icpc"
|
||||
#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
|
||||
#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
|
||||
@ -58,7 +65,7 @@ object_files_xlinker=""
|
||||
shared_versioned_libraries_host=""
|
||||
shared_versioned_libraries=""
|
||||
|
||||
# Does the User set the architecture
|
||||
# Does the User set the architecture
|
||||
arch_set=0
|
||||
|
||||
# Does the user overwrite the host compiler
|
||||
@ -77,7 +84,7 @@ host_only_args=""
|
||||
# Just run version on host compiler
|
||||
get_host_version=0
|
||||
|
||||
# Enable workaround for CUDA 6.5 for pragma ident
|
||||
# Enable workaround for CUDA 6.5 for pragma ident
|
||||
replace_pragma_ident=0
|
||||
|
||||
# Mark first host compiler argument
|
||||
@ -179,7 +186,7 @@ do
|
||||
shift
|
||||
;;
|
||||
#Handle known nvcc args
|
||||
--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*|--fmad*)
|
||||
--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad*|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle more known nvcc args
|
||||
@ -187,7 +194,7 @@ do
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle known nvcc args that have an argument
|
||||
-rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad)
|
||||
-rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
@ -195,11 +202,11 @@ do
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle unsupported standard flags
|
||||
--std=c++1y|-std=c++1y|--std=c++1z|-std=c++1z|--std=gnu++1y|-std=gnu++1y|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a|--std=c++17|-std=c++17)
|
||||
--std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a)
|
||||
fallback_std_flag="-std=c++14"
|
||||
# this is hopefully just occurring in a downstream project during CMake feature tests
|
||||
# we really have no choice here but to accept the flag and change to an accepted C++ standard
|
||||
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
|
||||
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
|
||||
if [ -n "$std_flag" ]; then
|
||||
warn_std_flag
|
||||
shared_args=${shared_args/ $std_flag/}
|
||||
@ -216,7 +223,25 @@ do
|
||||
fi
|
||||
std_flag=$corrected_std_flag
|
||||
shared_args="$shared_args $std_flag"
|
||||
;;
|
||||
;;
|
||||
--std=c++17|-std=c++17)
|
||||
if [ -n "$std_flag" ]; then
|
||||
warn_std_flag
|
||||
shared_args=${shared_args/ $std_flag/}
|
||||
fi
|
||||
# NVCC only has C++17 from version 11 on
|
||||
cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]})
|
||||
if [ ${cuda_main_version} -lt 11 ]; then
|
||||
fallback_std_flag="-std=c++14"
|
||||
# this is hopefully just occurring in a downstream project during CMake feature tests
|
||||
# we really have no choice here but to accept the flag and change to an accepted C++ standard
|
||||
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
|
||||
std_flag=$fallback_std_flag
|
||||
else
|
||||
std_flag=$1
|
||||
fi
|
||||
shared_args="$shared_args $std_flag"
|
||||
;;
|
||||
--std=c++11|-std=c++11|--std=c++14|-std=c++14)
|
||||
if [ -n "$std_flag" ]; then
|
||||
warn_std_flag
|
||||
@ -226,6 +251,20 @@ do
|
||||
shared_args="$shared_args $std_flag"
|
||||
;;
|
||||
|
||||
#convert PGI standard flags to something nvcc can handle
|
||||
--c++11|--c++14|--c++17)
|
||||
if [ -n "$std_flag" ]; then
|
||||
warn_std_flag
|
||||
shared_args=${shared_args/ $std_flag/}
|
||||
fi
|
||||
std_flag="-std=${1#--}"
|
||||
shared_args="$shared_args $std_flag"
|
||||
;;
|
||||
|
||||
#ignore PGI forcing ISO C++-conforming code
|
||||
-A)
|
||||
;;
|
||||
|
||||
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
|
||||
-std=c++98|--std=c++98)
|
||||
;;
|
||||
@ -237,13 +276,17 @@ do
|
||||
;;
|
||||
#strip -Xcompiler because we add it
|
||||
-Xcompiler)
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args="$2"
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,$2"
|
||||
if [[ $2 != "-o" ]]; then
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args="$2"
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,$2"
|
||||
fi
|
||||
shift
|
||||
fi
|
||||
shift
|
||||
# else this we have -Xcompiler -o <filename>, in this case just drop -Xcompiler and process
|
||||
# the -o flag with the filename (done above)
|
||||
;;
|
||||
#strip of "-x cu" because we add that
|
||||
-x)
|
||||
@ -329,7 +372,7 @@ do
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args=$1
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,$1"
|
||||
fi
|
||||
;;
|
||||
@ -387,7 +430,7 @@ if [ $arch_set -ne 1 ]; then
|
||||
fi
|
||||
|
||||
#Compose compilation command
|
||||
nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
|
||||
nvcc_command="$nvcc_compiler $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
|
||||
if [ $first_xcompiler_arg -eq 0 ]; then
|
||||
nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
|
||||
fi
|
||||
|
||||
@ -2,6 +2,7 @@ SET(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@)
|
||||
SET(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@)
|
||||
SET(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@)
|
||||
SET(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@)
|
||||
SET(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@")
|
||||
|
||||
# These are needed by KokkosKernels
|
||||
FOREACH(DEV ${Kokkos_DEVICES})
|
||||
@ -38,7 +39,7 @@ include(FindPackageHandleStandardArgs)
|
||||
# kokkos_check(
|
||||
# [DEVICES <devices>...] # Set of backends (e.g. "OpenMP" and/or "Cuda")
|
||||
# [ARCH <archs>...] # Target architectures (e.g. "Power9" and/or "Volta70")
|
||||
# [OPTIONS <options>...] # Optional settings (e.g. "PROFILING")
|
||||
# [OPTIONS <options>...] # Optional settings (e.g. "TUNING")
|
||||
# [TPLS <tpls>...] # Third party libraries
|
||||
# [RETURN_VALUE <result>] # Set a variable that indicates the result of the
|
||||
# # check instead of a fatal error
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
|
||||
#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
|
||||
#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
|
||||
#error \
|
||||
"Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
|
||||
#else
|
||||
#define KOKKOS_CORE_CONFIG_H
|
||||
#endif
|
||||
@ -10,7 +11,6 @@
|
||||
// KOKKOS_VERSION / 10000 is the major version
|
||||
#cmakedefine KOKKOS_VERSION @KOKKOS_VERSION@
|
||||
|
||||
|
||||
/* Execution Spaces */
|
||||
#cmakedefine KOKKOS_ENABLE_SERIAL
|
||||
#cmakedefine KOKKOS_ENABLE_OPENMP
|
||||
@ -47,10 +47,9 @@
|
||||
#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
|
||||
#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
|
||||
#cmakedefine KOKKOS_ENABLE_COMPILER_WARNINGS
|
||||
#cmakedefine KOKKOS_ENABLE_PROFILING
|
||||
#cmakedefine KOKKOS_ENABLE_PROFILING_LOAD_PRINT
|
||||
#cmakedefine KOKKOS_ENABLE_TUNING
|
||||
#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
#cmakedefine KOKKOS_ENABLE_ETI
|
||||
#cmakedefine KOKKOS_ENABLE_LARGE_MEM_TESTS
|
||||
#cmakedefine KOKKOS_ENABLE_DUALVIEW_MODIFY_CHECK
|
||||
#cmakedefine KOKKOS_ENABLE_COMPLEX_ALIGN
|
||||
@ -60,7 +59,7 @@
|
||||
#cmakedefine KOKKOS_ENABLE_HWLOC
|
||||
#cmakedefine KOKKOS_USE_LIBRT
|
||||
#cmakedefine KOKKOS_ENABLE_HWBSPACE
|
||||
|
||||
#cmakedefine KOKKOS_ENABLE_LIBDL
|
||||
#cmakedefine KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
|
||||
#cmakedefine KOKKOS_COMPILER_CUDA_VERSION @KOKKOS_COMPILER_CUDA_VERSION@
|
||||
@ -95,4 +94,6 @@
|
||||
#cmakedefine KOKKOS_ARCH_VOLTA70
|
||||
#cmakedefine KOKKOS_ARCH_VOLTA72
|
||||
#cmakedefine KOKKOS_ARCH_TURING75
|
||||
#cmakedefine KOKKOS_ARCH_AMD_EPYC
|
||||
#cmakedefine KOKKOS_ARCH_AMPERE80
|
||||
#cmakedefine KOKKOS_ARCH_AMD_ZEN
|
||||
#cmakedefine KOKKOS_ARCH_AMD_ZEN2
|
||||
|
||||
958
lib/kokkos/cmake/Modules/CudaToolkit.cmake
Normal file
958
lib/kokkos/cmake/Modules/CudaToolkit.cmake
Normal file
@ -0,0 +1,958 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
#[=======================================================================[.rst:
|
||||
FindCUDAToolkit
|
||||
---------------
|
||||
|
||||
This script locates the NVIDIA CUDA toolkit and the associated libraries, but
|
||||
does not require the ``CUDA`` language be enabled for a given project. This
|
||||
module does not search for the NVIDIA CUDA Samples.
|
||||
|
||||
Search Behavior
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
Finding the CUDA Toolkit requires finding the ``nvcc`` executable, which is
|
||||
searched for in the following order:
|
||||
|
||||
1. If the ``CUDA`` language has been enabled we will use the directory
|
||||
containing the compiler as the first search location for ``nvcc``.
|
||||
|
||||
2. If the ``CUDAToolkit_ROOT`` cmake configuration variable (e.g.,
|
||||
``-DCUDAToolkit_ROOT=/some/path``) *or* environment variable is defined, it
|
||||
will be searched. If both an environment variable **and** a
|
||||
configuration variable are specified, the *configuration* variable takes
|
||||
precedence.
|
||||
|
||||
The directory specified here must be such that the executable ``nvcc`` can be
|
||||
found underneath the directory specified by ``CUDAToolkit_ROOT``. If
|
||||
``CUDAToolkit_ROOT`` is specified, but no ``nvcc`` is found underneath, this
|
||||
package is marked as **not** found. No subsequent search attempts are
|
||||
performed.
|
||||
|
||||
3. If the CUDA_PATH environment variable is defined, it will be searched.
|
||||
|
||||
4. The user's path is searched for ``nvcc`` using :command:`find_program`. If
|
||||
this is found, no subsequent search attempts are performed. Users are
|
||||
responsible for ensuring that the first ``nvcc`` to show up in the path is
|
||||
the desired path in the event that multiple CUDA Toolkits are installed.
|
||||
|
||||
5. On Unix systems, if the symbolic link ``/usr/local/cuda`` exists, this is
|
||||
used. No subsequent search attempts are performed. No default symbolic link
|
||||
location exists for the Windows platform.
|
||||
|
||||
6. The platform specific default install locations are searched. If exactly one
|
||||
candidate is found, this is used. The default CUDA Toolkit install locations
|
||||
searched are:
|
||||
|
||||
+-------------+-------------------------------------------------------------+
|
||||
| Platform | Search Pattern |
|
||||
+=============+=============================================================+
|
||||
| macOS | ``/Developer/NVIDIA/CUDA-X.Y`` |
|
||||
+-------------+-------------------------------------------------------------+
|
||||
| Other Unix | ``/usr/local/cuda-X.Y`` |
|
||||
+-------------+-------------------------------------------------------------+
|
||||
| Windows | ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y`` |
|
||||
+-------------+-------------------------------------------------------------+
|
||||
|
||||
Where ``X.Y`` would be a specific version of the CUDA Toolkit, such as
|
||||
``/usr/local/cuda-9.0`` or
|
||||
``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0``
|
||||
|
||||
.. note::
|
||||
|
||||
When multiple CUDA Toolkits are installed in the default location of a
|
||||
system (e.g., both ``/usr/local/cuda-9.0`` and ``/usr/local/cuda-10.0``
|
||||
exist but the ``/usr/local/cuda`` symbolic link does **not** exist), this
|
||||
package is marked as **not** found.
|
||||
|
||||
There are too many factors involved in making an automatic decision in
|
||||
the presence of multiple CUDA Toolkits being installed. In this
|
||||
situation, users are encouraged to either (1) set ``CUDAToolkit_ROOT`` or
|
||||
(2) ensure that the correct ``nvcc`` executable shows up in ``$PATH`` for
|
||||
:command:`find_program` to find.
|
||||
|
||||
Options
|
||||
^^^^^^^
|
||||
|
||||
``VERSION``
|
||||
If specified, describes the version of the CUDA Toolkit to search for.
|
||||
|
||||
``REQUIRED``
|
||||
If specified, configuration will error if a suitable CUDA Toolkit is not
|
||||
found.
|
||||
|
||||
``QUIET``
|
||||
If specified, the search for a suitable CUDA Toolkit will not produce any
|
||||
messages.
|
||||
|
||||
``EXACT``
|
||||
If specified, the CUDA Toolkit is considered found only if the exact
|
||||
``VERSION`` specified is recovered.
|
||||
|
||||
Imported targets
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
An :ref:`imported target <Imported targets>` named ``CUDA::toolkit`` is provided.
|
||||
|
||||
This module defines :prop_tgt:`IMPORTED` targets for each
|
||||
of the following libraries that are part of the CUDAToolkit:
|
||||
|
||||
- :ref:`CUDA Runtime Library<cuda_toolkit_rt_lib>`
|
||||
- :ref:`CUDA Driver Library<cuda_toolkit_driver_lib>`
|
||||
- :ref:`cuBLAS<cuda_toolkit_cuBLAS>`
|
||||
- :ref:`cuFFT<cuda_toolkit_cuFFT>`
|
||||
- :ref:`cuRAND<cuda_toolkit_cuRAND>`
|
||||
- :ref:`cuSOLVER<cuda_toolkit_cuSOLVER>`
|
||||
- :ref:`cuSPARSE<cuda_toolkit_cuSPARSE>`
|
||||
- :ref:`cuPTI<cuda_toolkit_cupti>`
|
||||
- :ref:`NPP<cuda_toolkit_NPP>`
|
||||
- :ref:`nvBLAS<cuda_toolkit_nvBLAS>`
|
||||
- :ref:`nvGRAPH<cuda_toolkit_nvGRAPH>`
|
||||
- :ref:`nvJPEG<cuda_toolkit_nvJPEG>`
|
||||
- :ref:`nvidia-ML<cuda_toolkit_nvML>`
|
||||
- :ref:`nvRTC<cuda_toolkit_nvRTC>`
|
||||
- :ref:`nvToolsExt<cuda_toolkit_nvToolsExt>`
|
||||
- :ref:`OpenCL<cuda_toolkit_opencl>`
|
||||
- :ref:`cuLIBOS<cuda_toolkit_cuLIBOS>`
|
||||
|
||||
.. _`cuda_toolkit_rt_lib`:
|
||||
|
||||
CUDA Runtime Library
|
||||
""""""""""""""""""""
|
||||
|
||||
The CUDA Runtime library (cudart) are what most applications will typically
|
||||
need to link against to make any calls such as `cudaMalloc`, and `cudaFree`.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::cudart``
|
||||
- ``CUDA::cudart_static``
|
||||
|
||||
.. _`cuda_toolkit_driver_lib`:
|
||||
|
||||
CUDA Driver Library
|
||||
""""""""""""""""""""
|
||||
|
||||
The CUDA Driver library (cuda) are used by applications that use calls
|
||||
such as `cuMemAlloc`, and `cuMemFree`. This is generally used by advanced
|
||||
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::cuda_driver``
|
||||
- ``CUDA::cuda_driver``
|
||||
|
||||
.. _`cuda_toolkit_cuBLAS`:
|
||||
|
||||
cuBLAS
|
||||
""""""
|
||||
|
||||
The `cuBLAS <https://docs.nvidia.com/cuda/cublas/index.html>`_ library.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::cublas``
|
||||
- ``CUDA::cublas_static``
|
||||
|
||||
.. _`cuda_toolkit_cuFFT`:
|
||||
|
||||
cuFFT
|
||||
"""""
|
||||
|
||||
The `cuFFT <https://docs.nvidia.com/cuda/cufft/index.html>`_ library.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::cufft``
|
||||
- ``CUDA::cufftw``
|
||||
- ``CUDA::cufft_static``
|
||||
- ``CUDA::cufftw_static``
|
||||
|
||||
cuRAND
|
||||
""""""
|
||||
|
||||
The `cuRAND <https://docs.nvidia.com/cuda/curand/index.html>`_ library.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::curand``
|
||||
- ``CUDA::curand_static``
|
||||
|
||||
.. _`cuda_toolkit_cuSOLVER`:
|
||||
|
||||
cuSOLVER
|
||||
""""""""
|
||||
|
||||
The `cuSOLVER <https://docs.nvidia.com/cuda/cusolver/index.html>`_ library.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::cusolver``
|
||||
- ``CUDA::cusolver_static``
|
||||
|
||||
.. _`cuda_toolkit_cuSPARSE`:
|
||||
|
||||
cuSPARSE
|
||||
""""""""
|
||||
|
||||
The `cuSPARSE <https://docs.nvidia.com/cuda/cusparse/index.html>`_ library.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::cusparse``
|
||||
- ``CUDA::cusparse_static``
|
||||
|
||||
.. _`cuda_toolkit_cupti`:
|
||||
|
||||
cupti
|
||||
"""""
|
||||
|
||||
The `NVIDIA CUDA Profiling Tools Interface <https://developer.nvidia.com/CUPTI>`_.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::cupti``
|
||||
- ``CUDA::cupti_static``
|
||||
|
||||
.. _`cuda_toolkit_NPP`:
|
||||
|
||||
NPP
|
||||
"""
|
||||
|
||||
The `NPP <https://docs.nvidia.com/cuda/npp/index.html>`_ libraries.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- `nppc`:
|
||||
|
||||
- ``CUDA::nppc``
|
||||
- ``CUDA::nppc_static``
|
||||
|
||||
- `nppial`: Arithmetic and logical operation functions in `nppi_arithmetic_and_logical_operations.h`
|
||||
|
||||
- ``CUDA::nppial``
|
||||
- ``CUDA::nppial_static``
|
||||
|
||||
- `nppicc`: Color conversion and sampling functions in `nppi_color_conversion.h`
|
||||
|
||||
- ``CUDA::nppicc``
|
||||
- ``CUDA::nppicc_static``
|
||||
|
||||
- `nppicom`: JPEG compression and decompression functions in `nppi_compression_functions.h`
|
||||
|
||||
- ``CUDA::nppicom``
|
||||
- ``CUDA::nppicom_static``
|
||||
|
||||
- `nppidei`: Data exchange and initialization functions in `nppi_data_exchange_and_initialization.h`
|
||||
|
||||
- ``CUDA::nppidei``
|
||||
- ``CUDA::nppidei_static``
|
||||
|
||||
- `nppif`: Filtering and computer vision functions in `nppi_filter_functions.h`
|
||||
|
||||
- ``CUDA::nppif``
|
||||
- ``CUDA::nppif_static``
|
||||
|
||||
- `nppig`: Geometry transformation functions found in `nppi_geometry_transforms.h`
|
||||
|
||||
- ``CUDA::nppig``
|
||||
- ``CUDA::nppig_static``
|
||||
|
||||
- `nppim`: Morphological operation functions found in `nppi_morphological_operations.h`
|
||||
|
||||
- ``CUDA::nppim``
|
||||
- ``CUDA::nppim_static``
|
||||
|
||||
- `nppist`: Statistics and linear transform in `nppi_statistics_functions.h` and `nppi_linear_transforms.h`
|
||||
|
||||
- ``CUDA::nppist``
|
||||
- ``CUDA::nppist_static``
|
||||
|
||||
- `nppisu`: Memory support functions in `nppi_support_functions.h`
|
||||
|
||||
- ``CUDA::nppisu``
|
||||
- ``CUDA::nppisu_static``
|
||||
|
||||
- `nppitc`: Threshold and compare operation functions in `nppi_threshold_and_compare_operations.h`
|
||||
|
||||
- ``CUDA::nppitc``
|
||||
- ``CUDA::nppitc_static``
|
||||
|
||||
- `npps`:
|
||||
|
||||
- ``CUDA::npps``
|
||||
- ``CUDA::npps_static``
|
||||
|
||||
.. _`cuda_toolkit_nvBLAS`:
|
||||
|
||||
nvBLAS
|
||||
""""""
|
||||
|
||||
The `nvBLAS <https://docs.nvidia.com/cuda/nvblas/index.html>`_ libraries.
|
||||
This is a shared library only.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::nvblas``
|
||||
|
||||
.. _`cuda_toolkit_nvGRAPH`:
|
||||
|
||||
nvGRAPH
|
||||
"""""""
|
||||
|
||||
The `nvGRAPH <https://docs.nvidia.com/cuda/nvgraph/index.html>`_ library.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::nvgraph``
|
||||
- ``CUDA::nvgraph_static``
|
||||
|
||||
|
||||
.. _`cuda_toolkit_nvJPEG`:
|
||||
|
||||
nvJPEG
|
||||
""""""
|
||||
|
||||
The `nvJPEG <https://docs.nvidia.com/cuda/nvjpeg/index.html>`_ library.
|
||||
Introduced in CUDA 10.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::nvjpeg``
|
||||
- ``CUDA::nvjpeg_static``
|
||||
|
||||
.. _`cuda_toolkit_nvRTC`:
|
||||
|
||||
nvRTC
|
||||
"""""
|
||||
|
||||
The `nvRTC <https://docs.nvidia.com/cuda/nvrtc/index.html>`_ (Runtime Compilation) library.
|
||||
This is a shared library only.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::nvrtc``
|
||||
|
||||
.. _`cuda_toolkit_nvml`:
|
||||
|
||||
nvidia-ML
|
||||
"""""""""
|
||||
|
||||
The `NVIDIA Management Library <https://developer.nvidia.com/nvidia-management-library-nvml>`_.
|
||||
This is a shared library only.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::nvml``
|
||||
|
||||
.. _`cuda_toolkit_nvToolsExt`:
|
||||
|
||||
nvToolsExt
|
||||
""""""""""
|
||||
|
||||
The `NVIDIA Tools Extension <https://docs.nvidia.com/gameworks/content/gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm>`_.
|
||||
This is a shared library only.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::nvToolsExt``
|
||||
|
||||
.. _`cuda_toolkit_opencl`:
|
||||
|
||||
OpenCL
|
||||
""""""
|
||||
|
||||
The `NVIDIA OpenCL Library <https://developer.nvidia.com/opencl>`_.
|
||||
This is a shared library only.
|
||||
|
||||
Targets Created:
|
||||
|
||||
- ``CUDA::OpenCL``
|
||||
|
||||
.. _`cuda_toolkit_cuLIBOS`:
|
||||
|
||||
cuLIBOS
|
||||
"""""""
|
||||
|
||||
The cuLIBOS library is a backend thread abstraction layer library which is
|
||||
static only. The ``CUDA::cublas_static``, ``CUDA::cusparse_static``,
|
||||
``CUDA::cufft_static``, ``CUDA::curand_static``, and (when implemented) NPP
|
||||
libraries all automatically have this dependency linked.
|
||||
|
||||
Target Created:
|
||||
|
||||
- ``CUDA::culibos``
|
||||
|
||||
**Note**: direct usage of this target by consumers should not be necessary.
|
||||
|
||||
.. _`cuda_toolkit_cuRAND`:
|
||||
|
||||
|
||||
|
||||
Result variables
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
``CUDAToolkit_FOUND``
|
||||
A boolean specifying whether or not the CUDA Toolkit was found.
|
||||
|
||||
``CUDAToolkit_VERSION``
|
||||
The exact version of the CUDA Toolkit found (as reported by
|
||||
``nvcc --version``).
|
||||
|
||||
``CUDAToolkit_VERSION_MAJOR``
|
||||
The major version of the CUDA Toolkit.
|
||||
|
||||
``CUDAToolkit_VERSION_MAJOR``
|
||||
The minor version of the CUDA Toolkit.
|
||||
|
||||
``CUDAToolkit_VERSION_PATCH``
|
||||
The patch version of the CUDA Toolkit.
|
||||
|
||||
``CUDAToolkit_BIN_DIR``
|
||||
The path to the CUDA Toolkit library directory that contains the CUDA
|
||||
executable ``nvcc``.
|
||||
|
||||
``CUDAToolkit_INCLUDE_DIRS``
|
||||
The path to the CUDA Toolkit ``include`` folder containing the header files
|
||||
required to compile a project linking against CUDA.
|
||||
|
||||
``CUDAToolkit_LIBRARY_DIR``
|
||||
The path to the CUDA Toolkit library directory that contains the CUDA
|
||||
Runtime library ``cudart``.
|
||||
|
||||
``CUDAToolkit_TARGET_DIR``
|
||||
The path to the CUDA Toolkit directory including the target architecture
|
||||
when cross-compiling. When not cross-compiling this will be equivalant to
|
||||
``CUDAToolkit_ROOT_DIR``.
|
||||
|
||||
``CUDAToolkit_NVCC_EXECUTABLE``
|
||||
The path to the NVIDIA CUDA compiler ``nvcc``. Note that this path may
|
||||
**not** be the same as
|
||||
:variable:`CMAKE_CUDA_COMPILER <CMAKE_<LANG>_COMPILER>`. ``nvcc`` must be
|
||||
found to determine the CUDA Toolkit version as well as determining other
|
||||
features of the Toolkit. This variable is set for the convenience of
|
||||
modules that depend on this one.
|
||||
|
||||
|
||||
#]=======================================================================]
|
||||
|
||||
# NOTE: much of this was simply extracted from FindCUDA.cmake.
|
||||
|
||||
# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
|
||||
# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
|
||||
#
|
||||
# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
|
||||
#
|
||||
# Copyright (c) 2007-2009
|
||||
# Scientific Computing and Imaging Institute, University of Utah
|
||||
#
|
||||
# This code is licensed under the MIT License. See the FindCUDA.cmake script
|
||||
# for the text of the license.
|
||||
|
||||
# The MIT License
|
||||
#
|
||||
# License for the specific language governing rights and limitations under
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
# For NVCC we can easily deduce the SDK binary directory from the compiler path.
|
||||
if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY)
|
||||
set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "")
|
||||
mark_as_advanced(CUDAToolkit_BIN_DIR)
|
||||
unset(cuda_dir)
|
||||
endif()
|
||||
|
||||
IF(CMAKE_VERSION VERSION_LESS "3.12.0")
|
||||
function(import_target_link_libraries target)
|
||||
cmake_parse_arguments(HACK
|
||||
"SYSTEM;INTERFACE;PUBLIC"
|
||||
""
|
||||
""
|
||||
${ARGN}
|
||||
)
|
||||
get_target_property(LIBS ${target} INTERFACE_LINK_LIBRARIES)
|
||||
if (LIBS)
|
||||
list(APPEND LIBS ${HACK_UNPARSED_ARGUMENTS})
|
||||
else()
|
||||
set(LIBS ${HACK_UNPARSED_ARGUMENTS})
|
||||
endif()
|
||||
set_target_properties(${target} PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES "${LIBS}")
|
||||
endfunction()
|
||||
ELSE()
|
||||
function(import_target_link_libraries)
|
||||
target_link_libraries(${ARGN})
|
||||
endfunction()
|
||||
ENDIF()
|
||||
|
||||
IF(CMAKE_VERSION VERSION_LESS "3.13.0")
|
||||
function(import_target_link_directories target)
|
||||
cmake_parse_arguments(HACK
|
||||
"SYSTEM;INTERFACE;PUBLIC"
|
||||
""
|
||||
""
|
||||
${ARGN}
|
||||
)
|
||||
get_target_property(LINK_LIBS ${target} INTERFACE_LINK_LIBRARIES)
|
||||
if (LINK_LIBS) #could be not-found
|
||||
set(LINK_LIBS_LIST ${LINK_LIBS})
|
||||
endif()
|
||||
foreach(LIB ${HACK_UNPARSED_ARGUMENTS})
|
||||
list(APPEND LINK_LIBS_LIST -L${LIB})
|
||||
endforeach()
|
||||
set_target_properties(${target} PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES "${LINK_LIBS_LIST}")
|
||||
endfunction()
|
||||
ELSE()
|
||||
function(import_target_link_directories)
|
||||
target_link_directories(${ARGN})
|
||||
endfunction()
|
||||
ENDIF()
|
||||
|
||||
IF(CMAKE_VERSION VERSION_LESS "3.12.0")
|
||||
function(import_target_include_directories target)
|
||||
cmake_parse_arguments(HACK
|
||||
"SYSTEM;INTERFACE;PUBLIC"
|
||||
""
|
||||
""
|
||||
${ARGN}
|
||||
)
|
||||
get_target_property(INLUDE_DIRS ${target} INTERFACE_INCLUDE_DIRECTORIES)
|
||||
if (INCLUDE_DIRS)
|
||||
list(APPEND INCLUDE_DIRS ${HACK_UNPARSED_ARGUMENTS})
|
||||
else()
|
||||
set(INCLUDE_DIRS ${HACK_UNPARSED_ARGUMENTS})
|
||||
endif()
|
||||
set_target_properties(${target} PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${INCLUDE_DIRS}")
|
||||
endfunction()
|
||||
ELSE()
|
||||
function(import_target_include_directories)
|
||||
target_include_directories(${ARGN})
|
||||
endfunction()
|
||||
ENDIF()
|
||||
|
||||
# Try language- or user-provided path first.
|
||||
if(CUDAToolkit_BIN_DIR)
|
||||
find_program(CUDAToolkit_NVCC_EXECUTABLE
|
||||
NAMES nvcc nvcc.exe
|
||||
PATHS ${CUDAToolkit_BIN_DIR}
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
endif()
|
||||
|
||||
# Search using CUDAToolkit_ROOT
|
||||
find_program(CUDAToolkit_NVCC_EXECUTABLE
|
||||
NAMES nvcc nvcc.exe
|
||||
PATHS ENV CUDA_PATH
|
||||
PATH_SUFFIXES bin
|
||||
)
|
||||
|
||||
# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error.
|
||||
if (NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT}))
|
||||
# Declare error messages now, print later depending on find_package args.
|
||||
set(fail_base "Could not find nvcc executable in path specified by")
|
||||
set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
|
||||
set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}")
|
||||
|
||||
if (CUDAToolkit_FIND_REQUIRED)
|
||||
if (DEFINED CUDAToolkit_ROOT)
|
||||
message(FATAL_ERROR ${cuda_root_fail})
|
||||
elseif (DEFINED ENV{CUDAToolkit_ROOT})
|
||||
message(FATAL_ERROR ${env_cuda_root_fail})
|
||||
endif()
|
||||
else()
|
||||
if (NOT CUDAToolkit_FIND_QUIETLY)
|
||||
if (DEFINED CUDAToolkit_ROOT)
|
||||
message(STATUS ${cuda_root_fail})
|
||||
elseif (DEFINED ENV{CUDAToolkit_ROOT})
|
||||
message(STATUS ${env_cuda_root_fail})
|
||||
endif()
|
||||
endif()
|
||||
set(CUDAToolkit_FOUND FALSE)
|
||||
unset(fail_base)
|
||||
unset(cuda_root_fail)
|
||||
unset(env_cuda_root_fail)
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults.
|
||||
#
|
||||
# - Linux: /usr/local/cuda-X.Y
|
||||
# - macOS: /Developer/NVIDIA/CUDA-X.Y
|
||||
# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y
|
||||
#
|
||||
# We will also search the default symlink location /usr/local/cuda first since
|
||||
# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked
|
||||
# directory is the desired location.
|
||||
if (NOT CUDAToolkit_NVCC_EXECUTABLE)
|
||||
if (UNIX)
|
||||
if (NOT APPLE)
|
||||
set(platform_base "/usr/local/cuda-")
|
||||
else()
|
||||
set(platform_base "/Developer/NVIDIA/CUDA-")
|
||||
endif()
|
||||
else()
|
||||
set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v")
|
||||
endif()
|
||||
|
||||
# Build out a descending list of possible cuda installations, e.g.
|
||||
file(GLOB possible_paths "${platform_base}*")
|
||||
# Iterate the glob results and create a descending list.
|
||||
set(possible_versions)
|
||||
foreach (p ${possible_paths})
|
||||
# Extract version number from end of string
|
||||
string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p})
|
||||
if (IS_DIRECTORY ${p} AND p_version)
|
||||
list(APPEND possible_versions ${p_version})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Cannot use list(SORT) because that is alphabetical, we need numerical.
|
||||
# NOTE: this is not an efficient sorting strategy. But even if a user had
|
||||
# every possible version of CUDA installed, this wouldn't create any
|
||||
# significant overhead.
|
||||
set(versions)
|
||||
foreach (v ${possible_versions})
|
||||
list(LENGTH versions num_versions)
|
||||
# First version, nothing to compare with so just append.
|
||||
if (num_versions EQUAL 0)
|
||||
list(APPEND versions ${v})
|
||||
else()
|
||||
# Loop through list. Insert at an index when comparison is
|
||||
# VERSION_GREATER since we want a descending list. Duplicates will not
|
||||
# happen since this came from a glob list of directories.
|
||||
set(i 0)
|
||||
set(early_terminate FALSE)
|
||||
while (i LESS num_versions)
|
||||
list(GET versions ${i} curr)
|
||||
if (v VERSION_GREATER curr)
|
||||
list(INSERT versions ${i} ${v})
|
||||
set(early_terminate TRUE)
|
||||
break()
|
||||
endif()
|
||||
math(EXPR i "${i} + 1")
|
||||
endwhile()
|
||||
# If it did not get inserted, place it at the end.
|
||||
if (NOT early_terminate)
|
||||
list(APPEND versions ${v})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# With a descending list of versions, populate possible paths to search.
|
||||
set(search_paths)
|
||||
foreach (v ${versions})
|
||||
list(APPEND search_paths "${platform_base}${v}")
|
||||
endforeach()
|
||||
|
||||
# Force the global default /usr/local/cuda to the front on Unix.
|
||||
if (UNIX)
|
||||
list(INSERT search_paths 0 "/usr/local/cuda")
|
||||
endif()
|
||||
|
||||
# Now search for nvcc again using the platform default search paths.
|
||||
find_program(CUDAToolkit_NVCC_EXECUTABLE
|
||||
NAMES nvcc nvcc.exe
|
||||
PATHS ${search_paths}
|
||||
PATH_SUFFIXES bin
|
||||
)
|
||||
|
||||
# We are done with these variables now, cleanup for caller.
|
||||
unset(platform_base)
|
||||
unset(possible_paths)
|
||||
unset(possible_versions)
|
||||
unset(versions)
|
||||
unset(i)
|
||||
unset(early_terminate)
|
||||
unset(search_paths)
|
||||
|
||||
if (NOT CUDAToolkit_NVCC_EXECUTABLE)
|
||||
if (CUDAToolkit_FIND_REQUIRED)
|
||||
message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.")
|
||||
elseif(NOT CUDAToolkit_FIND_QUIETLY)
|
||||
message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.")
|
||||
endif()
|
||||
|
||||
set(CUDAToolkit_FOUND FALSE)
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE)
|
||||
get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY)
|
||||
set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE)
|
||||
mark_as_advanced(CUDAToolkit_BIN_DIR)
|
||||
unset(cuda_dir)
|
||||
endif()
|
||||
|
||||
if(CUDAToolkit_NVCC_EXECUTABLE AND
|
||||
CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER)
|
||||
# Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value
|
||||
# This if statement will always match, but is used to provide variables for MATCH 1,2,3...
|
||||
if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=])
|
||||
set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
|
||||
set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
|
||||
set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
|
||||
set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")
|
||||
endif()
|
||||
else()
|
||||
# Compute the version by invoking nvcc
|
||||
execute_process (COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
|
||||
if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
|
||||
set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
|
||||
set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
|
||||
set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
|
||||
set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
|
||||
endif()
|
||||
unset(NVCC_OUT)
|
||||
endif()
|
||||
|
||||
|
||||
get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE)
|
||||
|
||||
# Handle cross compilation
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
|
||||
# Support for NVPACK
|
||||
set (CUDAToolkit_TARGET_NAME "armv7-linux-androideabi")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
|
||||
# Support for arm cross compilation
|
||||
set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||
# Support for aarch64 cross compilation
|
||||
if (ANDROID_ARCH_NAME STREQUAL "arm64")
|
||||
set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi")
|
||||
else()
|
||||
set(CUDAToolkit_TARGET_NAME "aarch64-linux")
|
||||
endif (ANDROID_ARCH_NAME STREQUAL "arm64")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
|
||||
set(CUDAToolkit_TARGET_NAME "x86_64-linux")
|
||||
endif()
|
||||
|
||||
if (EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
|
||||
set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
|
||||
# add known CUDA target root path to the set of directories we search for programs, libraries and headers
|
||||
list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}")
|
||||
|
||||
# Mark that we need to pop the root search path changes after we have
|
||||
# found all cuda libraries so that searches for our cross-compilation
|
||||
# libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or
|
||||
# PATh
|
||||
set(_CUDAToolkit_Pop_ROOT_PATH True)
|
||||
endif()
|
||||
else()
|
||||
# Not cross compiling
|
||||
set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}")
|
||||
# Now that we have the real ROOT_DIR, find components inside it.
|
||||
list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR})
|
||||
|
||||
# Mark that we need to pop the prefix path changes after we have
|
||||
# found the cudart library.
|
||||
set(_CUDAToolkit_Pop_Prefix True)
|
||||
endif()
|
||||
|
||||
|
||||
# Find the include/ directory
|
||||
find_path(CUDAToolkit_INCLUDE_DIR
|
||||
NAMES cuda_runtime.h
|
||||
)
|
||||
|
||||
# And find the CUDA Runtime Library libcudart
|
||||
find_library(CUDA_CUDART
|
||||
NAMES cudart
|
||||
PATH_SUFFIXES lib64 lib/x64
|
||||
)
|
||||
if (NOT CUDA_CUDART)
|
||||
find_library(CUDA_CUDART
|
||||
NAMES cudart
|
||||
PATH_SUFFIXES lib64/stubs lib/x64/stubs
|
||||
)
|
||||
endif()
|
||||
|
||||
if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY)
|
||||
message(STATUS "Unable to find cudart library.")
|
||||
endif()
|
||||
|
||||
unset(CUDAToolkit_ROOT_DIR)
|
||||
if(_CUDAToolkit_Pop_Prefix)
|
||||
list(REMOVE_AT CMAKE_PREFIX_PATH -1)
|
||||
unset(_CUDAToolkit_Pop_Prefix)
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Perform version comparison and validate all required variables are set.
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(CUDAToolkit
|
||||
REQUIRED_VARS
|
||||
CUDAToolkit_INCLUDE_DIR
|
||||
CUDA_CUDART
|
||||
CUDAToolkit_NVCC_EXECUTABLE
|
||||
VERSION_VAR
|
||||
CUDAToolkit_VERSION
|
||||
)
|
||||
mark_as_advanced(CUDA_CUDART
|
||||
CUDAToolkit_INCLUDE_DIR
|
||||
CUDAToolkit_NVCC_EXECUTABLE
|
||||
)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Construct result variables
|
||||
if(CUDAToolkit_FOUND)
|
||||
set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR})
|
||||
get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE)
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Construct import targets
|
||||
if(CUDAToolkit_FOUND)
|
||||
|
||||
function(_CUDAToolkit_find_and_add_import_lib lib_name)
|
||||
cmake_parse_arguments(arg "" "" "ALT;DEPS;EXTRA_PATH_SUFFIXES" ${ARGN})
|
||||
|
||||
set(search_names ${lib_name} ${arg_ALT})
|
||||
|
||||
find_library(CUDA_${lib_name}_LIBRARY
|
||||
NAMES ${search_names}
|
||||
HINTS ${CUDAToolkit_LIBRARY_DIR}
|
||||
ENV CUDA_PATH
|
||||
PATH_SUFFIXES nvidia/current lib64 lib/x64 lib
|
||||
${arg_EXTRA_PATH_SUFFIXES}
|
||||
)
|
||||
# Don't try any stub directories intil we have exhausted all other
|
||||
# search locations.
|
||||
if(NOT CUDA_${lib_name}_LIBRARY)
|
||||
find_library(CUDA_${lib_name}_LIBRARY
|
||||
NAMES ${search_names}
|
||||
HINTS ${CUDAToolkit_LIBRARY_DIR}
|
||||
ENV CUDA_PATH
|
||||
PATH_SUFFIXES lib64/stubs lib/x64/stubs lib/stubs stubs
|
||||
)
|
||||
endif()
|
||||
|
||||
mark_as_advanced(CUDA_${lib_name}_LIBRARY)
|
||||
|
||||
if (NOT TARGET CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY)
|
||||
add_library(CUDA::${lib_name} IMPORTED INTERFACE)
|
||||
import_target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}")
|
||||
import_target_link_libraries(CUDA::${lib_name} INTERFACE "${CUDA_${lib_name}_LIBRARY}")
|
||||
foreach(dep ${arg_DEPS})
|
||||
if(TARGET CUDA::${dep})
|
||||
import_target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep})
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if(NOT TARGET CUDA::toolkit)
|
||||
add_library(CUDA::toolkit IMPORTED INTERFACE)
|
||||
import_target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}")
|
||||
import_target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}")
|
||||
endif()
|
||||
|
||||
_CUDAToolkit_find_and_add_import_lib(cuda_driver ALT cuda)
|
||||
|
||||
_CUDAToolkit_find_and_add_import_lib(cudart)
|
||||
_CUDAToolkit_find_and_add_import_lib(cudart_static)
|
||||
|
||||
# setup dependencies that are required for cudart_static when building
|
||||
# on linux. These are generally only required when using the CUDA toolkit
|
||||
# when CUDA language is disabled
|
||||
if(NOT TARGET CUDA::cudart_static_deps
|
||||
AND TARGET CUDA::cudart_static)
|
||||
|
||||
add_library(CUDA::cudart_static_deps IMPORTED INTERFACE)
|
||||
import_target_link_libraries(CUDA::cudart_static INTERFACE CUDA::cudart_static_deps)
|
||||
|
||||
if(UNIX AND (CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER))
|
||||
find_package(Threads REQUIRED)
|
||||
import_target_link_libraries(CUDA::cudart_static_deps INTERFACE Threads::Threads ${CMAKE_DL_LIBS})
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
# On Linux, you must link against librt when using the static cuda runtime.
|
||||
find_library(CUDAToolkit_rt_LIBRARY rt)
|
||||
mark_as_advanced(CUDAToolkit_rt_LIBRARY)
|
||||
if(NOT CUDAToolkit_rt_LIBRARY)
|
||||
message(WARNING "Could not find librt library, needed by CUDA::cudart_static")
|
||||
else()
|
||||
import_target_link_libraries(CUDA::cudart_static_deps INTERFACE ${CUDAToolkit_rt_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
_CUDAToolkit_find_and_add_import_lib(culibos) # it's a static library
|
||||
foreach (cuda_lib cublas cufft curand cusparse nppc nvjpeg)
|
||||
_CUDAToolkit_find_and_add_import_lib(${cuda_lib})
|
||||
_CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS culibos)
|
||||
endforeach()
|
||||
|
||||
# cuFFTW depends on cuFFT
|
||||
_CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft)
|
||||
_CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft_static)
|
||||
|
||||
# cuSOLVER depends on cuBLAS, and cuSPARSE
|
||||
_CUDAToolkit_find_and_add_import_lib(cusolver DEPS cublas cusparse)
|
||||
_CUDAToolkit_find_and_add_import_lib(cusolver_static DEPS cublas_static cusparse_static culibos)
|
||||
|
||||
# nvGRAPH depends on cuRAND, and cuSOLVER.
|
||||
_CUDAToolkit_find_and_add_import_lib(nvgraph DEPS curand cusolver)
|
||||
_CUDAToolkit_find_and_add_import_lib(nvgraph_static DEPS curand_static cusolver_static)
|
||||
|
||||
# Process the majority of the NPP libraries.
|
||||
foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu)
|
||||
_CUDAToolkit_find_and_add_import_lib(${cuda_lib} DEPS nppc)
|
||||
_CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS nppc_static)
|
||||
endforeach()
|
||||
|
||||
_CUDAToolkit_find_and_add_import_lib(cupti
|
||||
EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/
|
||||
../extras/CUPTI/lib/)
|
||||
_CUDAToolkit_find_and_add_import_lib(cupti_static
|
||||
EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/
|
||||
../extras/CUPTI/lib/)
|
||||
|
||||
_CUDAToolkit_find_and_add_import_lib(nvrtc DEPS cuda_driver)
|
||||
|
||||
_CUDAToolkit_find_and_add_import_lib(nvml ALT nvidia-ml nvml)
|
||||
|
||||
if(WIN32)
|
||||
# nvtools can be installed outside the CUDA toolkit directory
|
||||
# so prefer the NVTOOLSEXT_PATH windows only environment variable
|
||||
# In addition on windows the most common name is nvToolsExt64_1
|
||||
find_library(CUDA_nvToolsExt_LIBRARY
|
||||
NAMES nvToolsExt64_1 nvToolsExt64 nvToolsExt
|
||||
PATHS ENV NVTOOLSEXT_PATH
|
||||
ENV CUDA_PATH
|
||||
PATH_SUFFIXES lib/x64 lib
|
||||
)
|
||||
endif()
|
||||
_CUDAToolkit_find_and_add_import_lib(nvToolsExt ALT nvToolsExt64)
|
||||
|
||||
_CUDAToolkit_find_and_add_import_lib(OpenCL)
|
||||
endif()
|
||||
|
||||
if(_CUDAToolkit_Pop_ROOT_PATH)
|
||||
list(REMOVE_AT CMAKE_FIND_ROOT_PATH 0)
|
||||
unset(_CUDAToolkit_Pop_ROOT_PATH)
|
||||
endif()
|
||||
@ -1,17 +1,37 @@
|
||||
|
||||
IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
||||
# Note: "stubs" suffix allows CMake to find the dummy
|
||||
# libcuda.so provided by the NVIDIA CUDA Toolkit for
|
||||
# cross-compiling CUDA on a host without a GPU.
|
||||
KOKKOS_FIND_IMPORTED(CUDA INTERFACE
|
||||
LIBRARIES cudart cuda
|
||||
LIBRARY_PATHS ENV LD_LIBRARY_PATH ENV CUDA_PATH /usr/local/cuda
|
||||
LIBRARY_SUFFIXES lib lib64 lib/stubs lib64/stubs
|
||||
ALLOW_SYSTEM_PATH_FALLBACK
|
||||
)
|
||||
ELSE()
|
||||
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
|
||||
LINK_LIBRARIES cuda
|
||||
)
|
||||
IF (NOT CUDAToolkit_ROOT)
|
||||
IF (NOT CUDA_ROOT)
|
||||
SET(CUDA_ROOT $ENV{CUDA_ROOT})
|
||||
ENDIF()
|
||||
IF(CUDA_ROOT)
|
||||
SET(CUDAToolkit_ROOT ${CUDA_ROOT})
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
|
||||
find_package(CUDAToolkit)
|
||||
ELSE()
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
|
||||
ENDIF()
|
||||
|
||||
|
||||
IF (TARGET CUDA::cudart)
|
||||
SET(FOUND_CUDART TRUE)
|
||||
KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cudart)
|
||||
ELSE()
|
||||
SET(FOUND_CUDART FALSE)
|
||||
ENDIF()
|
||||
|
||||
IF (TARGET CUDA::cuda_driver)
|
||||
SET(FOUND_CUDA_DRIVER TRUE)
|
||||
KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cuda_driver)
|
||||
ELSE()
|
||||
SET(FOUND_CUDA_DRIVVER FALSE)
|
||||
ENDIF()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA DEFAULT_MSG FOUND_CUDART FOUND_CUDA_DRIVER)
|
||||
IF (FOUND_CUDA_DRIVER AND FOUND_CUDART)
|
||||
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
|
||||
LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
@ -8,8 +9,6 @@
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Kokkos is licensed under 3-clause BSD terms of use:
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -41,18 +40,43 @@
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#define KOKKOS_IMPL_COMPILING_LIBRARY true
|
||||
#include <Kokkos_Core.hpp>
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****, LayoutStride, LayoutRight, OpenMP,
|
||||
int64_t)
|
||||
KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****, LayoutStride, LayoutLeft, OpenMP,
|
||||
int64_t)
|
||||
KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****, LayoutStride, LayoutStride, OpenMP,
|
||||
int64_t)
|
||||
KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****, LayoutStride, OpenMP, int64_t)
|
||||
#include <iostream>
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
int main() {
|
||||
cudaDeviceProp device_properties;
|
||||
const cudaError_t error = cudaGetDeviceProperties(&device_properties,
|
||||
/*device*/ 0);
|
||||
if (error != cudaSuccess) {
|
||||
std::cout << "CUDA error: " << cudaGetErrorString(error) << '\n';
|
||||
return error;
|
||||
}
|
||||
unsigned int const compute_capability =
|
||||
device_properties.major * 10 + device_properties.minor;
|
||||
#ifdef SM_ONLY
|
||||
std::cout << compute_capability;
|
||||
#else
|
||||
switch (compute_capability) {
|
||||
// clang-format off
|
||||
case 30: std::cout << "Set -DKokkos_ARCH_KEPLER30=ON ." << std::endl; break;
|
||||
case 32: std::cout << "Set -DKokkos_ARCH_KEPLER32=ON ." << std::endl; break;
|
||||
case 35: std::cout << "Set -DKokkos_ARCH_KEPLER35=ON ." << std::endl; break;
|
||||
case 37: std::cout << "Set -DKokkos_ARCH_KEPLER37=ON ." << std::endl; break;
|
||||
case 50: std::cout << "Set -DKokkos_ARCH_MAXWELL50=ON ." << std::endl; break;
|
||||
case 52: std::cout << "Set -DKokkos_ARCH_MAXWELL52=ON ." << std::endl; break;
|
||||
case 53: std::cout << "Set -DKokkos_ARCH_MAXWELL53=ON ." << std::endl; break;
|
||||
case 60: std::cout << "Set -DKokkos_ARCH_PASCAL60=ON ." << std::endl; break;
|
||||
case 61: std::cout << "Set -DKokkos_ARCH_PASCAL61=ON ." << std::endl; break;
|
||||
case 70: std::cout << "Set -DKokkos_ARCH_VOLTA70=ON ." << std::endl; break;
|
||||
case 72: std::cout << "Set -DKokkos_ARCH_VOLTA72=ON ." << std::endl; break;
|
||||
case 75: std::cout << "Set -DKokkos_ARCH_TURING75=ON ." << std::endl; break;
|
||||
case 80: std::cout << "Set -DKokkos_ARCH_AMPERE80=ON ." << std::endl; break;
|
||||
default:
|
||||
std::cout << "Compute capability " << compute_capability
|
||||
<< " is not supported" << std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
@ -88,7 +88,7 @@ FUNCTION(KOKKOS_ADD_TEST)
|
||||
if (KOKKOS_HAS_TRILINOS)
|
||||
CMAKE_PARSE_ARGUMENTS(TEST
|
||||
""
|
||||
"EXE;NAME"
|
||||
"EXE;NAME;TOOL"
|
||||
""
|
||||
${ARGN})
|
||||
IF(TEST_EXE)
|
||||
@ -104,10 +104,15 @@ FUNCTION(KOKKOS_ADD_TEST)
|
||||
NUM_MPI_PROCS 1
|
||||
${TEST_UNPARSED_ARGUMENTS}
|
||||
)
|
||||
|
||||
if(TEST_TOOL)
|
||||
add_dependencies(${EXE} ${TEST_TOOL}) #make sure the exe has to build the tool
|
||||
set_property(TEST ${TEST_NAME} APPEND_STRING PROPERTY ENVIRONMENT "KOKKOS_PROFILE_LIBRARY=$<TARGET_FILE:${TEST_TOOL}>")
|
||||
endif()
|
||||
else()
|
||||
CMAKE_PARSE_ARGUMENTS(TEST
|
||||
"WILL_FAIL"
|
||||
"FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME"
|
||||
"FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME;TOOL"
|
||||
"CATEGORIES;CMD_ARGS"
|
||||
${ARGN})
|
||||
# To match Tribits, we should always be receiving
|
||||
@ -135,6 +140,10 @@ FUNCTION(KOKKOS_ADD_TEST)
|
||||
IF(TEST_PASS_REGULAR_EXPRESSION)
|
||||
SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${TEST_PASS_REGULAR_EXPRESSION})
|
||||
ENDIF()
|
||||
if(TEST_TOOL)
|
||||
add_dependencies(${EXE} ${TEST_TOOL}) #make sure the exe has to build the tool
|
||||
set_property(TEST ${TEST_NAME} APPEND_STRING PROPERTY ENVIRONMENT "KOKKOS_PROFILE_LIBRARY=$<TARGET_FILE:${TEST_TOOL}>")
|
||||
endif()
|
||||
VERIFY_EMPTY(KOKKOS_ADD_TEST ${TEST_UNPARSED_ARGUMENTS})
|
||||
endif()
|
||||
ENDFUNCTION()
|
||||
|
||||
@ -2,11 +2,14 @@
|
||||
FUNCTION(KOKKOS_ARCH_OPTION SUFFIX DEV_TYPE DESCRIPTION)
|
||||
#all optimizations off by default
|
||||
KOKKOS_OPTION(ARCH_${SUFFIX} OFF BOOL "Optimize for ${DESCRIPTION} (${DEV_TYPE})")
|
||||
IF (KOKKOS_ARCH_${SUFFIX})
|
||||
SET(KOKKOS_ARCH_${SUFFIX} ${KOKKOS_ARCH_${SUFFIX}} PARENT_SCOPE)
|
||||
SET(KOKKOS_OPTION_KEYS ${KOKKOS_OPTION_KEYS} PARENT_SCOPE)
|
||||
SET(KOKKOS_OPTION_VALUES ${KOKKOS_OPTION_VALUES} PARENT_SCOPE)
|
||||
SET(KOKKOS_OPTION_TYPES ${KOKKOS_OPTION_TYPES} PARENT_SCOPE)
|
||||
IF(KOKKOS_ARCH_${SUFFIX})
|
||||
LIST(APPEND KOKKOS_ENABLED_ARCH_LIST ${SUFFIX})
|
||||
SET(KOKKOS_ENABLED_ARCH_LIST ${KOKKOS_ENABLED_ARCH_LIST} PARENT_SCOPE)
|
||||
ENDIF()
|
||||
SET(KOKKOS_ARCH_${SUFFIX} ${KOKKOS_ARCH_${SUFFIX}} PARENT_SCOPE)
|
||||
ENDFUNCTION()
|
||||
|
||||
|
||||
@ -15,6 +18,10 @@ KOKKOS_CFG_DEPENDS(ARCH COMPILER_ID)
|
||||
KOKKOS_CFG_DEPENDS(ARCH DEVICES)
|
||||
KOKKOS_CFG_DEPENDS(ARCH OPTIONS)
|
||||
|
||||
KOKKOS_CHECK_DEPRECATED_OPTIONS(
|
||||
ARCH_EPYC "Please replace EPYC with ZEN or ZEN2, depending on your platform"
|
||||
ARCH_RYZEN "Please replace RYZEN with ZEN or ZEN2, depending on your platform"
|
||||
)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# List of possible host architectures.
|
||||
@ -51,9 +58,12 @@ KOKKOS_ARCH_OPTION(PASCAL61 GPU "NVIDIA Pascal generation CC 6.1")
|
||||
KOKKOS_ARCH_OPTION(VOLTA70 GPU "NVIDIA Volta generation CC 7.0")
|
||||
KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2")
|
||||
KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5")
|
||||
KOKKOS_ARCH_OPTION(EPYC HOST "AMD Epyc architecture")
|
||||
KOKKOS_ARCH_OPTION(AMPERE80 GPU "NVIDIA Ampere generation CC 8.0")
|
||||
KOKKOS_ARCH_OPTION(ZEN HOST "AMD Zen architecture")
|
||||
KOKKOS_ARCH_OPTION(ZEN2 HOST "AMD Zen2 architecture")
|
||||
KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900")
|
||||
KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906")
|
||||
KOKKOS_ARCH_OPTION(INTEL_GEN GPU "Intel GPUs Gen9+")
|
||||
|
||||
IF (KOKKOS_ENABLE_CUDA)
|
||||
#Regardless of version, make sure we define the general architecture name
|
||||
@ -75,6 +85,10 @@ IF (KOKKOS_ENABLE_CUDA)
|
||||
IF (KOKKOS_ARCH_VOLTA70 OR KOKKOS_ARCH_VOLTA72)
|
||||
SET(KOKKOS_ARCH_VOLTA ON)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_AMPERE80)
|
||||
SET(KOKKOS_ARCH_AMPERE ON)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
|
||||
@ -88,9 +102,10 @@ IF(KOKKOS_ENABLE_COMPILER_WARNINGS)
|
||||
${COMMON_WARNINGS})
|
||||
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
PGI NO-VALUE-SPECIFIED
|
||||
GNU ${GNU_WARNINGS}
|
||||
DEFAULT ${COMMON_WARNINGS}
|
||||
COMPILER_ID CMAKE_CXX_COMPILER_ID
|
||||
PGI NO-VALUE-SPECIFIED
|
||||
GNU ${GNU_WARNINGS}
|
||||
DEFAULT ${COMMON_WARNINGS}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
@ -102,6 +117,9 @@ GLOBAL_SET(KOKKOS_CUDA_OPTIONS)
|
||||
IF (KOKKOS_ENABLE_CUDA_LAMBDA)
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-expt-extended-lambda")
|
||||
IF(KOKKOS_COMPILER_CUDA_VERSION GREATER_EQUAL 110)
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-Wext-lambda-captures-this")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
@ -113,7 +131,6 @@ ENDIF()
|
||||
|
||||
IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
||||
SET(CUDA_ARCH_FLAG "--cuda-gpu-arch")
|
||||
SET(AMDGPU_ARCH_FLAG "--amdgpu-target")
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -x cuda)
|
||||
IF (KOKKOS_ENABLE_CUDA)
|
||||
SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE)
|
||||
@ -133,6 +150,15 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
|
||||
#------------------------------- KOKKOS_HIP_OPTIONS ---------------------------
|
||||
#clear anything that might be in the cache
|
||||
GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS)
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL HIP)
|
||||
SET(AMDGPU_ARCH_FLAG "--amdgpu-target")
|
||||
ENDIF()
|
||||
|
||||
|
||||
IF (KOKKOS_ARCH_ARMV80)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Cray NO-VALUE-SPECIFIED
|
||||
@ -167,12 +193,21 @@ IF (KOKKOS_ARCH_ARMV8_THUNDERX2)
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_EPYC)
|
||||
IF (KOKKOS_ARCH_ZEN)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Intel -mavx2
|
||||
DEFAULT -march=znver1 -mtune=znver1
|
||||
)
|
||||
SET(KOKKOS_ARCH_AMD_EPYC ON)
|
||||
SET(KOKKOS_ARCH_AMD_ZEN ON)
|
||||
SET(KOKKOS_ARCH_AMD_AVX2 ON)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_ZEN2)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Intel -mavx2
|
||||
DEFAULT -march=znver2 -mtune=znver2
|
||||
)
|
||||
SET(KOKKOS_ARCH_AMD_ZEN2 ON)
|
||||
SET(KOKKOS_ARCH_AMD_AVX2 ON)
|
||||
ENDIF()
|
||||
|
||||
@ -216,14 +251,6 @@ IF (KOKKOS_ARCH_BDW)
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_EPYC)
|
||||
SET(KOKKOS_ARCH_AMD_AVX2 ON)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Intel -mvax2
|
||||
DEFAULT -march=znver1 -mtune=znver1
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_KNL)
|
||||
#avx512-mic
|
||||
SET(KOKKOS_ARCH_AVX512MIC ON) #not a cache variable
|
||||
@ -253,7 +280,7 @@ IF (KOKKOS_ARCH_SKX)
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_EPYC)
|
||||
IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2)
|
||||
SET(KOKKOS_USE_ISA_X86_64 ON)
|
||||
ENDIF()
|
||||
|
||||
@ -296,6 +323,21 @@ IF (Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
# Clang needs mcx16 option enabled for Windows atomic functions
|
||||
IF (CMAKE_CXX_COMPILER_ID STREQUAL Clang AND WIN32)
|
||||
COMPILER_SPECIFIC_OPTIONS(
|
||||
Clang -mcx16
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
# MSVC ABI has many deprecation warnings, so ignore them
|
||||
IF (CMAKE_CXX_COMPILER_ID STREQUAL MSVC OR "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
|
||||
COMPILER_SPECIFIC_DEFS(
|
||||
Clang _CRT_SECURE_NO_WARNINGS
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
|
||||
#Right now we cannot get the compiler ID when cross-compiling, so just check
|
||||
#that HIP is enabled
|
||||
IF (Kokkos_ENABLE_HIP)
|
||||
@ -324,11 +366,15 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG)
|
||||
ELSE()
|
||||
SET(KOKKOS_CUDA_ARCH_FLAG ${FLAG} PARENT_SCOPE)
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
|
||||
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
|
||||
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
LIST(APPEND KOKKOS_CUDA_ARCH_FLAGS ${FLAG})
|
||||
SET(KOKKOS_CUDA_ARCH_FLAGS ${KOKKOS_CUDA_ARCH_FLAGS} PARENT_SCOPE)
|
||||
LIST(APPEND KOKKOS_CUDA_ARCH_LIST ${ARCH})
|
||||
SET(KOKKOS_CUDA_ARCH_LIST ${KOKKOS_CUDA_ARCH_LIST} PARENT_SCOPE)
|
||||
ENDFUNCTION()
|
||||
|
||||
|
||||
@ -346,6 +392,7 @@ CHECK_CUDA_ARCH(PASCAL61 sm_61)
|
||||
CHECK_CUDA_ARCH(VOLTA70 sm_70)
|
||||
CHECK_CUDA_ARCH(VOLTA72 sm_72)
|
||||
CHECK_CUDA_ARCH(TURING75 sm_75)
|
||||
CHECK_CUDA_ARCH(AMPERE80 sm_80)
|
||||
|
||||
SET(AMDGPU_ARCH_ALREADY_SPECIFIED "")
|
||||
FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG)
|
||||
@ -372,12 +419,19 @@ ENDFUNCTION()
|
||||
CHECK_AMDGPU_ARCH(VEGA900 gfx900) # Radeon Instinct MI25
|
||||
CHECK_AMDGPU_ARCH(VEGA906 gfx906) # Radeon Instinct MI50 and MI60
|
||||
|
||||
IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED)
|
||||
MESSAGE(SEND_ERROR "HIP enabled but no AMD GPU architecture currently enabled. "
|
||||
"Please enable one AMD GPU architecture via -DKokkos_ARCH_{..}=ON'.")
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ENABLE_OPENMPTARGET)
|
||||
SET(CLANG_CUDA_ARCH ${KOKKOS_CUDA_ARCH_FLAG})
|
||||
IF (CLANG_CUDA_ARCH)
|
||||
STRING(REPLACE "sm_" "cc" PGI_CUDA_ARCH ${CLANG_CUDA_ARCH})
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Clang -Xopenmp-target -march=${CLANG_CUDA_ARCH} -fopenmp-targets=nvptx64-nvidia-cuda
|
||||
XL -qtgtarch=${KOKKOS_CUDA_ARCH_FLAG}
|
||||
PGI -gpu=${PGI_CUDA_ARCH}
|
||||
)
|
||||
ENDIF()
|
||||
SET(CLANG_AMDGPU_ARCH ${KOKKOS_AMDGPU_ARCH_FLAG})
|
||||
@ -386,10 +440,39 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
|
||||
Clang -Xopenmp-target=amdgcn-amd-amdhsa -march=${CLANG_AMDGPU_ARCH} -fopenmp-targets=amdgcn-amd-amdhsa
|
||||
)
|
||||
ENDIF()
|
||||
IF (KOKKOS_ARCH_INTEL_GEN)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
IntelClang -fopenmp-targets=spir64 -D__STRICT_ANSI__
|
||||
)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
IF(KOKKOS_ENABLE_CUDA AND NOT CUDA_ARCH_ALREADY_SPECIFIED)
|
||||
MESSAGE(SEND_ERROR "CUDA enabled but no NVIDIA GPU architecture currently enabled. Please give one -DKokkos_ARCH_{..}=ON' to enable an NVIDIA GPU architecture.")
|
||||
# Try to autodetect the CUDA Compute Capability by asking the device
|
||||
SET(_BINARY_TEST_DIR ${CMAKE_CURRENT_BINARY_DIR}/cmake/compile_tests/CUDAComputeCapabilityWorkdir)
|
||||
FILE(REMOVE_RECURSE ${_BINARY_TEST_DIR})
|
||||
FILE(MAKE_DIRECTORY ${_BINARY_TEST_DIR})
|
||||
|
||||
TRY_RUN(
|
||||
_RESULT
|
||||
_COMPILE_RESULT
|
||||
${_BINARY_TEST_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/compile_tests/cuda_compute_capability.cc
|
||||
COMPILE_DEFINITIONS -DSM_ONLY
|
||||
RUN_OUTPUT_VARIABLE _CUDA_COMPUTE_CAPABILITY)
|
||||
LIST(FIND KOKKOS_CUDA_ARCH_FLAGS sm_${_CUDA_COMPUTE_CAPABILITY} FLAG_INDEX)
|
||||
IF(_COMPILE_RESULT AND _RESULT EQUAL 0 AND NOT FLAG_INDEX EQUAL -1)
|
||||
MESSAGE(STATUS "Detected CUDA Compute Capability ${_CUDA_COMPUTE_CAPABILITY}")
|
||||
LIST(GET KOKKOS_CUDA_ARCH_LIST ${FLAG_INDEX} ARCHITECTURE)
|
||||
KOKKOS_SET_OPTION(ARCH_${ARCHITECTURE} ON)
|
||||
CHECK_CUDA_ARCH(${ARCHITECTURE} sm_${_CUDA_COMPUTE_CAPABILITY})
|
||||
LIST(APPEND KOKKOS_ENABLED_ARCH_LIST ${ARCHITECTURE})
|
||||
ELSE()
|
||||
MESSAGE(SEND_ERROR "CUDA enabled but no NVIDIA GPU architecture currently enabled and auto-detection failed. "
|
||||
"Please give one -DKokkos_ARCH_{..}=ON' to enable an NVIDIA GPU architecture.\n"
|
||||
"You can yourself try to compile ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compile_tests/cuda_compute_capability.cc and run the executable. "
|
||||
"If you are cross-compiling, you should try to do this on a compute node.")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
#CMake verbose is kind of pointless
|
||||
@ -453,4 +536,3 @@ MESSAGE(STATUS "Architectures:")
|
||||
FOREACH(Arch ${KOKKOS_ENABLED_ARCH_LIST})
|
||||
MESSAGE(STATUS " ${Arch}")
|
||||
ENDFOREACH()
|
||||
|
||||
|
||||
@ -4,33 +4,54 @@ SET(KOKKOS_CXX_COMPILER ${CMAKE_CXX_COMPILER})
|
||||
SET(KOKKOS_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
|
||||
SET(KOKKOS_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION})
|
||||
|
||||
# Check if the compiler is nvcc (which really means nvcc_wrapper).
|
||||
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
|
||||
COMMAND grep nvcc
|
||||
COMMAND wc -l
|
||||
OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
IF(Kokkos_ENABLE_CUDA)
|
||||
# Check if the compiler is nvcc (which really means nvcc_wrapper).
|
||||
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
|
||||
OUTPUT_VARIABLE INTERNAL_COMPILER_VERSION
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
STRING(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION} )
|
||||
|
||||
STRING(FIND ${INTERNAL_COMPILER_VERSION_ONE_LINE} "nvcc" INTERNAL_COMPILER_VERSION_CONTAINS_NVCC)
|
||||
|
||||
|
||||
STRING(REGEX REPLACE "^ +" ""
|
||||
INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}")
|
||||
|
||||
STRING(REGEX REPLACE "^ +" ""
|
||||
INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}")
|
||||
IF(${INTERNAL_COMPILER_VERSION_CONTAINS_NVCC} GREATER -1)
|
||||
SET(INTERNAL_HAVE_COMPILER_NVCC true)
|
||||
ELSE()
|
||||
SET(INTERNAL_HAVE_COMPILER_NVCC false)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
IF(INTERNAL_HAVE_COMPILER_NVCC)
|
||||
# Save the host compiler id before overwriting it.
|
||||
SET(KOKKOS_CXX_HOST_COMPILER_ID ${KOKKOS_CXX_COMPILER_ID})
|
||||
|
||||
# SET the compiler id to nvcc. We use the value used by CMake 3.8.
|
||||
SET(KOKKOS_CXX_COMPILER_ID NVIDIA CACHE STRING INTERNAL FORCE)
|
||||
|
||||
# SET nvcc's compiler version.
|
||||
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
|
||||
COMMAND grep release
|
||||
OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$"
|
||||
TEMP_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION})
|
||||
STRING(REGEX MATCH "V[0-9]+\\.[0-9]+\\.[0-9]+"
|
||||
TEMP_CXX_COMPILER_VERSION ${INTERNAL_COMPILER_VERSION_ONE_LINE})
|
||||
STRING(SUBSTRING ${TEMP_CXX_COMPILER_VERSION} 1 -1 TEMP_CXX_COMPILER_VERSION)
|
||||
SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
|
||||
MESSAGE(STATUS "Compiler Version: ${KOKKOS_CXX_COMPILER_VERSION}")
|
||||
ENDIF()
|
||||
|
||||
IF(Kokkos_ENABLE_HIP)
|
||||
# get HIP version
|
||||
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
|
||||
OUTPUT_VARIABLE INTERNAL_COMPILER_VERSION
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
STRING(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION} )
|
||||
SET(KOKKOS_CXX_COMPILER_ID HIP CACHE STRING INTERNAL FORCE)
|
||||
|
||||
STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+"
|
||||
TEMP_CXX_COMPILER_VERSION ${INTERNAL_COMPILER_VERSION_ONE_LINE})
|
||||
SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
|
||||
MESSAGE(STATUS "Compiler Version: ${KOKKOS_CXX_COMPILER_VERSION}")
|
||||
ENDIF()
|
||||
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
||||
# The Cray compiler reports as Clang to most versions of CMake
|
||||
@ -42,6 +63,16 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
||||
IF (INTERNAL_HAVE_CRAY_COMPILER) #not actually Clang
|
||||
SET(KOKKOS_CLANG_IS_CRAY TRUE)
|
||||
ENDIF()
|
||||
# The clang based Intel compiler reports as Clang to most versions of CMake
|
||||
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
|
||||
COMMAND grep icpx
|
||||
COMMAND wc -l
|
||||
OUTPUT_VARIABLE INTERNAL_HAVE_INTEL_COMPILER
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
IF (INTERNAL_HAVE_INTEL_COMPILER) #not actually Clang
|
||||
SET(KOKKOS_CLANG_IS_INTEL TRUE)
|
||||
SET(KOKKOS_CXX_COMPILER_ID IntelClang CACHE STRING INTERNAL FORCE)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray OR KOKKOS_CLANG_IS_CRAY)
|
||||
@ -65,6 +96,7 @@ SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher"
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.8.4 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 15.0.2 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 9.0.69 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC 3.5.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n")
|
||||
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
||||
@ -84,6 +116,10 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE)
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIP)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 3.5.0)
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 17.1)
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP AND NOT KOKKOS_CLANG_IS_CRAY)
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP AND NOT KOKKOS_CLANG_IS_CRAY AND NOT "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
|
||||
# The clang "version" doesn't actually tell you what runtimes and tools
|
||||
# were built into Clang. We should therefore make sure that libomp
|
||||
# was actually built into Clang. Otherwise the user will get nonsensical
|
||||
@ -49,11 +49,11 @@ ENDIF()
|
||||
|
||||
IF (KOKKOS_CXX_STANDARD STREQUAL 17)
|
||||
IF (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7)
|
||||
MESSAGE(FATAL_ERROR "You have requested c++17 support for GCC ${KOKKOS_CXX_COMPILER_VERSION}. Although CMake has allowed this and GCC accepts -std=c++1z/c++17, GCC <= 6 does not properly support *this capture. Please reduce the C++ standard to 14 or upgrade the compiler if you do need 17 support")
|
||||
MESSAGE(FATAL_ERROR "You have requested c++17 support for GCC ${KOKKOS_CXX_COMPILER_VERSION}. Although CMake has allowed this and GCC accepts -std=c++1z/c++17, GCC <= 6 does not properly support *this capture. Please reduce the C++ standard to 14 or upgrade the compiler if you do need C++17 support.")
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
MESSAGE(FATAL_ERROR "You have requested c++17 support for NVCC. Please reduce the C++ standard to 14. No versions of NVCC currently support 17.")
|
||||
IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11)
|
||||
MESSAGE(FATAL_ERROR "You have requested c++17 support for NVCC ${KOKKOS_CXX_COMPILER_VERSION}. NVCC only supports C++17 from version 11 on. Please reduce the C++ standard to 14 or upgrade the compiler if you need C++17 support.")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
|
||||
@ -36,25 +36,51 @@ IF(KOKKOS_ENABLE_OPENMP)
|
||||
IF(KOKKOS_CLANG_IS_CRAY)
|
||||
SET(ClangOpenMPFlag -fopenmp)
|
||||
ENDIF()
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Clang ${ClangOpenMPFlag}
|
||||
AppleClang -Xpreprocessor -fopenmp
|
||||
PGI -mp
|
||||
NVIDIA -Xcompiler -fopenmp
|
||||
Cray NO-VALUE-SPECIFIED
|
||||
XL -qsmp=omp
|
||||
DEFAULT -fopenmp
|
||||
)
|
||||
COMPILER_SPECIFIC_LIBS(
|
||||
AppleClang -lomp
|
||||
)
|
||||
IF(KOKKOS_CLANG_IS_INTEL)
|
||||
SET(ClangOpenMPFlag -fiopenmp)
|
||||
ENDIF()
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
|
||||
#expression /openmp yields error, so add a specific Clang flag
|
||||
COMPILER_SPECIFIC_OPTIONS(Clang /clang:-fopenmp)
|
||||
#link omp library from LLVM lib dir
|
||||
get_filename_component(LLVM_BIN_DIR ${CMAKE_CXX_COMPILER_AR} DIRECTORY)
|
||||
COMPILER_SPECIFIC_LIBS(Clang "${LLVM_BIN_DIR}/../lib/libomp.lib")
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
|
||||
Clang -Xcompiler ${ClangOpenMPFlag}
|
||||
PGI -Xcompiler -mp
|
||||
Cray NO-VALUE-SPECIFIED
|
||||
XL -Xcompiler -qsmp=omp
|
||||
DEFAULT -Xcompiler -fopenmp
|
||||
)
|
||||
ELSE()
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Clang ${ClangOpenMPFlag}
|
||||
AppleClang -Xpreprocessor -fopenmp
|
||||
PGI -mp
|
||||
Cray NO-VALUE-SPECIFIED
|
||||
XL -qsmp=omp
|
||||
DEFAULT -fopenmp
|
||||
)
|
||||
COMPILER_SPECIFIC_LIBS(
|
||||
AppleClang -lomp
|
||||
)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
KOKKOS_DEVICE_OPTION(OPENMPTARGET OFF DEVICE "Whether to build the OpenMP target backend")
|
||||
IF (KOKKOS_ENABLE_OPENMPTARGET)
|
||||
SET(ClangOpenMPFlag -fopenmp=libomp)
|
||||
IF(KOKKOS_CLANG_IS_CRAY)
|
||||
SET(ClangOpenMPFlag -fopenmp)
|
||||
ENDIF()
|
||||
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Clang -fopenmp -fopenmp=libomp
|
||||
Clang ${ClangOpenMPFlag} -Wno-openmp-mapping
|
||||
IntelClang -fiopenmp -Wno-openmp-mapping
|
||||
XL -qsmp=omp -qoffload -qnoeh
|
||||
PGI -mp=gpu
|
||||
DEFAULT -fopenmp
|
||||
)
|
||||
COMPILER_SPECIFIC_DEFS(
|
||||
@ -65,6 +91,9 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
|
||||
# COMPILER_SPECIFIC_LIBS(
|
||||
# Clang -lopenmptarget
|
||||
# )
|
||||
IF(KOKKOS_CXX_STANDARD LESS 17)
|
||||
MESSAGE(FATAL_ERROR "OpenMPTarget backend requires C++17 or newer")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA)
|
||||
@ -76,6 +105,9 @@ KOKKOS_DEVICE_OPTION(CUDA ${CUDA_DEFAULT} DEVICE "Whether to build CUDA backend"
|
||||
|
||||
IF (KOKKOS_ENABLE_CUDA)
|
||||
GLOBAL_SET(KOKKOS_DONT_ALLOW_EXTENSIONS "CUDA enabled")
|
||||
IF(WIN32)
|
||||
GLOBAL_APPEND(KOKKOS_COMPILE_OPTIONS -x cu)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
# We want this to default to OFF for cache reasons, but if no
|
||||
|
||||
@ -45,10 +45,9 @@ UNSET(_UPPERCASE_CMAKE_BUILD_TYPE)
|
||||
KOKKOS_ENABLE_OPTION(LARGE_MEM_TESTS OFF "Whether to perform extra large memory tests")
|
||||
KOKKOS_ENABLE_OPTION(DEBUG_BOUNDS_CHECK OFF "Whether to use bounds checking - will increase runtime")
|
||||
KOKKOS_ENABLE_OPTION(COMPILER_WARNINGS OFF "Whether to print all compiler warnings")
|
||||
KOKKOS_ENABLE_OPTION(PROFILING ON "Whether to create bindings for profiling tools")
|
||||
KOKKOS_ENABLE_OPTION(PROFILING_LOAD_PRINT OFF "Whether to print information about which profiling tools got loaded")
|
||||
KOKKOS_ENABLE_OPTION(TUNING OFF "Whether to create bindings for tuning tools")
|
||||
KOKKOS_ENABLE_OPTION(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops")
|
||||
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE OFF "Whether to enable deprecated code")
|
||||
|
||||
IF (KOKKOS_ENABLE_CUDA)
|
||||
SET(KOKKOS_COMPILER_CUDA_VERSION "${KOKKOS_COMPILER_VERSION_MAJOR}${KOKKOS_COMPILER_VERSION_MINOR}")
|
||||
|
||||
@ -47,6 +47,13 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING)
|
||||
SET(CAMEL_NAME Kokkos_${CAMEL_SUFFIX})
|
||||
STRING(TOUPPER ${CAMEL_NAME} UC_NAME)
|
||||
|
||||
LIST(APPEND KOKKOS_OPTION_KEYS ${CAMEL_SUFFIX})
|
||||
SET(KOKKOS_OPTION_KEYS ${KOKKOS_OPTION_KEYS} PARENT_SCOPE)
|
||||
LIST(APPEND KOKKOS_OPTION_VALUES "${DOCSTRING}")
|
||||
SET(KOKKOS_OPTION_VALUES ${KOKKOS_OPTION_VALUES} PARENT_SCOPE)
|
||||
LIST(APPEND KOKKOS_OPTION_TYPES ${TYPE})
|
||||
SET(KOKKOS_OPTION_TYPES ${KOKKOS_OPTION_TYPES} PARENT_SCOPE)
|
||||
|
||||
# Make sure this appears in the cache with the appropriate DOCSTRING
|
||||
SET(${CAMEL_NAME} ${DEFAULT} CACHE ${TYPE} ${DOCSTRING})
|
||||
|
||||
@ -73,7 +80,21 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING)
|
||||
ELSE()
|
||||
SET(${UC_NAME} ${DEFAULT} PARENT_SCOPE)
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
|
||||
FUNCTION(kokkos_set_option CAMEL_SUFFIX VALUE)
|
||||
LIST(FIND KOKKOS_OPTION_KEYS ${CAMEL_SUFFIX} OPTION_INDEX)
|
||||
IF(OPTION_INDEX EQUAL -1)
|
||||
MESSAGE(FATAL_ERROR "Couldn't set value for Kokkos_${CAMEL_SUFFIX}")
|
||||
ENDIF()
|
||||
SET(CAMEL_NAME Kokkos_${CAMEL_SUFFIX})
|
||||
STRING(TOUPPER ${CAMEL_NAME} UC_NAME)
|
||||
|
||||
LIST(GET KOKKOS_OPTION_VALUES ${OPTION_INDEX} DOCSTRING)
|
||||
LIST(GET KOKKOS_OPTION_TYPES ${OPTION_INDEX} TYPE)
|
||||
SET(${CAMEL_NAME} ${VALUE} CACHE ${TYPE} ${DOCSTRING} FORCE)
|
||||
MESSAGE(STATUS "Setting ${CAMEL_NAME}=${VALUE}")
|
||||
SET(${UC_NAME} ${VALUE} PARENT_SCOPE)
|
||||
ENDFUNCTION()
|
||||
|
||||
FUNCTION(kokkos_append_config_line LINE)
|
||||
@ -109,8 +130,8 @@ ENDMACRO()
|
||||
|
||||
MACRO(kokkos_export_imported_tpl NAME)
|
||||
IF (NOT KOKKOS_HAS_TRILINOS)
|
||||
GET_TARGET_PROPERTY(LIB_TYPE ${NAME} TYPE)
|
||||
IF (${LIB_TYPE} STREQUAL "INTERFACE_LIBRARY")
|
||||
GET_TARGET_PROPERTY(LIB_IMPORTED ${NAME} IMPORTED)
|
||||
IF (NOT LIB_IMPORTED)
|
||||
# This is not an imported target
|
||||
# This an interface library that we created
|
||||
INSTALL(
|
||||
@ -123,12 +144,18 @@ MACRO(kokkos_export_imported_tpl NAME)
|
||||
ELSE()
|
||||
#make sure this also gets "exported" in the config file
|
||||
KOKKOS_APPEND_CONFIG_LINE("IF(NOT TARGET ${NAME})")
|
||||
KOKKOS_APPEND_CONFIG_LINE("ADD_LIBRARY(${NAME} UNKNOWN IMPORTED)")
|
||||
KOKKOS_APPEND_CONFIG_LINE("SET_TARGET_PROPERTIES(${NAME} PROPERTIES")
|
||||
|
||||
GET_TARGET_PROPERTY(TPL_LIBRARY ${NAME} IMPORTED_LOCATION)
|
||||
IF(TPL_LIBRARY)
|
||||
KOKKOS_APPEND_CONFIG_LINE("IMPORTED_LOCATION ${TPL_LIBRARY}")
|
||||
GET_TARGET_PROPERTY(LIB_TYPE ${NAME} TYPE)
|
||||
IF (${LIB_TYPE} STREQUAL "INTERFACE_LIBRARY")
|
||||
KOKKOS_APPEND_CONFIG_LINE("ADD_LIBRARY(${NAME} INTERFACE IMPORTED)")
|
||||
KOKKOS_APPEND_CONFIG_LINE("SET_TARGET_PROPERTIES(${NAME} PROPERTIES")
|
||||
ELSE()
|
||||
KOKKOS_APPEND_CONFIG_LINE("ADD_LIBRARY(${NAME} UNKNOWN IMPORTED)")
|
||||
KOKKOS_APPEND_CONFIG_LINE("SET_TARGET_PROPERTIES(${NAME} PROPERTIES")
|
||||
GET_TARGET_PROPERTY(TPL_LIBRARY ${NAME} IMPORTED_LOCATION)
|
||||
IF(TPL_LIBRARY)
|
||||
KOKKOS_APPEND_CONFIG_LINE("IMPORTED_LOCATION ${TPL_LIBRARY}")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
GET_TARGET_PROPERTY(TPL_INCLUDES ${NAME} INTERFACE_INCLUDE_DIRECTORIES)
|
||||
@ -737,18 +764,22 @@ FUNCTION(kokkos_link_tpl TARGET)
|
||||
ENDFUNCTION()
|
||||
|
||||
FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER)
|
||||
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang GNU)
|
||||
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelClang GNU HIP)
|
||||
CMAKE_PARSE_ARGUMENTS(
|
||||
PARSE
|
||||
"LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES"
|
||||
""
|
||||
"COMPILER_ID"
|
||||
"${COMPILERS}"
|
||||
${ARGN})
|
||||
IF(PARSE_UNPARSED_ARGUMENTS)
|
||||
MESSAGE(SEND_ERROR "'${PARSE_UNPARSED_ARGUMENTS}' argument(s) not recognized when providing compiler specific options")
|
||||
ENDIF()
|
||||
|
||||
SET(COMPILER ${KOKKOS_CXX_COMPILER_ID})
|
||||
IF(PARSE_COMPILER_ID)
|
||||
SET(COMPILER ${${PARSE_COMPILER_ID}})
|
||||
ELSE()
|
||||
SET(COMPILER ${KOKKOS_CXX_COMPILER_ID})
|
||||
ENDIF()
|
||||
|
||||
SET(COMPILER_SPECIFIC_FLAGS_TMP)
|
||||
FOREACH(COMP ${COMPILERS})
|
||||
@ -792,6 +823,14 @@ FUNCTION(COMPILER_SPECIFIC_FLAGS)
|
||||
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_OPTIONS LINK_OPTIONS)
|
||||
ENDFUNCTION(COMPILER_SPECIFIC_FLAGS)
|
||||
|
||||
FUNCTION(COMPILER_SPECIFIC_OPTIONS)
|
||||
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_OPTIONS)
|
||||
ENDFUNCTION(COMPILER_SPECIFIC_OPTIONS)
|
||||
|
||||
FUNCTION(COMPILER_SPECIFIC_LINK_OPTIONS)
|
||||
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} LINK_OPTIONS)
|
||||
ENDFUNCTION(COMPILER_SPECIFIC_LINK_OPTIONS)
|
||||
|
||||
FUNCTION(COMPILER_SPECIFIC_DEFS)
|
||||
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_DEFINITIONS)
|
||||
ENDFUNCTION(COMPILER_SPECIFIC_DEFS)
|
||||
@ -799,3 +838,36 @@ ENDFUNCTION(COMPILER_SPECIFIC_DEFS)
|
||||
FUNCTION(COMPILER_SPECIFIC_LIBS)
|
||||
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} LINK_LIBRARIES)
|
||||
ENDFUNCTION(COMPILER_SPECIFIC_LIBS)
|
||||
|
||||
# Given a list of the form
|
||||
# key1;value1;key2;value2,...
|
||||
# Create a list of all keys in a variable named ${KEY_LIST_NAME}
|
||||
# and set the value for each key in a variable ${VAR_PREFIX}key1,...
|
||||
# kokkos_key_value_map(ARCH ALL_ARCHES key1;value1;key2;value2)
|
||||
# would produce a list variable ALL_ARCHES=key1;key2
|
||||
# and individual variables ARCHkey1=value1 and ARCHkey2=value2
|
||||
MACRO(KOKKOS_KEY_VALUE_MAP VAR_PREFIX KEY_LIST_NAME)
|
||||
SET(PARSE_KEY ON)
|
||||
SET(${KEY_LIST_NAME})
|
||||
FOREACH(ENTRY ${ARGN})
|
||||
IF(PARSE_KEY)
|
||||
SET(CURRENT_KEY ${ENTRY})
|
||||
SET(PARSE_KEY OFF)
|
||||
LIST(APPEND ${KEY_LIST_NAME} ${CURRENT_KEY})
|
||||
ELSE()
|
||||
SET(${VAR_PREFIX}${CURRENT_KEY} ${ENTRY})
|
||||
SET(PARSE_KEY ON)
|
||||
ENDIF()
|
||||
ENDFOREACH()
|
||||
ENDMACRO()
|
||||
|
||||
FUNCTION(KOKKOS_CHECK_DEPRECATED_OPTIONS)
|
||||
KOKKOS_KEY_VALUE_MAP(DEPRECATED_MSG_ DEPRECATED_LIST ${ARGN})
|
||||
FOREACH(OPTION_SUFFIX ${DEPRECATED_LIST})
|
||||
SET(OPTION_NAME Kokkos_${OPTION_SUFFIX})
|
||||
SET(OPTION_MESSAGE ${DEPRECATED_MSG_${OPTION_SUFFIX}})
|
||||
IF(DEFINED ${OPTION_NAME}) # This variable has been given by the user as on or off
|
||||
MESSAGE(SEND_ERROR "Removed option ${OPTION_NAME} has been given with value ${${OPTION_NAME}}. ${OPT_MESSAGE}")
|
||||
ENDIF()
|
||||
ENDFOREACH()
|
||||
ENDFUNCTION()
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
INCLUDE(CMakePackageConfigHelpers)
|
||||
IF (NOT KOKKOS_HAS_TRILINOS)
|
||||
IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING)
|
||||
INCLUDE(GNUInstallDirs)
|
||||
|
||||
#Set all the variables needed for KokkosConfig.cmake
|
||||
|
||||
@ -28,19 +28,30 @@ FUNCTION(kokkos_set_cxx_standard_feature standard)
|
||||
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME})
|
||||
ELSEIF(NOT KOKKOS_USE_CXX_EXTENSIONS AND ${STANDARD_NAME})
|
||||
MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard as feature")
|
||||
IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND (KOKKOS_CXX_HOST_COMPILER_ID STREQUAL GNU OR KOKKOS_CXX_HOST_COMPILER_ID STREQUAL Clang))
|
||||
SET(SUPPORTED_NVCC_FLAGS "-std=c++11;-std=c++14;-std=c++17")
|
||||
IF (NOT ${${STANDARD_NAME}} IN_LIST SUPPORTED_NVCC_FLAGS)
|
||||
MESSAGE(FATAL_ERROR "CMake wants to use ${${STANDARD_NAME}} which is not supported by NVCC. Using a more recent host compiler or a more recent CMake version might help.")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME})
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
ELSEIF (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
|
||||
#MSVC doesn't need a command line flag, that doesn't mean it has no support
|
||||
MESSAGE(STATUS "Using no flag for C++${standard} standard as feature")
|
||||
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME})
|
||||
ELSEIF((KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA") AND WIN32)
|
||||
MESSAGE(STATUS "Using no flag for C++${standard} standard as feature")
|
||||
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE "")
|
||||
ELSE()
|
||||
#nope, we can't do anything here
|
||||
MESSAGE(WARNING "C++${standard} is not supported as a compiler feature. We will choose custom flags for now, but this behavior has been deprecated. Please open an issue at https://github.com/kokkos/kokkos/issues reporting that ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION} failed for ${KOKKOS_CXX_STANDARD}, preferrably including your CMake command.")
|
||||
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE "")
|
||||
ENDIF()
|
||||
|
||||
IF(NOT ${FEATURE_NAME} IN_LIST CMAKE_CXX_COMPILE_FEATURES)
|
||||
MESSAGE(FATAL_ERROR "Compiler ${KOKKOS_CXX_COMPILER_ID} should support ${FEATURE_NAME}, but CMake reports feature not supported")
|
||||
IF(NOT WIN32)
|
||||
IF(NOT ${FEATURE_NAME} IN_LIST CMAKE_CXX_COMPILE_FEATURES)
|
||||
MESSAGE(FATAL_ERROR "Compiler ${KOKKOS_CXX_COMPILER_ID} should support ${FEATURE_NAME}, but CMake reports feature not supported")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
|
||||
@ -123,7 +134,7 @@ IF (NOT KOKKOS_CXX_STANDARD_FEATURE)
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/intel.cmake)
|
||||
kokkos_set_intel_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
ELSEIF((KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") OR ((KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA") AND WIN32))
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/msvc.cmake)
|
||||
kokkos_set_msvc_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
|
||||
ELSE()
|
||||
|
||||
@ -13,10 +13,10 @@ KOKKOS_TPL_OPTION(LIBNUMA Off)
|
||||
KOKKOS_TPL_OPTION(MEMKIND Off)
|
||||
KOKKOS_TPL_OPTION(CUDA Off)
|
||||
KOKKOS_TPL_OPTION(LIBRT Off)
|
||||
KOKKOS_TPL_OPTION(LIBDL On)
|
||||
|
||||
IF(KOKKOS_ENABLE_PROFILING AND NOT KOKKOS_ENABLE_LIBDL)
|
||||
MESSAGE(SEND_ERROR "Kokkos_ENABLE_PROFILING requires Kokkos_ENABLE_LIBDL=ON")
|
||||
IF (WIN32)
|
||||
KOKKOS_TPL_OPTION(LIBDL Off)
|
||||
ELSE()
|
||||
KOKKOS_TPL_OPTION(LIBDL On)
|
||||
ENDIF()
|
||||
|
||||
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX)
|
||||
|
||||
@ -21,10 +21,6 @@ IF (KOKKOS_HAS_TRILINOS)
|
||||
SET(${PROJECT_NAME}_ENABLE_DEBUG OFF)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11)
|
||||
SET(${PROJECT_NAME}_ENABLE_CXX11 ON)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS)
|
||||
SET(${PROJECT_NAME}_ENABLE_TESTS OFF)
|
||||
ENDIF()
|
||||
@ -134,7 +130,7 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE ROOT_NAME)
|
||||
VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE ${PARSE_UNPARSED_ARGUMENTS})
|
||||
#All executables must link to all the kokkos targets
|
||||
#This is just private linkage because exe is final
|
||||
TARGET_LINK_LIBRARIES(${EXE_NAME} PRIVATE kokkos)
|
||||
TARGET_LINK_LIBRARIES(${EXE_NAME} PRIVATE Kokkos::kokkos)
|
||||
endif()
|
||||
ENDFUNCTION()
|
||||
|
||||
@ -174,16 +170,42 @@ FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME)
|
||||
ENDFUNCTION()
|
||||
|
||||
MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT)
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_compiler_id.cmake)
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_devices.cmake)
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_options.cmake)
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_test_cxx_std.cmake)
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_arch.cmake)
|
||||
IF (NOT KOKKOS_HAS_TRILINOS)
|
||||
SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/")
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tpls.cmake)
|
||||
ENDIF()
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_corner_cases.cmake)
|
||||
# This is needed for both regular build and install tests
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_compiler_id.cmake)
|
||||
#set an internal option, if not already set
|
||||
SET(Kokkos_INSTALL_TESTING OFF CACHE INTERNAL "Whether to build tests and examples against installation")
|
||||
IF (Kokkos_INSTALL_TESTING)
|
||||
SET(KOKKOS_ENABLE_TESTS ON)
|
||||
SET(KOKKOS_ENABLE_EXAMPLES ON)
|
||||
# This looks a little weird, but what we are doing
|
||||
# is to NOT build Kokkos but instead look for an
|
||||
# installed Kokkos - then build examples and tests
|
||||
# against that installed Kokkos
|
||||
FIND_PACKAGE(Kokkos REQUIRED)
|
||||
# Just grab the configuration from the installation
|
||||
FOREACH(DEV ${Kokkos_DEVICES})
|
||||
SET(KOKKOS_ENABLE_${DEV} ON)
|
||||
ENDFOREACH()
|
||||
FOREACH(OPT ${Kokkos_OPTIONS})
|
||||
SET(KOKKOS_ENABLE_${OPT} ON)
|
||||
ENDFOREACH()
|
||||
FOREACH(TPL ${Kokkos_TPLS})
|
||||
SET(KOKKOS_ENABLE_${TPL} ON)
|
||||
ENDFOREACH()
|
||||
FOREACH(ARCH ${Kokkos_ARCH})
|
||||
SET(KOKKOS_ARCH_${ARCH} ON)
|
||||
ENDFOREACH()
|
||||
ELSE()
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_devices.cmake)
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_options.cmake)
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_test_cxx_std.cmake)
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_arch.cmake)
|
||||
IF (NOT KOKKOS_HAS_TRILINOS)
|
||||
SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/")
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tpls.cmake)
|
||||
ENDIF()
|
||||
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_corner_cases.cmake)
|
||||
ENDIF()
|
||||
ENDMACRO()
|
||||
|
||||
MACRO(KOKKOS_ADD_TEST_EXECUTABLE ROOT_NAME)
|
||||
@ -310,28 +332,40 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME)
|
||||
LIST(REMOVE_DUPLICATES PARSE_SOURCES)
|
||||
ENDIF()
|
||||
|
||||
IF(PARSE_STATIC)
|
||||
SET(LINK_TYPE STATIC)
|
||||
ENDIF()
|
||||
|
||||
IF(PARSE_SHARED)
|
||||
SET(LINK_TYPE SHARED)
|
||||
ENDIF()
|
||||
|
||||
# MSVC and other platforms want to have
|
||||
# the headers included as source files
|
||||
# for better dependency detection
|
||||
ADD_LIBRARY(
|
||||
${LIBRARY_NAME}
|
||||
${LINK_TYPE}
|
||||
${PARSE_HEADERS}
|
||||
${PARSE_SOURCES}
|
||||
)
|
||||
|
||||
KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME})
|
||||
|
||||
INSTALL(
|
||||
FILES ${PARSE_HEADERS}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
COMPONENT ${PACKAGE_NAME}
|
||||
)
|
||||
|
||||
#In case we are building in-tree, add an alias name
|
||||
#that matches the install Kokkos:: name
|
||||
ADD_LIBRARY(Kokkos::${LIBRARY_NAME} ALIAS ${LIBRARY_NAME})
|
||||
ENDFUNCTION()
|
||||
|
||||
FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME)
|
||||
CMAKE_PARSE_ARGUMENTS(PARSE
|
||||
"ADD_BUILD_OPTIONS"
|
||||
""
|
||||
""
|
||||
${ARGN}
|
||||
)
|
||||
IF (KOKKOS_HAS_TRILINOS)
|
||||
TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${ARGN})
|
||||
TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${PARSE_UNPARSED_ARGUMENTS})
|
||||
#Stolen from Tribits - it can add prefixes
|
||||
SET(TRIBITS_LIBRARY_NAME_PREFIX "${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}")
|
||||
SET(TRIBITS_LIBRARY_NAME ${TRIBITS_LIBRARY_NAME_PREFIX}${LIBRARY_NAME})
|
||||
@ -346,8 +380,10 @@ FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME)
|
||||
#KOKKOS_SET_LIBRARY_PROPERTIES(${TRIBITS_LIBRARY_NAME} PLAIN_STYLE)
|
||||
ELSE()
|
||||
KOKKOS_INTERNAL_ADD_LIBRARY(
|
||||
${LIBRARY_NAME} ${ARGN})
|
||||
KOKKOS_SET_LIBRARY_PROPERTIES(${LIBRARY_NAME})
|
||||
${LIBRARY_NAME} ${PARSE_UNPARSED_ARGUMENTS})
|
||||
IF (PARSE_ADD_BUILD_OPTIONS)
|
||||
KOKKOS_SET_LIBRARY_PROPERTIES(${LIBRARY_NAME})
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
|
||||
@ -364,17 +400,6 @@ ELSE()
|
||||
|
||||
ADD_LIBRARY(${NAME} INTERFACE)
|
||||
KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME})
|
||||
|
||||
INSTALL(
|
||||
FILES ${PARSE_HEADERS}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
)
|
||||
|
||||
INSTALL(
|
||||
FILES ${PARSE_HEADERS}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
COMPONENT ${PACKAGE_NAME}
|
||||
)
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
|
||||
|
||||
4
lib/kokkos/config/yaml/volta.yaml
Normal file
4
lib/kokkos/config/yaml/volta.yaml
Normal file
@ -0,0 +1,4 @@
|
||||
packages:
|
||||
kokkos:
|
||||
variants: +cuda +openmp +volta70 +cuda_lambda +wrapper ^cuda@10.1
|
||||
compiler: [gcc@7.2.0]
|
||||
@ -2,7 +2,9 @@
|
||||
|
||||
KOKKOS_SUBPACKAGE(Containers)
|
||||
|
||||
ADD_SUBDIRECTORY(src)
|
||||
IF (NOT Kokkos_INSTALL_TESTING)
|
||||
ADD_SUBDIRECTORY(src)
|
||||
ENDIF()
|
||||
|
||||
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
|
||||
KOKKOS_ADD_TEST_DIRECTORIES(performance_tests)
|
||||
|
||||
@ -31,10 +31,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
TEST_TARGETS += test-cuda
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
OBJ_ROCM = TestROCm.o TestMain.o gtest-all.o
|
||||
TARGETS += KokkosContainers_PerformanceTest_ROCm
|
||||
TEST_TARGETS += test-rocm
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
|
||||
OBJ_HIP = TestHIP.o TestMain.o gtest-all.o
|
||||
TARGETS += KokkosContainers_PerformanceTest_HIP
|
||||
TEST_TARGETS += test-hip
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
|
||||
@ -58,7 +58,7 @@ namespace Performance {
|
||||
// View functor
|
||||
template <typename DeviceType>
|
||||
struct InitViewFunctor {
|
||||
typedef Kokkos::View<double ***, DeviceType> inviewtype;
|
||||
using inviewtype = Kokkos::View<double ***, DeviceType>;
|
||||
inviewtype _inview;
|
||||
|
||||
InitViewFunctor(inviewtype &inview_) : _inview(inview_) {}
|
||||
@ -73,10 +73,10 @@ struct InitViewFunctor {
|
||||
}
|
||||
|
||||
struct SumComputationTest {
|
||||
typedef Kokkos::View<double ***, DeviceType> inviewtype;
|
||||
using inviewtype = Kokkos::View<double ***, DeviceType>;
|
||||
inviewtype _inview;
|
||||
|
||||
typedef Kokkos::View<double *, DeviceType> outviewtype;
|
||||
using outviewtype = Kokkos::View<double *, DeviceType>;
|
||||
outviewtype _outview;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -96,7 +96,7 @@ struct InitViewFunctor {
|
||||
|
||||
template <typename DeviceType>
|
||||
struct InitStrideViewFunctor {
|
||||
typedef Kokkos::View<double ***, Kokkos::LayoutStride, DeviceType> inviewtype;
|
||||
using inviewtype = Kokkos::View<double ***, Kokkos::LayoutStride, DeviceType>;
|
||||
inviewtype _inview;
|
||||
|
||||
InitStrideViewFunctor(inviewtype &inview_) : _inview(inview_) {}
|
||||
@ -113,7 +113,7 @@ struct InitStrideViewFunctor {
|
||||
|
||||
template <typename DeviceType>
|
||||
struct InitViewRank7Functor {
|
||||
typedef Kokkos::View<double *******, DeviceType> inviewtype;
|
||||
using inviewtype = Kokkos::View<double *******, DeviceType>;
|
||||
inviewtype _inview;
|
||||
|
||||
InitViewRank7Functor(inviewtype &inview_) : _inview(inview_) {}
|
||||
@ -131,7 +131,7 @@ struct InitViewRank7Functor {
|
||||
// DynRankView functor
|
||||
template <typename DeviceType>
|
||||
struct InitDynRankViewFunctor {
|
||||
typedef Kokkos::DynRankView<double, DeviceType> inviewtype;
|
||||
using inviewtype = Kokkos::DynRankView<double, DeviceType>;
|
||||
inviewtype _inview;
|
||||
|
||||
InitDynRankViewFunctor(inviewtype &inview_) : _inview(inview_) {}
|
||||
@ -146,10 +146,10 @@ struct InitDynRankViewFunctor {
|
||||
}
|
||||
|
||||
struct SumComputationTest {
|
||||
typedef Kokkos::DynRankView<double, DeviceType> inviewtype;
|
||||
using inviewtype = Kokkos::DynRankView<double, DeviceType>;
|
||||
inviewtype _inview;
|
||||
|
||||
typedef Kokkos::DynRankView<double, DeviceType> outviewtype;
|
||||
using outviewtype = Kokkos::DynRankView<double, DeviceType>;
|
||||
outviewtype _outview;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -169,8 +169,8 @@ struct InitDynRankViewFunctor {
|
||||
|
||||
template <typename DeviceType>
|
||||
void test_dynrankview_op_perf(const int par_size) {
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
using execution_space = DeviceType;
|
||||
using size_type = typename execution_space::size_type;
|
||||
const size_type dim_2 = 90;
|
||||
const size_type dim_3 = 30;
|
||||
|
||||
@ -184,7 +184,7 @@ void test_dynrankview_op_perf(const int par_size) {
|
||||
{
|
||||
Kokkos::View<double ***, DeviceType> testview("testview", par_size, dim_2,
|
||||
dim_3);
|
||||
typedef InitViewFunctor<DeviceType> FunctorType;
|
||||
using FunctorType = InitViewFunctor<DeviceType>;
|
||||
|
||||
timer.reset();
|
||||
Kokkos::RangePolicy<DeviceType> policy(0, par_size);
|
||||
@ -204,7 +204,7 @@ void test_dynrankview_op_perf(const int par_size) {
|
||||
|
||||
Kokkos::View<double ***, Kokkos::LayoutStride, DeviceType> teststrideview =
|
||||
Kokkos::subview(testview, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL);
|
||||
typedef InitStrideViewFunctor<DeviceType> FunctorStrideType;
|
||||
using FunctorStrideType = InitStrideViewFunctor<DeviceType>;
|
||||
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(policy, FunctorStrideType(teststrideview));
|
||||
@ -216,7 +216,7 @@ void test_dynrankview_op_perf(const int par_size) {
|
||||
{
|
||||
Kokkos::View<double *******, DeviceType> testview("testview", par_size,
|
||||
dim_2, dim_3, 1, 1, 1, 1);
|
||||
typedef InitViewRank7Functor<DeviceType> FunctorType;
|
||||
using FunctorType = InitViewRank7Functor<DeviceType>;
|
||||
|
||||
timer.reset();
|
||||
Kokkos::RangePolicy<DeviceType> policy(0, par_size);
|
||||
@ -229,7 +229,7 @@ void test_dynrankview_op_perf(const int par_size) {
|
||||
{
|
||||
Kokkos::DynRankView<double, DeviceType> testdrview("testdrview", par_size,
|
||||
dim_2, dim_3);
|
||||
typedef InitDynRankViewFunctor<DeviceType> FunctorType;
|
||||
using FunctorType = InitDynRankViewFunctor<DeviceType>;
|
||||
|
||||
timer.reset();
|
||||
Kokkos::RangePolicy<DeviceType> policy(0, par_size);
|
||||
|
||||
@ -65,9 +65,9 @@ union helper {
|
||||
|
||||
template <typename Device>
|
||||
struct generate_ids {
|
||||
typedef Device execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef Kokkos::View<uint32_t*, execution_space> local_id_view;
|
||||
using execution_space = Device;
|
||||
using size_type = typename execution_space::size_type;
|
||||
using local_id_view = Kokkos::View<uint32_t*, execution_space>;
|
||||
|
||||
local_id_view local_2_global;
|
||||
|
||||
@ -96,13 +96,12 @@ struct generate_ids {
|
||||
|
||||
template <typename Device>
|
||||
struct fill_map {
|
||||
typedef Device execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef Kokkos::View<const uint32_t*, execution_space,
|
||||
Kokkos::MemoryRandomAccess>
|
||||
local_id_view;
|
||||
typedef Kokkos::UnorderedMap<uint32_t, size_type, execution_space>
|
||||
global_id_view;
|
||||
using execution_space = Device;
|
||||
using size_type = typename execution_space::size_type;
|
||||
using local_id_view = Kokkos::View<const uint32_t*, execution_space,
|
||||
Kokkos::MemoryRandomAccess>;
|
||||
using global_id_view =
|
||||
Kokkos::UnorderedMap<uint32_t, size_type, execution_space>;
|
||||
|
||||
global_id_view global_2_local;
|
||||
local_id_view local_2_global;
|
||||
@ -120,18 +119,17 @@ struct fill_map {
|
||||
|
||||
template <typename Device>
|
||||
struct find_test {
|
||||
typedef Device execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef Kokkos::View<const uint32_t*, execution_space,
|
||||
Kokkos::MemoryRandomAccess>
|
||||
local_id_view;
|
||||
typedef Kokkos::UnorderedMap<const uint32_t, const size_type, execution_space>
|
||||
global_id_view;
|
||||
using execution_space = Device;
|
||||
using size_type = typename execution_space::size_type;
|
||||
using local_id_view = Kokkos::View<const uint32_t*, execution_space,
|
||||
Kokkos::MemoryRandomAccess>;
|
||||
using global_id_view =
|
||||
Kokkos::UnorderedMap<const uint32_t, const size_type, execution_space>;
|
||||
|
||||
global_id_view global_2_local;
|
||||
local_id_view local_2_global;
|
||||
|
||||
typedef size_t value_type;
|
||||
using value_type = size_t;
|
||||
|
||||
find_test(global_id_view gIds, local_id_view lIds, value_type& num_errors)
|
||||
: global_2_local(gIds), local_2_global(lIds) {
|
||||
@ -156,12 +154,12 @@ struct find_test {
|
||||
|
||||
template <typename Device>
|
||||
void test_global_to_local_ids(unsigned num_ids) {
|
||||
typedef Device execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
using execution_space = Device;
|
||||
using size_type = typename execution_space::size_type;
|
||||
|
||||
typedef Kokkos::View<uint32_t*, execution_space> local_id_view;
|
||||
typedef Kokkos::UnorderedMap<uint32_t, size_type, execution_space>
|
||||
global_id_view;
|
||||
using local_id_view = Kokkos::View<uint32_t*, execution_space>;
|
||||
using global_id_view =
|
||||
Kokkos::UnorderedMap<uint32_t, size_type, execution_space>;
|
||||
|
||||
// size
|
||||
std::cout << num_ids << ", ";
|
||||
|
||||
@ -50,14 +50,14 @@
|
||||
|
||||
namespace Perf {
|
||||
|
||||
template <typename ExecSpace, typename Layout, int duplication,
|
||||
int contribution>
|
||||
template <typename ExecSpace, typename Layout, typename Duplication,
|
||||
typename Contribution>
|
||||
void test_scatter_view(int m, int n) {
|
||||
Kokkos::View<double * [3], Layout, ExecSpace> original_view("original_view",
|
||||
n);
|
||||
{
|
||||
auto scatter_view = Kokkos::Experimental::create_scatter_view<
|
||||
Kokkos::Experimental::ScatterSum, duplication, contribution>(
|
||||
Kokkos::Experimental::ScatterSum, Duplication, Contribution>(
|
||||
original_view);
|
||||
Kokkos::Experimental::UniqueToken<
|
||||
ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global>
|
||||
|
||||
@ -55,9 +55,9 @@ namespace Perf {
|
||||
|
||||
template <typename Device, bool Near>
|
||||
struct UnorderedMapTest {
|
||||
typedef Device execution_space;
|
||||
typedef Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space> map_type;
|
||||
typedef typename map_type::histogram_type histogram_type;
|
||||
using execution_space = Device;
|
||||
using map_type = Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space>;
|
||||
using histogram_type = typename map_type::histogram_type;
|
||||
|
||||
struct value_type {
|
||||
uint32_t failed_count;
|
||||
|
||||
@ -9,6 +9,10 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(KOKKOS_CONTAINERS_SRCS)
|
||||
APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.cpp)
|
||||
SET(KOKKOS_CONTAINER_HEADERS)
|
||||
APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.hpp)
|
||||
APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
|
||||
|
||||
|
||||
INSTALL (
|
||||
DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
|
||||
@ -19,6 +23,7 @@ INSTALL (
|
||||
KOKKOS_ADD_LIBRARY(
|
||||
kokkoscontainers
|
||||
SOURCES ${KOKKOS_CONTAINERS_SRCS}
|
||||
HEADERS ${KOKKOS_CONTAINER_HEADERS}
|
||||
)
|
||||
|
||||
SET_TARGET_PROPERTIES(kokkoscontainers PROPERTIES VERSION ${Kokkos_VERSION})
|
||||
|
||||
@ -73,8 +73,8 @@ void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src);
|
||||
template <typename Device>
|
||||
class Bitset {
|
||||
public:
|
||||
typedef Device execution_space;
|
||||
typedef unsigned size_type;
|
||||
using execution_space = Device;
|
||||
using size_type = unsigned;
|
||||
|
||||
enum { BIT_SCAN_REVERSE = 1u };
|
||||
enum { MOVE_HINT_BACKWARD = 2u };
|
||||
@ -137,9 +137,9 @@ class Bitset {
|
||||
|
||||
if (m_last_block_mask) {
|
||||
// clear the unused bits in the last block
|
||||
typedef Kokkos::Impl::DeepCopy<typename execution_space::memory_space,
|
||||
Kokkos::HostSpace>
|
||||
raw_deep_copy;
|
||||
using raw_deep_copy =
|
||||
Kokkos::Impl::DeepCopy<typename execution_space::memory_space,
|
||||
Kokkos::HostSpace>;
|
||||
raw_deep_copy(m_blocks.data() + (m_blocks.extent(0) - 1u),
|
||||
&m_last_block_mask, sizeof(unsigned));
|
||||
}
|
||||
@ -234,6 +234,10 @@ class Bitset {
|
||||
return find_any_helper(block_idx, offset, block, scan_direction);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
|
||||
return m_blocks.is_allocated();
|
||||
}
|
||||
|
||||
private:
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx,
|
||||
@ -304,8 +308,8 @@ class Bitset {
|
||||
template <typename Device>
|
||||
class ConstBitset {
|
||||
public:
|
||||
typedef Device execution_space;
|
||||
typedef unsigned size_type;
|
||||
using execution_space = Device;
|
||||
using size_type = unsigned;
|
||||
|
||||
private:
|
||||
enum { block_size = static_cast<unsigned>(sizeof(unsigned) * CHAR_BIT) };
|
||||
@ -380,9 +384,9 @@ void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> const& src) {
|
||||
"Error: Cannot deep_copy bitsets of different sizes!");
|
||||
}
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
|
||||
typename SrcDevice::memory_space>
|
||||
raw_deep_copy;
|
||||
using raw_deep_copy =
|
||||
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
|
||||
typename SrcDevice::memory_space>;
|
||||
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(),
|
||||
sizeof(unsigned) * src.m_blocks.extent(0));
|
||||
}
|
||||
@ -394,9 +398,9 @@ void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
|
||||
"Error: Cannot deep_copy bitsets of different sizes!");
|
||||
}
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
|
||||
typename SrcDevice::memory_space>
|
||||
raw_deep_copy;
|
||||
using raw_deep_copy =
|
||||
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
|
||||
typename SrcDevice::memory_space>;
|
||||
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(),
|
||||
sizeof(unsigned) * src.m_blocks.extent(0));
|
||||
}
|
||||
@ -408,9 +412,9 @@ void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
|
||||
"Error: Cannot deep_copy bitsets of different sizes!");
|
||||
}
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
|
||||
typename SrcDevice::memory_space>
|
||||
raw_deep_copy;
|
||||
using raw_deep_copy =
|
||||
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
|
||||
typename SrcDevice::memory_space>;
|
||||
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(),
|
||||
sizeof(unsigned) * src.m_blocks.extent(0));
|
||||
}
|
||||
|
||||
@ -100,99 +100,91 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
public:
|
||||
//! \name Typedefs for device types and various Kokkos::View specializations.
|
||||
//@{
|
||||
typedef ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> traits;
|
||||
using traits = ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type>;
|
||||
|
||||
//! The Kokkos Host Device type;
|
||||
typedef typename traits::host_mirror_space host_mirror_space;
|
||||
using host_mirror_space = typename traits::host_mirror_space;
|
||||
|
||||
//! The type of a Kokkos::View on the device.
|
||||
typedef View<typename traits::data_type, Arg1Type, Arg2Type, Arg3Type> t_dev;
|
||||
using t_dev = View<typename traits::data_type, Arg1Type, Arg2Type, Arg3Type>;
|
||||
|
||||
/// \typedef t_host
|
||||
/// \brief The type of a Kokkos::View host mirror of \c t_dev.
|
||||
typedef typename t_dev::HostMirror t_host;
|
||||
using t_host = typename t_dev::HostMirror;
|
||||
|
||||
//! The type of a const View on the device.
|
||||
//! The type of a Kokkos::View on the device.
|
||||
typedef View<typename traits::const_data_type, Arg1Type, Arg2Type, Arg3Type>
|
||||
t_dev_const;
|
||||
using t_dev_const =
|
||||
View<typename traits::const_data_type, Arg1Type, Arg2Type, Arg3Type>;
|
||||
|
||||
/// \typedef t_host_const
|
||||
/// \brief The type of a const View host mirror of \c t_dev_const.
|
||||
typedef typename t_dev_const::HostMirror t_host_const;
|
||||
using t_host_const = typename t_dev_const::HostMirror;
|
||||
|
||||
//! The type of a const, random-access View on the device.
|
||||
typedef View<typename traits::const_data_type, typename traits::array_layout,
|
||||
typename traits::device_type,
|
||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> >
|
||||
t_dev_const_randomread;
|
||||
using t_dev_const_randomread =
|
||||
View<typename traits::const_data_type, typename traits::array_layout,
|
||||
typename traits::device_type,
|
||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> >;
|
||||
|
||||
/// \typedef t_host_const_randomread
|
||||
/// \brief The type of a const, random-access View host mirror of
|
||||
/// \c t_dev_const_randomread.
|
||||
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread;
|
||||
using t_host_const_randomread = typename t_dev_const_randomread::HostMirror;
|
||||
|
||||
//! The type of an unmanaged View on the device.
|
||||
typedef View<typename traits::data_type, typename traits::array_layout,
|
||||
typename traits::device_type, MemoryUnmanaged>
|
||||
t_dev_um;
|
||||
using t_dev_um =
|
||||
View<typename traits::data_type, typename traits::array_layout,
|
||||
typename traits::device_type, MemoryUnmanaged>;
|
||||
|
||||
//! The type of an unmanaged View host mirror of \c t_dev_um.
|
||||
typedef View<typename t_host::data_type, typename t_host::array_layout,
|
||||
typename t_host::device_type, MemoryUnmanaged>
|
||||
t_host_um;
|
||||
using t_host_um =
|
||||
View<typename t_host::data_type, typename t_host::array_layout,
|
||||
typename t_host::device_type, MemoryUnmanaged>;
|
||||
|
||||
//! The type of a const unmanaged View on the device.
|
||||
typedef View<typename traits::const_data_type, typename traits::array_layout,
|
||||
typename traits::device_type, MemoryUnmanaged>
|
||||
t_dev_const_um;
|
||||
using t_dev_const_um =
|
||||
View<typename traits::const_data_type, typename traits::array_layout,
|
||||
typename traits::device_type, MemoryUnmanaged>;
|
||||
|
||||
//! The type of a const unmanaged View host mirror of \c t_dev_const_um.
|
||||
typedef View<typename t_host::const_data_type, typename t_host::array_layout,
|
||||
typename t_host::device_type, MemoryUnmanaged>
|
||||
t_host_const_um;
|
||||
using t_host_const_um =
|
||||
View<typename t_host::const_data_type, typename t_host::array_layout,
|
||||
typename t_host::device_type, MemoryUnmanaged>;
|
||||
|
||||
//! The type of a const, random-access View on the device.
|
||||
typedef View<typename t_host::const_data_type, typename t_host::array_layout,
|
||||
typename t_host::device_type,
|
||||
Kokkos::MemoryTraits<Kokkos::Unmanaged | Kokkos::RandomAccess> >
|
||||
t_dev_const_randomread_um;
|
||||
using t_dev_const_randomread_um =
|
||||
View<typename t_host::const_data_type, typename t_host::array_layout,
|
||||
typename t_host::device_type,
|
||||
Kokkos::MemoryTraits<Kokkos::Unmanaged | Kokkos::RandomAccess> >;
|
||||
|
||||
/// \typedef t_host_const_randomread
|
||||
/// \brief The type of a const, random-access View host mirror of
|
||||
/// \c t_dev_const_randomread.
|
||||
typedef
|
||||
typename t_dev_const_randomread::HostMirror t_host_const_randomread_um;
|
||||
|
||||
//@}
|
||||
//! \name The two View instances.
|
||||
//@{
|
||||
|
||||
t_dev d_view;
|
||||
t_host h_view;
|
||||
using t_host_const_randomread_um =
|
||||
typename t_dev_const_randomread::HostMirror;
|
||||
|
||||
//@}
|
||||
//! \name Counters to keep track of changes ("modified" flags)
|
||||
//@{
|
||||
|
||||
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
protected:
|
||||
// modified_flags[0] -> host
|
||||
// modified_flags[1] -> device
|
||||
typedef View<unsigned int[2], LayoutLeft, Kokkos::HostSpace> t_modified_flags;
|
||||
using t_modified_flags = View<unsigned int[2], LayoutLeft, Kokkos::HostSpace>;
|
||||
t_modified_flags modified_flags;
|
||||
|
||||
public:
|
||||
#else
|
||||
typedef View<unsigned int[2], LayoutLeft, typename t_host::execution_space>
|
||||
t_modified_flags;
|
||||
typedef View<unsigned int, LayoutLeft, typename t_host::execution_space>
|
||||
t_modified_flag;
|
||||
t_modified_flags modified_flags;
|
||||
t_modified_flag modified_host, modified_device;
|
||||
#endif
|
||||
|
||||
//@}
|
||||
|
||||
// Moved this specifically after modified_flags to resolve an alignment issue
|
||||
// on MSVC/NVCC
|
||||
//! \name The two View instances.
|
||||
//@{
|
||||
t_dev d_view;
|
||||
t_host h_view;
|
||||
//@}
|
||||
|
||||
//! \name Constructors
|
||||
//@{
|
||||
|
||||
@ -201,14 +193,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
/// Both device and host View objects are constructed using their
|
||||
/// default constructors. The "modified" flags are both initialized
|
||||
/// to "unmodified."
|
||||
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
DualView() = default;
|
||||
#else
|
||||
DualView() : modified_flags(t_modified_flags("DualView::modified_flags")) {
|
||||
modified_host = t_modified_flag(modified_flags, 0);
|
||||
modified_device = t_modified_flag(modified_flags, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/// \brief Constructor that allocates View objects on both host and device.
|
||||
///
|
||||
@ -228,15 +213,10 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
|
||||
: d_view(label, n0, n1, n2, n3, n4, n5, n6, n7),
|
||||
: modified_flags(t_modified_flags("DualView::modified_flags")),
|
||||
d_view(label, n0, n1, n2, n3, n4, n5, n6, n7),
|
||||
h_view(create_mirror_view(d_view)) // without UVM, host View mirrors
|
||||
,
|
||||
modified_flags(t_modified_flags("DualView::modified_flags")) {
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
modified_host = t_modified_flag(modified_flags, 0);
|
||||
modified_device = t_modified_flag(modified_flags, 1);
|
||||
#endif
|
||||
}
|
||||
{}
|
||||
|
||||
/// \brief Constructor that allocates View objects on both host and device.
|
||||
///
|
||||
@ -260,15 +240,10 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
|
||||
: d_view(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7),
|
||||
: modified_flags(t_modified_flags("DualView::modified_flags")),
|
||||
d_view(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7),
|
||||
h_view(create_mirror_view(d_view)) // without UVM, host View mirrors
|
||||
,
|
||||
modified_flags(t_modified_flags("DualView::modified_flags")) {
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
modified_host = t_modified_flag(modified_flags, 0);
|
||||
modified_device = t_modified_flag(modified_flags, 1);
|
||||
#endif
|
||||
}
|
||||
{}
|
||||
|
||||
explicit inline DualView(const ViewAllocateWithoutInitializing& arg_prop,
|
||||
const size_t arg_N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
@ -288,30 +263,16 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
//! Copy constructor (shallow copy)
|
||||
template <class SS, class LS, class DS, class MS>
|
||||
DualView(const DualView<SS, LS, DS, MS>& src)
|
||||
: d_view(src.d_view),
|
||||
h_view(src.h_view),
|
||||
modified_flags(src.modified_flags)
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
,
|
||||
modified_host(src.modified_host),
|
||||
modified_device(src.modified_device)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
: modified_flags(src.modified_flags),
|
||||
d_view(src.d_view),
|
||||
h_view(src.h_view) {}
|
||||
|
||||
//! Subview constructor
|
||||
template <class SD, class S1, class S2, class S3, class Arg0, class... Args>
|
||||
DualView(const DualView<SD, S1, S2, S3>& src, const Arg0& arg0, Args... args)
|
||||
: d_view(Kokkos::subview(src.d_view, arg0, args...)),
|
||||
h_view(Kokkos::subview(src.h_view, arg0, args...)),
|
||||
modified_flags(src.modified_flags)
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
,
|
||||
modified_host(src.modified_host),
|
||||
modified_device(src.modified_device)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
: modified_flags(src.modified_flags),
|
||||
d_view(Kokkos::subview(src.d_view, arg0, args...)),
|
||||
h_view(Kokkos::subview(src.h_view, arg0, args...)) {}
|
||||
|
||||
/// \brief Create DualView from existing device and host View objects.
|
||||
///
|
||||
@ -324,9 +285,9 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
/// \param d_view_ Device View
|
||||
/// \param h_view_ Host View (must have type t_host = t_dev::HostMirror)
|
||||
DualView(const t_dev& d_view_, const t_host& h_view_)
|
||||
: d_view(d_view_),
|
||||
h_view(h_view_),
|
||||
modified_flags(t_modified_flags("DualView::modified_flags")) {
|
||||
: modified_flags(t_modified_flags("DualView::modified_flags")),
|
||||
d_view(d_view_),
|
||||
h_view(h_view_) {
|
||||
if (int(d_view.rank) != int(h_view.rank) ||
|
||||
d_view.extent(0) != h_view.extent(0) ||
|
||||
d_view.extent(1) != h_view.extent(1) ||
|
||||
@ -348,10 +309,6 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
Kokkos::Impl::throw_runtime_exception(
|
||||
"DualView constructed with incompatible views");
|
||||
}
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
modified_host = t_modified_flag(modified_flags, 0);
|
||||
modified_device = t_modified_flag(modified_flags, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
//@}
|
||||
@ -367,20 +324,25 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
///
|
||||
/// For example, suppose you create a DualView on Cuda, like this:
|
||||
/// \code
|
||||
/// typedef Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda>
|
||||
/// dual_view_type; dual_view_type DV ("my dual view", 100); \endcode If you
|
||||
/// want to get the CUDA device View, do this: \code typename
|
||||
/// dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> (); \endcode and if
|
||||
/// you want to get the host mirror of that View, do this: \code typedef
|
||||
/// typename Kokkos::HostSpace::execution_space host_device_type; typename
|
||||
/// dual_view_type::t_host hostView = DV.view<host_device_type> (); \endcode
|
||||
/// using dual_view_type =
|
||||
/// Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda>;
|
||||
/// dual_view_type DV ("my dual view", 100);
|
||||
/// \endcode
|
||||
/// If you want to get the CUDA device View, do this:
|
||||
/// \code
|
||||
/// typename dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> ();
|
||||
/// \endcode
|
||||
/// and if you want to get the host mirror of that View, do this:
|
||||
/// \code
|
||||
/// using host_device_type = typename Kokkos::HostSpace::execution_space;
|
||||
/// typename dual_view_type::t_host hostView = DV.view<host_device_type> ();
|
||||
/// \endcode
|
||||
template <class Device>
|
||||
KOKKOS_INLINE_FUNCTION const typename Impl::if_c<
|
||||
std::is_same<typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value,
|
||||
t_dev, t_host>::type&
|
||||
view() const {
|
||||
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
constexpr bool device_is_memspace =
|
||||
std::is_same<Device, typename Device::memory_space>::value;
|
||||
constexpr bool device_is_execspace =
|
||||
@ -415,7 +377,6 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
(device_exec_is_t_dev_exec || device_exec_is_t_host_exec))),
|
||||
"Template parameter to .view() must exactly match one of the "
|
||||
"DualView's device types or one of the execution or memory spaces");
|
||||
#endif
|
||||
|
||||
return Impl::if_c<std::is_same<typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value,
|
||||
@ -428,6 +389,10 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
t_dev view_device() const { return d_view; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
|
||||
return (d_view.is_allocated() && h_view.is_allocated());
|
||||
}
|
||||
|
||||
template <class Device>
|
||||
static int get_device_side() {
|
||||
constexpr bool device_is_memspace =
|
||||
@ -453,7 +418,6 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
std::is_same<typename Device::memory_space,
|
||||
typename t_host::device_type>::value;
|
||||
|
||||
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
static_assert(
|
||||
device_is_t_dev_device || device_is_t_host_device ||
|
||||
(device_is_memspace &&
|
||||
@ -465,13 +429,8 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
(device_exec_is_t_dev_exec || device_exec_is_t_host_exec))),
|
||||
"Template parameter to .sync() must exactly match one of the "
|
||||
"DualView's device types or one of the execution or memory spaces");
|
||||
#endif
|
||||
|
||||
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
int dev = -1;
|
||||
#else
|
||||
int dev = 0;
|
||||
#endif
|
||||
if (device_is_t_dev_device)
|
||||
dev = 1;
|
||||
else if (device_is_t_host_device)
|
||||
@ -822,11 +781,6 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
//! \name Methods for getting capacity, stride, or dimension(s).
|
||||
//@{
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
//! The allocation size (same as Kokkos::View::capacity).
|
||||
size_t capacity() const { return d_view.span(); }
|
||||
#endif
|
||||
|
||||
//! The allocation size (same as Kokkos::View::span).
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return d_view.span(); }
|
||||
|
||||
@ -854,29 +808,6 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
|
||||
return static_cast<int>(d_view.extent(r));
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
/* Deprecate all 'dimension' functions in favor of
|
||||
* ISO/C++ vocabulary 'extent'.
|
||||
*/
|
||||
|
||||
/* \brief return size of dimension 0 */
|
||||
size_t dimension_0() const { return d_view.extent(0); }
|
||||
/* \brief return size of dimension 1 */
|
||||
size_t dimension_1() const { return d_view.extent(1); }
|
||||
/* \brief return size of dimension 2 */
|
||||
size_t dimension_2() const { return d_view.extent(2); }
|
||||
/* \brief return size of dimension 3 */
|
||||
size_t dimension_3() const { return d_view.extent(3); }
|
||||
/* \brief return size of dimension 4 */
|
||||
size_t dimension_4() const { return d_view.extent(4); }
|
||||
/* \brief return size of dimension 5 */
|
||||
size_t dimension_5() const { return d_view.extent(5); }
|
||||
/* \brief return size of dimension 6 */
|
||||
size_t dimension_6() const { return d_view.extent(6); }
|
||||
/* \brief return size of dimension 7 */
|
||||
size_t dimension_7() const { return d_view.extent(7); }
|
||||
#endif
|
||||
|
||||
//@}
|
||||
};
|
||||
|
||||
@ -893,13 +824,12 @@ namespace Impl {
|
||||
|
||||
template <class D, class A1, class A2, class A3, class... Args>
|
||||
struct DualViewSubview {
|
||||
typedef typename Kokkos::Impl::ViewMapping<
|
||||
void, Kokkos::ViewTraits<D, A1, A2, A3>, Args...>::traits_type dst_traits;
|
||||
using dst_traits = typename Kokkos::Impl::ViewMapping<
|
||||
void, Kokkos::ViewTraits<D, A1, A2, A3>, Args...>::traits_type;
|
||||
|
||||
typedef Kokkos::DualView<
|
||||
using type = Kokkos::DualView<
|
||||
typename dst_traits::data_type, typename dst_traits::array_layout,
|
||||
typename dst_traits::device_type, typename dst_traits::memory_traits>
|
||||
type;
|
||||
typename dst_traits::device_type, typename dst_traits::memory_traits>;
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
|
||||
@ -349,8 +349,8 @@ class ViewMapping<
|
||||
public:
|
||||
enum { is_assignable = is_assignable_value_type && is_assignable_layout };
|
||||
|
||||
typedef ViewMapping<DstTraits, typename DstTraits::specialize> DstType;
|
||||
typedef ViewMapping<SrcTraits, typename SrcTraits::specialize> SrcType;
|
||||
using DstType = ViewMapping<DstTraits, typename DstTraits::specialize>;
|
||||
using SrcType = ViewMapping<SrcTraits, typename SrcTraits::specialize>;
|
||||
|
||||
template <typename DT, typename... DP, typename ST, typename... SP>
|
||||
KOKKOS_INLINE_FUNCTION static void assign(
|
||||
@ -365,13 +365,13 @@ class ViewMapping<
|
||||
|
||||
// Removed dimension checks...
|
||||
|
||||
typedef typename DstType::offset_type dst_offset_type;
|
||||
using dst_offset_type = typename DstType::offset_type;
|
||||
dst.m_map.m_impl_offset = dst_offset_type(
|
||||
std::integral_constant<unsigned, 0>(),
|
||||
src.layout()); // Check this for integer input1 for padding, etc
|
||||
dst.m_map.m_impl_handle = Kokkos::Impl::ViewDataHandle<DstTraits>::assign(
|
||||
src.m_map.m_impl_handle, src.m_track);
|
||||
dst.m_track.assign(src.m_track, DstTraits::is_managed);
|
||||
src.m_map.m_impl_handle, src.m_track.m_tracker);
|
||||
dst.m_track.assign(src.m_track.m_tracker, DstTraits::is_managed);
|
||||
dst.m_rank = src.Rank;
|
||||
}
|
||||
};
|
||||
@ -415,16 +415,16 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
friend class Kokkos::Impl::ViewMapping;
|
||||
|
||||
public:
|
||||
typedef ViewTraits<DataType, Properties...> drvtraits;
|
||||
using drvtraits = ViewTraits<DataType, Properties...>;
|
||||
|
||||
typedef View<DataType*******, Properties...> view_type;
|
||||
using view_type = View<DataType*******, Properties...>;
|
||||
|
||||
typedef ViewTraits<DataType*******, Properties...> traits;
|
||||
using traits = ViewTraits<DataType*******, Properties...>;
|
||||
|
||||
private:
|
||||
typedef Kokkos::Impl::ViewMapping<traits, typename traits::specialize>
|
||||
map_type;
|
||||
typedef Kokkos::Impl::SharedAllocationTracker track_type;
|
||||
using map_type =
|
||||
Kokkos::Impl::ViewMapping<traits, typename traits::specialize>;
|
||||
using track_type = Kokkos::Impl::SharedAllocationTracker;
|
||||
|
||||
track_type m_track;
|
||||
map_type m_map;
|
||||
@ -440,28 +440,24 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
// 7 data_type of the traits
|
||||
|
||||
/** \brief Compatible view of array of scalar types */
|
||||
typedef DynRankView<
|
||||
using array_type = DynRankView<
|
||||
typename drvtraits::scalar_array_type, typename drvtraits::array_layout,
|
||||
typename drvtraits::device_type, typename drvtraits::memory_traits>
|
||||
array_type;
|
||||
typename drvtraits::device_type, typename drvtraits::memory_traits>;
|
||||
|
||||
/** \brief Compatible view of const data type */
|
||||
typedef DynRankView<
|
||||
using const_type = DynRankView<
|
||||
typename drvtraits::const_data_type, typename drvtraits::array_layout,
|
||||
typename drvtraits::device_type, typename drvtraits::memory_traits>
|
||||
const_type;
|
||||
typename drvtraits::device_type, typename drvtraits::memory_traits>;
|
||||
|
||||
/** \brief Compatible view of non-const data type */
|
||||
typedef DynRankView<
|
||||
using non_const_type = DynRankView<
|
||||
typename drvtraits::non_const_data_type, typename drvtraits::array_layout,
|
||||
typename drvtraits::device_type, typename drvtraits::memory_traits>
|
||||
non_const_type;
|
||||
typename drvtraits::device_type, typename drvtraits::memory_traits>;
|
||||
|
||||
/** \brief Compatible HostMirror view */
|
||||
typedef DynRankView<typename drvtraits::non_const_data_type,
|
||||
typename drvtraits::array_layout,
|
||||
typename drvtraits::host_mirror_space>
|
||||
HostMirror;
|
||||
using HostMirror = DynRankView<typename drvtraits::non_const_data_type,
|
||||
typename drvtraits::array_layout,
|
||||
typename drvtraits::host_mirror_space>;
|
||||
|
||||
//----------------------------------------
|
||||
// Domain rank and extents
|
||||
@ -493,42 +489,6 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
* ISO/C++ vocabulary 'extent'.
|
||||
*/
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
template <typename iType>
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
typename std::enable_if<std::is_integral<iType>::value, size_t>::type
|
||||
dimension(const iType& r) const {
|
||||
return extent(r);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const {
|
||||
return m_map.dimension_0();
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const {
|
||||
return m_map.dimension_1();
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const {
|
||||
return m_map.dimension_2();
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const {
|
||||
return m_map.dimension_3();
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const {
|
||||
return m_map.dimension_4();
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const {
|
||||
return m_map.dimension_5();
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const {
|
||||
return m_map.dimension_6();
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const {
|
||||
return m_map.dimension_7();
|
||||
}
|
||||
#endif
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t size() const {
|
||||
return m_map.extent(0) * m_map.extent(1) * m_map.extent(2) *
|
||||
m_map.extent(3) * m_map.extent(4) * m_map.extent(5) *
|
||||
@ -568,8 +528,8 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
//----------------------------------------
|
||||
// Range span is the span which contains all members.
|
||||
|
||||
typedef typename map_type::reference_type reference_type;
|
||||
typedef typename map_type::pointer_type pointer_type;
|
||||
using reference_type = typename map_type::reference_type;
|
||||
using pointer_type = typename map_type::pointer_type;
|
||||
|
||||
enum {
|
||||
reference_type_is_lvalue_reference =
|
||||
@ -577,39 +537,18 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); }
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
// Deprecated, use 'span()' instead
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const {
|
||||
return m_map.span();
|
||||
}
|
||||
#endif
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
|
||||
return m_map.span_is_contiguous();
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const {
|
||||
return m_map.data();
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
// Deprecated, use 'span_is_contigous()' instead
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const {
|
||||
return m_map.span_is_contiguous();
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
|
||||
return (m_map.data() != nullptr);
|
||||
}
|
||||
// Deprecated, use 'data()' instead
|
||||
KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const {
|
||||
return m_map.data();
|
||||
}
|
||||
#endif
|
||||
|
||||
//----------------------------------------
|
||||
// Allow specializations to query their specialized map
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const Kokkos::Impl::ViewMapping<traits, typename traits::specialize>&
|
||||
implementation_map() const {
|
||||
return m_map;
|
||||
}
|
||||
#endif
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const Kokkos::Impl::ViewMapping<traits, typename traits::specialize>&
|
||||
impl_map() const {
|
||||
@ -709,12 +648,11 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
const size_t dim_scalar = m_map.dimension_scalar();
|
||||
const size_t bytes = this->span() / dim_scalar;
|
||||
|
||||
typedef Kokkos::View<
|
||||
using tmp_view_type = Kokkos::View<
|
||||
DataType*, typename traits::array_layout, typename traits::device_type,
|
||||
Kokkos::MemoryTraits<traits::memory_traits::is_unmanaged |
|
||||
traits::memory_traits::is_random_access |
|
||||
traits::memory_traits::is_atomic> >
|
||||
tmp_view_type;
|
||||
traits::memory_traits::is_atomic> >;
|
||||
tmp_view_type rankone_view(this->data(), bytes, dim_scalar);
|
||||
return rankone_view(i0);
|
||||
}
|
||||
@ -1102,10 +1040,9 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
template <class RT, class... RP>
|
||||
KOKKOS_INLINE_FUNCTION DynRankView(const DynRankView<RT, RP...>& rhs)
|
||||
: m_track(rhs.m_track, traits::is_managed), m_map(), m_rank(rhs.m_rank) {
|
||||
typedef typename DynRankView<RT, RP...>::traits SrcTraits;
|
||||
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits,
|
||||
typename traits::specialize>
|
||||
Mapping;
|
||||
using SrcTraits = typename DynRankView<RT, RP...>::traits;
|
||||
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits,
|
||||
typename traits::specialize>;
|
||||
static_assert(Mapping::is_assignable,
|
||||
"Incompatible DynRankView copy construction");
|
||||
Mapping::assign(m_map, rhs.m_map, rhs.m_track);
|
||||
@ -1114,10 +1051,9 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
template <class RT, class... RP>
|
||||
KOKKOS_INLINE_FUNCTION DynRankView& operator=(
|
||||
const DynRankView<RT, RP...>& rhs) {
|
||||
typedef typename DynRankView<RT, RP...>::traits SrcTraits;
|
||||
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits,
|
||||
typename traits::specialize>
|
||||
Mapping;
|
||||
using SrcTraits = typename DynRankView<RT, RP...>::traits;
|
||||
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits,
|
||||
typename traits::specialize>;
|
||||
static_assert(Mapping::is_assignable,
|
||||
"Incompatible DynRankView copy construction");
|
||||
Mapping::assign(m_map, rhs.m_map, rhs.m_track);
|
||||
@ -1130,10 +1066,10 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
template <class RT, class... RP>
|
||||
KOKKOS_INLINE_FUNCTION DynRankView(const View<RT, RP...>& rhs)
|
||||
: m_track(), m_map(), m_rank(rhs.Rank) {
|
||||
typedef typename View<RT, RP...>::traits SrcTraits;
|
||||
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits,
|
||||
Kokkos::Impl::ViewToDynRankViewTag>
|
||||
Mapping;
|
||||
using SrcTraits = typename View<RT, RP...>::traits;
|
||||
using Mapping =
|
||||
Kokkos::Impl::ViewMapping<traits, SrcTraits,
|
||||
Kokkos::Impl::ViewToDynRankViewTag>;
|
||||
static_assert(Mapping::is_assignable,
|
||||
"Incompatible View to DynRankView copy construction");
|
||||
Mapping::assign(*this, rhs);
|
||||
@ -1141,10 +1077,10 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
|
||||
template <class RT, class... RP>
|
||||
KOKKOS_INLINE_FUNCTION DynRankView& operator=(const View<RT, RP...>& rhs) {
|
||||
typedef typename View<RT, RP...>::traits SrcTraits;
|
||||
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits,
|
||||
Kokkos::Impl::ViewToDynRankViewTag>
|
||||
Mapping;
|
||||
using SrcTraits = typename View<RT, RP...>::traits;
|
||||
using Mapping =
|
||||
Kokkos::Impl::ViewMapping<traits, SrcTraits,
|
||||
Kokkos::Impl::ViewToDynRankViewTag>;
|
||||
static_assert(Mapping::is_assignable,
|
||||
"Incompatible View to DynRankView copy assignment");
|
||||
Mapping::assign(*this, rhs);
|
||||
@ -1177,11 +1113,11 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
template computeRank<typename traits::array_layout, P...>(
|
||||
arg_prop, arg_layout)) {
|
||||
// Append layout and spaces if not input
|
||||
typedef Kokkos::Impl::ViewCtorProp<P...> alloc_prop_input;
|
||||
using alloc_prop_input = Kokkos::Impl::ViewCtorProp<P...>;
|
||||
|
||||
// use 'std::integral_constant<unsigned,I>' for non-types
|
||||
// to avoid duplicate class error.
|
||||
typedef Kokkos::Impl::ViewCtorProp<
|
||||
using alloc_prop = Kokkos::Impl::ViewCtorProp<
|
||||
P...,
|
||||
typename std::conditional<alloc_prop_input::has_label,
|
||||
std::integral_constant<unsigned, 0>,
|
||||
@ -1193,19 +1129,13 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
|
||||
typename std::conditional<
|
||||
alloc_prop_input::has_execution_space,
|
||||
std::integral_constant<unsigned, 2>,
|
||||
typename traits::device_type::execution_space>::type>
|
||||
alloc_prop;
|
||||
typename traits::device_type::execution_space>::type>;
|
||||
|
||||
static_assert(traits::is_managed,
|
||||
"View allocation constructor requires managed memory");
|
||||
|
||||
if (alloc_prop::initialize &&
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
!alloc_prop::execution_space::is_initialized()
|
||||
#else
|
||||
!alloc_prop::execution_space::impl_is_initialized()
|
||||
#endif
|
||||
) {
|
||||
!alloc_prop::execution_space::impl_is_initialized()) {
|
||||
// If initializing view data then
|
||||
// the execution space must be initialized.
|
||||
Kokkos::Impl::throw_runtime_exception(
|
||||
@ -1499,36 +1429,34 @@ struct ViewMapping<
|
||||
unsigned(R4) + unsigned(R5) + unsigned(R6)
|
||||
};
|
||||
|
||||
typedef Kokkos::LayoutStride array_layout;
|
||||
using array_layout = Kokkos::LayoutStride;
|
||||
|
||||
typedef typename SrcTraits::value_type value_type;
|
||||
using value_type = typename SrcTraits::value_type;
|
||||
|
||||
typedef value_type******* data_type;
|
||||
using data_type = value_type*******;
|
||||
|
||||
public:
|
||||
typedef Kokkos::ViewTraits<data_type, array_layout,
|
||||
typename SrcTraits::device_type,
|
||||
typename SrcTraits::memory_traits>
|
||||
traits_type;
|
||||
using traits_type = Kokkos::ViewTraits<data_type, array_layout,
|
||||
typename SrcTraits::device_type,
|
||||
typename SrcTraits::memory_traits>;
|
||||
|
||||
typedef Kokkos::View<data_type, array_layout, typename SrcTraits::device_type,
|
||||
typename SrcTraits::memory_traits>
|
||||
type;
|
||||
using type =
|
||||
Kokkos::View<data_type, array_layout, typename SrcTraits::device_type,
|
||||
typename SrcTraits::memory_traits>;
|
||||
|
||||
template <class MemoryTraits>
|
||||
struct apply {
|
||||
static_assert(Kokkos::Impl::is_memory_traits<MemoryTraits>::value, "");
|
||||
|
||||
typedef Kokkos::ViewTraits<data_type, array_layout,
|
||||
typename SrcTraits::device_type, MemoryTraits>
|
||||
traits_type;
|
||||
using traits_type =
|
||||
Kokkos::ViewTraits<data_type, array_layout,
|
||||
typename SrcTraits::device_type, MemoryTraits>;
|
||||
|
||||
typedef Kokkos::View<data_type, array_layout,
|
||||
typename SrcTraits::device_type, MemoryTraits>
|
||||
type;
|
||||
using type = Kokkos::View<data_type, array_layout,
|
||||
typename SrcTraits::device_type, MemoryTraits>;
|
||||
};
|
||||
|
||||
typedef typename SrcTraits::dimension dimension;
|
||||
using dimension = typename SrcTraits::dimension;
|
||||
|
||||
template <class Arg0 = int, class Arg1 = int, class Arg2 = int,
|
||||
class Arg3 = int, class Arg4 = int, class Arg5 = int,
|
||||
@ -1544,18 +1472,17 @@ struct ViewMapping<
|
||||
}
|
||||
};
|
||||
|
||||
typedef Kokkos::DynRankView<value_type, array_layout,
|
||||
typename SrcTraits::device_type,
|
||||
typename SrcTraits::memory_traits>
|
||||
ret_type;
|
||||
using ret_type = Kokkos::DynRankView<value_type, array_layout,
|
||||
typename SrcTraits::device_type,
|
||||
typename SrcTraits::memory_traits>;
|
||||
|
||||
template <typename T, class... P>
|
||||
KOKKOS_INLINE_FUNCTION static ret_type subview(
|
||||
const unsigned src_rank, Kokkos::DynRankView<T, P...> const& src,
|
||||
Args... args) {
|
||||
typedef ViewMapping<traits_type, typename traits_type::specialize> DstType;
|
||||
using DstType = ViewMapping<traits_type, typename traits_type::specialize>;
|
||||
|
||||
typedef typename std::conditional<
|
||||
using DstDimType = typename std::conditional<
|
||||
(rank == 0), ViewDimension<>,
|
||||
typename std::conditional<
|
||||
(rank == 1), ViewDimension<0>,
|
||||
@ -1570,10 +1497,10 @@ struct ViewMapping<
|
||||
typename std::conditional<
|
||||
(rank == 6), ViewDimension<0, 0, 0, 0, 0, 0>,
|
||||
ViewDimension<0, 0, 0, 0, 0, 0, 0> >::type>::
|
||||
type>::type>::type>::type>::type>::type DstDimType;
|
||||
type>::type>::type>::type>::type>::type;
|
||||
|
||||
typedef ViewOffset<DstDimType, Kokkos::LayoutStride> dst_offset_type;
|
||||
typedef typename DstType::handle_type dst_handle_type;
|
||||
using dst_offset_type = ViewOffset<DstDimType, Kokkos::LayoutStride>;
|
||||
using dst_handle_type = typename DstType::handle_type;
|
||||
|
||||
ret_type dst;
|
||||
|
||||
@ -1636,9 +1563,9 @@ subdynrankview(const Kokkos::DynRankView<D, P...>& src, Args... args) {
|
||||
"DynRankView");
|
||||
}
|
||||
|
||||
typedef Kokkos::Impl::ViewMapping<Kokkos::Impl::DynRankSubviewTag,
|
||||
Kokkos::ViewTraits<D*******, P...>, Args...>
|
||||
metafcn;
|
||||
using metafcn =
|
||||
Kokkos::Impl::ViewMapping<Kokkos::Impl::DynRankSubviewTag,
|
||||
Kokkos::ViewTraits<D*******, P...>, Args...>;
|
||||
|
||||
return metafcn::subview(src.rank(), src, args...);
|
||||
}
|
||||
@ -1659,8 +1586,8 @@ template <class LT, class... LP, class RT, class... RP>
|
||||
KOKKOS_INLINE_FUNCTION bool operator==(const DynRankView<LT, LP...>& lhs,
|
||||
const DynRankView<RT, RP...>& rhs) {
|
||||
// Same data, layout, dimensions
|
||||
typedef ViewTraits<LT, LP...> lhs_traits;
|
||||
typedef ViewTraits<RT, RP...> rhs_traits;
|
||||
using lhs_traits = ViewTraits<LT, LP...>;
|
||||
using rhs_traits = ViewTraits<RT, RP...>;
|
||||
|
||||
return std::is_same<typename lhs_traits::const_value_type,
|
||||
typename rhs_traits::const_value_type>::value &&
|
||||
@ -1691,7 +1618,7 @@ namespace Impl {
|
||||
|
||||
template <class OutputView, typename Enable = void>
|
||||
struct DynRankViewFill {
|
||||
typedef typename OutputView::traits::const_value_type const_value_type;
|
||||
using const_value_type = typename OutputView::traits::const_value_type;
|
||||
|
||||
const OutputView output;
|
||||
const_value_type input;
|
||||
@ -1722,15 +1649,11 @@ struct DynRankViewFill {
|
||||
|
||||
DynRankViewFill(const OutputView& arg_out, const_value_type& arg_in)
|
||||
: output(arg_out), input(arg_in) {
|
||||
typedef typename OutputView::execution_space execution_space;
|
||||
typedef Kokkos::RangePolicy<execution_space> Policy;
|
||||
using execution_space = typename OutputView::execution_space;
|
||||
using Policy = Kokkos::RangePolicy<execution_space>;
|
||||
|
||||
const Kokkos::Impl::ParallelFor<DynRankViewFill, Policy> closure(
|
||||
*this, Policy(0, output.extent(0)));
|
||||
|
||||
closure.execute();
|
||||
|
||||
execution_space().fence();
|
||||
Kokkos::parallel_for("Kokkos::DynRankViewFill", Policy(0, output.extent(0)),
|
||||
*this);
|
||||
}
|
||||
};
|
||||
|
||||
@ -1770,11 +1693,9 @@ struct DynRankViewRemap {
|
||||
n5(std::min((size_t)arg_out.extent(5), (size_t)arg_in.extent(5))),
|
||||
n6(std::min((size_t)arg_out.extent(6), (size_t)arg_in.extent(6))),
|
||||
n7(std::min((size_t)arg_out.extent(7), (size_t)arg_in.extent(7))) {
|
||||
typedef Kokkos::RangePolicy<ExecSpace> Policy;
|
||||
const Kokkos::Impl::ParallelFor<DynRankViewRemap, Policy> closure(
|
||||
*this, Policy(0, n0));
|
||||
closure.execute();
|
||||
// ExecSpace().fence(); // ??
|
||||
using Policy = Kokkos::RangePolicy<ExecSpace>;
|
||||
|
||||
Kokkos::parallel_for("Kokkos::DynRankViewRemap", Policy(0, n0), *this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1814,7 +1735,9 @@ inline void deep_copy(
|
||||
typename ViewTraits<DT, DP...>::value_type>::value,
|
||||
"deep_copy requires non-const type");
|
||||
|
||||
Kokkos::fence();
|
||||
Kokkos::Impl::DynRankViewFill<DynRankView<DT, DP...> >(dst, value);
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
/** \brief Deep copy into a value in Host memory from a view. */
|
||||
@ -1828,10 +1751,12 @@ inline void deep_copy(
|
||||
Kokkos::abort("");
|
||||
}
|
||||
|
||||
typedef ViewTraits<ST, SP...> src_traits;
|
||||
typedef typename src_traits::memory_space src_memory_space;
|
||||
using src_traits = ViewTraits<ST, SP...>;
|
||||
using src_memory_space = typename src_traits::memory_space;
|
||||
Kokkos::fence();
|
||||
Kokkos::Impl::DeepCopy<HostSpace, src_memory_space>(&dst, src.data(),
|
||||
sizeof(ST));
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -1851,13 +1776,13 @@ inline void deep_copy(
|
||||
typename DstType::traits::non_const_value_type>::value,
|
||||
"deep_copy requires non-const destination type");
|
||||
|
||||
typedef DstType dst_type;
|
||||
typedef SrcType src_type;
|
||||
using dst_type = DstType;
|
||||
using src_type = SrcType;
|
||||
|
||||
typedef typename dst_type::execution_space dst_execution_space;
|
||||
typedef typename src_type::execution_space src_execution_space;
|
||||
typedef typename dst_type::memory_space dst_memory_space;
|
||||
typedef typename src_type::memory_space src_memory_space;
|
||||
using dst_execution_space = typename dst_type::execution_space;
|
||||
using src_execution_space = typename src_type::execution_space;
|
||||
using dst_memory_space = typename dst_type::memory_space;
|
||||
using src_memory_space = typename src_type::memory_space;
|
||||
|
||||
enum {
|
||||
DstExecCanAccessSrc =
|
||||
@ -1878,9 +1803,11 @@ inline void deep_copy(
|
||||
// If same type, equal layout, equal dimensions, equal span, and contiguous
|
||||
// memory then can byte-wise copy
|
||||
if (rank(src) == 0 && rank(dst) == 0) {
|
||||
typedef typename dst_type::value_type value_type;
|
||||
using value_type = typename dst_type::value_type;
|
||||
Kokkos::fence();
|
||||
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
|
||||
dst.data(), src.data(), sizeof(value_type));
|
||||
Kokkos::fence();
|
||||
} else if (std::is_same<
|
||||
typename DstType::traits::value_type,
|
||||
typename SrcType::traits::non_const_value_type>::value &&
|
||||
@ -1902,9 +1829,10 @@ inline void deep_copy(
|
||||
dst.extent(6) == src.extent(6) &&
|
||||
dst.extent(7) == src.extent(7)) {
|
||||
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
|
||||
|
||||
Kokkos::fence();
|
||||
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
|
||||
dst.data(), src.data(), nbytes);
|
||||
Kokkos::fence();
|
||||
} else if (std::is_same<
|
||||
typename DstType::traits::value_type,
|
||||
typename SrcType::traits::non_const_value_type>::value &&
|
||||
@ -1931,22 +1859,29 @@ inline void deep_copy(
|
||||
dst.stride_6() == src.stride_6() &&
|
||||
dst.stride_7() == src.stride_7()) {
|
||||
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
|
||||
|
||||
Kokkos::fence();
|
||||
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
|
||||
dst.data(), src.data(), nbytes);
|
||||
Kokkos::fence();
|
||||
} else if (DstExecCanAccessSrc) {
|
||||
// Copying data between views in accessible memory spaces and either
|
||||
// non-contiguous or incompatible shape.
|
||||
Kokkos::fence();
|
||||
Kokkos::Impl::DynRankViewRemap<dst_type, src_type>(dst, src);
|
||||
Kokkos::fence();
|
||||
} else if (SrcExecCanAccessDst) {
|
||||
// Copying data between views in accessible memory spaces and either
|
||||
// non-contiguous or incompatible shape.
|
||||
Kokkos::fence();
|
||||
Kokkos::Impl::DynRankViewRemap<dst_type, src_type, src_execution_space>(
|
||||
dst, src);
|
||||
Kokkos::fence();
|
||||
} else {
|
||||
Kokkos::Impl::throw_runtime_exception(
|
||||
"deep_copy given views that would require a temporary allocation");
|
||||
}
|
||||
} else {
|
||||
Kokkos::fence();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1962,45 +1897,45 @@ namespace Impl {
|
||||
template <class Space, class T, class... P>
|
||||
struct MirrorDRViewType {
|
||||
// The incoming view_type
|
||||
typedef typename Kokkos::DynRankView<T, P...> src_view_type;
|
||||
using src_view_type = typename Kokkos::DynRankView<T, P...>;
|
||||
// The memory space for the mirror view
|
||||
typedef typename Space::memory_space memory_space;
|
||||
using memory_space = typename Space::memory_space;
|
||||
// Check whether it is the same memory space
|
||||
enum {
|
||||
is_same_memspace =
|
||||
std::is_same<memory_space, typename src_view_type::memory_space>::value
|
||||
};
|
||||
// The array_layout
|
||||
typedef typename src_view_type::array_layout array_layout;
|
||||
using array_layout = typename src_view_type::array_layout;
|
||||
// The data type (we probably want it non-const since otherwise we can't even
|
||||
// deep_copy to it.
|
||||
typedef typename src_view_type::non_const_data_type data_type;
|
||||
using data_type = typename src_view_type::non_const_data_type;
|
||||
// The destination view type if it is not the same memory space
|
||||
typedef Kokkos::DynRankView<data_type, array_layout, Space> dest_view_type;
|
||||
using dest_view_type = Kokkos::DynRankView<data_type, array_layout, Space>;
|
||||
// If it is the same memory_space return the existsing view_type
|
||||
// This will also keep the unmanaged trait if necessary
|
||||
typedef typename std::conditional<is_same_memspace, src_view_type,
|
||||
dest_view_type>::type view_type;
|
||||
using view_type = typename std::conditional<is_same_memspace, src_view_type,
|
||||
dest_view_type>::type;
|
||||
};
|
||||
|
||||
template <class Space, class T, class... P>
|
||||
struct MirrorDRVType {
|
||||
// The incoming view_type
|
||||
typedef typename Kokkos::DynRankView<T, P...> src_view_type;
|
||||
using src_view_type = typename Kokkos::DynRankView<T, P...>;
|
||||
// The memory space for the mirror view
|
||||
typedef typename Space::memory_space memory_space;
|
||||
using memory_space = typename Space::memory_space;
|
||||
// Check whether it is the same memory space
|
||||
enum {
|
||||
is_same_memspace =
|
||||
std::is_same<memory_space, typename src_view_type::memory_space>::value
|
||||
};
|
||||
// The array_layout
|
||||
typedef typename src_view_type::array_layout array_layout;
|
||||
using array_layout = typename src_view_type::array_layout;
|
||||
// The data type (we probably want it non-const since otherwise we can't even
|
||||
// deep_copy to it.
|
||||
typedef typename src_view_type::non_const_data_type data_type;
|
||||
using data_type = typename src_view_type::non_const_data_type;
|
||||
// The destination view type if it is not the same memory space
|
||||
typedef Kokkos::DynRankView<data_type, array_layout, Space> view_type;
|
||||
using view_type = Kokkos::DynRankView<data_type, array_layout, Space>;
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
@ -2012,8 +1947,8 @@ inline typename DynRankView<T, P...>::HostMirror create_mirror(
|
||||
std::is_same<typename ViewTraits<T, P...>::specialize, void>::value &&
|
||||
!std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
|
||||
Kokkos::LayoutStride>::value>::type* = nullptr) {
|
||||
typedef DynRankView<T, P...> src_type;
|
||||
typedef typename src_type::HostMirror dst_type;
|
||||
using src_type = DynRankView<T, P...>;
|
||||
using dst_type = typename src_type::HostMirror;
|
||||
|
||||
return dst_type(std::string(src.label()).append("_mirror"),
|
||||
Impl::reconstructLayout(src.layout(), src.rank()));
|
||||
@ -2026,8 +1961,8 @@ inline typename DynRankView<T, P...>::HostMirror create_mirror(
|
||||
std::is_same<typename ViewTraits<T, P...>::specialize, void>::value &&
|
||||
std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
|
||||
Kokkos::LayoutStride>::value>::type* = 0) {
|
||||
typedef DynRankView<T, P...> src_type;
|
||||
typedef typename src_type::HostMirror dst_type;
|
||||
using src_type = DynRankView<T, P...>;
|
||||
using dst_type = typename src_type::HostMirror;
|
||||
|
||||
return dst_type(std::string(src.label()).append("_mirror"),
|
||||
Impl::reconstructLayout(src.layout(), src.rank()));
|
||||
@ -2066,7 +2001,7 @@ inline typename DynRankView<T, P...>::HostMirror create_mirror_view(
|
||||
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
|
||||
std::is_same<typename DynRankView<T, P...>::data_type,
|
||||
typename DynRankView<T, P...>::HostMirror::data_type>::
|
||||
value)>::type* = 0) {
|
||||
value)>::type* = nullptr) {
|
||||
return Kokkos::create_mirror(src);
|
||||
}
|
||||
|
||||
@ -2085,7 +2020,8 @@ template <class Space, class T, class... P>
|
||||
typename Impl::MirrorDRViewType<Space, T, P...>::view_type create_mirror_view(
|
||||
const Space&, const Kokkos::DynRankView<T, P...>& src,
|
||||
typename std::enable_if<
|
||||
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = 0) {
|
||||
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* =
|
||||
nullptr) {
|
||||
return typename Impl::MirrorDRViewType<Space, T, P...>::view_type(
|
||||
src.label(), Impl::reconstructLayout(src.layout(), src.rank()));
|
||||
}
|
||||
@ -2112,7 +2048,8 @@ create_mirror_view_and_copy(
|
||||
const Space&, const Kokkos::DynRankView<T, P...>& src,
|
||||
std::string const& name = "",
|
||||
typename std::enable_if<
|
||||
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = 0) {
|
||||
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* =
|
||||
nullptr) {
|
||||
using Mirror = typename Impl::MirrorDRViewType<Space, T, P...>::view_type;
|
||||
std::string label = name.empty() ? src.label() : name;
|
||||
auto mirror = Mirror(Kokkos::ViewAllocateWithoutInitializing(label),
|
||||
@ -2139,7 +2076,7 @@ inline void resize(DynRankView<T, P...>& v,
|
||||
const size_t n5 = KOKKOS_INVALID_INDEX,
|
||||
const size_t n6 = KOKKOS_INVALID_INDEX,
|
||||
const size_t n7 = KOKKOS_INVALID_INDEX) {
|
||||
typedef DynRankView<T, P...> drview_type;
|
||||
using drview_type = DynRankView<T, P...>;
|
||||
|
||||
static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
|
||||
"Can only resize managed views");
|
||||
@ -2163,7 +2100,7 @@ inline void realloc(DynRankView<T, P...>& v,
|
||||
const size_t n5 = KOKKOS_INVALID_INDEX,
|
||||
const size_t n6 = KOKKOS_INVALID_INDEX,
|
||||
const size_t n7 = KOKKOS_INVALID_INDEX) {
|
||||
typedef DynRankView<T, P...> drview_type;
|
||||
using drview_type = DynRankView<T, P...>;
|
||||
|
||||
static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
|
||||
"Can only realloc managed views");
|
||||
|
||||
@ -85,13 +85,13 @@ struct ChunkArraySpace<Kokkos::Experimental::HIPSpace> {
|
||||
template <typename DataType, typename... P>
|
||||
class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
|
||||
public:
|
||||
typedef Kokkos::ViewTraits<DataType, P...> traits;
|
||||
using traits = Kokkos::ViewTraits<DataType, P...>;
|
||||
|
||||
private:
|
||||
template <class, class...>
|
||||
friend class DynamicView;
|
||||
|
||||
typedef Kokkos::Impl::SharedAllocationTracker track_type;
|
||||
using track_type = Kokkos::Impl::SharedAllocationTracker;
|
||||
|
||||
static_assert(traits::rank == 1 && traits::rank_dynamic == 1,
|
||||
"DynamicView must be rank-one");
|
||||
@ -118,8 +118,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
|
||||
|
||||
private:
|
||||
track_type m_track;
|
||||
typename traits::value_type**
|
||||
m_chunks; // array of pointers to 'chunks' of memory
|
||||
typename traits::value_type** m_chunks =
|
||||
nullptr; // array of pointers to 'chunks' of memory
|
||||
unsigned m_chunk_shift; // ceil(log2(m_chunk_size))
|
||||
unsigned m_chunk_mask; // m_chunk_size - 1
|
||||
unsigned m_chunk_max; // number of entries in the chunk array - each pointing
|
||||
@ -130,38 +130,36 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
/** \brief Compatible view of array of scalar types */
|
||||
typedef DynamicView<typename traits::data_type, typename traits::device_type>
|
||||
array_type;
|
||||
using array_type =
|
||||
DynamicView<typename traits::data_type, typename traits::device_type>;
|
||||
|
||||
/** \brief Compatible view of const data type */
|
||||
typedef DynamicView<typename traits::const_data_type,
|
||||
typename traits::device_type>
|
||||
const_type;
|
||||
using const_type = DynamicView<typename traits::const_data_type,
|
||||
typename traits::device_type>;
|
||||
|
||||
/** \brief Compatible view of non-const data type */
|
||||
typedef DynamicView<typename traits::non_const_data_type,
|
||||
typename traits::device_type>
|
||||
non_const_type;
|
||||
using non_const_type = DynamicView<typename traits::non_const_data_type,
|
||||
typename traits::device_type>;
|
||||
|
||||
/** \brief Must be accessible everywhere */
|
||||
typedef DynamicView HostMirror;
|
||||
using HostMirror = DynamicView;
|
||||
|
||||
/** \brief Unified types */
|
||||
typedef Kokkos::Device<typename traits::device_type::execution_space,
|
||||
Kokkos::AnonymousSpace>
|
||||
uniform_device;
|
||||
typedef array_type uniform_type;
|
||||
typedef const_type uniform_const_type;
|
||||
typedef array_type uniform_runtime_type;
|
||||
typedef const_type uniform_runtime_const_type;
|
||||
typedef DynamicView<typename traits::data_type, uniform_device>
|
||||
uniform_nomemspace_type;
|
||||
typedef DynamicView<typename traits::const_data_type, uniform_device>
|
||||
uniform_const_nomemspace_type;
|
||||
typedef DynamicView<typename traits::data_type, uniform_device>
|
||||
uniform_runtime_nomemspace_type;
|
||||
typedef DynamicView<typename traits::const_data_type, uniform_device>
|
||||
uniform_runtime_const_nomemspace_type;
|
||||
using uniform_device =
|
||||
Kokkos::Device<typename traits::device_type::execution_space,
|
||||
Kokkos::AnonymousSpace>;
|
||||
using uniform_type = array_type;
|
||||
using uniform_const_type = const_type;
|
||||
using uniform_runtime_type = array_type;
|
||||
using uniform_runtime_const_type = const_type;
|
||||
using uniform_nomemspace_type =
|
||||
DynamicView<typename traits::data_type, uniform_device>;
|
||||
using uniform_const_nomemspace_type =
|
||||
DynamicView<typename traits::const_data_type, uniform_device>;
|
||||
using uniform_runtime_nomemspace_type =
|
||||
DynamicView<typename traits::data_type, uniform_device>;
|
||||
using uniform_runtime_const_nomemspace_type =
|
||||
DynamicView<typename traits::const_data_type, uniform_device>;
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
@ -193,17 +191,6 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
|
||||
return r == 0 ? size() : 1;
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
KOKKOS_INLINE_FUNCTION size_t dimension_0() const { return size(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return 1; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return 1; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return 1; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return 1; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return 1; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return 1; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return 1; }
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return 0; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return 0; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return 0; }
|
||||
@ -231,8 +218,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
|
||||
//----------------------------------------------------------------------
|
||||
// Range span is the span which contains all members.
|
||||
|
||||
typedef typename traits::value_type& reference_type;
|
||||
typedef typename traits::value_type* pointer_type;
|
||||
using reference_type = typename traits::value_type&;
|
||||
using pointer_type = typename traits::value_type*;
|
||||
|
||||
enum {
|
||||
reference_type_is_lvalue_reference =
|
||||
@ -299,8 +286,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
|
||||
typename Impl::ChunkArraySpace<
|
||||
typename traits::memory_space>::memory_space>::accessible>::type
|
||||
resize_serial(IntType const& n) {
|
||||
typedef typename traits::value_type local_value_type;
|
||||
typedef local_value_type* value_pointer_type;
|
||||
using local_value_type = typename traits::value_type;
|
||||
using value_pointer_type = local_value_type*;
|
||||
|
||||
const uintptr_t NC =
|
||||
(n + m_chunk_mask) >>
|
||||
@ -332,6 +319,17 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
|
||||
*(pc + 1) = n;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION bool is_allocated() const {
|
||||
if (m_chunks == nullptr) {
|
||||
return false;
|
||||
} else {
|
||||
// *m_chunks[m_chunk_max] stores the current number of chunks being used
|
||||
uintptr_t* const pc =
|
||||
reinterpret_cast<uintptr_t*>(m_chunks + m_chunk_max);
|
||||
return (*(pc + 1) > 0);
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
~DynamicView() = default;
|
||||
@ -349,8 +347,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
|
||||
m_chunk_mask(rhs.m_chunk_mask),
|
||||
m_chunk_max(rhs.m_chunk_max),
|
||||
m_chunk_size(rhs.m_chunk_size) {
|
||||
typedef typename DynamicView<RT, RP...>::traits SrcTraits;
|
||||
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits, void> Mapping;
|
||||
using SrcTraits = typename DynamicView<RT, RP...>::traits;
|
||||
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, void>;
|
||||
static_assert(Mapping::is_assignable,
|
||||
"Incompatible DynamicView copy construction");
|
||||
}
|
||||
@ -373,9 +371,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
|
||||
}
|
||||
|
||||
void execute(bool arg_destroy) {
|
||||
typedef Kokkos::RangePolicy<typename HostSpace::execution_space> Range;
|
||||
// typedef Kokkos::RangePolicy< typename Impl::ChunkArraySpace< typename
|
||||
// traits::memory_space >::memory_space::execution_space > Range ;
|
||||
using Range = Kokkos::RangePolicy<typename HostSpace::execution_space>;
|
||||
|
||||
m_destroy = arg_destroy;
|
||||
|
||||
@ -431,12 +427,11 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
|
||||
m_chunk_shift) // max num pointers-to-chunks in array
|
||||
,
|
||||
m_chunk_size(2 << (m_chunk_shift - 1)) {
|
||||
typedef typename Impl::ChunkArraySpace<
|
||||
typename traits::memory_space>::memory_space chunk_array_memory_space;
|
||||
using chunk_array_memory_space = typename Impl::ChunkArraySpace<
|
||||
typename traits::memory_space>::memory_space;
|
||||
// A functor to deallocate all of the chunks upon final destruction
|
||||
typedef Kokkos::Impl::SharedAllocationRecord<chunk_array_memory_space,
|
||||
Destroy>
|
||||
record_type;
|
||||
using record_type =
|
||||
Kokkos::Impl::SharedAllocationRecord<chunk_array_memory_space, Destroy>;
|
||||
|
||||
// Allocate chunk pointers and allocation counter
|
||||
record_type* const record =
|
||||
@ -471,11 +466,11 @@ create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src) {
|
||||
template <class T, class... DP, class... SP>
|
||||
inline void deep_copy(const View<T, DP...>& dst,
|
||||
const Kokkos::Experimental::DynamicView<T, SP...>& src) {
|
||||
typedef View<T, DP...> dst_type;
|
||||
typedef Kokkos::Experimental::DynamicView<T, SP...> src_type;
|
||||
using dst_type = View<T, DP...>;
|
||||
using src_type = Kokkos::Experimental::DynamicView<T, SP...>;
|
||||
|
||||
typedef typename ViewTraits<T, DP...>::execution_space dst_execution_space;
|
||||
typedef typename ViewTraits<T, SP...>::memory_space src_memory_space;
|
||||
using dst_execution_space = typename ViewTraits<T, DP...>::execution_space;
|
||||
using src_memory_space = typename ViewTraits<T, SP...>::memory_space;
|
||||
|
||||
enum {
|
||||
DstExecCanAccessSrc =
|
||||
@ -496,11 +491,11 @@ inline void deep_copy(const View<T, DP...>& dst,
|
||||
template <class T, class... DP, class... SP>
|
||||
inline void deep_copy(const Kokkos::Experimental::DynamicView<T, DP...>& dst,
|
||||
const View<T, SP...>& src) {
|
||||
typedef Kokkos::Experimental::DynamicView<T, SP...> dst_type;
|
||||
typedef View<T, DP...> src_type;
|
||||
using dst_type = Kokkos::Experimental::DynamicView<T, SP...>;
|
||||
using src_type = View<T, DP...>;
|
||||
|
||||
typedef typename ViewTraits<T, DP...>::execution_space dst_execution_space;
|
||||
typedef typename ViewTraits<T, SP...>::memory_space src_memory_space;
|
||||
using dst_execution_space = typename ViewTraits<T, DP...>::execution_space;
|
||||
using src_memory_space = typename ViewTraits<T, SP...>::memory_space;
|
||||
|
||||
enum {
|
||||
DstExecCanAccessSrc =
|
||||
@ -522,10 +517,10 @@ namespace Impl {
|
||||
template <class Arg0, class... DP, class... SP>
|
||||
struct CommonSubview<Kokkos::Experimental::DynamicView<DP...>,
|
||||
Kokkos::Experimental::DynamicView<SP...>, 1, Arg0> {
|
||||
typedef Kokkos::Experimental::DynamicView<DP...> DstType;
|
||||
typedef Kokkos::Experimental::DynamicView<SP...> SrcType;
|
||||
typedef DstType dst_subview_type;
|
||||
typedef SrcType src_subview_type;
|
||||
using DstType = Kokkos::Experimental::DynamicView<DP...>;
|
||||
using SrcType = Kokkos::Experimental::DynamicView<SP...>;
|
||||
using dst_subview_type = DstType;
|
||||
using src_subview_type = SrcType;
|
||||
dst_subview_type dst_sub;
|
||||
src_subview_type src_sub;
|
||||
CommonSubview(const DstType& dst, const SrcType& src, const Arg0& /*arg0*/)
|
||||
@ -535,9 +530,9 @@ struct CommonSubview<Kokkos::Experimental::DynamicView<DP...>,
|
||||
template <class... DP, class SrcType, class Arg0>
|
||||
struct CommonSubview<Kokkos::Experimental::DynamicView<DP...>, SrcType, 1,
|
||||
Arg0> {
|
||||
typedef Kokkos::Experimental::DynamicView<DP...> DstType;
|
||||
typedef DstType dst_subview_type;
|
||||
typedef typename Kokkos::Subview<SrcType, Arg0> src_subview_type;
|
||||
using DstType = Kokkos::Experimental::DynamicView<DP...>;
|
||||
using dst_subview_type = DstType;
|
||||
using src_subview_type = typename Kokkos::Subview<SrcType, Arg0>;
|
||||
dst_subview_type dst_sub;
|
||||
src_subview_type src_sub;
|
||||
CommonSubview(const DstType& dst, const SrcType& src, const Arg0& arg0)
|
||||
@ -547,9 +542,9 @@ struct CommonSubview<Kokkos::Experimental::DynamicView<DP...>, SrcType, 1,
|
||||
template <class DstType, class... SP, class Arg0>
|
||||
struct CommonSubview<DstType, Kokkos::Experimental::DynamicView<SP...>, 1,
|
||||
Arg0> {
|
||||
typedef Kokkos::Experimental::DynamicView<SP...> SrcType;
|
||||
typedef typename Kokkos::Subview<DstType, Arg0> dst_subview_type;
|
||||
typedef SrcType src_subview_type;
|
||||
using SrcType = Kokkos::Experimental::DynamicView<SP...>;
|
||||
using dst_subview_type = typename Kokkos::Subview<DstType, Arg0>;
|
||||
using src_subview_type = SrcType;
|
||||
dst_subview_type dst_sub;
|
||||
src_subview_type src_sub;
|
||||
CommonSubview(const DstType& dst, const SrcType& src, const Arg0& arg0)
|
||||
@ -559,11 +554,11 @@ struct CommonSubview<DstType, Kokkos::Experimental::DynamicView<SP...>, 1,
|
||||
template <class... DP, class ViewTypeB, class Layout, class ExecSpace,
|
||||
typename iType>
|
||||
struct ViewCopy<Kokkos::Experimental::DynamicView<DP...>, ViewTypeB, Layout,
|
||||
ExecSpace, 1, iType, false> {
|
||||
ExecSpace, 1, iType> {
|
||||
Kokkos::Experimental::DynamicView<DP...> a;
|
||||
ViewTypeB b;
|
||||
|
||||
typedef Kokkos::RangePolicy<ExecSpace, Kokkos::IndexType<iType>> policy_type;
|
||||
using policy_type = Kokkos::RangePolicy<ExecSpace, Kokkos::IndexType<iType>>;
|
||||
|
||||
ViewCopy(const Kokkos::Experimental::DynamicView<DP...>& a_,
|
||||
const ViewTypeB& b_)
|
||||
@ -580,11 +575,11 @@ template <class... DP, class... SP, class Layout, class ExecSpace,
|
||||
typename iType>
|
||||
struct ViewCopy<Kokkos::Experimental::DynamicView<DP...>,
|
||||
Kokkos::Experimental::DynamicView<SP...>, Layout, ExecSpace, 1,
|
||||
iType, false> {
|
||||
iType> {
|
||||
Kokkos::Experimental::DynamicView<DP...> a;
|
||||
Kokkos::Experimental::DynamicView<SP...> b;
|
||||
|
||||
typedef Kokkos::RangePolicy<ExecSpace, Kokkos::IndexType<iType>> policy_type;
|
||||
using policy_type = Kokkos::RangePolicy<ExecSpace, Kokkos::IndexType<iType>>;
|
||||
|
||||
ViewCopy(const Kokkos::Experimental::DynamicView<DP...>& a_,
|
||||
const Kokkos::Experimental::DynamicView<SP...>& b_)
|
||||
|
||||
@ -56,9 +56,9 @@ namespace Experimental {
|
||||
template <typename ReportType, typename DeviceType>
|
||||
class ErrorReporter {
|
||||
public:
|
||||
typedef ReportType report_type;
|
||||
typedef DeviceType device_type;
|
||||
typedef typename device_type::execution_space execution_space;
|
||||
using report_type = ReportType;
|
||||
using device_type = DeviceType;
|
||||
using execution_space = typename device_type::execution_space;
|
||||
|
||||
ErrorReporter(int max_results)
|
||||
: m_numReportsAttempted(""),
|
||||
@ -103,10 +103,10 @@ class ErrorReporter {
|
||||
}
|
||||
|
||||
private:
|
||||
typedef Kokkos::View<report_type *, execution_space> reports_view_t;
|
||||
typedef Kokkos::DualView<report_type *, execution_space> reports_dualview_t;
|
||||
using reports_view_t = Kokkos::View<report_type *, execution_space>;
|
||||
using reports_dualview_t = Kokkos::DualView<report_type *, execution_space>;
|
||||
|
||||
typedef typename reports_dualview_t::host_mirror_space host_mirror_space;
|
||||
using host_mirror_space = typename reports_dualview_t::host_mirror_space;
|
||||
Kokkos::View<int, execution_space> m_numReportsAttempted;
|
||||
reports_dualview_t m_reports;
|
||||
Kokkos::DualView<int *, execution_space> m_reporters;
|
||||
|
||||
@ -52,10 +52,10 @@ namespace Kokkos {
|
||||
|
||||
template <typename T>
|
||||
struct pod_hash {
|
||||
typedef T argument_type;
|
||||
typedef T first_argument_type;
|
||||
typedef uint32_t second_argument_type;
|
||||
typedef uint32_t result_type;
|
||||
using argument_type = T;
|
||||
using first_argument_type = T;
|
||||
using second_argument_type = uint32_t;
|
||||
using result_type = uint32_t;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
uint32_t operator()(T const& t) const {
|
||||
@ -70,9 +70,9 @@ struct pod_hash {
|
||||
|
||||
template <typename T>
|
||||
struct pod_equal_to {
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
using first_argument_type = T;
|
||||
using second_argument_type = T;
|
||||
using result_type = bool;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const& a, T const& b) const {
|
||||
@ -82,9 +82,9 @@ struct pod_equal_to {
|
||||
|
||||
template <typename T>
|
||||
struct pod_not_equal_to {
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
using first_argument_type = T;
|
||||
using second_argument_type = T;
|
||||
using result_type = bool;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const& a, T const& b) const {
|
||||
@ -94,9 +94,9 @@ struct pod_not_equal_to {
|
||||
|
||||
template <typename T>
|
||||
struct equal_to {
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
using first_argument_type = T;
|
||||
using second_argument_type = T;
|
||||
using result_type = bool;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const& a, T const& b) const { return a == b; }
|
||||
@ -104,9 +104,9 @@ struct equal_to {
|
||||
|
||||
template <typename T>
|
||||
struct not_equal_to {
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
using first_argument_type = T;
|
||||
using second_argument_type = T;
|
||||
using result_type = bool;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const& a, T const& b) const { return a != b; }
|
||||
@ -114,9 +114,9 @@ struct not_equal_to {
|
||||
|
||||
template <typename T>
|
||||
struct greater {
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
using first_argument_type = T;
|
||||
using second_argument_type = T;
|
||||
using result_type = bool;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const& a, T const& b) const { return a > b; }
|
||||
@ -124,9 +124,9 @@ struct greater {
|
||||
|
||||
template <typename T>
|
||||
struct less {
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
using first_argument_type = T;
|
||||
using second_argument_type = T;
|
||||
using result_type = bool;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const& a, T const& b) const { return a < b; }
|
||||
@ -134,9 +134,9 @@ struct less {
|
||||
|
||||
template <typename T>
|
||||
struct greater_equal {
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
using first_argument_type = T;
|
||||
using second_argument_type = T;
|
||||
using result_type = bool;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const& a, T const& b) const { return a >= b; }
|
||||
@ -144,9 +144,9 @@ struct greater_equal {
|
||||
|
||||
template <typename T>
|
||||
struct less_equal {
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef bool result_type;
|
||||
using first_argument_type = T;
|
||||
using second_argument_type = T;
|
||||
using result_type = bool;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool operator()(T const& a, T const& b) const { return a <= b; }
|
||||
|
||||
@ -51,10 +51,10 @@ namespace Impl {
|
||||
|
||||
template <class ViewType>
|
||||
struct GetOffsetViewTypeFromViewType {
|
||||
typedef OffsetView<
|
||||
typename ViewType::data_type, typename ViewType::array_layout,
|
||||
typename ViewType::device_type, typename ViewType::memory_traits>
|
||||
type;
|
||||
using type =
|
||||
OffsetView<typename ViewType::data_type, typename ViewType::array_layout,
|
||||
typename ViewType::device_type,
|
||||
typename ViewType::memory_traits>;
|
||||
};
|
||||
|
||||
template <unsigned, class MapType, class BeginsType>
|
||||
@ -180,7 +180,7 @@ void runtime_check_rank_device(const size_t rank_dynamic, const size_t rank,
|
||||
template <class DataType, class... Properties>
|
||||
class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
public:
|
||||
typedef ViewTraits<DataType, Properties...> traits;
|
||||
using traits = ViewTraits<DataType, Properties...>;
|
||||
|
||||
private:
|
||||
template <class, class...>
|
||||
@ -190,12 +190,12 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
template <class, class...>
|
||||
friend class Kokkos::Impl::ViewMapping;
|
||||
|
||||
typedef Kokkos::Impl::ViewMapping<traits, void> map_type;
|
||||
typedef Kokkos::Impl::SharedAllocationTracker track_type;
|
||||
using map_type = Kokkos::Impl::ViewMapping<traits, void>;
|
||||
using track_type = Kokkos::Impl::SharedAllocationTracker;
|
||||
|
||||
public:
|
||||
enum { Rank = map_type::Rank };
|
||||
typedef Kokkos::Array<int64_t, Rank> begins_type;
|
||||
using begins_type = Kokkos::Array<int64_t, Rank>;
|
||||
|
||||
template <
|
||||
typename iType,
|
||||
@ -223,28 +223,27 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
public:
|
||||
//----------------------------------------
|
||||
/** \brief Compatible view of array of scalar types */
|
||||
typedef OffsetView<
|
||||
typename traits::scalar_array_type, typename traits::array_layout,
|
||||
typename traits::device_type, typename traits::memory_traits>
|
||||
array_type;
|
||||
using array_type =
|
||||
OffsetView<typename traits::scalar_array_type,
|
||||
typename traits::array_layout, typename traits::device_type,
|
||||
typename traits::memory_traits>;
|
||||
|
||||
/** \brief Compatible view of const data type */
|
||||
typedef OffsetView<
|
||||
typename traits::const_data_type, typename traits::array_layout,
|
||||
typename traits::device_type, typename traits::memory_traits>
|
||||
const_type;
|
||||
using const_type =
|
||||
OffsetView<typename traits::const_data_type,
|
||||
typename traits::array_layout, typename traits::device_type,
|
||||
typename traits::memory_traits>;
|
||||
|
||||
/** \brief Compatible view of non-const data type */
|
||||
typedef OffsetView<
|
||||
typename traits::non_const_data_type, typename traits::array_layout,
|
||||
typename traits::device_type, typename traits::memory_traits>
|
||||
non_const_type;
|
||||
using non_const_type =
|
||||
OffsetView<typename traits::non_const_data_type,
|
||||
typename traits::array_layout, typename traits::device_type,
|
||||
typename traits::memory_traits>;
|
||||
|
||||
/** \brief Compatible HostMirror view */
|
||||
typedef OffsetView<typename traits::non_const_data_type,
|
||||
typename traits::array_layout,
|
||||
typename traits::host_mirror_space>
|
||||
HostMirror;
|
||||
using HostMirror = OffsetView<typename traits::non_const_data_type,
|
||||
typename traits::array_layout,
|
||||
typename traits::host_mirror_space>;
|
||||
|
||||
//----------------------------------------
|
||||
// Domain rank and extents
|
||||
@ -335,8 +334,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
//----------------------------------------
|
||||
// Range span is the span which contains all members.
|
||||
|
||||
typedef typename map_type::reference_type reference_type;
|
||||
typedef typename map_type::pointer_type pointer_type;
|
||||
using reference_type = typename map_type::reference_type;
|
||||
using pointer_type = typename map_type::pointer_type;
|
||||
|
||||
enum {
|
||||
reference_type_is_lvalue_reference =
|
||||
@ -347,6 +346,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
KOKKOS_INLINE_FUNCTION bool span_is_contiguous() const {
|
||||
return m_map.span_is_contiguous();
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
|
||||
return m_map.data() != nullptr;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const {
|
||||
return m_map.data();
|
||||
}
|
||||
@ -841,10 +843,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
|
||||
// interoperability with View
|
||||
private:
|
||||
typedef View<typename traits::scalar_array_type,
|
||||
typename traits::array_layout, typename traits::device_type,
|
||||
typename traits::memory_traits>
|
||||
view_type;
|
||||
using view_type =
|
||||
View<typename traits::scalar_array_type, typename traits::array_layout,
|
||||
typename traits::device_type, typename traits::memory_traits>;
|
||||
|
||||
public:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -856,8 +857,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
template <class RT, class... RP>
|
||||
KOKKOS_INLINE_FUNCTION OffsetView(const View<RT, RP...>& aview)
|
||||
: m_track(aview.impl_track()), m_map() {
|
||||
typedef typename OffsetView<RT, RP...>::traits SrcTraits;
|
||||
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits, void> Mapping;
|
||||
using SrcTraits = typename OffsetView<RT, RP...>::traits;
|
||||
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, void>;
|
||||
static_assert(Mapping::is_assignable,
|
||||
"Incompatible OffsetView copy construction");
|
||||
Mapping::assign(m_map, aview.impl_map(), m_track);
|
||||
@ -871,8 +872,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
KOKKOS_INLINE_FUNCTION OffsetView(const View<RT, RP...>& aview,
|
||||
const index_list_type& minIndices)
|
||||
: m_track(aview.impl_track()), m_map() {
|
||||
typedef typename OffsetView<RT, RP...>::traits SrcTraits;
|
||||
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits, void> Mapping;
|
||||
using SrcTraits = typename OffsetView<RT, RP...>::traits;
|
||||
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, void>;
|
||||
static_assert(Mapping::is_assignable,
|
||||
"Incompatible OffsetView copy construction");
|
||||
Mapping::assign(m_map, aview.impl_map(), m_track);
|
||||
@ -894,8 +895,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
KOKKOS_INLINE_FUNCTION OffsetView(const View<RT, RP...>& aview,
|
||||
const begins_type& beg)
|
||||
: m_track(aview.impl_track()), m_map(), m_begins(beg) {
|
||||
typedef typename OffsetView<RT, RP...>::traits SrcTraits;
|
||||
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits, void> Mapping;
|
||||
using SrcTraits = typename OffsetView<RT, RP...>::traits;
|
||||
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, void>;
|
||||
static_assert(Mapping::is_assignable,
|
||||
"Incompatible OffsetView copy construction");
|
||||
Mapping::assign(m_map, aview.impl_map(), m_track);
|
||||
@ -917,8 +918,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
: m_track(rhs.m_track, traits::is_managed),
|
||||
m_map(),
|
||||
m_begins(rhs.m_begins) {
|
||||
typedef typename OffsetView<RT, RP...>::traits SrcTraits;
|
||||
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits, void> Mapping;
|
||||
using SrcTraits = typename OffsetView<RT, RP...>::traits;
|
||||
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, void>;
|
||||
static_assert(Mapping::is_assignable,
|
||||
"Incompatible OffsetView copy construction");
|
||||
Mapping::assign(m_map, rhs.m_map, rhs.m_track); // swb what about assign?
|
||||
@ -1215,11 +1216,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
for (size_t i = 0; i < Rank; ++i) m_begins[i] = minIndices.begin()[i];
|
||||
|
||||
// Append layout and spaces if not input
|
||||
typedef Kokkos::Impl::ViewCtorProp<P...> alloc_prop_input;
|
||||
using alloc_prop_input = Kokkos::Impl::ViewCtorProp<P...>;
|
||||
|
||||
// use 'std::integral_constant<unsigned,I>' for non-types
|
||||
// to avoid duplicate class error.
|
||||
typedef Kokkos::Impl::ViewCtorProp<
|
||||
using alloc_prop = Kokkos::Impl::ViewCtorProp<
|
||||
P...,
|
||||
typename std::conditional<alloc_prop_input::has_label,
|
||||
std::integral_constant<unsigned, 0>,
|
||||
@ -1231,19 +1232,13 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
|
||||
typename std::conditional<
|
||||
alloc_prop_input::has_execution_space,
|
||||
std::integral_constant<unsigned, 2>,
|
||||
typename traits::device_type::execution_space>::type>
|
||||
alloc_prop;
|
||||
typename traits::device_type::execution_space>::type>;
|
||||
|
||||
static_assert(traits::is_managed,
|
||||
"OffsetView allocation constructor requires managed memory");
|
||||
|
||||
if (alloc_prop::initialize &&
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
!alloc_prop::execution_space::is_initialized()
|
||||
#else
|
||||
!alloc_prop::execution_space::impl_is_initialized()
|
||||
#endif
|
||||
) {
|
||||
!alloc_prop::execution_space::impl_is_initialized()) {
|
||||
// If initializing view data then
|
||||
// the execution space must be initialized.
|
||||
Kokkos::Impl::throw_runtime_exception(
|
||||
@ -1764,8 +1759,8 @@ template <class LT, class... LP, class RT, class... RP>
|
||||
KOKKOS_INLINE_FUNCTION bool operator==(const OffsetView<LT, LP...>& lhs,
|
||||
const OffsetView<RT, RP...>& rhs) {
|
||||
// Same data, layout, dimensions
|
||||
typedef ViewTraits<LT, LP...> lhs_traits;
|
||||
typedef ViewTraits<RT, RP...> rhs_traits;
|
||||
using lhs_traits = ViewTraits<LT, LP...>;
|
||||
using rhs_traits = ViewTraits<RT, RP...>;
|
||||
|
||||
return std::is_same<typename lhs_traits::const_value_type,
|
||||
typename rhs_traits::const_value_type>::value &&
|
||||
@ -1795,8 +1790,8 @@ template <class LT, class... LP, class RT, class... RP>
|
||||
KOKKOS_INLINE_FUNCTION bool operator==(const View<LT, LP...>& lhs,
|
||||
const OffsetView<RT, RP...>& rhs) {
|
||||
// Same data, layout, dimensions
|
||||
typedef ViewTraits<LT, LP...> lhs_traits;
|
||||
typedef ViewTraits<RT, RP...> rhs_traits;
|
||||
using lhs_traits = ViewTraits<LT, LP...>;
|
||||
using rhs_traits = ViewTraits<RT, RP...>;
|
||||
|
||||
return std::is_same<typename lhs_traits::const_value_type,
|
||||
typename rhs_traits::const_value_type>::value &&
|
||||
@ -1825,10 +1820,10 @@ KOKKOS_INLINE_FUNCTION bool operator==(const OffsetView<LT, LP...>& lhs,
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
template <class DT, class... DP>
|
||||
inline void deep_copy(
|
||||
const OffsetView<DT, DP...>& dst,
|
||||
const Experimental::OffsetView<DT, DP...>& dst,
|
||||
typename ViewTraits<DT, DP...>::const_value_type& value,
|
||||
typename std::enable_if<std::is_same<
|
||||
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
|
||||
@ -1844,7 +1839,8 @@ inline void deep_copy(
|
||||
|
||||
template <class DT, class... DP, class ST, class... SP>
|
||||
inline void deep_copy(
|
||||
const OffsetView<DT, DP...>& dst, const OffsetView<ST, SP...>& value,
|
||||
const Experimental::OffsetView<DT, DP...>& dst,
|
||||
const Experimental::OffsetView<ST, SP...>& value,
|
||||
typename std::enable_if<std::is_same<
|
||||
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
|
||||
nullptr) {
|
||||
@ -1858,7 +1854,8 @@ inline void deep_copy(
|
||||
}
|
||||
template <class DT, class... DP, class ST, class... SP>
|
||||
inline void deep_copy(
|
||||
const OffsetView<DT, DP...>& dst, const View<ST, SP...>& value,
|
||||
const Experimental::OffsetView<DT, DP...>& dst,
|
||||
const View<ST, SP...>& value,
|
||||
typename std::enable_if<std::is_same<
|
||||
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
|
||||
nullptr) {
|
||||
@ -1873,7 +1870,8 @@ inline void deep_copy(
|
||||
|
||||
template <class DT, class... DP, class ST, class... SP>
|
||||
inline void deep_copy(
|
||||
const View<DT, DP...>& dst, const OffsetView<ST, SP...>& value,
|
||||
const View<DT, DP...>& dst,
|
||||
const Experimental::OffsetView<ST, SP...>& value,
|
||||
typename std::enable_if<std::is_same<
|
||||
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
|
||||
nullptr) {
|
||||
@ -1884,53 +1882,54 @@ inline void deep_copy(
|
||||
|
||||
Kokkos::deep_copy(dst, value.view());
|
||||
}
|
||||
|
||||
namespace Impl {
|
||||
|
||||
// Deduce Mirror Types
|
||||
template <class Space, class T, class... P>
|
||||
struct MirrorOffsetViewType {
|
||||
// The incoming view_type
|
||||
typedef typename Kokkos::Experimental::OffsetView<T, P...> src_view_type;
|
||||
using src_view_type = typename Kokkos::Experimental::OffsetView<T, P...>;
|
||||
// The memory space for the mirror view
|
||||
typedef typename Space::memory_space memory_space;
|
||||
using memory_space = typename Space::memory_space;
|
||||
// Check whether it is the same memory space
|
||||
enum {
|
||||
is_same_memspace =
|
||||
std::is_same<memory_space, typename src_view_type::memory_space>::value
|
||||
};
|
||||
// The array_layout
|
||||
typedef typename src_view_type::array_layout array_layout;
|
||||
using array_layout = typename src_view_type::array_layout;
|
||||
// The data type (we probably want it non-const since otherwise we can't even
|
||||
// deep_copy to it.
|
||||
typedef typename src_view_type::non_const_data_type data_type;
|
||||
using data_type = typename src_view_type::non_const_data_type;
|
||||
// The destination view type if it is not the same memory space
|
||||
typedef Kokkos::Experimental::OffsetView<data_type, array_layout, Space>
|
||||
dest_view_type;
|
||||
using dest_view_type =
|
||||
Kokkos::Experimental::OffsetView<data_type, array_layout, Space>;
|
||||
// If it is the same memory_space return the existsing view_type
|
||||
// This will also keep the unmanaged trait if necessary
|
||||
typedef typename std::conditional<is_same_memspace, src_view_type,
|
||||
dest_view_type>::type view_type;
|
||||
using view_type = typename std::conditional<is_same_memspace, src_view_type,
|
||||
dest_view_type>::type;
|
||||
};
|
||||
|
||||
template <class Space, class T, class... P>
|
||||
struct MirrorOffsetType {
|
||||
// The incoming view_type
|
||||
typedef typename Kokkos::Experimental::OffsetView<T, P...> src_view_type;
|
||||
using src_view_type = typename Kokkos::Experimental::OffsetView<T, P...>;
|
||||
// The memory space for the mirror view
|
||||
typedef typename Space::memory_space memory_space;
|
||||
using memory_space = typename Space::memory_space;
|
||||
// Check whether it is the same memory space
|
||||
enum {
|
||||
is_same_memspace =
|
||||
std::is_same<memory_space, typename src_view_type::memory_space>::value
|
||||
};
|
||||
// The array_layout
|
||||
typedef typename src_view_type::array_layout array_layout;
|
||||
using array_layout = typename src_view_type::array_layout;
|
||||
// The data type (we probably want it non-const since otherwise we can't even
|
||||
// deep_copy to it.
|
||||
typedef typename src_view_type::non_const_data_type data_type;
|
||||
using data_type = typename src_view_type::non_const_data_type;
|
||||
// The destination view type if it is not the same memory space
|
||||
typedef Kokkos::Experimental::OffsetView<data_type, array_layout, Space>
|
||||
view_type;
|
||||
using view_type =
|
||||
Kokkos::Experimental::OffsetView<data_type, array_layout, Space>;
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
@ -1942,8 +1941,8 @@ create_mirror(
|
||||
typename std::enable_if<
|
||||
!std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
|
||||
Kokkos::LayoutStride>::value>::type* = 0) {
|
||||
typedef OffsetView<T, P...> src_type;
|
||||
typedef typename src_type::HostMirror dst_type;
|
||||
using src_type = Experimental::OffsetView<T, P...>;
|
||||
using dst_type = typename src_type::HostMirror;
|
||||
|
||||
return dst_type(
|
||||
Kokkos::Impl::ViewCtorProp<std::string>(
|
||||
@ -1962,8 +1961,8 @@ create_mirror(
|
||||
typename std::enable_if<
|
||||
std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
|
||||
Kokkos::LayoutStride>::value>::type* = 0) {
|
||||
typedef OffsetView<T, P...> src_type;
|
||||
typedef typename src_type::HostMirror dst_type;
|
||||
using src_type = Experimental::OffsetView<T, P...>;
|
||||
using dst_type = typename src_type::HostMirror;
|
||||
|
||||
Kokkos::LayoutStride layout;
|
||||
|
||||
@ -1992,14 +1991,13 @@ create_mirror(
|
||||
|
||||
// Create a mirror in a new space (specialization for different space)
|
||||
template <class Space, class T, class... P>
|
||||
typename Kokkos::Experimental::Impl::MirrorOffsetType<Space, T, P...>::view_type
|
||||
typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type
|
||||
create_mirror(const Space&,
|
||||
const Kokkos::Experimental::OffsetView<T, P...>& src) {
|
||||
return typename Kokkos::Experimental::Impl::MirrorOffsetType<
|
||||
Space, T, P...>::view_type(src.label(), src.layout(),
|
||||
{src.begin(0), src.begin(1), src.begin(2),
|
||||
src.begin(3), src.begin(4), src.begin(5),
|
||||
src.begin(6), src.begin(7)});
|
||||
return typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type(
|
||||
src.label(), src.layout(),
|
||||
{src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
|
||||
src.begin(5), src.begin(6), src.begin(7)});
|
||||
}
|
||||
|
||||
template <class T, class... P>
|
||||
@ -2031,13 +2029,12 @@ create_mirror_view(
|
||||
typename Kokkos::Experimental::OffsetView<T, P...>::data_type,
|
||||
typename Kokkos::Experimental::OffsetView<
|
||||
T, P...>::HostMirror::data_type>::value)>::type* = 0) {
|
||||
return Kokkos::Experimental::create_mirror(src);
|
||||
return Kokkos::create_mirror(src);
|
||||
}
|
||||
|
||||
// Create a mirror view in a new space (specialization for same space)
|
||||
template <class Space, class T, class... P>
|
||||
typename Kokkos::Experimental::Impl::MirrorOffsetViewType<Space, T,
|
||||
P...>::view_type
|
||||
typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type
|
||||
create_mirror_view(const Space&,
|
||||
const Kokkos::Experimental::OffsetView<T, P...>& src,
|
||||
typename std::enable_if<Impl::MirrorOffsetViewType<
|
||||
@ -2047,17 +2044,15 @@ create_mirror_view(const Space&,
|
||||
|
||||
// Create a mirror view in a new space (specialization for different space)
|
||||
template <class Space, class T, class... P>
|
||||
typename Kokkos::Experimental::Impl::MirrorOffsetViewType<Space, T,
|
||||
P...>::view_type
|
||||
typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type
|
||||
create_mirror_view(const Space&,
|
||||
const Kokkos::Experimental::OffsetView<T, P...>& src,
|
||||
typename std::enable_if<!Impl::MirrorOffsetViewType<
|
||||
Space, T, P...>::is_same_memspace>::type* = 0) {
|
||||
return typename Kokkos::Experimental::Impl::MirrorOffsetViewType<
|
||||
Space, T, P...>::view_type(src.label(), src.layout(),
|
||||
{src.begin(0), src.begin(1), src.begin(2),
|
||||
src.begin(3), src.begin(4), src.begin(5),
|
||||
src.begin(6), src.begin(7)});
|
||||
return typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type(
|
||||
src.label(), src.layout(),
|
||||
{src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
|
||||
src.begin(5), src.begin(6), src.begin(7)});
|
||||
}
|
||||
//
|
||||
// // Create a mirror view and deep_copy in a new space (specialization for
|
||||
@ -2093,7 +2088,6 @@ create_mirror_view(const Space&,
|
||||
// return mirror;
|
||||
// }
|
||||
|
||||
} // namespace Experimental
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -57,7 +57,7 @@ namespace Kokkos {
|
||||
namespace Impl {
|
||||
template <class RowOffsetsType, class RowBlockOffsetsType>
|
||||
struct StaticCrsGraphBalancerFunctor {
|
||||
typedef typename RowOffsetsType::non_const_value_type int_type;
|
||||
using int_type = typename RowOffsetsType::non_const_value_type;
|
||||
RowOffsetsType row_offsets;
|
||||
RowBlockOffsetsType row_block_offsets;
|
||||
|
||||
@ -148,7 +148,7 @@ struct StaticCrsGraphBalancerFunctor {
|
||||
///
|
||||
/// Here is an example loop over the entries in the row:
|
||||
/// \code
|
||||
/// typedef typename GraphRowViewConst<MatrixType>::ordinal_type ordinal_type;
|
||||
/// using ordinal_type = typename GraphRowViewConst<MatrixType>::ordinal_type;
|
||||
///
|
||||
/// GraphRowView<GraphType> G_i = ...;
|
||||
/// const ordinal_type numEntries = G_i.length;
|
||||
@ -159,7 +159,7 @@ struct StaticCrsGraphBalancerFunctor {
|
||||
/// \endcode
|
||||
///
|
||||
/// GraphType must provide the \c data_type
|
||||
/// typedefs. In addition, it must make sense to use GraphRowViewConst to
|
||||
/// aliases. In addition, it must make sense to use GraphRowViewConst to
|
||||
/// view a row of GraphType. In particular, column
|
||||
/// indices of a row must be accessible using the <tt>entries</tt>
|
||||
/// resp. <tt>colidx</tt> arrays given to the constructor of this
|
||||
@ -170,7 +170,7 @@ struct StaticCrsGraphBalancerFunctor {
|
||||
template <class GraphType>
|
||||
struct GraphRowViewConst {
|
||||
//! The type of the column indices in the row.
|
||||
typedef const typename GraphType::data_type ordinal_type;
|
||||
using ordinal_type = const typename GraphType::data_type;
|
||||
|
||||
private:
|
||||
//! Array of (local) column indices in the row.
|
||||
@ -279,49 +279,33 @@ struct GraphRowViewConst {
|
||||
/// <li> <tt> entries( row_map[i0] + i1 , i2 , i3 , ... ); </tt> </li>
|
||||
/// </ul>
|
||||
template <class DataType, class Arg1Type, class Arg2Type = void,
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
typename SizeType =
|
||||
typename ViewTraits<DataType*, Arg1Type, Arg2Type>::size_type,
|
||||
class Arg3Type = void>
|
||||
#else
|
||||
class Arg3Type = void,
|
||||
typename SizeType = typename ViewTraits<DataType*, Arg1Type, Arg2Type,
|
||||
Arg3Type>::size_type>
|
||||
#endif
|
||||
class StaticCrsGraph {
|
||||
private:
|
||||
typedef ViewTraits<DataType*, Arg1Type, Arg2Type, Arg3Type> traits;
|
||||
using traits = ViewTraits<DataType*, Arg1Type, Arg2Type, Arg3Type>;
|
||||
|
||||
public:
|
||||
typedef DataType data_type;
|
||||
typedef typename traits::array_layout array_layout;
|
||||
typedef typename traits::execution_space execution_space;
|
||||
typedef typename traits::device_type device_type;
|
||||
typedef typename traits::memory_traits memory_traits;
|
||||
typedef SizeType size_type;
|
||||
using data_type = DataType;
|
||||
using array_layout = typename traits::array_layout;
|
||||
using execution_space = typename traits::execution_space;
|
||||
using device_type = typename traits::device_type;
|
||||
using memory_traits = typename traits::memory_traits;
|
||||
using size_type = SizeType;
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>
|
||||
staticcrsgraph_type;
|
||||
typedef StaticCrsGraph<data_type, array_layout,
|
||||
typename traits::host_mirror_space, size_type,
|
||||
memory_traits>
|
||||
HostMirror;
|
||||
#else
|
||||
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>
|
||||
staticcrsgraph_type;
|
||||
typedef StaticCrsGraph<data_type, array_layout,
|
||||
typename traits::host_mirror_space, memory_traits,
|
||||
size_type>
|
||||
HostMirror;
|
||||
#endif
|
||||
using staticcrsgraph_type =
|
||||
StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>;
|
||||
using HostMirror = StaticCrsGraph<data_type, array_layout,
|
||||
typename traits::host_mirror_space,
|
||||
memory_traits, size_type>;
|
||||
|
||||
typedef View<const size_type*, array_layout, device_type, memory_traits>
|
||||
row_map_type;
|
||||
typedef View<data_type*, array_layout, device_type, memory_traits>
|
||||
entries_type;
|
||||
typedef View<const size_type*, array_layout, device_type, memory_traits>
|
||||
row_block_type;
|
||||
using row_map_type =
|
||||
View<const size_type*, array_layout, device_type, memory_traits>;
|
||||
using entries_type =
|
||||
View<data_type*, array_layout, device_type, memory_traits>;
|
||||
using row_block_type =
|
||||
View<const size_type*, array_layout, device_type, memory_traits>;
|
||||
|
||||
entries_type entries;
|
||||
row_map_type row_map;
|
||||
@ -370,6 +354,10 @@ class StaticCrsGraph {
|
||||
: static_cast<size_type>(0);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
|
||||
return (row_map.is_allocated() && entries.is_allocated());
|
||||
}
|
||||
|
||||
/// \brief Return a const view of row i of the graph.
|
||||
///
|
||||
/// If row i does not belong to the graph, return an empty view.
|
||||
@ -436,35 +424,19 @@ typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template <class DataType, class Arg1Type, class Arg2Type,
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
typename SizeType, class Arg3Type>
|
||||
typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
|
||||
Arg3Type>::HostMirror
|
||||
create_mirror_view(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
|
||||
Arg3Type>& input);
|
||||
#else
|
||||
class Arg3Type, typename SizeType>
|
||||
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
|
||||
typename SizeType>
|
||||
typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
|
||||
SizeType>::HostMirror
|
||||
create_mirror_view(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
|
||||
SizeType>& input);
|
||||
#endif
|
||||
|
||||
template <class DataType, class Arg1Type, class Arg2Type,
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
typename SizeType, class Arg3Type>
|
||||
typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
|
||||
Arg3Type>::HostMirror
|
||||
create_mirror_view(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
|
||||
Arg3Type>& input);
|
||||
#else
|
||||
class Arg3Type, typename SizeType>
|
||||
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
|
||||
typename SizeType>
|
||||
typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
|
||||
SizeType>::HostMirror
|
||||
create_mirror(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
|
||||
SizeType>& input);
|
||||
#endif
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
@ -481,8 +453,8 @@ namespace Impl {
|
||||
|
||||
template <class GraphType>
|
||||
struct StaticCrsGraphMaximumEntry {
|
||||
typedef typename GraphType::execution_space execution_space;
|
||||
typedef typename GraphType::data_type value_type;
|
||||
using execution_space = typename GraphType::execution_space;
|
||||
using value_type = typename GraphType::data_type;
|
||||
|
||||
const typename GraphType::entries_type entries;
|
||||
|
||||
@ -505,22 +477,13 @@ struct StaticCrsGraphMaximumEntry {
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
template <class DataType, class Arg1Type, class Arg2Type, typename SizeType,
|
||||
class Arg3Type>
|
||||
DataType maximum_entry(const StaticCrsGraph<DataType, Arg1Type, Arg2Type,
|
||||
SizeType, Arg3Type>& graph) {
|
||||
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>
|
||||
GraphType;
|
||||
#else
|
||||
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
|
||||
typename SizeType>
|
||||
DataType maximum_entry(const StaticCrsGraph<DataType, Arg1Type, Arg2Type,
|
||||
Arg3Type, SizeType>& graph) {
|
||||
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>
|
||||
GraphType;
|
||||
#endif
|
||||
typedef Impl::StaticCrsGraphMaximumEntry<GraphType> FunctorType;
|
||||
using GraphType =
|
||||
StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>;
|
||||
using FunctorType = Impl::StaticCrsGraphMaximumEntry<GraphType>;
|
||||
|
||||
DataType result = 0;
|
||||
Kokkos::parallel_reduce("Kokkos::maximum_entry", graph.entries.extent(0),
|
||||
|
||||
@ -66,7 +66,7 @@
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
enum { UnorderedMapInvalidIndex = ~0u };
|
||||
enum : unsigned { UnorderedMapInvalidIndex = ~0u };
|
||||
|
||||
/// \brief First element of the return value of UnorderedMap::insert().
|
||||
///
|
||||
@ -84,7 +84,7 @@ enum { UnorderedMapInvalidIndex = ~0u };
|
||||
|
||||
class UnorderedMapInsertResult {
|
||||
private:
|
||||
enum Status {
|
||||
enum Status : uint32_t {
|
||||
SUCCESS = 1u << 31,
|
||||
EXISTING = 1u << 30,
|
||||
FREED_EXISTING = 1u << 29,
|
||||
@ -206,42 +206,40 @@ template <typename Key, typename Value,
|
||||
pod_equal_to<typename std::remove_const<Key>::type> >
|
||||
class UnorderedMap {
|
||||
private:
|
||||
typedef typename ViewTraits<Key, Device, void, void>::host_mirror_space
|
||||
host_mirror_space;
|
||||
using host_mirror_space =
|
||||
typename ViewTraits<Key, Device, void, void>::host_mirror_space;
|
||||
|
||||
public:
|
||||
//! \name Public types and constants
|
||||
//@{
|
||||
|
||||
// key_types
|
||||
typedef Key declared_key_type;
|
||||
typedef typename std::remove_const<declared_key_type>::type key_type;
|
||||
typedef typename std::add_const<key_type>::type const_key_type;
|
||||
using declared_key_type = Key;
|
||||
using key_type = typename std::remove_const<declared_key_type>::type;
|
||||
using const_key_type = typename std::add_const<key_type>::type;
|
||||
|
||||
// value_types
|
||||
typedef Value declared_value_type;
|
||||
typedef typename std::remove_const<declared_value_type>::type value_type;
|
||||
typedef typename std::add_const<value_type>::type const_value_type;
|
||||
using declared_value_type = Value;
|
||||
using value_type = typename std::remove_const<declared_value_type>::type;
|
||||
using const_value_type = typename std::add_const<value_type>::type;
|
||||
|
||||
typedef Device device_type;
|
||||
typedef typename Device::execution_space execution_space;
|
||||
typedef Hasher hasher_type;
|
||||
typedef EqualTo equal_to_type;
|
||||
typedef uint32_t size_type;
|
||||
using device_type = Device;
|
||||
using execution_space = typename Device::execution_space;
|
||||
using hasher_type = Hasher;
|
||||
using equal_to_type = EqualTo;
|
||||
using size_type = uint32_t;
|
||||
|
||||
// map_types
|
||||
typedef UnorderedMap<declared_key_type, declared_value_type, device_type,
|
||||
hasher_type, equal_to_type>
|
||||
declared_map_type;
|
||||
typedef UnorderedMap<key_type, value_type, device_type, hasher_type,
|
||||
equal_to_type>
|
||||
insertable_map_type;
|
||||
typedef UnorderedMap<const_key_type, value_type, device_type, hasher_type,
|
||||
equal_to_type>
|
||||
modifiable_map_type;
|
||||
typedef UnorderedMap<const_key_type, const_value_type, device_type,
|
||||
hasher_type, equal_to_type>
|
||||
const_map_type;
|
||||
using declared_map_type =
|
||||
UnorderedMap<declared_key_type, declared_value_type, device_type,
|
||||
hasher_type, equal_to_type>;
|
||||
using insertable_map_type = UnorderedMap<key_type, value_type, device_type,
|
||||
hasher_type, equal_to_type>;
|
||||
using modifiable_map_type =
|
||||
UnorderedMap<const_key_type, value_type, device_type, hasher_type,
|
||||
equal_to_type>;
|
||||
using const_map_type = UnorderedMap<const_key_type, const_value_type,
|
||||
device_type, hasher_type, equal_to_type>;
|
||||
|
||||
static const bool is_set = std::is_same<void, value_type>::value;
|
||||
static const bool has_const_key =
|
||||
@ -254,43 +252,42 @@ class UnorderedMap {
|
||||
static const bool is_modifiable_map = has_const_key && !has_const_value;
|
||||
static const bool is_const_map = has_const_key && has_const_value;
|
||||
|
||||
typedef UnorderedMapInsertResult insert_result;
|
||||
using insert_result = UnorderedMapInsertResult;
|
||||
|
||||
typedef UnorderedMap<Key, Value, host_mirror_space, Hasher, EqualTo>
|
||||
HostMirror;
|
||||
using HostMirror =
|
||||
UnorderedMap<Key, Value, host_mirror_space, Hasher, EqualTo>;
|
||||
|
||||
typedef Impl::UnorderedMapHistogram<const_map_type> histogram_type;
|
||||
using histogram_type = Impl::UnorderedMapHistogram<const_map_type>;
|
||||
|
||||
//@}
|
||||
|
||||
private:
|
||||
enum { invalid_index = ~static_cast<size_type>(0) };
|
||||
enum : size_type { invalid_index = ~static_cast<size_type>(0) };
|
||||
|
||||
typedef typename Impl::if_c<is_set, int, declared_value_type>::type
|
||||
impl_value_type;
|
||||
using impl_value_type =
|
||||
typename Impl::if_c<is_set, int, declared_value_type>::type;
|
||||
|
||||
typedef typename Impl::if_c<
|
||||
using key_type_view = typename Impl::if_c<
|
||||
is_insertable_map, View<key_type *, device_type>,
|
||||
View<const key_type *, device_type, MemoryTraits<RandomAccess> > >::type
|
||||
key_type_view;
|
||||
View<const key_type *, device_type, MemoryTraits<RandomAccess> > >::type;
|
||||
|
||||
typedef typename Impl::if_c<is_insertable_map || is_modifiable_map,
|
||||
View<impl_value_type *, device_type>,
|
||||
View<const impl_value_type *, device_type,
|
||||
MemoryTraits<RandomAccess> > >::type
|
||||
value_type_view;
|
||||
using value_type_view =
|
||||
typename Impl::if_c<is_insertable_map || is_modifiable_map,
|
||||
View<impl_value_type *, device_type>,
|
||||
View<const impl_value_type *, device_type,
|
||||
MemoryTraits<RandomAccess> > >::type;
|
||||
|
||||
typedef typename Impl::if_c<
|
||||
using size_type_view = typename Impl::if_c<
|
||||
is_insertable_map, View<size_type *, device_type>,
|
||||
View<const size_type *, device_type, MemoryTraits<RandomAccess> > >::type
|
||||
size_type_view;
|
||||
View<const size_type *, device_type, MemoryTraits<RandomAccess> > >::type;
|
||||
|
||||
typedef typename Impl::if_c<is_insertable_map, Bitset<execution_space>,
|
||||
ConstBitset<execution_space> >::type bitset_type;
|
||||
using bitset_type =
|
||||
typename Impl::if_c<is_insertable_map, Bitset<execution_space>,
|
||||
ConstBitset<execution_space> >::type;
|
||||
|
||||
enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 };
|
||||
enum { num_scalars = 3 };
|
||||
typedef View<int[num_scalars], LayoutLeft, device_type> scalars_view;
|
||||
using scalars_view = View<int[num_scalars], LayoutLeft, device_type>;
|
||||
|
||||
public:
|
||||
//! \name Public member functions
|
||||
@ -353,6 +350,11 @@ class UnorderedMap {
|
||||
{ Kokkos::deep_copy(m_scalars, 0); }
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
|
||||
return (m_keys.is_allocated() && m_values.is_allocated() &&
|
||||
m_scalars.is_allocated());
|
||||
}
|
||||
|
||||
/// \brief Change the capacity of the the map
|
||||
///
|
||||
/// If there are no failed inserts the current size of the map will
|
||||
@ -742,9 +744,9 @@ class UnorderedMap {
|
||||
|
||||
Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes);
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy<typename device_type::memory_space,
|
||||
typename SDevice::memory_space>
|
||||
raw_deep_copy;
|
||||
using raw_deep_copy =
|
||||
Kokkos::Impl::DeepCopy<typename device_type::memory_space,
|
||||
typename SDevice::memory_space>;
|
||||
|
||||
raw_deep_copy(tmp.m_hash_lists.data(), src.m_hash_lists.data(),
|
||||
sizeof(size_type) * src.m_hash_lists.extent(0));
|
||||
@ -768,25 +770,25 @@ class UnorderedMap {
|
||||
bool modified() const { return get_flag(modified_idx); }
|
||||
|
||||
void set_flag(int flag) const {
|
||||
typedef Kokkos::Impl::DeepCopy<typename device_type::memory_space,
|
||||
Kokkos::HostSpace>
|
||||
raw_deep_copy;
|
||||
using raw_deep_copy =
|
||||
Kokkos::Impl::DeepCopy<typename device_type::memory_space,
|
||||
Kokkos::HostSpace>;
|
||||
const int true_ = true;
|
||||
raw_deep_copy(m_scalars.data() + flag, &true_, sizeof(int));
|
||||
}
|
||||
|
||||
void reset_flag(int flag) const {
|
||||
typedef Kokkos::Impl::DeepCopy<typename device_type::memory_space,
|
||||
Kokkos::HostSpace>
|
||||
raw_deep_copy;
|
||||
using raw_deep_copy =
|
||||
Kokkos::Impl::DeepCopy<typename device_type::memory_space,
|
||||
Kokkos::HostSpace>;
|
||||
const int false_ = false;
|
||||
raw_deep_copy(m_scalars.data() + flag, &false_, sizeof(int));
|
||||
}
|
||||
|
||||
bool get_flag(int flag) const {
|
||||
typedef Kokkos::Impl::DeepCopy<Kokkos::HostSpace,
|
||||
typename device_type::memory_space>
|
||||
raw_deep_copy;
|
||||
using raw_deep_copy =
|
||||
Kokkos::Impl::DeepCopy<Kokkos::HostSpace,
|
||||
typename device_type::memory_space>;
|
||||
int result = false;
|
||||
raw_deep_copy(&result, m_scalars.data() + flag, sizeof(int));
|
||||
return result;
|
||||
|
||||
@ -58,19 +58,19 @@ namespace Kokkos {
|
||||
template <class Scalar, class Arg1Type = void>
|
||||
class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
|
||||
public:
|
||||
typedef Scalar value_type;
|
||||
typedef Scalar* pointer;
|
||||
typedef const Scalar* const_pointer;
|
||||
typedef Scalar& reference;
|
||||
typedef const Scalar& const_reference;
|
||||
typedef Scalar* iterator;
|
||||
typedef const Scalar* const_iterator;
|
||||
typedef size_t size_type;
|
||||
using value_type = Scalar;
|
||||
using pointer = Scalar*;
|
||||
using const_pointer = const Scalar*;
|
||||
using reference = Scalar&;
|
||||
using const_reference = const Scalar&;
|
||||
using iterator = Scalar*;
|
||||
using const_iterator = const Scalar*;
|
||||
using size_type = size_t;
|
||||
|
||||
private:
|
||||
size_t _size;
|
||||
float _extra_storage;
|
||||
typedef DualView<Scalar*, LayoutLeft, Arg1Type> DV;
|
||||
using DV = DualView<Scalar*, LayoutLeft, Arg1Type>;
|
||||
|
||||
public:
|
||||
#ifdef KOKKOS_ENABLE_CUDA_UVM
|
||||
@ -212,14 +212,17 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
|
||||
return begin() + start;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
|
||||
return DV::is_allocated();
|
||||
}
|
||||
|
||||
size_type size() const { return _size; }
|
||||
size_type max_size() const { return 2000000000; }
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
size_type capacity() const { return DV::capacity(); }
|
||||
#endif
|
||||
size_type span() const { return DV::span(); }
|
||||
bool empty() const { return _size == 0; }
|
||||
|
||||
pointer data() const { return DV::h_view.data(); }
|
||||
|
||||
iterator begin() const { return DV::h_view.data(); }
|
||||
|
||||
iterator end() const {
|
||||
@ -310,7 +313,7 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
|
||||
|
||||
public:
|
||||
struct set_functor {
|
||||
typedef typename DV::t_dev::execution_space execution_space;
|
||||
using execution_space = typename DV::t_dev::execution_space;
|
||||
typename DV::t_dev _data;
|
||||
Scalar _val;
|
||||
|
||||
@ -321,7 +324,7 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
|
||||
};
|
||||
|
||||
struct set_functor_host {
|
||||
typedef typename DV::t_host::execution_space execution_space;
|
||||
using execution_space = typename DV::t_host::execution_space;
|
||||
typename DV::t_host _data;
|
||||
Scalar _val;
|
||||
|
||||
|
||||
@ -65,11 +65,11 @@ unsigned rotate_right(unsigned i, int r) {
|
||||
|
||||
template <typename Bitset>
|
||||
struct BitsetCount {
|
||||
typedef Bitset bitset_type;
|
||||
typedef
|
||||
typename bitset_type::execution_space::execution_space execution_space;
|
||||
typedef typename bitset_type::size_type size_type;
|
||||
typedef size_type value_type;
|
||||
using bitset_type = Bitset;
|
||||
using execution_space =
|
||||
typename bitset_type::execution_space::execution_space;
|
||||
using size_type = typename bitset_type::size_type;
|
||||
using value_type = size_type;
|
||||
|
||||
bitset_type m_bitset;
|
||||
|
||||
|
||||
@ -140,10 +140,10 @@ uint32_t MurmurHash3_x86_32(const void* key, int len, uint32_t seed) {
|
||||
template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION bool bitwise_equal(T const* const a_ptr,
|
||||
T const* const b_ptr) {
|
||||
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
|
||||
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64; // NOLINT(modernize-use-using)
|
||||
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32; // NOLINT(modernize-use-using)
|
||||
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16; // NOLINT(modernize-use-using)
|
||||
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8; // NOLINT(modernize-use-using)
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
|
||||
@ -50,19 +50,6 @@
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
template <class DataType, class Arg1Type, class Arg2Type, typename SizeType,
|
||||
class Arg3Type>
|
||||
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
|
||||
Arg3Type>::HostMirror
|
||||
create_mirror_view(
|
||||
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>&
|
||||
view,
|
||||
typename std::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type,
|
||||
Arg3Type>::is_hostspace>::type* = 0) {
|
||||
return view;
|
||||
}
|
||||
#else
|
||||
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
|
||||
typename SizeType>
|
||||
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
|
||||
@ -74,20 +61,7 @@ create_mirror_view(
|
||||
Arg3Type>::is_hostspace>::type* = 0) {
|
||||
return view;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
template <class DataType, class Arg1Type, class Arg2Type, typename SizeType,
|
||||
class Arg3Type>
|
||||
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
|
||||
Arg3Type>::HostMirror
|
||||
create_mirror(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
|
||||
Arg3Type>& view) {
|
||||
// Force copy:
|
||||
// typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
|
||||
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>
|
||||
staticcrsgraph_type;
|
||||
#else
|
||||
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
|
||||
typename SizeType>
|
||||
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
|
||||
@ -95,10 +69,9 @@ inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
|
||||
create_mirror(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
|
||||
SizeType>& view) {
|
||||
// Force copy:
|
||||
// typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
|
||||
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>
|
||||
staticcrsgraph_type;
|
||||
#endif
|
||||
// using alloc = Impl::ViewAssignment<Impl::ViewDefault>; // unused
|
||||
using staticcrsgraph_type =
|
||||
StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>;
|
||||
|
||||
typename staticcrsgraph_type::HostMirror tmp;
|
||||
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map =
|
||||
@ -120,17 +93,6 @@ create_mirror(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
|
||||
return tmp;
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
template <class DataType, class Arg1Type, class Arg2Type, typename SizeType,
|
||||
class Arg3Type>
|
||||
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
|
||||
Arg3Type>::HostMirror
|
||||
create_mirror_view(
|
||||
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>&
|
||||
view,
|
||||
typename std::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type,
|
||||
Arg3Type>::is_hostspace>::type* = 0)
|
||||
#else
|
||||
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
|
||||
typename SizeType>
|
||||
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
|
||||
@ -139,9 +101,7 @@ create_mirror_view(
|
||||
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>&
|
||||
view,
|
||||
typename std::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type,
|
||||
Arg3Type>::is_hostspace>::type* = 0)
|
||||
#endif
|
||||
{
|
||||
Arg3Type>::is_hostspace>::type* = 0) {
|
||||
return create_mirror(view);
|
||||
}
|
||||
} // namespace Kokkos
|
||||
@ -154,16 +114,15 @@ namespace Kokkos {
|
||||
template <class StaticCrsGraphType, class InputSizeType>
|
||||
inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
|
||||
const std::string& label, const std::vector<InputSizeType>& input) {
|
||||
typedef StaticCrsGraphType output_type;
|
||||
// typedef std::vector< InputSizeType > input_type ; // unused
|
||||
using output_type = StaticCrsGraphType;
|
||||
// using input_type = std::vector<InputSizeType>; // unused
|
||||
|
||||
typedef typename output_type::entries_type entries_type;
|
||||
using entries_type = typename output_type::entries_type;
|
||||
|
||||
typedef View<typename output_type::size_type[],
|
||||
typename output_type::array_layout,
|
||||
typename output_type::execution_space,
|
||||
typename output_type::memory_traits>
|
||||
work_type;
|
||||
using work_type = View<typename output_type::size_type[],
|
||||
typename output_type::array_layout,
|
||||
typename output_type::execution_space,
|
||||
typename output_type::memory_traits>;
|
||||
|
||||
output_type output;
|
||||
|
||||
@ -197,16 +156,15 @@ template <class StaticCrsGraphType, class InputSizeType>
|
||||
inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
|
||||
const std::string& label,
|
||||
const std::vector<std::vector<InputSizeType> >& input) {
|
||||
typedef StaticCrsGraphType output_type;
|
||||
typedef typename output_type::entries_type entries_type;
|
||||
using output_type = StaticCrsGraphType;
|
||||
using entries_type = typename output_type::entries_type;
|
||||
|
||||
static_assert(entries_type::rank == 1, "Graph entries view must be rank one");
|
||||
|
||||
typedef View<typename output_type::size_type[],
|
||||
typename output_type::array_layout,
|
||||
typename output_type::execution_space,
|
||||
typename output_type::memory_traits>
|
||||
work_type;
|
||||
using work_type = View<typename output_type::size_type[],
|
||||
typename output_type::array_layout,
|
||||
typename output_type::execution_space,
|
||||
typename output_type::memory_traits>;
|
||||
|
||||
output_type output;
|
||||
|
||||
|
||||
@ -60,10 +60,10 @@ uint32_t find_hash_size(uint32_t size);
|
||||
|
||||
template <typename Map>
|
||||
struct UnorderedMapRehash {
|
||||
typedef Map map_type;
|
||||
typedef typename map_type::const_map_type const_map_type;
|
||||
typedef typename map_type::execution_space execution_space;
|
||||
typedef typename map_type::size_type size_type;
|
||||
using map_type = Map;
|
||||
using const_map_type = typename map_type::const_map_type;
|
||||
using execution_space = typename map_type::execution_space;
|
||||
using size_type = typename map_type::size_type;
|
||||
|
||||
map_type m_dst;
|
||||
const_map_type m_src;
|
||||
@ -84,11 +84,11 @@ struct UnorderedMapRehash {
|
||||
|
||||
template <typename UMap>
|
||||
struct UnorderedMapErase {
|
||||
typedef UMap map_type;
|
||||
typedef typename map_type::execution_space execution_space;
|
||||
typedef typename map_type::size_type size_type;
|
||||
typedef typename map_type::key_type key_type;
|
||||
typedef typename map_type::impl_value_type value_type;
|
||||
using map_type = UMap;
|
||||
using execution_space = typename map_type::execution_space;
|
||||
using size_type = typename map_type::size_type;
|
||||
using key_type = typename map_type::key_type;
|
||||
using value_type = typename map_type::impl_value_type;
|
||||
|
||||
map_type m_map;
|
||||
|
||||
@ -140,12 +140,12 @@ struct UnorderedMapErase {
|
||||
|
||||
template <typename UMap>
|
||||
struct UnorderedMapHistogram {
|
||||
typedef UMap map_type;
|
||||
typedef typename map_type::execution_space execution_space;
|
||||
typedef typename map_type::size_type size_type;
|
||||
using map_type = UMap;
|
||||
using execution_space = typename map_type::execution_space;
|
||||
using size_type = typename map_type::size_type;
|
||||
|
||||
typedef View<int[100], execution_space> histogram_view;
|
||||
typedef typename histogram_view::HostMirror host_histogram_view;
|
||||
using histogram_view = View<int[100], execution_space>;
|
||||
using host_histogram_view = typename histogram_view::HostMirror;
|
||||
|
||||
map_type m_map;
|
||||
histogram_view m_length;
|
||||
@ -230,9 +230,9 @@ struct UnorderedMapHistogram {
|
||||
|
||||
template <typename UMap>
|
||||
struct UnorderedMapPrint {
|
||||
typedef UMap map_type;
|
||||
typedef typename map_type::execution_space execution_space;
|
||||
typedef typename map_type::size_type size_type;
|
||||
using map_type = UMap;
|
||||
using execution_space = typename map_type::execution_space;
|
||||
using size_type = typename map_type::size_type;
|
||||
|
||||
map_type m_map;
|
||||
|
||||
|
||||
@ -47,6 +47,7 @@
|
||||
#include <iostream>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Bitset.hpp>
|
||||
#include <array>
|
||||
|
||||
namespace Test {
|
||||
|
||||
@ -54,9 +55,9 @@ namespace Impl {
|
||||
|
||||
template <typename Bitset, bool Set>
|
||||
struct TestBitset {
|
||||
typedef Bitset bitset_type;
|
||||
typedef typename bitset_type::execution_space execution_space;
|
||||
typedef uint32_t value_type;
|
||||
using bitset_type = Bitset;
|
||||
using execution_space = typename bitset_type::execution_space;
|
||||
using value_type = uint32_t;
|
||||
|
||||
bitset_type m_bitset;
|
||||
|
||||
@ -95,9 +96,9 @@ struct TestBitset {
|
||||
|
||||
template <typename Bitset>
|
||||
struct TestBitsetTest {
|
||||
typedef Bitset bitset_type;
|
||||
typedef typename bitset_type::execution_space execution_space;
|
||||
typedef uint32_t value_type;
|
||||
using bitset_type = Bitset;
|
||||
using execution_space = typename bitset_type::execution_space;
|
||||
using value_type = uint32_t;
|
||||
|
||||
bitset_type m_bitset;
|
||||
|
||||
@ -127,9 +128,9 @@ struct TestBitsetTest {
|
||||
|
||||
template <typename Bitset, bool Set>
|
||||
struct TestBitsetAny {
|
||||
typedef Bitset bitset_type;
|
||||
typedef typename bitset_type::execution_space execution_space;
|
||||
typedef uint32_t value_type;
|
||||
using bitset_type = Bitset;
|
||||
using execution_space = typename bitset_type::execution_space;
|
||||
using value_type = uint32_t;
|
||||
|
||||
bitset_type m_bitset;
|
||||
|
||||
@ -181,16 +182,30 @@ struct TestBitsetAny {
|
||||
|
||||
template <typename Device>
|
||||
void test_bitset() {
|
||||
typedef Kokkos::Bitset<Device> bitset_type;
|
||||
typedef Kokkos::ConstBitset<Device> const_bitset_type;
|
||||
using bitset_type = Kokkos::Bitset<Device>;
|
||||
using const_bitset_type = Kokkos::ConstBitset<Device>;
|
||||
|
||||
// unsigned test_sizes[] = { 0u, 1000u, 1u<<14, 1u<<16, 10000001 };
|
||||
unsigned test_sizes[] = {1000u, 1u << 14, 1u << 16, 10000001};
|
||||
{
|
||||
unsigned ts = 100u;
|
||||
bitset_type b1;
|
||||
ASSERT_TRUE(b1.is_allocated());
|
||||
|
||||
for (int i = 0, end = sizeof(test_sizes) / sizeof(unsigned); i < end; ++i) {
|
||||
b1 = bitset_type(ts);
|
||||
bitset_type b2(b1);
|
||||
bitset_type b3(ts);
|
||||
|
||||
ASSERT_TRUE(b1.is_allocated());
|
||||
ASSERT_TRUE(b2.is_allocated());
|
||||
ASSERT_TRUE(b3.is_allocated());
|
||||
}
|
||||
|
||||
std::array<unsigned, 7> test_sizes = {
|
||||
{0u, 10u, 100u, 1000u, 1u << 14, 1u << 16, 10000001}};
|
||||
|
||||
for (const auto test_size : test_sizes) {
|
||||
// std::cout << "Bitset " << test_sizes[i] << std::endl;
|
||||
|
||||
bitset_type bitset(test_sizes[i]);
|
||||
bitset_type bitset(test_size);
|
||||
|
||||
// std::cout << " Check initial count " << std::endl;
|
||||
// nothing should be set
|
||||
@ -253,10 +268,7 @@ void test_bitset() {
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME_HIP deadlock
|
||||
#ifndef KOKKOS_ENABLE_HIP
|
||||
TEST(TEST_CATEGORY, bitset) { test_bitset<TEST_EXECSPACE>(); }
|
||||
#endif
|
||||
} // namespace Test
|
||||
|
||||
#endif // KOKKOS_TEST_BITSET_HPP
|
||||
|
||||
@ -55,13 +55,45 @@
|
||||
namespace Test {
|
||||
|
||||
namespace Impl {
|
||||
template <typename Scalar, class Device>
|
||||
struct test_dualview_alloc {
|
||||
using scalar_type = Scalar;
|
||||
using execution_space = Device;
|
||||
|
||||
template <typename ViewType>
|
||||
bool run_me(unsigned int n, unsigned int m) {
|
||||
if (n < 10) n = 10;
|
||||
if (m < 3) m = 3;
|
||||
|
||||
{
|
||||
ViewType b1;
|
||||
if (b1.is_allocated() == true) return false;
|
||||
|
||||
b1 = ViewType("B1", n, m);
|
||||
ViewType b2(b1);
|
||||
ViewType b3("B3", n, m);
|
||||
|
||||
if (b1.is_allocated() == false) return false;
|
||||
if (b2.is_allocated() == false) return false;
|
||||
if (b3.is_allocated() == false) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
|
||||
test_dualview_alloc(unsigned int size) {
|
||||
result = run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(
|
||||
size, 3);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar, class Device>
|
||||
struct test_dualview_combinations {
|
||||
typedef test_dualview_combinations<Scalar, Device> self_type;
|
||||
using self_type = test_dualview_combinations<Scalar, Device>;
|
||||
|
||||
typedef Scalar scalar_type;
|
||||
typedef Device execution_space;
|
||||
using scalar_type = Scalar;
|
||||
using execution_space = Device;
|
||||
|
||||
Scalar reference;
|
||||
Scalar result;
|
||||
@ -110,7 +142,7 @@ struct test_dualview_combinations {
|
||||
|
||||
template <typename Scalar, class ViewType>
|
||||
struct SumViewEntriesFunctor {
|
||||
typedef Scalar value_type;
|
||||
using value_type = Scalar;
|
||||
|
||||
ViewType fv;
|
||||
|
||||
@ -126,8 +158,8 @@ struct SumViewEntriesFunctor {
|
||||
|
||||
template <typename Scalar, class Device>
|
||||
struct test_dual_view_deep_copy {
|
||||
typedef Scalar scalar_type;
|
||||
typedef Device execution_space;
|
||||
using scalar_type = Scalar;
|
||||
using execution_space = Device;
|
||||
|
||||
template <typename ViewType>
|
||||
void run_me(int n, const int m, const bool use_templ_sync) {
|
||||
@ -153,8 +185,8 @@ struct test_dual_view_deep_copy {
|
||||
// Check device view is initialized as expected
|
||||
scalar_type a_d_sum = 0;
|
||||
// Execute on the execution_space associated with t_dev's memory space
|
||||
typedef typename ViewType::t_dev::memory_space::execution_space
|
||||
t_dev_exec_space;
|
||||
using t_dev_exec_space =
|
||||
typename ViewType::t_dev::memory_space::execution_space;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<t_dev_exec_space>(0, n),
|
||||
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
|
||||
@ -220,8 +252,8 @@ struct test_dual_view_deep_copy {
|
||||
|
||||
template <typename Scalar, class Device>
|
||||
struct test_dualview_resize {
|
||||
typedef Scalar scalar_type;
|
||||
typedef Device execution_space;
|
||||
using scalar_type = Scalar;
|
||||
using execution_space = Device;
|
||||
|
||||
template <typename ViewType>
|
||||
void run_me() {
|
||||
@ -244,8 +276,8 @@ struct test_dualview_resize {
|
||||
// Check device view is initialized as expected
|
||||
scalar_type a_d_sum = 0;
|
||||
// Execute on the execution_space associated with t_dev's memory space
|
||||
typedef typename ViewType::t_dev::memory_space::execution_space
|
||||
t_dev_exec_space;
|
||||
using t_dev_exec_space =
|
||||
typename ViewType::t_dev::memory_space::execution_space;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
|
||||
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
|
||||
@ -274,8 +306,8 @@ struct test_dualview_resize {
|
||||
// Check device view is initialized as expected
|
||||
a_d_sum = 0;
|
||||
// Execute on the execution_space associated with t_dev's memory space
|
||||
typedef typename ViewType::t_dev::memory_space::execution_space
|
||||
t_dev_exec_space;
|
||||
using t_dev_exec_space =
|
||||
typename ViewType::t_dev::memory_space::execution_space;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
|
||||
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
|
||||
@ -301,8 +333,8 @@ struct test_dualview_resize {
|
||||
|
||||
template <typename Scalar, class Device>
|
||||
struct test_dualview_realloc {
|
||||
typedef Scalar scalar_type;
|
||||
typedef Device execution_space;
|
||||
using scalar_type = Scalar;
|
||||
using execution_space = Device;
|
||||
|
||||
template <typename ViewType>
|
||||
void run_me() {
|
||||
@ -319,8 +351,8 @@ struct test_dualview_realloc {
|
||||
// Check device view is initialized as expected
|
||||
scalar_type a_d_sum = 0;
|
||||
// Execute on the execution_space associated with t_dev's memory space
|
||||
typedef typename ViewType::t_dev::memory_space::execution_space
|
||||
t_dev_exec_space;
|
||||
using t_dev_exec_space =
|
||||
typename ViewType::t_dev::memory_space::execution_space;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
|
||||
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
|
||||
@ -351,6 +383,12 @@ void test_dualview_combinations(unsigned int size, bool with_init) {
|
||||
ASSERT_EQ(test.result, 0);
|
||||
}
|
||||
|
||||
template <typename Scalar, typename Device>
|
||||
void test_dualview_alloc(unsigned int size) {
|
||||
Impl::test_dualview_alloc<Scalar, Device> test(size);
|
||||
ASSERT_TRUE(test.result);
|
||||
}
|
||||
|
||||
template <typename Scalar, typename Device>
|
||||
void test_dualview_deep_copy() {
|
||||
Impl::test_dual_view_deep_copy<Scalar, Device>();
|
||||
@ -370,6 +408,10 @@ TEST(TEST_CATEGORY, dualview_combination) {
|
||||
test_dualview_combinations<int, TEST_EXECSPACE>(10, true);
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, dualview_alloc) {
|
||||
test_dualview_alloc<int, TEST_EXECSPACE>(10);
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, dualview_combinations_without_init) {
|
||||
test_dualview_combinations<int, TEST_EXECSPACE>(10, false);
|
||||
}
|
||||
|
||||
@ -68,12 +68,12 @@ size_t allocation_count(const Kokkos::DynRankView<T, P...>& view) {
|
||||
|
||||
template <typename T, class DeviceType>
|
||||
struct TestViewOperator {
|
||||
typedef DeviceType execution_space;
|
||||
using execution_space = DeviceType;
|
||||
|
||||
static const unsigned N = 100;
|
||||
static const unsigned D = 3;
|
||||
|
||||
typedef Kokkos::DynRankView<T, execution_space> view_type;
|
||||
using view_type = Kokkos::DynRankView<T, execution_space>;
|
||||
|
||||
const view_type v1;
|
||||
const view_type v2;
|
||||
@ -101,11 +101,11 @@ struct TestViewOperator_LeftAndRight;
|
||||
|
||||
template <class DataType, class DeviceType>
|
||||
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 7> {
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::memory_space memory_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
using execution_space = DeviceType;
|
||||
using memory_space = typename execution_space::memory_space;
|
||||
using size_type = typename execution_space::size_type;
|
||||
|
||||
typedef int value_type;
|
||||
using value_type = int;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type& update,
|
||||
@ -116,11 +116,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 7> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(value_type& update) { update = 0; }
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
|
||||
left_view;
|
||||
using left_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
|
||||
right_view;
|
||||
using right_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
|
||||
|
||||
left_view left;
|
||||
right_view right;
|
||||
@ -186,11 +186,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 7> {
|
||||
|
||||
template <class DataType, class DeviceType>
|
||||
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 6> {
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::memory_space memory_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
using execution_space = DeviceType;
|
||||
using memory_space = typename execution_space::memory_space;
|
||||
using size_type = typename execution_space::size_type;
|
||||
|
||||
typedef int value_type;
|
||||
using value_type = int;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type& update,
|
||||
@ -201,11 +201,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 6> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(value_type& update) { update = 0; }
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
|
||||
left_view;
|
||||
using left_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
|
||||
right_view;
|
||||
using right_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
|
||||
|
||||
left_view left;
|
||||
right_view right;
|
||||
@ -268,11 +268,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 6> {
|
||||
|
||||
template <class DataType, class DeviceType>
|
||||
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 5> {
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::memory_space memory_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
using execution_space = DeviceType;
|
||||
using memory_space = typename execution_space::memory_space;
|
||||
using size_type = typename execution_space::size_type;
|
||||
|
||||
typedef int value_type;
|
||||
using value_type = int;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type& update,
|
||||
@ -283,14 +283,14 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 5> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(value_type& update) { update = 0; }
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
|
||||
left_view;
|
||||
using left_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
|
||||
right_view;
|
||||
using right_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>
|
||||
stride_view;
|
||||
using stride_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>;
|
||||
|
||||
left_view left;
|
||||
right_view right;
|
||||
@ -363,11 +363,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 5> {
|
||||
|
||||
template <class DataType, class DeviceType>
|
||||
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 4> {
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::memory_space memory_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
using execution_space = DeviceType;
|
||||
using memory_space = typename execution_space::memory_space;
|
||||
using size_type = typename execution_space::size_type;
|
||||
|
||||
typedef int value_type;
|
||||
using value_type = int;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type& update,
|
||||
@ -378,11 +378,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 4> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(value_type& update) { update = 0; }
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
|
||||
left_view;
|
||||
using left_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
|
||||
right_view;
|
||||
using right_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
|
||||
|
||||
left_view left;
|
||||
right_view right;
|
||||
@ -438,11 +438,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 4> {
|
||||
|
||||
template <class DataType, class DeviceType>
|
||||
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 3> {
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::memory_space memory_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
using execution_space = DeviceType;
|
||||
using memory_space = typename execution_space::memory_space;
|
||||
using size_type = typename execution_space::size_type;
|
||||
|
||||
typedef int value_type;
|
||||
using value_type = int;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type& update,
|
||||
@ -453,14 +453,14 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 3> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(value_type& update) { update = 0; }
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
|
||||
left_view;
|
||||
using left_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
|
||||
right_view;
|
||||
using right_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>
|
||||
stride_view;
|
||||
using stride_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>;
|
||||
|
||||
left_view left;
|
||||
right_view right;
|
||||
@ -536,11 +536,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 3> {
|
||||
|
||||
template <class DataType, class DeviceType>
|
||||
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 2> {
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::memory_space memory_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
using execution_space = DeviceType;
|
||||
using memory_space = typename execution_space::memory_space;
|
||||
using size_type = typename execution_space::size_type;
|
||||
|
||||
typedef int value_type;
|
||||
using value_type = int;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type& update,
|
||||
@ -551,11 +551,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 2> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(value_type& update) { update = 0; }
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
|
||||
left_view;
|
||||
using left_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
|
||||
right_view;
|
||||
using right_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
|
||||
|
||||
left_view left;
|
||||
right_view right;
|
||||
@ -616,11 +616,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 2> {
|
||||
|
||||
template <class DataType, class DeviceType>
|
||||
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 1> {
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::memory_space memory_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
using execution_space = DeviceType;
|
||||
using memory_space = typename execution_space::memory_space;
|
||||
using size_type = typename execution_space::size_type;
|
||||
|
||||
typedef int value_type;
|
||||
using value_type = int;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type& update,
|
||||
@ -631,14 +631,14 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 1> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(value_type& update) { update = 0; }
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
|
||||
left_view;
|
||||
using left_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
|
||||
right_view;
|
||||
using right_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
|
||||
|
||||
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>
|
||||
stride_view;
|
||||
using stride_view =
|
||||
Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>;
|
||||
|
||||
left_view left;
|
||||
right_view right;
|
||||
@ -689,22 +689,22 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 1> {
|
||||
template <typename T, class DeviceType>
|
||||
class TestDynViewAPI {
|
||||
public:
|
||||
typedef DeviceType device;
|
||||
using device = DeviceType;
|
||||
|
||||
enum { N0 = 1000, N1 = 3, N2 = 5, N3 = 7 };
|
||||
|
||||
typedef Kokkos::DynRankView<T, device> dView0;
|
||||
typedef Kokkos::DynRankView<const T, device> const_dView0;
|
||||
using dView0 = Kokkos::DynRankView<T, device>;
|
||||
using const_dView0 = Kokkos::DynRankView<const T, device>;
|
||||
|
||||
typedef Kokkos::DynRankView<T, device, Kokkos::MemoryUnmanaged>
|
||||
dView0_unmanaged;
|
||||
typedef typename dView0::host_mirror_space host_drv_space;
|
||||
using dView0_unmanaged =
|
||||
Kokkos::DynRankView<T, device, Kokkos::MemoryUnmanaged>;
|
||||
using host_drv_space = typename dView0::host_mirror_space;
|
||||
|
||||
typedef Kokkos::View<T, device> View0;
|
||||
typedef Kokkos::View<T*, device> View1;
|
||||
typedef Kokkos::View<T*******, device> View7;
|
||||
using View0 = Kokkos::View<T, device>;
|
||||
using View1 = Kokkos::View<T*, device>;
|
||||
using View7 = Kokkos::View<T*******, device>;
|
||||
|
||||
typedef typename View0::host_mirror_space host_view_space;
|
||||
using host_view_space = typename View0::host_mirror_space;
|
||||
|
||||
static void run_tests() {
|
||||
run_test_resize_realloc();
|
||||
@ -712,6 +712,7 @@ class TestDynViewAPI {
|
||||
run_test_mirror_and_copy();
|
||||
run_test_scalar();
|
||||
run_test();
|
||||
run_test_allocated();
|
||||
run_test_const();
|
||||
run_test_subview();
|
||||
run_test_subview_strided();
|
||||
@ -750,8 +751,8 @@ class TestDynViewAPI {
|
||||
}
|
||||
|
||||
static void run_test_mirror() {
|
||||
typedef Kokkos::DynRankView<int, host_drv_space> view_type;
|
||||
typedef typename view_type::HostMirror mirror_type;
|
||||
using view_type = Kokkos::DynRankView<int, host_drv_space>;
|
||||
using mirror_type = typename view_type::HostMirror;
|
||||
view_type a("a");
|
||||
mirror_type am = Kokkos::create_mirror_view(a);
|
||||
mirror_type ax = Kokkos::create_mirror(a);
|
||||
@ -851,8 +852,8 @@ class TestDynViewAPI {
|
||||
ASSERT_EQ(a_h.rank(), a_d.rank());
|
||||
}
|
||||
{
|
||||
typedef Kokkos::DynRankView<int, Kokkos::LayoutStride, Kokkos::HostSpace>
|
||||
view_stride_type;
|
||||
using view_stride_type =
|
||||
Kokkos::DynRankView<int, Kokkos::LayoutStride, Kokkos::HostSpace>;
|
||||
unsigned order[] = {6, 5, 4, 3, 2, 1, 0},
|
||||
dimen[] = {N0, N1, N2, 2, 2, 2, 2}; // LayoutRight equivalent
|
||||
view_stride_type a_h(
|
||||
@ -956,8 +957,8 @@ class TestDynViewAPI {
|
||||
}
|
||||
|
||||
static void run_test_scalar() {
|
||||
typedef typename dView0::HostMirror
|
||||
hView0; // HostMirror of DynRankView is a DynRankView
|
||||
using hView0 = typename dView0::HostMirror; // HostMirror of DynRankView is
|
||||
// a DynRankView
|
||||
|
||||
dView0 dx, dy;
|
||||
hView0 hx, hy;
|
||||
@ -1050,12 +1051,12 @@ class TestDynViewAPI {
|
||||
|
||||
static void run_test() {
|
||||
// mfh 14 Feb 2014: This test doesn't actually create instances of
|
||||
// these types. In order to avoid "declared but unused typedef"
|
||||
// these types. In order to avoid "unused type alias"
|
||||
// warnings, we declare empty instances of these types, with the
|
||||
// usual "(void)" marker to avoid compiler warnings for unused
|
||||
// variables.
|
||||
|
||||
typedef typename dView0::HostMirror hView0;
|
||||
using hView0 = typename dView0::HostMirror;
|
||||
|
||||
{
|
||||
hView0 thing;
|
||||
@ -1361,7 +1362,7 @@ class TestDynViewAPI {
|
||||
}
|
||||
}
|
||||
|
||||
typedef T DataType;
|
||||
using DataType = T;
|
||||
|
||||
static void check_auto_conversion_to_const(
|
||||
const Kokkos::DynRankView<const DataType, device>& arg_const,
|
||||
@ -1369,12 +1370,28 @@ class TestDynViewAPI {
|
||||
ASSERT_TRUE(arg_const == arg);
|
||||
}
|
||||
|
||||
static void run_test_allocated() {
|
||||
using device_type = Kokkos::DynRankView<DataType, device>;
|
||||
|
||||
const int N1 = 100;
|
||||
const int N2 = 10;
|
||||
|
||||
device_type d1;
|
||||
ASSERT_FALSE(d1.is_allocated());
|
||||
|
||||
d1 = device_type("d1", N1, N2);
|
||||
device_type d2(d1);
|
||||
device_type d3("d3", N1);
|
||||
ASSERT_TRUE(d1.is_allocated());
|
||||
ASSERT_TRUE(d2.is_allocated());
|
||||
ASSERT_TRUE(d3.is_allocated());
|
||||
}
|
||||
|
||||
static void run_test_const() {
|
||||
typedef Kokkos::DynRankView<DataType, device> typeX;
|
||||
typedef Kokkos::DynRankView<const DataType, device> const_typeX;
|
||||
typedef Kokkos::DynRankView<const DataType, device,
|
||||
Kokkos::MemoryRandomAccess>
|
||||
const_typeR;
|
||||
using typeX = Kokkos::DynRankView<DataType, device>;
|
||||
using const_typeX = Kokkos::DynRankView<const DataType, device>;
|
||||
using const_typeR =
|
||||
Kokkos::DynRankView<const DataType, device, Kokkos::MemoryRandomAccess>;
|
||||
typeX x("X", 2);
|
||||
const_typeX xc = x;
|
||||
const_typeR xr = x;
|
||||
@ -1398,10 +1415,10 @@ class TestDynViewAPI {
|
||||
}
|
||||
|
||||
static void run_test_subview() {
|
||||
typedef Kokkos::DynRankView<const T, device> cdView;
|
||||
typedef Kokkos::DynRankView<T, device> dView;
|
||||
using cdView = Kokkos::DynRankView<const T, device>;
|
||||
using dView = Kokkos::DynRankView<T, device>;
|
||||
// LayoutStride required for all returned DynRankView subdynrankview's
|
||||
typedef Kokkos::DynRankView<T, Kokkos::LayoutStride, device> sdView;
|
||||
using sdView = Kokkos::DynRankView<T, Kokkos::LayoutStride, device>;
|
||||
|
||||
dView0 d0("d0");
|
||||
cdView s0 = d0;
|
||||
@ -1452,7 +1469,7 @@ class TestDynViewAPI {
|
||||
ASSERT_EQ(dv6.rank(), 6);
|
||||
|
||||
// DynRankView with LayoutRight
|
||||
typedef Kokkos::DynRankView<T, Kokkos::LayoutRight, device> drView;
|
||||
using drView = Kokkos::DynRankView<T, Kokkos::LayoutRight, device>;
|
||||
drView dr5("dr5", N0, N1, N2, 2, 2);
|
||||
ASSERT_EQ(dr5.rank(), 5);
|
||||
|
||||
@ -1514,7 +1531,8 @@ class TestDynViewAPI {
|
||||
ASSERT_EQ(ds5.extent(4), ds5plus.extent(4));
|
||||
ASSERT_EQ(ds5.extent(5), ds5plus.extent(5));
|
||||
|
||||
#if !defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_UVM)
|
||||
#if (!defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_UVM)) && \
|
||||
!defined(KOKKOS_ENABLE_HIP)
|
||||
ASSERT_EQ(&ds5(1, 1, 1, 1, 0) - &ds5plus(1, 1, 1, 1, 0), 0);
|
||||
ASSERT_EQ(&ds5(1, 1, 1, 1, 0, 0) - &ds5plus(1, 1, 1, 1, 0, 0),
|
||||
0); // passing argument to rank beyond the view's rank is allowed
|
||||
@ -1538,12 +1556,12 @@ class TestDynViewAPI {
|
||||
}
|
||||
|
||||
static void run_test_subview_strided() {
|
||||
typedef Kokkos::DynRankView<int, Kokkos::LayoutLeft, host_drv_space>
|
||||
drview_left;
|
||||
typedef Kokkos::DynRankView<int, Kokkos::LayoutRight, host_drv_space>
|
||||
drview_right;
|
||||
typedef Kokkos::DynRankView<int, Kokkos::LayoutStride, host_drv_space>
|
||||
drview_stride;
|
||||
using drview_left =
|
||||
Kokkos::DynRankView<int, Kokkos::LayoutLeft, host_drv_space>;
|
||||
using drview_right =
|
||||
Kokkos::DynRankView<int, Kokkos::LayoutRight, host_drv_space>;
|
||||
using drview_stride =
|
||||
Kokkos::DynRankView<int, Kokkos::LayoutStride, host_drv_space>;
|
||||
|
||||
drview_left xl2("xl2", 100, 200);
|
||||
drview_right xr2("xr2", 100, 200);
|
||||
@ -1588,31 +1606,29 @@ class TestDynViewAPI {
|
||||
static void run_test_vector() {
|
||||
static const unsigned Length = 1000, Count = 8;
|
||||
|
||||
typedef typename Kokkos::DynRankView<T, Kokkos::LayoutLeft, host_drv_space>
|
||||
multivector_type;
|
||||
using multivector_type =
|
||||
typename Kokkos::DynRankView<T, Kokkos::LayoutLeft, host_drv_space>;
|
||||
|
||||
typedef typename Kokkos::DynRankView<T, Kokkos::LayoutRight, host_drv_space>
|
||||
multivector_right_type;
|
||||
using multivector_right_type =
|
||||
typename Kokkos::DynRankView<T, Kokkos::LayoutRight, host_drv_space>;
|
||||
|
||||
multivector_type mv = multivector_type("mv", Length, Count);
|
||||
multivector_right_type mv_right =
|
||||
multivector_right_type("mv", Length, Count);
|
||||
|
||||
typedef
|
||||
typename Kokkos::DynRankView<T, Kokkos::LayoutStride, host_drv_space>
|
||||
svector_type;
|
||||
typedef
|
||||
typename Kokkos::DynRankView<T, Kokkos::LayoutStride, host_drv_space>
|
||||
smultivector_type;
|
||||
typedef typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
|
||||
host_drv_space>
|
||||
const_svector_right_type;
|
||||
typedef typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
|
||||
host_drv_space>
|
||||
const_svector_type;
|
||||
typedef typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
|
||||
host_drv_space>
|
||||
const_smultivector_type;
|
||||
using svector_type =
|
||||
typename Kokkos::DynRankView<T, Kokkos::LayoutStride, host_drv_space>;
|
||||
using smultivector_type =
|
||||
typename Kokkos::DynRankView<T, Kokkos::LayoutStride, host_drv_space>;
|
||||
using const_svector_right_type =
|
||||
typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
|
||||
host_drv_space>;
|
||||
using const_svector_type =
|
||||
typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
|
||||
host_drv_space>;
|
||||
using const_smultivector_type =
|
||||
typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
|
||||
host_drv_space>;
|
||||
|
||||
svector_type v1 = Kokkos::subdynrankview(mv, Kokkos::ALL(), 0);
|
||||
svector_type v2 = Kokkos::subdynrankview(mv, Kokkos::ALL(), 1);
|
||||
|
||||
@ -44,10 +44,7 @@
|
||||
|
||||
#include <TestDynViewAPI.hpp>
|
||||
namespace Test {
|
||||
// FIXME_HIP attempt to access inaccessible memory space
|
||||
#ifndef KOKKOS_ENABLE_HIP
|
||||
TEST(TEST_CATEGORY, dyn_rank_view_api_generic) {
|
||||
TestDynViewAPI<double, TEST_EXECSPACE>::run_tests();
|
||||
}
|
||||
#endif
|
||||
} // namespace Test
|
||||
|
||||
@ -45,10 +45,7 @@
|
||||
#include <TestDynViewAPI.hpp>
|
||||
|
||||
namespace Test {
|
||||
// FIXME_HIP failing with wrong value
|
||||
#ifndef KOKKOS_ENABLE_HIP
|
||||
TEST(TEST_CATEGORY, dyn_rank_view_api_operator_rank12345) {
|
||||
TestDynViewAPI<double, TEST_EXECSPACE>::run_operator_test_rank12345();
|
||||
}
|
||||
#endif
|
||||
} // namespace Test
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user