Merge branch 'master' into library-refactor

This commit is contained in:
Axel Kohlmeyer
2020-08-26 19:08:09 -04:00
1477 changed files with 21212 additions and 72605 deletions

View File

@ -15,75 +15,93 @@ if(BUILD_DOC)
endif()
set(VIRTUALENV ${Python3_EXECUTABLE} -m virtualenv -p ${Python3_EXECUTABLE})
endif()
find_package(Doxygen 1.8.10 REQUIRED)
file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.rst)
add_custom_command(
OUTPUT docenv
COMMAND ${VIRTUALENV} docenv
)
set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt)
set(SPHINX_CONFIG_DIR ${LAMMPS_DOC_DIR}/utils/sphinx-config)
set(SPHINX_CONFIG_FILE_TEMPLATE ${SPHINX_CONFIG_DIR}/conf.py.in)
set(SPHINX_STATIC_DIR ${SPHINX_CONFIG_DIR}/_static)
# configuration and static files are copied to binary dir to avoid collisions with parallel builds
set(DOC_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/doc)
set(DOC_BUILD_CONFIG_FILE ${DOC_BUILD_DIR}/conf.py)
set(DOC_BUILD_STATIC_DIR ${DOC_BUILD_DIR}/_static)
set(DOXYGEN_BUILD_DIR ${DOC_BUILD_DIR}/doxygen)
set(DOXYGEN_XML_DIR ${DOXYGEN_BUILD_DIR}/xml)
# copy entire configuration folder to doc build directory
# files in _static are automatically copied during sphinx-build, so no need to copy them individually
file(COPY ${SPHINX_CONFIG_DIR}/ DESTINATION ${DOC_BUILD_DIR})
# configure paths in conf.py, since relative paths change when file is copied
configure_file(${SPHINX_CONFIG_FILE_TEMPLATE} ${DOC_BUILD_CONFIG_FILE})
add_custom_command(
OUTPUT requirements.txt
DEPENDS docenv
COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/utils/requirements.txt requirements.txt
OUTPUT ${DOC_BUILD_DIR}/requirements.txt
DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE}
COMMAND ${CMAKE_COMMAND} -E copy ${DOCENV_REQUIREMENTS_FILE} ${DOC_BUILD_DIR}/requirements.txt
COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade pip
COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r requirements.txt --upgrade
COMMAND ${DOCENV_BINARY_DIR}/pip install --use-feature=2020-resolver -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
)
# download mathjax distribution and unpack to folder "mathjax"
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/mathjax/es5)
if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/mathjax/es5)
file(DOWNLOAD "https://github.com/mathjax/MathJax/archive/3.0.5.tar.gz"
"${CMAKE_CURRENT_BINARY_DIR}/mathjax.tar.gz"
EXPECTED_MD5 5d9d3799cce77a1a95eee6be04eb68e7)
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf mathjax.tar.gz WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
file(GLOB MATHJAX_VERSION_DIR ${CMAKE_CURRENT_BINARY_DIR}/MathJax-*)
execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${CMAKE_CURRENT_BINARY_DIR}/mathjax)
execute_process(COMMAND ${CMAKE_COMMAND} -E rename ${MATHJAX_VERSION_DIR} ${DOC_BUILD_STATIC_DIR}/mathjax)
endif()
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax)
file(COPY ${CMAKE_CURRENT_BINARY_DIR}/mathjax/es5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax/)
# for increased browser compatibility
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/html/_static/polyfill.js)
if(NOT EXISTS ${DOC_BUILD_STATIC_DIR}/polyfill.js)
file(DOWNLOAD "https://polyfill.io/v3/polyfill.min.js?features=es6"
"${CMAKE_CURRENT_BINARY_DIR}/html/_static/polyfill.js")
"${DOC_BUILD_STATIC_DIR}/polyfill.js")
endif()
# note, this may run in parallel with other tasks, so we must not use multiple processes here
# set up doxygen and add targets to run it
file(MAKE_DIRECTORY ${DOXYGEN_BUILD_DIR})
file(COPY ${LAMMPS_DOC_DIR}/doxygen/lammps-logo.png DESTINATION ${DOXYGEN_BUILD_DIR}/lammps-logo.png)
configure_file(${LAMMPS_DOC_DIR}/doxygen/Doxyfile.in ${DOXYGEN_BUILD_DIR}/Doxyfile)
get_target_property(LAMMPS_SOURCES lammps SOURCES)
add_custom_command(
OUTPUT html
DEPENDS ${DOC_SOURCES} docenv requirements.txt
COMMAND ${DOCENV_BINARY_DIR}/sphinx-build -b html -c ${LAMMPS_DOC_DIR}/utils/sphinx-config -d ${CMAKE_BINARY_DIR}/doctrees ${LAMMPS_DOC_DIR}/src html
COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${CMAKE_CURRENT_BINARY_DIR}/html/index.html
OUTPUT ${DOXYGEN_XML_DIR}/index.xml
DEPENDS ${DOC_SOURCES} ${LAMMPS_SOURCES}
COMMAND Doxygen::doxygen ${DOXYGEN_BUILD_DIR}/Doxyfile WORKING_DIRECTORY ${DOXYGEN_BUILD_DIR}
COMMAND ${CMAKE_COMMAND} -E touch ${DOXYGEN_XML_DIR}/run.stamp
)
# copy selected image files to html output tree
file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/html/JPG)
set(HTML_EXTRA_IMAGES balance_nonuniform.jpg balance_rcb.jpg
balance_uniform.jpg bow_tutorial_01.png bow_tutorial_02.png
bow_tutorial_03.png bow_tutorial_04.png bow_tutorial_05.png
dump1.jpg dump2.jpg examples_mdpd.gif gran_funnel.png gran_mixer.png
hop1.jpg hop2.jpg saed_ewald_intersect.jpg saed_mesh.jpg
screenshot_atomeye.jpg screenshot_gl.jpg screenshot_pymol.jpg
screenshot_vmd.jpg sinusoid.jpg xrd_mesh.jpg)
set(HTML_IMAGE_TARGETS "")
foreach(_IMG ${HTML_EXTRA_IMAGES})
string(PREPEND _IMG JPG/)
list(APPEND HTML_IMAGE_TARGETS "${CMAKE_CURRENT_BINARY_DIR}/html/${_IMG}")
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/html/${_IMG}
DEPENDS ${LAMMPS_DOC_DIR}/src/${_IMG} ${CMAKE_CURRENT_BINARY_DIR}/html/JPG
COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/src/${_IMG} ${CMAKE_BINARY_DIR}/html/${_IMG}
)
endforeach()
if(EXISTS ${DOXYGEN_XML_DIR}/run.stamp)
set(SPHINX_EXTRA_OPTS "-E")
else()
set(SPHINX_EXTRA_OPTS "")
endif()
add_custom_command(
OUTPUT html
DEPENDS ${DOC_SOURCES} docenv ${DOC_BUILD_DIR}/requirements.txt ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE}
COMMAND ${DOCENV_BINARY_DIR}/sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html
COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${DOC_BUILD_DIR}/html/index.html
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src/PDF ${DOC_BUILD_DIR}/html/PDF
COMMAND ${CMAKE_COMMAND} -E remove -f ${DOXYGEN_XML_DIR}/run.stamp
)
add_custom_target(
doc ALL
DEPENDS html ${CMAKE_CURRENT_BINARY_DIR}/html/_static/mathjax/es5 ${HTML_IMAGE_TARGETS}
DEPENDS html ${DOC_BUILD_STATIC_DIR}/mathjax/es5
SOURCES ${LAMMPS_DOC_DIR}/utils/requirements.txt ${DOC_SOURCES}
)
install(DIRECTORY ${CMAKE_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
install(DIRECTORY ${DOC_BUILD_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
endif()

View File

@ -35,8 +35,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
ExternalProject_Add(kokkos_build
URL https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz
URL_MD5 3ccb2100f7fc316891e7dad3bc33fa37
URL https://github.com/kokkos/kokkos/archive/3.2.00.tar.gz
URL_MD5 81569170fe232e5e64ab074f7cca5e50
CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS}
BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a
)
@ -50,7 +50,7 @@ if(DOWNLOAD_KOKKOS)
target_link_libraries(lammps PRIVATE LAMMPS::KOKKOS)
add_dependencies(LAMMPS::KOKKOS kokkos_build)
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 3.1.01 REQUIRED CONFIG)
find_package(Kokkos 3.2.00 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)

7
doc/.gitignore vendored
View File

@ -1,6 +1,7 @@
/old
/html
/html-offline
/epub
/latex
/mathjax
/spelling
@ -10,3 +11,9 @@
/Developer.pdf
/doctrees
/docenv
/doxygen-warn.log
/utils/sphinx-config/conf.py
/doxygen/Doxyfile
*.el
/utils/sphinx-config/_static/mathjax
/utils/sphinx-config/_static/polyfill.js

View File

@ -1,21 +1,29 @@
# Makefile for LAMMPS documentation
SHELL = /bin/bash
BUILDDIR = ${CURDIR}
RSTDIR = $(BUILDDIR)/src
VENV = $(BUILDDIR)/docenv
MATHJAX = $(BUILDDIR)/mathjax
TXT2RST = $(VENV)/bin/txt2rst
ANCHORCHECK = $(VENV)/bin/rst_anchor_check
SHELL = /bin/bash
BUILDDIR = ${CURDIR}
RSTDIR = $(BUILDDIR)/src
VENV = $(BUILDDIR)/docenv
TXT2RST = $(VENV)/bin/txt2rst
ANCHORCHECK = $(VENV)/bin/rst_anchor_check
SPHINXCONFIG = $(BUILDDIR)/utils/sphinx-config
MATHJAX = $(SPHINXCONFIG)/_static/mathjax
POLYFILL = $(SPHINXCONFIG)/_static/polyfill.js
PYTHON = $(shell which python3)
PYTHON = $(shell which python3)
DOXYGEN = $(shell which doxygen)
VIRTUALENV = virtualenv
HAS_PYTHON3 = NO
HAS_VIRTUALENV = NO
HAS_DOXYGEN = NO
HAS_PDFLATEX = NO
ifeq ($(shell which python3 >/dev/null 2>&1; echo $$?), 0)
HAS_PYTHON3 = YES
HAS_PYTHON3 = YES
endif
ifeq ($(shell which doxygen >/dev/null 2>&1; echo $$?), 0)
HAS_DOXYGEN = YES
endif
ifeq ($(shell which virtualenv-3 >/dev/null 2>&1; echo $$?), 0)
@ -33,9 +41,13 @@ HAS_PDFLATEX = YES
endif
SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())')
SPHINXEXTRA = -j $(shell $(PYTHON) -c 'import multiprocessing;print(multiprocessing.cpu_count())') $(shell test -f $(BUILDDIR)/doxygen/xml/run.stamp && printf -- "-E")
.PHONY: help clean-all clean clean-spelling epub mobi rst html pdf spelling anchor_check style_check
# grab list of sources from doxygen config file.
# we only want to use explicitly listed files.
DOXYFILES = $(shell sed -n -e 's/\#.*$$//' -e '/^ *INPUT \+=/,/^[A-Z_]\+ \+=/p' doxygen/Doxyfile.in | sed -e 's/@LAMMPS_SOURCE_DIR@/..\/src/g' -e 's/\\//g' -e 's/ \+/ /' -e 's/[A-Z_]\+ \+= *\(YES\|NO\|\)//')
.PHONY: help clean-all clean clean-spelling epub mobi rst html pdf spelling anchor_check style_check xmlgen
# ------------------------------------------
@ -57,23 +69,32 @@ help:
# ------------------------------------------
clean-all: clean
rm -rf $(BUILDDIR)/docenv $(BUILDDIR)/doctrees $(BUILDDIR)/mathjax Manual.pdf Developer.pdf
rm -rf $(BUILDDIR)/docenv $(MATHJAX) $(BUILDDIR)/LAMMPS.mobi $(BUILDDIR)/LAMMPS.epub $(BUILDDIR)/Manual.pdf $(BUILDDIR)/Developer.pdf
clean: clean-spelling
rm -rf html epub latex
rm -rf $(BUILDDIR)/html $(BUILDDIR)/epub $(BUILDDIR)/latex $(BUILDDIR)/doctrees $(BUILDDIR)/doxygen/xml $(BUILDDIR)/doxygen-warn.log $(BUILDDIR)/doxygen/Doxyfile $(SPHINXCONFIG)/conf.py
clean-spelling:
rm -rf spelling
rm -rf $(BUILDDIR)/spelling
html: $(ANCHORCHECK) $(MATHJAX)
$(SPHINXCONFIG)/conf.py: $(SPHINXCONFIG)/conf.py.in
sed -e 's,@DOXYGEN_XML_DIR@,$(BUILDDIR)/doxygen/xml,g' \
-e 's,@LAMMPS_SOURCE_DIR@,$(BUILDDIR)/../src,g' \
-e 's,@LAMMPS_PYTHON_DIR@,$(BUILDDIR)/../python,g' \
-e 's,@LAMMPS_DOC_DIR@,$(BUILDDIR),g' $< > $@
html: xmlgen $(SPHINXCONFIG)/conf.py $(ANCHORCHECK) $(MATHJAX) $(POLYFILL)
@$(MAKE) $(MFLAGS) -C graphviz all
@(\
. $(VENV)/bin/activate ;\
sphinx-build $(SPHINXEXTRA) -b html -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) html ;\
. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
sphinx-build $(SPHINXEXTRA) -b html -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) html ;\
ln -sf Manual.html html/index.html;\
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
echo "############################################" ;\
rst_anchor_check src/*.rst ;\
python utils/check-packages.py -s ../src -d src ;\
python $(BUILDDIR)/utils/check-packages.py -s ../src -d src ;\
env LC_ALL=C grep -n '[^ -~]' $(RSTDIR)/*.rst ;\
python utils/check-styles.py -s ../src -d src ;\
python $(BUILDDIR)/utils/check-styles.py -s ../src -d src ;\
echo "############################################" ;\
deactivate ;\
)
@ -82,30 +103,28 @@ html: $(ANCHORCHECK) $(MATHJAX)
@rm -rf html/USER
@rm -rf html/JPG
@cp -r src/PDF html/PDF
@mkdir -p html/JPG
@cp `grep -A2 '\.\. .*\(image\|figure\)::' src/*.rst | grep ':target: JPG' | sed -e 's,.*:target: JPG/,src/JPG/,' | sort | uniq` html/JPG/
@rm -rf html/PDF/.[sg]*
@mkdir -p html/_static/mathjax
@cp -r $(MATHJAX)/es5 html/_static/mathjax/
@echo "Build finished. The HTML pages are in doc/html."
spelling: $(VENV) utils/sphinx-config/false_positives.txt
spelling: xmlgen $(VENV) $(SPHINXCONFIG)/false_positives.txt
@(\
. $(VENV)/bin/activate ;\
cp utils/sphinx-config/false_positives.txt $(RSTDIR)/ ; env PYTHONWARNINGS= \
sphinx-build -b spelling -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) spelling ;\
. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
cp $(SPHINXCONFIG)/false_positives.txt $(RSTDIR)/ ; env PYTHONWARNINGS= \
sphinx-build -b spelling -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) spelling ;\
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
deactivate ;\
)
@echo "Spell check finished."
epub: $(VENV)
epub: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
@$(MAKE) $(MFLAGS) -C graphviz all
@mkdir -p epub/JPG
@rm -f LAMMPS.epub
@cp src/JPG/lammps-logo.png epub/
@cp src/JPG/*.* epub/JPG
@(\
. $(VENV)/bin/activate ;\
sphinx-build $(SPHINXEXTRA) -b epub -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) epub ;\
sphinx-build $(SPHINXEXTRA) -b epub -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) epub ;\
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
deactivate ;\
)
@mv epub/LAMMPS.epub .
@ -117,7 +136,8 @@ mobi: epub
@ebook-convert LAMMPS.epub LAMMPS.mobi
@echo "Conversion finished. The MOBI manual file is created."
pdf: $(ANCHORCHECK)
pdf: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
@$(MAKE) $(MFLAGS) -C graphviz all
@if [ "$(HAS_PDFLATEX)" == "NO" ] ; then echo "PDFLaTeX was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
@(\
cd src/Developer; \
@ -127,8 +147,9 @@ pdf: $(ANCHORCHECK)
cd ../../; \
)
@(\
. $(VENV)/bin/activate ;\
sphinx-build $(SPHINXEXTRA) -b latex -c utils/sphinx-config -d $(BUILDDIR)/doctrees $(RSTDIR) latex ;\
. $(VENV)/bin/activate ; env PYTHONWARNINGS= \
sphinx-build $(SPHINXEXTRA) -b latex -c $(SPHINXCONFIG) -d $(BUILDDIR)/doctrees $(RSTDIR) latex ;\
rm -f $(BUILDDIR)/doxygen/xml/run.stamp;\
echo "############################################" ;\
rst_anchor_check src/*.rst ;\
python utils/check-packages.py -s ../src -d src ;\
@ -185,21 +206,32 @@ package_check : $(VENV)
deactivate ;\
)
xmlgen : doxygen/xml/index.xml
doxygen/Doxyfile: doxygen/Doxyfile.in
sed -e 's/@LAMMPS_SOURCE_DIR@/..\/..\/src/g' $< > $@
doxygen/xml/index.xml : $(VENV) doxygen/Doxyfile $(DOXYFILES)
@(cd doxygen; $(DOXYGEN) Doxyfile && touch xml/run.stamp)
# ------------------------------------------
$(VENV):
@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "Python3 was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "python3 was not found! Please see README for further instructions" 1>&2; exit 1; fi
@if [ "$(HAS_DOXYGEN)" == "NO" ] ; then echo "doxygen was not found! Please see README for further instructions" 1>&2; exit 1; fi
@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please see README for further instructions" 1>&2; exit 1; fi
@( \
$(VIRTUALENV) -p $(PYTHON) $(VENV); \
. $(VENV)/bin/activate; \
pip install --upgrade pip; \
pip install --use-feature=2020-resolver -r requirements.txt; \
pip install --use-feature=2020-resolver -r $(BUILDDIR)/utils/requirements.txt; \
deactivate;\
)
$(MATHJAX):
@git clone --depth 1 https://github.com/mathjax/MathJax.git mathjax
@git clone --depth 1 https://github.com/mathjax/MathJax.git $@
$(POLYFILL): $(MATHJAX)
@curl -s -o $@ "https://polyfill.io/v3/polyfill.min.js?features=es6"
$(TXT2RST) $(ANCHORCHECK): $(VENV)
@( \

View File

@ -1,97 +1,60 @@
LAMMPS Documentation
Depending on how you obtained LAMMPS, this directory has 2 or 3
sub-directories and optionally 2 PDF files and an ePUB file:
Depending on how you obtained LAMMPS and whether you have built
the manual yourself, this directory has a varying number of
sub-directories and files. Here is a list with descriptions:
src content files for LAMMPS documentation
html HTML version of the LAMMPS manual (see html/Manual.html)
utils utilities and settings for building the documentation
Manual.pdf large PDF version of entire manual
Developer.pdf small PDF with info about how LAMMPS is structured
LAMMPS.epub Manual in ePUB format
README this file
src content files for LAMMPS documentation
html HTML version of the LAMMPS manual (see html/Manual.html)
utils utilities and settings for building the documentation
Manual.pdf PDF version of entire manual
Developer.pdf PDF with info about how LAMMPS is structured
LAMMPS.epub Manual in ePUB format
LAMMPS.mobi Manual in MOBI (Kindle) format
lammps.1 man page for the lammps command
msi2lmp.1 man page for the msi2lmp command
mathjax code and fonts for rendering math in html
doctree temporary data
docenv python virtual environment for generating the manual
doxygen Doxygen configuration and output
.gitignore list of files and folders to be ignored by git
doxygen-warn.log logfile with warnings from running doxygen
If you downloaded LAMMPS as a tarball from the web site, all these
directories and files should be included.
and:
If you downloaded LAMMPS from the public SVN or Git repositories, then
the HTML and PDF files are not included. Instead you need to create
them, in one of three ways:
github-development-workflow.md notes on the LAMMPS development workflow
include-file-conventions.md notes on LAMMPS' include file conventions
documentation_conventions.md notes on writing documentation for LAMMPS
If you downloaded a LAMMPS tarball from lammps.sandia.gov, then the html
folder and the PDF manual should be included. If you downloaded LAMMPS
from GitHub then you either need to download them or build them.
(a) You can "fetch" the current HTML and PDF files from the LAMMPS web
site. Just type "make fetch". This should create a html_www dir and
Manual_www.pdf/Developer_www.pdf files. Note that if new LAMMPS
features have been added more recently than the date of your version,
the fetched documentation will include those changes (but your source
code will not, unless you update your local repository).
Manual_www.pdf/Developer_www.pdf files. These files will always
represent the latest published patch/development version of LAMMPS.
(b) You can build the HTML and PDF files yourself, by typing "make
html" or by "make pdf", respectively. This requires various tools
including the Python documentation processing tool Sphinx, which the
build process will attempt to download and install on your system into
a python virtual environment, if not already available. The PDF file
will require a working LaTeX installation with several add-on packages
in addition to the Python/Sphinx setup. See more details below.
(b) You can build the HTML and PDF files yourself, by typing "make html"
or by "make pdf", respectively. This requires various tools and files.
Some of them have to be installed (more on that below). For the rest the
build process will attempt to download and install into a python virtual
environment and local folders.
----------------
The generation of all documentation is managed by the Makefile in this
dir.
Installing prerequisites for the documentation build
Options:
To run the HTML documention build toolchain, python 3.x, doxygen, git,
and virtualenv have to be installed. Also internet access is initially
required to download external files and tools.
make html # generate HTML in html dir using Sphinx
make pdf # generate 2 PDF files (Manual.pdf,Developer.pdf)
# in this dir via Sphinx and PDFLaTeX
make fetch # fetch HTML doc pages and 2 PDF files from web site
# as a tarball and unpack into html dir and 2 PDFs
make epub # generate LAMMPS.epub in ePUB format using Sphinx
make clean # remove intermediate RST files created by HTML build
make clean-all # remove entire build folder and any cached data
----------------
Installing prerequisites for HTML build
To run the HTML documention build toolchain, Python 3 and virtualenv
have to be installed. Here are instructions for common setups:
# Ubuntu
sudo apt-get install python-virtualenv
# Fedora (up to version 21)
# Red Hat Enterprise Linux or CentOS (up to version 7.x)
sudo yum install python3-virtualenv
# Fedora (since version 22)
sudo dnf install python3-virtualenv
# MacOS X
## Python 3
Download the latest Python 3 MacOS X package from
https://www.python.org and install it. This will install both Python
3 and pip3.
## virtualenv
Once Python 3 is installed, open a Terminal and type
pip3 install virtualenv
This will install virtualenv from the Python Package Index.
----------------
Installing prerequisites for PDF build
Same as for HTML plus a compatible LaTeX installation with
support for PDFLaTeX. Also the following LaTeX packages need
to be installed (e.g. from texlive):
Building the PDF format manual requires in addition a compatible LaTeX
installation with support for PDFLaTeX and several add-on LaTeX packages
installed. This includes:
- amsmath
- anysize
- babel
- capt-of
- cmap
@ -105,24 +68,13 @@ to be installed (e.g. from texlive):
- tabulary
- upquote
- wrapfig
Building the EPUB format requires LaTeX installation with the same packages
as for the PDF format plus the 'dvipng' command to convert the embedded math
into images. The MOBI format is generated from the EPUB format file by using
the tool 'ebook-convert' from the 'calibre' e-book management software
(https://calibre-ebook.com).
----------------
Installing prerequisites for epub build
## ePUB
Same as for HTML. This uses the same tools and configuration
files as the HTML tree. The ePUB format conversion currently
does not support processing mathematical expressions via MathJAX,
so there will be limitations on some pages. For the time being
until this is resolved, building and using the PDF format file
is recommended instead.
For converting the generated ePUB file to a mobi format file
(for e-book readers like Kindle, that cannot read ePUB), you
also need to have the 'ebook-convert' tool from the "calibre"
software installed. http://calibre-ebook.com/
You first create the ePUB file with 'make epub' and then do:
ebook-convert LAMMPS.epub LAMMPS.mobi
More details this can be found in the manual itself. The online
version is at: https://lammps.sandia.gov/doc/Manual_build.html

View File

@ -0,0 +1,93 @@
# Outline of LAMMPS documentation file conventions
This purpose of this document is to provide a point of reference
for LAMMPS developers and contributors as to what conventions
should be used to structure and format files in the LAMMPS manual.
Last change: 2020-04-23
## File format and tools
In fall 2019, the LAMMPS documentation file format has changed from
a home grown minimal markup designed to generate HTML format files
from a mostly plain text format to using the reStructuredText file
format. For a transition period all files in the old .txt format
were transparently converted to .rst and then processed. The txt2rst
tool is still included in the distribution to obtain an initial .rst
file for integration into the manual. Since the transition to
reStructured text as source format, many of the artifacts or the
translation have been removed though and parts of the documentation
refactored and expanded to take advantage of the capabilities
reStructuredText and associated tools. The conversion from the
source to the final formats (HTML, PDF, and optionally e-book
reader formats ePUB and MOBI) is mostly automated and controlled
by a Makefile in the `doc` folder. This makefile assumes that the
processing is done on a Unix-like machine and Python 3.5 or later
and a matching virtualenv module are available. Additional Python
packages (like the Sphinx tool and several extensions) are
transparently installed into a virtual environment over the
internet using the `pip` package manager. Further requirements
and details are discussed in the manual.
## Work in progress
The refactoring and improving of the documentation is an ongoing
process, so statements in this document may not always be fully
up-to-date. If in doubt, contact the LAMMPS developers.
## General structure
The layout and formatting of added files should follow the example
of the existing files. Since those are directly derived from their
former .txt format versions and the manual has been maintained in
that format for many years, there is a large degree of consistency
already, so comparision with similar files should give you a good
idea what kind of information and sections are needed.
## Formatting conventions
Filenames, folders, paths, (shell) commands, definitions, makefile
settings and similar should be formatted as "literals" with
double backward quotes bracketing the item: \`\`path/to/some/file\`\`
Keywords and options are formatted in italics: \*option\*
Mathematical expressions, equations, symbols are typeset using
either a `.. math:`` block or the `:math:` role.
Groups of shell commands or LAMMPS input script or C/C++ source
code should be typeset into a `.. code-block::` section. A syntax
highlighting extension for LAMMPS input scripts is provided, so
`LAMMPS` can be used to indicate the language in the code block
in addition to `bash`, `c`, or `python`. When no syntax style
is indicated, no syntax highlighting is performed.
As an alternative, e.g. to typeset the syntax of file formats
a `.. parsed-literal::` block can be used, which allows some
formatting directives, which means that related characters need
to be escaped with a preceding backslash: `\*`.
Special remarks can be highlighted with a `.. note::` block and
strong warnings can be put into a `.. warning::` block.
## Required steps when adding a custom style to LAMMPS
When adding a new style (e.g. pair style or a compute or a fix)
or a new command, it is **required** to include the corresponding
documentation. Those are often new files that need to be added.
In order to be included in the documentation, those new files
need to be reference in a `.. toctree::` block. Most of those
use patterns with wildcards, so the addition will be automatic.
However, those additions also need to be added to some lists of
styles or commands. The `make style\_check` command will perform
a test and report any missing entries and list the affected files.
Any references defined with `.. \_refname:` have to be unique
across all documentation files and this can be checked for with
`make anchor\_check`. Finally, a spell-check should be done,
which is triggered via `make spelling`. Any offenses need to
be corrected and false positives should be added to the file
`utils/sphinx-config/false\_positives.txt`.
## Required additional steps when adding a new package to LAMMPS
TODO

1
doc/doxygen/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/xml

522
doc/doxygen/Doxyfile.in Normal file
View File

@ -0,0 +1,522 @@
# Doxyfile 1.8.15 -*- makefile -*-
DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = "LAMMPS Programmer's Guide"
PROJECT_NUMBER = "24 August 2020"
PROJECT_BRIEF = "Documentation of the LAMMPS library interface and Python wrapper"
PROJECT_LOGO = lammps-logo.png
CREATE_SUBDIRS = NO
ALLOW_UNICODE_NAMES = NO
OUTPUT_LANGUAGE = English
OUTPUT_TEXT_DIRECTION = LTR
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = NO
INHERIT_DOCS = YES
TAB_SIZE = 2
# When enabled doxygen tries to link words that correspond to documented
# classes, or namespaces to their corresponding documentation. Such a link can
# be prevented in individual cases by putting a % sign in front of the word or
# globally by setting AUTOLINK_SUPPORT to NO.
# The default value is: YES.
AUTOLINK_SUPPORT = YES
# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
# to include (a tag file for) the STL sources as input, then you should set this
# tag to YES in order to let doxygen match functions declarations and
# definitions whose arguments contain STL classes (e.g. func(std::string);
# versus func(std::string) {}). This also make the inheritance and collaboration
# diagrams that involve STL classes more complete and accurate.
# The default value is: NO.
BUILTIN_STL_SUPPORT = YES
IDL_PROPERTY_SUPPORT = NO
# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
# cache is used to resolve symbols given their name and scope. Since this can be
# an expensive process and often the same symbol appears multiple times in the
# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
# doxygen will become slower. If the cache is too large, memory is wasted. The
# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
# symbols. At the end of a run doxygen will report the cache usage and suggest
# the optimal cache size from a speed point of view.
# Minimum value: 0, maximum value: 9, default value: 0.
LOOKUP_CACHE_SIZE = 2
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
# documentation are documented, even if no documentation was available. Private
# class members and static file members will be hidden unless the
# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
# Note: This will also disable the warnings about undocumented members that are
# normally produced when WARNINGS is set to YES.
# The default value is: NO.
EXTRACT_ALL = NO
# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
# be included in the documentation.
# The default value is: NO.
EXTRACT_PRIVATE = YES
# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
# scope will be included in the documentation.
# The default value is: NO.
EXTRACT_PACKAGE = YES
# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
# included in the documentation.
# The default value is: NO.
EXTRACT_STATIC = YES
# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
# locally in source files will be included in the documentation. If set to NO,
# only classes defined in header files are included. Does not have any effect
# for Java sources.
# The default value is: YES.
EXTRACT_LOCAL_CLASSES = YES
# If this flag is set to YES, the members of anonymous namespaces will be
# extracted and appear in the documentation as a namespace called
# 'anonymous_namespace{file}', where file will be replaced with the base name of
# the file that contains the anonymous namespace. By default anonymous namespace
# are hidden.
# The default value is: NO.
EXTRACT_ANON_NSPACES = YES
# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
# undocumented members inside documented classes or files. If set to NO these
# members will be included in the various overviews, but no documentation
# section is generated. This option has no effect if EXTRACT_ALL is enabled.
# The default value is: NO.
HIDE_UNDOC_MEMBERS = YES
# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
# undocumented classes that are normally visible in the class hierarchy. If set
# to NO, these classes will be included in the various overviews. This option
# has no effect if EXTRACT_ALL is enabled.
# The default value is: NO.
HIDE_UNDOC_CLASSES = YES
# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
# (class|struct|union) declarations. If set to NO, these declarations will be
# included in the documentation.
# The default value is: NO.
HIDE_FRIEND_COMPOUNDS = NO
# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
# documentation blocks found inside the body of a function. If set to NO, these
# blocks will be appended to the function's detailed documentation block.
# The default value is: NO.
HIDE_IN_BODY_DOCS = NO
# The INTERNAL_DOCS tag determines if documentation that is typed after a
# \internal command is included. If the tag is set to NO then the documentation
# will be excluded. Set it to YES to include the internal documentation.
# The default value is: NO.
INTERNAL_DOCS = NO
# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
# names in lower-case letters. If set to YES, upper-case letters are also
# allowed. This is useful if you have classes or files whose names only differ
# in case and if your file system supports case sensitive file names. Windows
# and Mac users are advised to set this option to NO.
# The default value is: system dependent.
CASE_SENSE_NAMES = YES
# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
# their full class and namespace scopes in the documentation. If set to YES, the
# scope will be hidden.
# The default value is: NO.
HIDE_SCOPE_NAMES = YES
# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
# append additional text to a page's title, such as Class Reference. If set to
# YES the compound reference will be hidden.
# The default value is: NO.
HIDE_COMPOUND_REFERENCE= NO
# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
# the files that are included by a file in the documentation of that file.
# The default value is: YES.
SHOW_INCLUDE_FILES = NO
# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
# grouped member an include statement to the documentation, telling the reader
# which file to include in order to use the member.
# The default value is: NO.
SHOW_GROUPED_MEMB_INC = NO
# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
# files with double quotes in the documentation rather than with sharp brackets.
# The default value is: NO.
FORCE_LOCAL_INCLUDES = NO
# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
# documentation for inline members.
# The default value is: YES.
INLINE_INFO = YES
# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
# (detailed) documentation of file and class members alphabetically by member
# name. If set to NO, the members will appear in declaration order.
# The default value is: YES.
SORT_MEMBER_DOCS = NO
# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
# descriptions of file, namespace and class members alphabetically by member
# name. If set to NO, the members will appear in declaration order. Note that
# this will also influence the order of the classes in the class list.
# The default value is: NO.
SORT_BRIEF_DOCS = NO
# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
# (brief and detailed) documentation of class members so that constructors and
# destructors are listed first. If set to NO the constructors will appear in the
# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
# member documentation.
# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
# detailed member documentation.
# The default value is: NO.
SORT_MEMBERS_CTORS_1ST = NO
# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
# of group names into alphabetical order. If set to NO the group names will
# appear in their defined order.
# The default value is: NO.
SORT_GROUP_NAMES = NO
# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
# fully-qualified names, including namespaces. If set to NO, the class list will
# be sorted only by class name, not including the namespace part.
# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
# Note: This option applies only to the class list, not to the alphabetical
# list.
# The default value is: NO.
SORT_BY_SCOPE_NAME = NO
# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
# type resolution of all parameters of a function it will reject a match between
# the prototype and the implementation of a member function even if there is
# only one candidate or it is obvious which candidate to choose by doing a
# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
# accept a match between prototype and implementation in such cases.
# The default value is: NO.
STRICT_PROTO_MATCHING = NO
# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
# list. This list is created by putting \todo commands in the documentation.
# The default value is: YES.
GENERATE_TODOLIST = YES
# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
# list. This list is created by putting \test commands in the documentation.
# The default value is: YES.
GENERATE_TESTLIST = YES
# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
# list. This list is created by putting \bug commands in the documentation.
# The default value is: YES.
GENERATE_BUGLIST = YES
# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
# the deprecated list. This list is created by putting \deprecated commands in
# the documentation.
# The default value is: YES.
GENERATE_DEPRECATEDLIST= YES
# The ENABLED_SECTIONS tag can be used to enable conditional documentation
# sections, marked by \if <section_label> ... \endif and \cond <section_label>
# ... \endcond blocks.
ENABLED_SECTIONS =
# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
# initial value of a variable or macro / define can have for it to appear in the
# documentation. If the initializer consists of more lines than specified here
# it will be hidden. Use a value of 0 to hide initializers completely. The
# appearance of the value of individual variables and macros / defines can be
# controlled using \showinitializer or \hideinitializer command in the
# documentation regardless of this setting.
# Minimum value: 0, maximum value: 10000, default value: 30.
MAX_INITIALIZER_LINES = 30
# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
# the bottom of the documentation of classes and structs. If set to YES, the
# list will mention the files that were used to generate the documentation.
# The default value is: YES.
SHOW_USED_FILES = YES
# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
# will remove the Files entry from the Quick Index and from the Folder Tree View
# (if specified).
# The default value is: YES.
SHOW_FILES = NO
# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
# page. This will remove the Namespaces entry from the Quick Index and from the
# Folder Tree View (if specified).
# The default value is: YES.
SHOW_NAMESPACES = YES
# The FILE_VERSION_FILTER tag can be used to specify a program or script that
# doxygen should invoke to get the current version for each file (typically from
# the version control system). Doxygen will invoke the program by executing (via
# popen()) the command command input-file, where command is the value of the
# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
# by doxygen. Whatever the program writes to standard output is used as the file
# version. For an example see the documentation.
FILE_VERSION_FILTER =
# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
# by doxygen. The layout file controls the global structure of the generated
# output files in an output format independent way. To create the layout file
# that represents doxygen's defaults, run doxygen with the -l option. You can
# optionally specify a file name after the option, if omitted DoxygenLayout.xml
# will be used as the name of the layout file.
#
# Note that if you run doxygen from a directory containing a file called
# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
# tag is left empty.
LAYOUT_FILE =
# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
# the reference definitions. This must be a list of .bib files. The .bib
# extension is automatically appended if omitted. This requires the bibtex tool
# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
# For LaTeX the style of the bibliography can be controlled using
# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
# search path. See also \cite for info how to create references.
CITE_BIB_FILES =
#---------------------------------------------------------------------------
# Configuration options related to warning and progress messages
#---------------------------------------------------------------------------
# The QUIET tag can be used to turn on/off the messages that are generated to
# standard output by doxygen. If QUIET is set to YES this implies that the
# messages are off.
# The default value is: NO.
QUIET = NO
# The WARNINGS tag can be used to turn on/off the warning messages that are
# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
# this implies that the warnings are on.
#
# Tip: Turn warnings on while writing the documentation.
# The default value is: YES.
WARNINGS = YES
# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
# will automatically be disabled.
# The default value is: YES.
WARN_IF_UNDOCUMENTED = YES
# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
# potential errors in the documentation, such as not documenting some parameters
# in a documented function, or documenting parameters that don't exist or using
# markup commands wrongly.
# The default value is: YES.
WARN_IF_DOC_ERROR = YES
# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
# are documented, but have no documentation for their parameters or return
# value. If set to NO, doxygen will only warn about wrong or incomplete
# parameter documentation, but not about the absence of documentation. If
# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
# The default value is: NO.
WARN_NO_PARAMDOC = YES
# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
# a warning is encountered.
# The default value is: NO.
WARN_AS_ERROR = NO
# The WARN_FORMAT tag determines the format of the warning messages that doxygen
# can produce. The string should contain the $file, $line, and $text tags, which
# will be replaced by the file and line number from which the warning originated
# and the warning text. Optionally the format may contain $version, which will
# be replaced by the version of the file (if it could be obtained via
# FILE_VERSION_FILTER)
# The default value is: $file:$line: $text.
WARN_FORMAT = "$file:$line: $text"
# The WARN_LOGFILE tag can be used to specify a file to which warning and error
# messages should be written. If left blank the output is written to standard
# error (stderr).
WARN_LOGFILE = "../doxygen-warn.log"
#---------------------------------------------------------------------------
# Configuration options related to the input files
#---------------------------------------------------------------------------
# The INPUT tag is used to specify the files and/or directories that contain
# documented source files. You may enter file names like myfile.cpp or
# directories like /usr/src/myproject. Separate the files or directories with
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.
INPUT = @LAMMPS_SOURCE_DIR@/utils.cpp \
@LAMMPS_SOURCE_DIR@/utils.h \
@LAMMPS_SOURCE_DIR@/library.cpp \
@LAMMPS_SOURCE_DIR@/library.h \
@LAMMPS_SOURCE_DIR@/lammps.cpp \
@LAMMPS_SOURCE_DIR@/lammps.h \
@LAMMPS_SOURCE_DIR@/lmptype.h \
@LAMMPS_SOURCE_DIR@/pointers.h \
@LAMMPS_SOURCE_DIR@/atom.cpp \
@LAMMPS_SOURCE_DIR@/atom.h \
@LAMMPS_SOURCE_DIR@/input.cpp \
@LAMMPS_SOURCE_DIR@/input.h \
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded
# from the input.
# The default value is: NO.
EXCLUDE_SYMLINKS = YES
#---------------------------------------------------------------------------
# Configuration options related to output
#---------------------------------------------------------------------------
GENERATE_HTML = NO
GENERATE_LATEX = NO
GENERATE_XML = YES
XML_OUTPUT = xml
XML_PROGRAMLISTING = YES
XML_NS_MEMB_FILE_SCOPE = NO
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
# C-preprocessor directives found in the sources and include files.
# The default value is: YES.
#ENABLE_PREPROCESSING = YES
ENABLE_PREPROCESSING = NO
# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
# in the source code. If set to NO, only conditional compilation will be
# performed. Macro expansion can be done in a controlled way by setting
# EXPAND_ONLY_PREDEF to YES.
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
MACRO_EXPANSION = NO
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
# the macro expansion is limited to the macros specified with the PREDEFINED and
# EXPAND_AS_DEFINED tags.
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
EXPAND_ONLY_PREDEF = NO
# If the SEARCH_INCLUDES tag is set to YES, the include files in the
# INCLUDE_PATH will be searched if a #include is found.
# The default value is: YES.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
SEARCH_INCLUDES = YES
# The INCLUDE_PATH tag can be used to specify one or more directories that
# contain include files that are not input files but should be processed by the
# preprocessor.
# This tag requires that the tag SEARCH_INCLUDES is set to YES.
INCLUDE_PATH =
# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
# patterns (like *.h and *.hpp) to filter out the header-files in the
# directories. If left blank, the patterns specified with FILE_PATTERNS will be
# used.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
INCLUDE_FILE_PATTERNS =
# The PREDEFINED tag can be used to specify one or more macro names that are
# defined before the preprocessor is started (similar to the -D option of e.g.
# gcc). The argument of the tag is a list of macros of the form: name or
# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
# is assumed. To prevent a macro definition from being undefined via #undef or
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
PREDEFINED =
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The
# macro definition that is found in the sources will be used. Use the PREDEFINED
# tag if you want to use a different macro definition that overrules the
# definition found in the source code.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
EXPAND_AS_DEFINED =
# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
# remove all references to function-like macros that are alone on a line, have
# an all uppercase name, and do not end with a semicolon. Such function macros
# are typically used for boiler-plate code, and will confuse the parser if not
# removed.
# The default value is: YES.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
SKIP_FUNCTION_MACROS = YES

BIN
doc/doxygen/lammps-logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

3
doc/graphviz/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/*.png
/*.svg
/*.pdf

30
doc/graphviz/Makefile Normal file
View File

@ -0,0 +1,30 @@
# Makefile for generating images with graphviz
#
SHELL = /bin/bash
BUILDDIR = ${CURDIR}/..
IMGDIR = $(BUILDDIR)/src/JPG
IMGSRC = $(wildcard *.dot)
IMGPNG = $(IMGSRC:%.dot=$(IMGDIR)/%.png)
HAS_DOT = NO
ifeq ($(shell which dot >/dev/null 2>&1; echo $$?), 0)
HAS_DOT = YES
endif
all: $(IMGPNG)
clean:
rm -f $(IMGSVG) $(IMGPDF) $(IMGPNG) *~
ifeq ($(HAS_DOT),YES)
$(IMGDIR)/%.png: %.dot
dot -Tpng -o $@ $<
endif
ifeq ($(HAS_DOT),NO)
$(IMGDIR)/%.png: %.dot
@echo '###################################################'
@echo '# Need to install "graphviz" to regenerate graphs #'
@echo '###################################################'
endif

View File

@ -0,0 +1,90 @@
// LAMMPS Class topology
digraph lammps {
rankdir="LR"
La [shape=circle label="LAMMPS"]
At [shape=box label="Atom" color=blue]
Ci [shape=box label="CiteMe"]
Co [shape=box label="Comm" color=blue]
Do [shape=box label="Domain" color=blue]
Er [shape=box label="Error" color=blue]
Fo [shape=box label="Force" color=blue]
Gr [shape=box label="Group" color=blue]
In [shape=box label="Input" color=blue]
Ko [shape=box label="KokkosLMP"]
Ak [shape=box label="AtomKK" color=blue]
Mk [shape=box label="MemoryKK" color=blue]
Me [shape=box label="Memory" color=blue]
Mo [shape=box label="Modify" color=blue]
Ne [shape=box label="Neighbor" color=blue]
Ou [shape=box label="Output" color=blue]
Py [shape=box label="Python" color=blue]
Up [shape=box label="Update" color=blue]
Un [shape=box label="Universe" color=blue]
Ti [shape=box label="Timer" color=blue]
Rg [label="Region" color=red]
Rb [shape=box label="RegionBlock"]
Rs [shape=box label="RegionSphere"]
Av [label="AtomVec" color=red]
It [label="Integrate" color=red]
Mi [label="Min" color=red]
Pa [label="Pair" color=red]
Bo [label="Bond" color=red]
An [label="Angle" color=red]
Di [label="Dihedral" color=red]
Im [label="Improper" color=red]
Ks [label="Kspace" color=red]
Du [label="Dump" color=red]
Fi [label="Fix" color=red]
Cp [label="Compute" color=red]
Th [label="Thermo"]
Va [label="Variable"]
Ew [shape=box label="Ewald"]
Pp [shape=box label="PPPM"]
Ff [label="FFT3d"]
Re [label="Remap"]
Gc [label="GridComm"]
Cb [shape=box label="CommBrick"]
Ct [shape=box label="CommTiled"]
Aa [shape=box label="AtomVecAtomic"]
Am [shape=box label="AtomVecMolecular"]
Lj [shape=box label="PairLJCut"]
Lo [shape=box label="PairLJCutOMP"]
Lg [shape=box label="PairLJCutGPU"]
Te [shape=box label="PairTersoff"]
Bh [shape=box label="BondHarmonic"]
Bf [shape=box label="BondFENE"]
Fa [shape=box label="FixAveTime"]
Fn [shape=box label="FixNVE"]
Fh [shape=box label="FixNH"]
Fp [shape=box label="FixNPT"]
Ft [shape=box label="FixNVT"]
Da [shape=box label="DumpAtom"]
Dc [shape=box label="DumpCustom"]
Dg [shape=box label="DumpCFG"]
Ve [shape=box label="Verlet"]
Rr [shape=box label="Respa"]
Po [shape=box label="PPPMOmp"]
La -> {At Ci Co Do Er Fo Gr In Ko Ak Mk Me Mo Ne Ou Py Ti Up Un} [penwidth=2]
Do -> {Rg} [penwidth=2]
Co -> {Cb Ct} [style=dashed penwidth=2]
Rg -> {Rb Rs} [style=dashed penwidth=2]
In -> Va [penwidth=2]
Mo -> {Fi Cp} [penwidth=2]
Fo -> {Pa Bo An Di Im Ks} [penwidth=2]
Ks -> {Ew Pp} [style=dashed penwidth=2]
Pp -> {Ff Re Gc} [penwidth=2]
Pp -> {Po} [style=dashed penwidth=2]
Up -> {It Mi} [penwidth=2]
It -> {Ve Rr} [style=dashed penwidth=2]
Ou -> {Du Th} [penwidth=2]
Du -> {Da Dc} [style=dashed penwidth=2]
Dc -> {Dg} [style=dashed penwidth=2]
At -> Av [penwidth=2]
Av -> {Aa Am} [style=dashed penwidth=2]
Pa -> {Lj Te} [style=dashed penwidth=2]
Lj -> {Lo Lg} [style=dashed penwidth=2]
Bo -> {Bh Bf} [style=dashed penwidth=2]
Fi -> {Fa Fn Fh} [style=dashed penwidth=2]
Fh -> {Fp Ft} [style=dashed penwidth=2]
}

View File

@ -1,4 +0,0 @@
Sphinx
sphinxcontrib-spelling
breathe
Pygments

View File

@ -378,22 +378,22 @@ The images below illustrate how the data is presented.
.. list-table::
* - .. figure:: JPG/coverage-overview-top.png
:target: JPG/coverage-overview-top.png
:scale: 25%
Top of the overview page
- .. figure:: JPG/coverage-overview-manybody.png
:target: JPG/coverage-overview-manybody.png
:scale: 25%
Styles with good coverage
- .. figure:: JPG/coverage-file-top.png
:target: JPG/coverage-file-top.png
:scale: 25%
Top of individual source page
- .. figure:: JPG/coverage-file-branches.png
:target: JPG/coverage-file-branches.png
:scale: 25%
Source page with branches

View File

@ -361,9 +361,12 @@ be specified in uppercase.
* - AMDAVX
- HOST
- AMD 64-bit x86 CPU (AVX 1)
* - EPYC
* - ZEN
- HOST
- AMD EPYC Zen class CPU (AVX 2)
- AMD Zen class CPU (AVX 2)
* - ZEN2
- HOST
- AMD Zen2 class CPU (AVX 2)
* - ARMV80
- HOST
- ARMv8.0 Compatible CPU
@ -445,12 +448,18 @@ be specified in uppercase.
* - TURING75
- GPU
- NVIDIA Turing generation CC 7.5 GPU
* - AMPERE80
- GPU
- NVIDIA Ampere generation CC 8.0 GPU
* - VEGA900
- GPU
- AMD GPU MI25 GFX900
* - VEGA906
- GPU
- AMD GPU MI50/MI60 GFX906
* - INTEL_GEN
- GPU
- Intel GPUs Gen9+
Basic CMake build settings:
^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -502,10 +502,10 @@ Doc page with :doc:`WARNING messages <Errors_warnings>`
*Bond/react: Unknown section in map file*
Please ensure reaction map files are properly formatted.
*Bond/react: Atom affected by reaction too close to template edge*
*Bond/react: Atom/Bond type affected by reaction too close to template edge*
This means an atom which changes type or connectivity during the
reaction is too close to an 'edge' atom defined in the map
file. This could cause incorrect assignment of bonds, angle, etc.
file. This could cause incorrect assignment of bonds, angle, etc.
Generally, this means you must include more atoms in your templates,
such that there are at least two atoms between each atom involved in
the reaction and an edge atom.

View File

@ -191,19 +191,19 @@ You start the command ``ccmake ../cmake`` in the ``build`` folder.
.. list-table::
* - .. figure:: JPG/ccmake-initial.png
:target: JPG/ccmake-initial.png
:scale: 33%
:align: center
Initial ``ccmake`` screen
- .. figure:: JPG/ccmake-config.png
:target: JPG/ccmake-config.png
:scale: 33%
:align: center
Configure output of ``ccmake``
- .. figure:: JPG/ccmake-options.png
:target: JPG/ccmake-options.png
:scale: 33%
:align: center
Options screen of ``ccmake``
@ -236,19 +236,19 @@ not required, it can also be entered from the GUI.
.. list-table::
* - .. figure:: JPG/cmake-gui-initial.png
:target: JPG/cmake-gui-initial.png
:scale: 40%
:align: center
Initial ``cmake-gui`` screen
- .. figure:: JPG/cmake-gui-popup.png
:target: JPG/cmake-gui-popup.png
:scale: 60%
:align: center
Generator selection in ``cmake-gui``
- .. figure:: JPG/cmake-gui-options.png
:target: JPG/cmake-gui-options.png
:scale: 40%
:align: center
Options screen of ``cmake-gui``

Binary file not shown.

After

Width:  |  Height:  |  Size: 245 KiB

View File

@ -60,6 +60,19 @@ every LAMMPS command.
Errors
Manual_build
.. _programmer_documentation:
.. toctree::
:maxdepth: 2
:numbered: 3
:caption: Programmer Documentation
:name: progdoc
:includehidden:
pg_developer
.. pg_library
.. pg_modify
.. pg_base
.. toctree::
:caption: Index
:name: index

View File

@ -14,19 +14,22 @@ Syntax
react react-ID react-group-ID Nevery Rmin Rmax template-ID(pre-reacted) template-ID(post-reacted) map_file individual_keyword values ...
...
* ID, group-ID are documented in :doc:`fix <fix>` command. Group-ID is ignored.
* ID, group-ID are documented in :doc:`fix <fix>` command.
* bond/react = style name of this fix command
* the common keyword/values may be appended directly after 'bond/react'
* this applies to all reaction specifications (below)
* common_keyword = *stabilization*
* common_keyword = *stabilization* or *reset_mol_ids*
.. parsed-literal::
*stabilization* values = *no* or *yes* *group-ID* *xmax*
*no* = no reaction site stabilization
*no* = no reaction site stabilization (default)
*yes* = perform reaction site stabilization
*group-ID* = user-assigned prefix for the dynamic group of atoms not currently involved in a reaction
*xmax* = xmax value that is used by an internally-created :doc:`nve/limit <fix_nve_limit>` integrator
*reset_mol_ids* values = *yes* or *no*
*yes* = update molecule IDs based on new global topology (default)
*no* = do not update molecule IDs
* react = mandatory argument indicating new reaction specification
* react-ID = user-assigned name for the reaction
@ -50,9 +53,9 @@ Syntax
*stabilize_steps* value = timesteps
timesteps = number of timesteps to apply the internally-created :doc:`nve/limit <fix_nve_limit>` fix to reacting atoms
*update_edges* value = *none* or *charges* or *custom*
none = do not update topology near the edges of reaction templates
charges = update atomic charges of all atoms in reaction templates
custom = force the update of user-specified atomic charges
*none* = do not update topology near the edges of reaction templates
*charges* = update atomic charges of all atoms in reaction templates
*custom* = force the update of user-specified atomic charges
Examples
""""""""
@ -154,6 +157,13 @@ due to the internal dynamic grouping performed by fix bond/react.
If the group-ID is an existing static group, react-group-IDs
should also be specified as this static group, or a subset.
The *reset_mol_ids* keyword invokes the :doc:`reset_mol_ids <reset_mol_ids>`
command after a reaction occurs, to ensure that molecule IDs are
consistent with the new bond topology. The group-ID used for
:doc:`reset_mol_ids <reset_mol_ids>` is the group-ID for this fix.
Resetting molecule IDs is necessarily a global operation, and so can
be slow for very large systems.
The following comments pertain to each *react* argument (in other
words, can be customized for each reaction, or reaction step):
@ -203,9 +213,10 @@ surrounding topology. As described below, the bonding atom pairs of
the pre-reacted template are specified by atom ID in the map file. The
pre-reacted molecule template should contain as few atoms as possible
while still completely describing the topology of all atoms affected
by the reaction. For example, if the force field contains dihedrals,
the pre-reacted template should contain any atom within three bonds of
reacting atoms.
by the reaction (which includes all atoms that change atom type or
connectivity, and all bonds that change bond type). For example, if
the force field contains dihedrals, the pre-reacted template should
contain any atom within three bonds of reacting atoms.
Some atoms in the pre-reacted template that are not reacting may have
missing topology with respect to the simulation. For example, the
@ -554,7 +565,7 @@ Default
"""""""
The option defaults are stabilization = no, prob = 1.0, stabilize_steps = 60,
update_edges = none
reset_mol_ids = yes, update_edges = none
----------

View File

@ -93,7 +93,7 @@ on particle *i* due to contact with particle *j* is given by:
.. math::
\mathbf{F}_{ne, Hooke} = k_N \delta_{ij} \mathbf{n}
\mathbf{F}_{ne, Hooke} = k_n \delta_{ij} \mathbf{n}
Where :math:`\delta_{ij} = R_i + R_j - \|\mathbf{r}_{ij}\|` is the particle
overlap, :math:`R_i, R_j` are the particle radii, :math:`\mathbf{r}_{ij} = \mathbf{r}_i - \mathbf{r}_j` is the vector separating the two
@ -106,7 +106,7 @@ For the *hertz* model, the normal component of force is given by:
.. math::
\mathbf{F}_{ne, Hertz} = k_N R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n}
\mathbf{F}_{ne, Hertz} = k_n R_{eff}^{1/2}\delta_{ij}^{3/2} \mathbf{n}
Here, :math:`R_{eff} = \frac{R_i R_j}{R_i + R_j}` is the effective
radius, denoted for simplicity as *R* from here on. For *hertz*\ , the
@ -123,7 +123,7 @@ Here, :math:`E_{eff} = E = \left(\frac{1-\nu_i^2}{E_i} + \frac{1-\nu_j^2}{E_j}\r
modulus, with :math:`\nu_i, \nu_j` the Poisson ratios of the particles of
types *i* and *j*\ . Note that if the elastic modulus and the shear
modulus of the two particles are the same, the *hertz/material* model
is equivalent to the *hertz* model with :math:`k_N = 4/3 E_{eff}`
is equivalent to the *hertz* model with :math:`k_n = 4/3 E_{eff}`
The *dmt* model corresponds to the
:ref:`(Derjaguin-Muller-Toporov) <DMT1975>` cohesive model, where the force
@ -140,7 +140,7 @@ where the force is computed as:
\mathbf{F}_{ne, jkr} = \left(\frac{4Ea^3}{3R} - 2\pi a^2\sqrt{\frac{4\gamma E}{\pi a}}\right)\mathbf{n}
Here, *a* is the radius of the contact zone, related to the overlap
Here, :math:`a` is the radius of the contact zone, related to the overlap
:math:`\delta` according to:
.. math::
@ -167,7 +167,7 @@ following general form:
\mathbf{F}_{n,damp} = -\eta_n \mathbf{v}_{n,rel}
Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot \mathbf{n} \mathbf{n}` is the component of relative velocity along
Here, :math:`\mathbf{v}_{n,rel} = (\mathbf{v}_j - \mathbf{v}_i) \cdot \mathbf{n}\ \mathbf{n}` is the component of relative velocity along
:math:`\mathbf{n}`.
The optional *damping* keyword to the *pair_coeff* command followed by
@ -259,7 +259,9 @@ tangential model choices and their expected parameters are as follows:
1. *linear_nohistory* : :math:`x_{\gamma,t}`, :math:`\mu_s`
2. *linear_history* : :math:`k_t`, :math:`x_{\gamma,t}`, :math:`\mu_s`
3. *mindlin* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
4. *mindlin_rescale* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
4. *mindlin/force* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
5. *mindlin_rescale* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
6. *mindlin_rescale/force* : :math:`k_t` or NULL, :math:`x_{\gamma,t}`, :math:`\mu_s`
Here, :math:`x_{\gamma,t}` is a dimensionless multiplier for the normal
damping :math:`\eta_n` that determines the magnitude of the tangential
@ -268,11 +270,11 @@ coefficient, and :math:`k_t` is the tangential stiffness coefficient.
For *tangential linear_nohistory*, a simple velocity-dependent Coulomb
friction criterion is used, which mimics the behavior of the *pair
gran/hooke* style. The tangential force (\mathbf{F}_t\) is given by:
gran/hooke* style. The tangential force :math:`\mathbf{F}_t` is given by:
.. math::
\mathbf{F}_t = -min(\mu_t F_{n0}, \|\mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
\mathbf{F}_t = -\min(\mu_t F_{n0}, \|\mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
The tangential damping force :math:`\mathbf{F}_\mathrm{t,damp}` is given by:
@ -294,8 +296,8 @@ keyword also affects the tangential damping. The parameter
literature use :math:`x_{\gamma,t} = 1` (:ref:`Marshall <Marshall2009>`,
:ref:`Tsuji et al <Tsuji1992>`, :ref:`Silbert et al <Silbert2001>`). The relative
tangential velocity at the point of contact is given by
:math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\Omega_i + R_j\Omega_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}{n}`,
:math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i`.
:math:`\mathbf{v}_{t, rel} = \mathbf{v}_{t} - (R_i\mathbf{\Omega}_i + R_j\mathbf{\Omega}_j) \times \mathbf{n}`, where :math:`\mathbf{v}_{t} = \mathbf{v}_r - \mathbf{v}_r\cdot\mathbf{n}\ \mathbf{n}`,
:math:`\mathbf{v}_r = \mathbf{v}_j - \mathbf{v}_i` .
The direction of the applied force is :math:`\mathbf{t} = \mathbf{v_{t,rel}}/\|\mathbf{v_{t,rel}}\|` .
The normal force value :math:`F_{n0}` used to compute the critical force
@ -314,21 +316,24 @@ form:
.. math::
F_{n0} = \|\mathbf{F}_ne + 2 F_{pulloff}\|
F_{n0} = \|\mathbf{F}_{ne} + 2 F_{pulloff}\|
Where :math:`F_{pulloff} = 3\pi \gamma R` for *jkr*\ , and
:math:`F_{pulloff} = 4\pi \gamma R` for *dmt*\ .
The remaining tangential options all use accumulated tangential
displacement (i.e. contact history). This is discussed below in the
context of the *linear_history* option, but the same treatment of the
accumulated displacement applies to the other options as well.
displacement (i.e. contact history), except for the options
*mindlin/force* and *mindlin_rescale/force*, that use accumulated
tangential force instead, and are discussed further below.
The accumulated tangential displacement is discussed in details below
in the context of the *linear_history* option. The same treatment of
the accumulated displacement applies to the other options as well.
For *tangential linear_history*, the tangential force is given by:
.. math::
\mathbf{F}_t = -min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
\mathbf{F}_t = -\min(\mu_t F_{n0}, \|-k_t\mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
Here, :math:`\mathbf{\xi}` is the tangential displacement accumulated
during the entire duration of the contact:
@ -356,7 +361,7 @@ work:
.. math::
\mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'}\| - \mathbf{n}\cdot\mathbf{\xi'}}
\mathbf{\xi} = \left(\mathbf{\xi'} - (\mathbf{n} \cdot \mathbf{\xi'})\mathbf{n}\right) \frac{\|\mathbf{\xi'}\|}{\|\mathbf{\xi'} - (\mathbf{n}\cdot\mathbf{\xi'})\mathbf{n}\|}
Here, :math:`\mathbf{\xi'}` is the accumulated displacement prior to the
current time step and :math:`\mathbf{\xi}` is the corrected
@ -372,7 +377,7 @@ discussion):
.. math::
\mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} + \mathbf{F}_{t,damp}\right)
\mathbf{\xi} = -\frac{1}{k_t}\left(\mu_t F_{n0}\mathbf{t} - \mathbf{F}_{t,damp}\right)
The tangential force is added to the total normal force (elastic plus
damping) to produce the total force on the particle. The tangential
@ -387,27 +392,68 @@ overlap region) to induce a torque on each particle according to:
\mathbf{\tau}_j = -(R_j - 0.5 \delta) \mathbf{n} \times \mathbf{F}_t
For *tangential mindlin*\ , the :ref:`Mindlin <Mindlin1949>` no-slip solution is used, which differs from the *linear_history*
option by an additional factor of *a*\ , the radius of the contact region. The tangential force is given by:
For *tangential mindlin*\ , the :ref:`Mindlin <Mindlin1949>` no-slip solution
is used which differs from the *linear_history* option by an additional factor
of :math:`a`, the radius of the contact region. The tangential force is given by:
.. math::
\mathbf{F}_t = -min(\mu_t F_{n0}, \|-k_t a \mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
\mathbf{F}_t = -\min(\mu_t F_{n0}, \|-k_t a \mathbf{\xi} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
Here, *a* is the radius of the contact region, given by :math:`a =\sqrt{R\delta}`
Here, :math:`a` is the radius of the contact region, given by :math:`a =\sqrt{R\delta}`
for all normal contact models, except for *jkr*\ , where it is given
implicitly by :math:`\delta = a^2/R - 2\sqrt{\pi \gamma a/E}`, see
discussion above. To match the Mindlin solution, one should set :math:`k_t = 4G/(2-\nu)`, where :math:`G` is the shear modulus, related to Young's modulus
:math:`E` by :math:`G = E/(2(1+\nu))`, where :math:`\nu` is Poisson's ratio. This
can also be achieved by specifying *NULL* for :math:`k_t`, in which case a
discussion above. To match the Mindlin solution, one should set
:math:`k_t = 8G_{eff}`, where :math:`G_{eff}` is the effective shear modulus given by:
.. math::
G_{eff} = \left(\frac{2-\nu_i}{G_i} + \frac{2-\nu_j}{G_j}\right)^{-1}
where :math:`G` is the shear modulus, related to Young's modulus :math:`E`
and Poisson's ratio :math:`\nu` by :math:`G = E/(2(1+\nu))`. This can also be
achieved by specifying *NULL* for :math:`k_t`, in which case a
normal contact model that specifies material parameters :math:`E` and
:math:`\nu` is required (e.g. *hertz/material*\ , *dmt* or *jkr*\ ). In this
case, mixing of the shear modulus for different particle types *i* and
*j* is done according to:
*j* is done according to the formula above.
.. note::
The radius of the contact region :math:`a` depends on the normal overlap.
As a result, the tangential force for *mindlin* can change due to
a variation in normal overlap, even with no change in tangential displacement.
For *tangential mindlin/force*, the accumulated elastic tangential force
characterizes the contact history, instead of the accumulated tangential
displacement. This prevents the dependence of the tangential force on the
normal overlap as noted above. The tangential force is given by:
.. math::
1/G = 2(2-\nu_i)(1+\nu_i)/E_i + 2(2-\nu_j)(1+\nu_j)/E_j
\mathbf{F}_t = -\min(\mu_t F_{n0}, \|\mathbf{F}_{te} + \mathbf{F}_\mathrm{t,damp}\|) \mathbf{t}
The increment of the elastic component of the tangential force
:math:`\mathbf{F}_{te}` is given by:
.. math::
\mathrm{d}\mathbf{F}_{te} = -k_t a \mathbf{v}_{t,rel} \mathrm{d}\tau
The changes in frame of reference of the contacting pair of particles during
contact are accounted for by the same formula as above, replacing the
accumulated tangential displacement :math:`\xi`, by the accumulated tangential
elastic force :math:`F_{te}`. When the tangential force exceeds the critical
force, the tangential force is directly re-scaled to match the value for
the critical force:
.. math::
\mathbf{F}_{te} = - \mu_t F_{n0}\mathbf{t} + \mathbf{F}_{t,damp}
The same rules as those described for *mindlin* apply regarding the tangential
stiffness and mixing of the shear modulus for different particle types.
The *mindlin_rescale* option uses the same form as *mindlin*\ , but the
magnitude of the tangential displacement is re-scaled as the contact
@ -421,9 +467,32 @@ Here, :math:`t_{n-1}` indicates the value at the previous time
step. This rescaling accounts for the fact that a decrease in the
contact area upon unloading leads to the contact being unable to
support the previous tangential loading, and spurious energy is
created without the rescaling above (:ref:`Walton <WaltonPC>` ). See also
discussion in :ref:`Thornton et al, 2013 <Thornton2013>` , particularly
equation 18(b) of that work and associated discussion.
created without the rescaling above (:ref:`Walton <WaltonPC>` ).
.. note::
For *mindlin*, a decrease in the tangential force already occurs as the
contact unloads, due to the dependence of the tangential force on the normal
force described above. By re-scaling :math:`\xi`, *mindlin_rescale*
effectively re-scales the tangential force twice, i.e., proportionally to
:math:`a^2`. This peculiar behavior results from use of the accumulated
tangential displacement to characterize the contact history. Although
*mindlin_rescale* remains available for historic reasons and backward
compatibility purposes, it should be avoided in favor of *mindlin_rescale/force*.
The *mindlin_rescale/force* option uses the same form as *mindlin/force*,
but the magnitude of the tangential elastic force is re-scaled as the contact
unloads, i.e. if :math:`a < a_{t_{n-1}}`:
.. math::
\mathbf{F}_{te} = \mathbf{F}_{te, t_{n-1}} \frac{a}{a_{t_{n-1}}}
This approach provides a better approximation of the :ref:`Mindlin-Deresiewicz <Mindlin1953>`
laws and is more consistent than *mindlin_rescale*. See discussions in
:ref:`Thornton et al, 2013 <Thornton2013>`, particularly equation 18(b) of that
work and associated discussion, and :ref:`Agnolin and Roux, 2007 <AgnolinRoux2007>`,
particularly Appendix A.
----------
@ -460,7 +529,7 @@ exceeds a critical value:
.. math::
\mathbf{F}_{roll} = min(\mu_{roll} F_{n,0}, \|\mathbf{F}_{roll,0}\|)\mathbf{k}
\mathbf{F}_{roll} = \min(\mu_{roll} F_{n,0}, \|\mathbf{F}_{roll,0}\|)\mathbf{k}
Here, :math:`\mathbf{k} = \mathbf{v}_{roll}/\|\mathbf{v}_{roll}\|` is the direction of
the pseudo-force. As with tangential displacement, the rolling
@ -512,7 +581,7 @@ is then truncated according to:
.. math::
\tau_{twist} = min(\mu_{twist} F_{n,0}, \tau_{twist,0})
\tau_{twist} = \min(\mu_{twist} F_{n,0}, \tau_{twist,0})
Similar to the sliding and rolling displacement, the angular
displacement is rescaled so that it corresponds to the critical value
@ -763,3 +832,15 @@ Technology, 233, 30-46.
.. _WaltonPC:
**(Otis R. Walton)** Walton, O.R., Personal Communication
.. _Mindlin1953:
**(Mindlin and Deresiewicz, 1953)** Mindlin, R.D., & Deresiewicz, H (1953).
Elastic Spheres in Contact under Varying Oblique Force.
J. Appl. Mech., ASME 20, 327-344.
.. _AgnolinRoux2007:
**(Agnolin and Roux 2007)** Agnolin, I. & Roux, J-N. (2007).
Internal states of model isotropic granular packings.
I. Assembling process, geometry, and contact networks. Phys. Rev. E, 76, 061302.

View File

@ -250,8 +250,12 @@ from :ref:`(Li2013_POF) <Li2013_POF>`. The short mDPD run (about 2 minutes
on a single core) generates a particle trajectory which can
be visualized as follows.
.. only:: html
.. image:: JPG/examples_mdpd.gif
:align: center
.. image:: JPG/examples_mdpd_first.jpg
:target: JPG/examples_mdpd.gif
:align: center
.. image:: JPG/examples_mdpd_last.jpg

120
doc/src/pg_developer.rst Normal file
View File

@ -0,0 +1,120 @@
LAMMPS Developer Guide
**********************
This section describes the internal structure and basic algorithms
of the LAMMPS code. This is a work in progress and additional
information will be added incrementally depending on availability
of time and requests from the LAMMPS user community.
LAMMPS source files
===================
The source files of the LAMMPS code are distributed across two
directories of the distribution. The core of the code is located in the
``src`` folder and its sub-directories. Almost all of those are C++ files
(implementation files have a ``.cpp`` extension and and headers a
``.h``). A sizable number of these files are in the ``src`` directory
itself, but there are plenty of :doc:`packages <Packages>`, which can be
included or excluded when LAMMPS is built. See the :doc:`Include
packages in build <Build_package>` section of the manual for more
information about that part of the build process. LAMMPS currently
supports building with :doc:`conventional makefiles <Build_make>` and
through :doc:`CMake <Build_cmake>` which differ in how packages are
enabled or disabled for a LAMMPS binary. The source files for each
package are in all-uppercase sub-directories of the ``src`` folder, for
example ``src/MOLECULE`` or ``src/USER-MISC``. The ``src/STUBS``
sub-directory is not a package but contains a dummy MPI library, that is
used when building a serial version of the code. the ``src/MAKE``
directory contains makefiles with settings and flags for a variety of
configuration and machines for the build process with traditional
makefiles.
The ``lib`` directory contains the source code for several supporting
libraries or files with configuration settings to use globally installed
libraries, that are required by some of the optional packages.
Each sub-directory, like ``lib/poems`` or ``lib/gpu``, contains the
source files, some of which are in different languages such as Fortran
or CUDA. These libraries are linked to during a LAMMPS build, if the
corresponding package is installed.
LAMMPS C++ source files almost always come in pairs, such as
``src/run.cpp`` and ``src/run.h``. The pair of files defines a C++
class, for example the :cpp:class:`LAMMPS_NS::Run` class which contains
the code invoked by the :doc:`run <run>` command in a LAMMPS input script.
As this example illustrates, source file and class names often have a
one-to-one correspondence with a command used in a LAMMPS input script.
Some source files and classes do not have a corresponding input script
command, e.g. ``src/force.cpp`` and the :cpp:class:`LAMMPS_NS::Force`
class. They are discussed in the next section.
Overview of LAMMPS class topology
=================================
Though LAMMPS has a lot of source files and classes, its class topology
is relative flat, as outlined in the :ref:`class-topology` figure. Each
name refers to a class and has a pair of associated source files in the
``src`` folder, for example the class :cpp:class:`LAMMPS_NS::Memory`
corresponds to the files ``memory.cpp`` and ``memory.h``, or the class
:cpp:class:`LAMMPS_NS::AtomVec` corresponds to the files
``atom_vec.cpp`` and ``atom_vec.h``. Full lines in the figure represent
compositing: that is the class to the left holds a pointer to an
instance of the class to the right. Dashed lines instead represent
inheritance: the class to the right is derived from the class on the
left. Classes with a red boundary are not instantiated directly, but
they represent the base classes for "styles". Those "styles" make up
the bulk of the LAMMPS code and only a few typical examples are included
in the figure for demonstration purposes.
.. _class-topology:
.. figure:: JPG/lammps-classes.png
LAMMPS class topology
This figure shows some of the relations of the base classes of the
LAMMPS simulation package. Full lines indicate that a class holds an
instance of the class it is pointing to; dashed lines point to
derived classes that are given as examples of what classes may be
instantiated during a LAMMPS run based on the input commands and
accessed through the API define by their respective base classes. At
the core is the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class, which
holds pointers to class instances with specific purposes. Those may
hold instances of other classes, sometimes directly, or only
temporarily, sometimes as derived classes or derived classes or
derived classes, which may also hold instances of other classes.
The :cpp:class:`LAMMPS_NS::LAMMPS` class is the topmost class and
represents what is referred to an "instance" of LAMMPS. It is a
composite holding references to instances of other core classes
providing the core functionality of the MD engine in LAMMPS and through
them abstractions of the required operations. The constructor of the
LAMMPS class will instantiate those instances, process the command line
flags, initialize MPI (if not already done) and set up file pointers for
input and output. The destructor will shut everything down and free all
associated memory. Thus code for the standalone LAMMPS executable in
``main.cpp`` simply initializes MPI, instantiates a single instance of
LAMMPS, and passes it the command line flags and input script. It
deletes the LAMMPS instance after the method reading the input returns
and shuts down the MPI environment before it exits the executable.
The :cpp:class:`LAMMPS_NS::Pointers` is not shown in the
:ref:`class-topology` figure, it holds references to members of the
`LAMMPS_NS::LAMMPS`, so that all classes derived from
:cpp:class:`LAMMPS_NS::Pointers` have direct access to those reference.
From the class topology all classes with blue boundary are referenced in
this class and all classes in the second and third columns, that are not
listed as derived classes are instead derived from
:cpp:class:`LAMMPS_NS::Pointers`.
Since all storage is encapsulated, the LAMMPS class can also be
instantiated multiple times by a calling code, and that can be either
simultaneously or consecutively. When running in parallel with MPI,
care has to be taken, that suitable communicators are used to not
create conflicts between different instances.
The LAMMPS class currently holds instances of 19 classes representing
different core functionalities
There are a handful of virtual parent classes in LAMMPS that define
what LAMMPS calls ``styles``. They are shaded red in Fig
\ref{fig:classes}. Each of these are parents of a number of child
classes that implement the interface defined by the parent class.

View File

@ -1 +1,5 @@
Sphinx
sphinxcontrib-spelling
sphinx-fortran
breathe
Pygments

View File

@ -7,3 +7,10 @@
display: block;
margin-bottom: 0.809em;
}
.lammps_release {
text-align: center;
font-size: 11px;
display: block;
margin-bottom: 0.405em;
}

View File

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -103,6 +103,12 @@
{%- endif %}
{%- endblock %}
{%- block extrahead %} {% endblock %}
{# Keep modernizr in head - http://modernizr.com/docs/#installing #}
<script src="{{ pathto('_static/js/modernizr.min.js', 1) }}"></script>
{# for improved browser compatibility #}
<script src="{{ pathto('_static/polyfill.js', 1) }}"></script>
</head>
<body class="wy-body-for-nav">
@ -135,9 +141,8 @@
{%- set nav_version = current_version %}
{% endif %}
{% if nav_version %}
<div class="version">
{{ nav_version }}
</div>
<div class="lammps_version">Version: <b>{{ nav_version }}</b></div>
<div class="lammps_release">git info: {{ release }}</div>
{% endif %}
{% endif %}

View File

@ -23,11 +23,16 @@ try:
except:
pass
LAMMPS_DOC_DIR = '@LAMMPS_DOC_DIR@'
LAMMPS_SOURCE_DIR = '@LAMMPS_SOURCE_DIR@'
LAMMPS_PYTHON_DIR = '@LAMMPS_PYTHON_DIR@'
LAMMPS_DOXYGEN_XML_DIR = '@DOXYGEN_XML_DIR@'
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
sys.path.append(os.path.join(os.path.dirname(__file__), '../../src/_ext'))
sys.path.append(os.path.join(LAMMPS_DOC_DIR, 'src', '_ext'))
# -- General configuration ------------------------------------------------
@ -41,7 +46,9 @@ extensions = [
'sphinx.ext.mathjax',
'sphinx.ext.imgmath',
'sphinx.ext.autodoc',
'sphinxfortran.fortran_domain',
'table_from_list',
'breathe',
]
# 2017-12-07: commented out, since this package is broken with Sphinx 16.x
# yet we can no longer use Sphinx 15.x, since that breaks with
@ -72,12 +79,24 @@ copyright = '2003-2020 Sandia Corporation'
def get_lammps_version():
import os
script_dir = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(script_dir, '../../../src/version.h'), 'r') as f:
with open(os.path.join(LAMMPS_SOURCE_DIR, 'version.h'), 'r') as f:
line = f.readline()
start_pos = line.find('"')+1
end_pos = line.find('"', start_pos)
return line[start_pos:end_pos]
def get_git_info():
import subprocess,time
git_n_date = ''
try:
gitinfo = subprocess.run(['git','describe'],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
if gitinfo.returncode == 0:
git_n_date = gitinfo.stdout.decode().replace('_',' ')
except:
pass
return git_n_date
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
@ -85,7 +104,7 @@ def get_lammps_version():
# The short X.Y version.
version = get_lammps_version()
# The full version, including alpha/beta/rc tags.
release = ''
release = get_git_info()
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@ -153,7 +172,7 @@ html_title = "LAMMPS documentation"
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
html_logo = 'lammps-logo.png'
html_logo = '_static/lammps-logo.png'
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
@ -314,7 +333,7 @@ texinfo_documents = [
epub_title = 'LAMMPS Documentation - ' + get_lammps_version()
epub_cover = ('lammps-logo.png', '')
epub_cover = ('_static/lammps-logo.png', '')
epub_description = """
This is the Manual for the LAMMPS software package.
@ -342,13 +361,29 @@ if spelling_spec and has_enchant:
spelling_lang='en_US'
spelling_word_list_filename='false_positives.txt'
sys.path.append(os.path.join(os.path.dirname(__file__), '.'))
conf_script_dir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(conf_script_dir, '.'))
import LAMMPSLexer
from sphinx.highlighting import lexers
lexers['LAMMPS'] = LAMMPSLexer.LAMMPSLexer(startinline=True)
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../python'))
sys.path.append(LAMMPS_PYTHON_DIR)
# avoid syntax highlighting in blocks that don't specify language
highlight_language = 'none'
# autodoc configuration
autodoc_member_order = 'bysource'
#autoclass_content = 'both'
# breathe configuration
breathe_projects = { 'progguide' : LAMMPS_DOXYGEN_XML_DIR }
breathe_default_project = 'progguide'
breathe_show_define_initializer = True
breathe_domain_by_extension = { 'h' : 'cpp',
'cpp' : 'cpp',
'c' : 'c',
}

View File

@ -43,6 +43,7 @@ Afshar
agilio
Agilio
agni
Agnolin
Ai
Aidan
aij
@ -436,6 +437,7 @@ Colvars
COLVARS
comID
Commun
compositing
compressibility
compressive
Comput
@ -599,6 +601,7 @@ Dequidt
der
dereference
derekt
Deresiewicz
Derjagin
Derjaguin
Derlet
@ -2219,6 +2222,7 @@ oxdna
oxrna
oxDNA
oxRNA
packings
padua
Padua
palegoldenrod

View File

@ -1 +0,0 @@
../../src/JPG/lammps-logo.png

View File

@ -709,7 +709,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
for (int i=0; i<6; i++)
virial[i]=(acctyp)0;
__local int red_acc[BLOCK_PAIR];
__local int ijnum_shared[BLOCK_PAIR];
__syncthreads();
@ -789,14 +789,14 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
k &= NEIGHMASK;
if (k == i) {
ijnum = nbor_k;
red_acc[m] = ijnum;
ijnum_shared[m] = ijnum;
break;
}
}
numtyp r1 = ucl_sqrt(rsq1);
numtyp r1inv = ucl_rsqrt(rsq1);
if (ijnum < 0) ijnum = red_acc[m];
if (ijnum < 0) ijnum = ijnum_shared[m];
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
int idx = ijnum;

View File

@ -719,7 +719,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
for (int i=0; i<6; i++)
virial[i]=(acctyp)0;
__local int red_acc[BLOCK_PAIR];
__local int ijnum_shared[BLOCK_PAIR];
__syncthreads();
@ -799,14 +799,14 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
k &= NEIGHMASK;
if (k == i) {
ijnum = nbor_k;
red_acc[m] = ijnum;
ijnum_shared[m] = ijnum;
break;
}
}
numtyp r1 = ucl_sqrt(rsq1);
numtyp r1inv = ucl_rsqrt(rsq1);
if (ijnum < 0) ijnum = red_acc[m];
if (ijnum < 0) ijnum = ijnum_shared[m];
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
int idx = ijnum;
@ -957,7 +957,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
for (int i=0; i<6; i++)
virial[i]=(acctyp)0;
__local int red_acc[BLOCK_PAIR];
__local int ijnum_shared[BLOCK_PAIR];
__syncthreads();
@ -1037,14 +1037,14 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
k &= NEIGHMASK;
if (k == i) {
ijnum = nbor_k;
red_acc[m] = ijnum;
ijnum_shared[m] = ijnum;
break;
}
}
numtyp r1 = ucl_sqrt(rsq1);
numtyp r1inv = ucl_rsqrt(rsq1);
if (ijnum < 0) ijnum = red_acc[m];
if (ijnum < 0) ijnum = ijnum_shared[m];
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
int idx = ijnum;

View File

@ -729,7 +729,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
for (int i=0; i<6; i++)
virial[i]=(acctyp)0;
__local int red_acc[BLOCK_PAIR];
__local int ijnum_shared[BLOCK_PAIR];
__syncthreads();
@ -809,14 +809,14 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
k &= NEIGHMASK;
if (k == i) {
ijnum = nbor_k;
red_acc[m] = ijnum;
ijnum_shared[m] = ijnum;
break;
}
}
numtyp r1 = ucl_sqrt(rsq1);
numtyp r1inv = ucl_rsqrt(rsq1);
if (ijnum < 0) ijnum = red_acc[m];
if (ijnum < 0) ijnum = ijnum_shared[m];
// idx to zetaij is shifted by n_stride relative to ijnum in dev_short_nbor
int idx = ijnum;

View File

@ -10,33 +10,45 @@ for C++. Applications heavily leveraging Kokkos are strongly encouraged to use
You can either use Kokkos as an installed package (encouraged) or use Kokkos in-tree in your project.
Modern CMake is exceedingly simple at a high-level (with the devil in the details).
Once Kokkos is installed In your `CMakeLists.txt` simply use:
````
````cmake
find_package(Kokkos REQUIRED)
````
Then for every executable or library in your project:
````
````cmake
target_link_libraries(myTarget Kokkos::kokkos)
````
That's it! There is no checking Kokkos preprocessor, compiler, or linker flags.
Kokkos propagates all the necessary flags to your project.
This means not only is linking to Kokkos easy, but Kokkos itself can actually configure compiler and linker flags for *your*
project. If building in-tree, there is no `find_package` and you link with `target_link_libraries(kokkos)`.
project.
When configuring your project just set:
````bash
> cmake ${srcdir} \
-DKokkos_ROOT=${kokkos_install_prefix} \
-DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos}
````
Note: You may need the following if using some versions of CMake (e.g. 3.12):
````cmake
cmake_policy(SET CMP0074 NEW)
````
If building in-tree, there is no `find_package`. You can use `add_subdirectory(kokkos)` with the Kokkos source and again just link with `target_link_libraries(Kokkos::kokkos)`.
The examples in `examples/cmake_build_installed` and `examples/cmake_build_in_tree` can help get you started.
## Configuring CMake
A very basic installation is done with:
````
cmake ${srcdir} \
A very basic installation of Kokkos is done with:
````bash
> cmake ${srcdir} \
-DCMAKE_CXX_COMPILER=g++ \
-DCMAKE_INSTALL_PREFIX=${my_install_folder}
-DCMAKE_INSTALL_PREFIX=${kokkos_install_folder}
````
which builds and installed a default Kokkos when you run `make install`.
There are numerous device backends, options, and architecture-specific optimizations that can be configured, e.g.
````
cmake ${srcdir} \
````bash
> cmake ${srcdir} \
-DCMAKE_CXX_COMPILER=g++ \
-DCMAKE_INSTALL_PREFIX=${my_install_folder} \
-DKokkos_ENABLE_OPENMP=On
-DCMAKE_INSTALL_PREFIX=${kokkos_install_folder} \
-DKokkos_ENABLE_OPENMP=ON
````
which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below.
@ -50,16 +62,16 @@ which activates the OpenMP backend. All of the options controlling device backen
## Spack
An alternative to manually building with the CMake is to use the Spack package manager.
To do so, download the `kokkos-spack` git repo and add to the package list:
````
spack repo add $path-to-kokkos-spack
````bash
> spack repo add $path-to-kokkos-spack
````
A basic installation would be done as:
````
spack install kokkos
````bash
> spack install kokkos
````
Spack allows options and and compilers to be tuned in the install command.
````
spack install kokkos@3.0 %gcc@7.3.0 +openmp
````bash
> spack install kokkos@3.0 %gcc@7.3.0 +openmp
````
This example illustrates the three most common parameters to Spack:
* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
@ -67,17 +79,17 @@ This example illustrates the three most common parameters to Spack:
* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.
For a complete list of Kokkos options, run:
````bash
> spack info kokkos
````
spack info kokkos
````
More details can be found in the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
More details can be found in the [Spack README](Spack.md)
#### Spack Development
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
If you must know, you can locate Spack Kokkos installations with:
````
spack find -p kokkos ...
````bash
> spack find -p kokkos ...
````
where `...` is the unique spec identifying the particular Kokkos configuration and version.
@ -102,8 +114,14 @@ Device backends can be enabled by specifying `-DKokkos_ENABLE_X`.
* Whether to build Pthread backend
* BOOL Default: OFF
* Kokkos_ENABLE_SERIAL
* Whether to build serial backend
* Whether to build serial backend
* BOOL Default: ON
* Kokkos_ENABLE_HIP (Experimental)
* Whether to build HIP backend
* BOOL Default: OFF
* Kokkos_ENABLE_OPENMPTARGET (Experimental)
* Whether to build the OpenMP target backend
* BOOL Default: OFF
## Enable Options
Options can be enabled by specifying `-DKokkos_ENABLE_X`.
@ -138,9 +156,6 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
* Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
* Debug check on dual views
* BOOL Default: OFF
* Kokkos_ENABLE_DEPRECATED_CODE
* Whether to enable deprecated code
* BOOL Default: OFF
* Kokkos_ENABLE_EXAMPLES
* Whether to enable building examples
* BOOL Default: OFF
@ -150,9 +165,6 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
* Kokkos_ENABLE_LARGE_MEM_TESTS
* Whether to perform extra large memory tests
* BOOL_Default: OFF
* Kokkos_ENABLE_PROFILING
* Whether to create bindings for profiling tools
* BOOL Default: ON
* Kokkos_ENABLE_PROFILING_LOAD_PRINT
* Whether to print information about which profiling tools gotloaded
* BOOL Default: OFF
@ -235,8 +247,11 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_
* Kokkos_ARCH_BGQ
* Whether to optimize for the BGQ architecture
* BOOL Default: OFF
* Kokkos_ARCH_EPYC
* Whether to optimize for the EPYC architecture
* Kokkos_ARCH_ZEN
* Whether to optimize for the Zen architecture
* BOOL Default: OFF
* Kokkos_ARCH_ZEN2
* Whether to optimize for the Zen2 architecture
* BOOL Default: OFF
* Kokkos_ARCH_HSW
* Whether to optimize for the HSW architecture

View File

@ -1,6 +1,113 @@
# Change Log
## [3.1.1](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14)
## [3.2.00](https://github.com/kokkos/kokkos/tree/3.2.00) (2020-08-19)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.01...3.2.00)
**Implemented enhancements:**
- HIP:Enable stream in HIP [\#3163](https://github.com/kokkos/kokkos/issues/3163)
- HIP:Add support for shuffle reduction for the HIP backend [\#3154](https://github.com/kokkos/kokkos/issues/3154)
- HIP:Add implementations of missing HIPHostPinnedSpace methods for LAMMPS [\#3137](https://github.com/kokkos/kokkos/issues/3137)
- HIP:Require HIP 3.5.0 or higher [\#3099](https://github.com/kokkos/kokkos/issues/3099)
- HIP:WorkGraphPolicy for HIP [\#3096](https://github.com/kokkos/kokkos/issues/3096)
- OpenMPTarget: Significant update to the new experimental backend. Requires C++17, works on Intel GPUs, reference counting fixes. [\#3169](https://github.com/kokkos/kokkos/issues/3169)
- Windows Cuda support [\#3018](https://github.com/kokkos/kokkos/issues/3018)
- Pass `-Wext-lambda-captures-this` to NVCC when support for `__host__ __device__` lambda is enabled from CUDA 11 [\#3241](https://github.com/kokkos/kokkos/issues/3241)
- Use explicit staging buffer for constant memory kernel launches and cleanup host/device synchronization [\#3234](https://github.com/kokkos/kokkos/issues/3234)
- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 1: [\#3202](https://github.com/kokkos/kokkos/issues/3202)
- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 2: [\#3203](https://github.com/kokkos/kokkos/issues/3203)
- Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable 3: [\#3196](https://github.com/kokkos/kokkos/issues/3196)
- Annotations for `DefaultExectutionSpace` and `DefaultHostExectutionSpace` to use in static analysis [\#3189](https://github.com/kokkos/kokkos/issues/3189)
- Add documentation on using Spack to install Kokkos and developing packages that depend on Kokkos [\#3187](https://github.com/kokkos/kokkos/issues/3187)
- Improve support for nvcc\_wrapper with exotic host compiler [\#3186](https://github.com/kokkos/kokkos/issues/3186)
- Add OpenMPTarget backend flags for NVC++ compiler [\#3185](https://github.com/kokkos/kokkos/issues/3185)
- Move deep\_copy/create\_mirror\_view on Experimental::OffsetView into Kokkos:: namespace [\#3166](https://github.com/kokkos/kokkos/issues/3166)
- Allow for larger block size in HIP [\#3165](https://github.com/kokkos/kokkos/issues/3165)
- View: Added names of Views to the different View initialize/free kernels [\#3159](https://github.com/kokkos/kokkos/issues/3159)
- Cuda: Caching cudaFunctorAttributes and whether L1/Shmem prefer was set [\#3151](https://github.com/kokkos/kokkos/issues/3151)
- BuildSystem: Provide an explicit default CMAKE\_BUILD\_TYPE [\#3131](https://github.com/kokkos/kokkos/issues/3131)
- Cuda: Update CUDA occupancy calculation [\#3124](https://github.com/kokkos/kokkos/issues/3124)
- Vector: Adding data() to Vector [\#3123](https://github.com/kokkos/kokkos/issues/3123)
- BuildSystem: Add CUDA Ampere configuration support [\#3122](https://github.com/kokkos/kokkos/issues/3122)
- General: Apply [[noreturn]] to Kokkos::abort when applicable [\#3106](https://github.com/kokkos/kokkos/issues/3106)
- TeamPolicy: Validate storage level argument passed to TeamPolicy::set\_scratch\_size() [\#3098](https://github.com/kokkos/kokkos/issues/3098)
- nvcc\_wrapper: send --cudart to nvcc instead of host compiler [\#3092](https://github.com/kokkos/kokkos/issues/3092)
- BuildSystem: Make kokkos\_has\_string() function in Makefile.kokkos case insensitive [\#3091](https://github.com/kokkos/kokkos/issues/3091)
- Modify KOKKOS\_FUNCTION macro for clang-tidy analysis [\#3087](https://github.com/kokkos/kokkos/issues/3087)
- Move allocation profiling to allocate/deallocate calls [\#3084](https://github.com/kokkos/kokkos/issues/3084)
- BuildSystem: FATAL\_ERROR when attempting in-source build [\#3082](https://github.com/kokkos/kokkos/issues/3082)
- Change enums in ScatterView to types [\#3076](https://github.com/kokkos/kokkos/issues/3076)
- HIP: Changes for new compiler/runtime [\#3067](https://github.com/kokkos/kokkos/issues/3067)
- Extract and use get\_gpu [\#3061](https://github.com/kokkos/kokkos/issues/3061)
- Extract and use get\_gpu [\#3048](https://github.com/kokkos/kokkos/issues/3048)
- Add is\_allocated to View-like containers [\#3059](https://github.com/kokkos/kokkos/issues/3059)
- Combined reducers for scalar references [\#3052](https://github.com/kokkos/kokkos/issues/3052)
- Add configurable capacity for UniqueToken [\#3051](https://github.com/kokkos/kokkos/issues/3051)
- Add installation testing [\#3034](https://github.com/kokkos/kokkos/issues/3034)
- BuildSystem: Add -expt-relaxed-constexpr flag to nvcc\_wrapper [\#3021](https://github.com/kokkos/kokkos/issues/3021)
- HIP: Add UniqueToken [\#3020](https://github.com/kokkos/kokkos/issues/3020)
- Autodetect number of devices [\#3013](https://github.com/kokkos/kokkos/issues/3013)
**Fixed bugs:**
- Check error code from `cudaStreamSynchronize` in CUDA fences [\#3255](https://github.com/kokkos/kokkos/issues/3255)
- Fix issue with C++ standard flags when using `nvcc\_wrapper` with PGI [\#3254](https://github.com/kokkos/kokkos/issues/3254)
- Add missing threadfence in lock-based atomics [\#3208](https://github.com/kokkos/kokkos/issues/3208)
- Fix dedup of linker flags for shared lib on CMake <=3.12 [\#3176](https://github.com/kokkos/kokkos/issues/3176)
- Fix memory leak with CUDA streams [\#3170](https://github.com/kokkos/kokkos/issues/3170)
- BuildSystem: Fix OpenMP Target flags for Cray [\#3161](https://github.com/kokkos/kokkos/issues/3161)
- ScatterView: fix for OpenmpTarget remove inheritance from reducers [\#3162](https://github.com/kokkos/kokkos/issues/3162)
- BuildSystem: Set OpenMP flags according to host compiler [\#3127](https://github.com/kokkos/kokkos/issues/3127)
- OpenMP: Fix logic for nested omp in partition\_master bug [\#3101](https://github.com/kokkos/kokkos/issues/3101)
- BuildSystem: Fixes for Cuda/11 and c++17 [\#3085](https://github.com/kokkos/kokkos/issues/3085)
- HIP: Fix print\_configuration [\#3080](https://github.com/kokkos/kokkos/issues/3080)
- Conditionally define get\_gpu [\#3072](https://github.com/kokkos/kokkos/issues/3072)
- Fix bounds for ranges in random number generator [\#3069](https://github.com/kokkos/kokkos/issues/3069)
- Fix Cuda minor arch check [\#3035](https://github.com/kokkos/kokkos/issues/3035)
**Incompatibilities:**
- Remove ETI support [\#3157](https://github.com/kokkos/kokkos/issues/3157)
- Remove KOKKOS\_INTERNAL\_ENABLE\_NON\_CUDA\_BACKEND [\#3147](https://github.com/kokkos/kokkos/issues/3147)
- Remove core/unit\_test/config [\#3146](https://github.com/kokkos/kokkos/issues/3146)
- Removed the preprocessor branch for KOKKOS\_ENABLE\_PROFILING [\#3115](https://github.com/kokkos/kokkos/issues/3115)
- Disable profiling with MSVC [\#3066](https://github.com/kokkos/kokkos/issues/3066)
**Closed issues:**
- Silent error (Validate storage level arg to set_scratch_size) [\#3097](https://github.com/kokkos/kokkos/issues/3097)
- Remove KOKKKOS\_ENABLE\_PROFILING Option [\#3095](https://github.com/kokkos/kokkos/issues/3095)
- Cuda 11 -\> allow C++17 [\#3083](https://github.com/kokkos/kokkos/issues/3083)
- In source build failure not explained [\#3081](https://github.com/kokkos/kokkos/issues/3081)
- Allow naming of Views for initialization kernel [\#3070](https://github.com/kokkos/kokkos/issues/3070)
- DefaultInit tests failing when using CTest resource allocation feature [\#3040](https://github.com/kokkos/kokkos/issues/3040)
- Add installation testing. [\#3037](https://github.com/kokkos/kokkos/issues/3037)
- nvcc\_wrapper needs to handle `-expt-relaxed-constexpr` flag [\#3017](https://github.com/kokkos/kokkos/issues/3017)
- CPU core oversubscription warning on macOS with OpenMP backend [\#2996](https://github.com/kokkos/kokkos/issues/2996)
- Default behavior of KOKKOS\_NUM\_DEVICES to use all devices available [\#2975](https://github.com/kokkos/kokkos/issues/2975)
- Assert blocksize \> 0 [\#2974](https://github.com/kokkos/kokkos/issues/2974)
- Add ability to assign kokkos profile function from executable [\#2973](https://github.com/kokkos/kokkos/issues/2973)
- ScatterView Support for the pre/post increment operator [\#2967](https://github.com/kokkos/kokkos/issues/2967)
- Compiler issue: Cuda build with clang 10 has errors with the atomic unit tests [\#3237](https://github.com/kokkos/kokkos/issues/3237)
- Incompatibility of flags for C++ standard with PGI v20.4 on Power9/NVIDIA V100 system [\#3252](https://github.com/kokkos/kokkos/issues/3252)
- Error configuring as subproject [\#3140](https://github.com/kokkos/kokkos/issues/3140)
- CMake fails with Nvidia compilers when the GPU architecture option is not supplied (Fix configure with OMPT and Cuda) [\#3207](https://github.com/kokkos/kokkos/issues/3207)
- PGI compiler being passed the gcc -fopenmp flag [\#3125](https://github.com/kokkos/kokkos/issues/3125)
- Cuda: Memory leak when using CUDA stream [\#3167](https://github.com/kokkos/kokkos/issues/3167)
- RangePolicy has an implicitly deleted assignment operator [\#3192](https://github.com/kokkos/kokkos/issues/3192)
- MemorySpace::allocate needs to have memory pool counting. [\#3064](https://github.com/kokkos/kokkos/issues/3064)
- Missing write fence for lock based atomics on CUDA [\#3038](https://github.com/kokkos/kokkos/issues/3038)
- CUDA compute capability version check problem [\#3026](https://github.com/kokkos/kokkos/issues/3026)
- Make DynRankView fencing consistent [\#3014](https://github.com/kokkos/kokkos/issues/3014)
- nvcc\_wrapper cant handle -Xcompiler -o out.o [\#2993](https://github.com/kokkos/kokkos/issues/2993)
- Reductions of non-trivial types of size 4 fail in CUDA shfl operations [\#2990](https://github.com/kokkos/kokkos/issues/2990)
- complex\_double misalignment in reduce, clang+CUDA [\#2989](https://github.com/kokkos/kokkos/issues/2989)
- Span of degenerated \(zero-length\) subviews is not zero in some special cases [\#2979](https://github.com/kokkos/kokkos/issues/2979)
- Rank 1 custom layouts dont work as expected. [\#2840](https://github.com/kokkos/kokkos/issues/2840)
## [3.1.01](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.00...3.1.1)
**Fixed bugs:**

View File

@ -1,4 +1,9 @@
# Disable in-source builds to prevent source tree corruption.
if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" )
message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files." )
endif()
# We want to determine if options are given with the wrong case
# In order to detect which arguments are given to compare against
# the list of valid arguments, at the beginning here we need to
@ -34,6 +39,9 @@ IF(COMMAND TRIBITS_PACKAGE_DECL)
ELSE()
SET(KOKKOS_HAS_TRILINOS OFF)
ENDIF()
# Is this build a subdirectory of another project
GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_pick_cxx_std.cmake)
@ -75,16 +83,17 @@ IF(NOT KOKKOS_HAS_TRILINOS)
SET(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE)
SET(ENV{CXX} ${SPACK_CXX})
ENDIF()
ENDif()
IF(NOT DEFINED ${PROJECT_NAME})
# WORKAROUND FOR HIPCC
IF(Kokkos_ENABLE_HIP)
SET(KOKKOS_INTERNAL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --amdgpu-target=gfx906")
ENDIF()
PROJECT(Kokkos CXX)
IF(Kokkos_ENABLE_HIP)
SET(CMAKE_CXX_FLAGS ${KOKKOS_INTERNAL_CMAKE_CXX_FLAGS})
ENDIF()
# Always call the project command to define Kokkos_ variables
# and to make sure that C++ is an enabled language
PROJECT(Kokkos CXX)
IF(NOT HAS_PARENT)
IF (NOT CMAKE_BUILD_TYPE)
SET(DEFAULT_BUILD_TYPE "RelWithDebInfo")
MESSAGE(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.")
SET(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING
"Choose the type of build, options are: Debug, Release, RelWithDebInfo and MinSizeRel."
FORCE)
ENDIF()
ENDIF()
ENDIF()
@ -102,8 +111,8 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 3)
set(Kokkos_VERSION_MINOR 1)
set(Kokkos_VERSION_PATCH 1)
set(Kokkos_VERSION_MINOR 2)
set(Kokkos_VERSION_PATCH 0)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
@ -147,6 +156,7 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake)
# Check the environment and set certain variables
# to allow platform-specific checks
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake)
# The build environment setup goes in the following steps
# 1) Check all the enable options. This includes checking Kokkos_DEVICES
# 2) Check the compiler ID (type and version)
@ -169,7 +179,6 @@ SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontain
SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms)
SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES})
GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY)
IF (KOKKOS_HAS_TRILINOS)
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR})
@ -203,7 +212,7 @@ IF (KOKKOS_HAS_TRILINOS)
SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
ENDFOREACH()
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
IF (KOKKOS_ENABLE_CUDA)
STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}")
FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
@ -246,7 +255,7 @@ KOKKOS_PACKAGE_POSTPROCESS()
#We are ready to configure the header
CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY)
IF (NOT KOKKOS_HAS_TRILINOS)
IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING)
ADD_LIBRARY(kokkos INTERFACE)
#Make sure in-tree projects can reference this as Kokkos::
#to match the installed target names
@ -262,8 +271,6 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake)
# If the argument of DESTINATION is a relative path, CMake computes it
# as relative to ${CMAKE_INSTALL_PATH}.
INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION ${CMAKE_INSTALL_BINDIR})
INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
# Finally - if we are a subproject - make sure the enabled devices are visible
IF (HAS_PARENT)

View File

@ -11,20 +11,20 @@ CXXFLAGS += $(SHFLAGS)
endif
KOKKOS_VERSION_MAJOR = 3
KOKKOS_VERSION_MINOR = 1
KOKKOS_VERSION_PATCH = 1
KOKKOS_VERSION_MINOR = 2
KOKKOS_VERSION_PATCH = 0
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthread"
# Options:
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Vega900,Vega906
# AMD-CPUS: AMDAVX,EPYC
# AMD-CPUS: AMDAVX,Zen,Zen2
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
@ -32,10 +32,8 @@ KOKKOS_DEBUG ?= "no"
KOKKOS_USE_TPLS ?= ""
# Options: c++11,c++14,c++1y,c++17,c++1z,c++2a
KOKKOS_CXX_STANDARD ?= "c++11"
# Options: aggressive_vectorization,disable_profiling,enable_deprecated_code,disable_deprecated_code,enable_large_mem_tests,disable_complex_align
# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align
KOKKOS_OPTIONS ?= ""
# Option for setting ETI path
KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti
KOKKOS_CMAKE ?= "no"
KOKKOS_TRIBITS ?= "no"
KOKKOS_STANDALONE_CMAKE ?= "no"
@ -74,6 +72,7 @@ KOKKOS_INTERNAL_ENABLE_CXX1Y := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),
KOKKOS_INTERNAL_ENABLE_CXX17 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++17)
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z)
KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a)
KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++20)
# Check for external libraries.
KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
@ -83,9 +82,7 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),exper
# Check for advanced settings.
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling)
KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code)
KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecated_code)
KOKKOS_INTERNAL_ENABLE_TUNING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_tuning)
KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align)
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print)
@ -96,7 +93,6 @@ KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr)
KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti)
KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)
@ -140,6 +136,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_DEVICELIST += OPENMPTARGET
KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER := $(shell expr $(KOKKOS_INTERNAL_ENABLE_CXX17) \
+ $(KOKKOS_INTERNAL_ENABLE_CXX20) \
+ $(KOKKOS_INTERNAL_ENABLE_CXX2A))
ifneq ($(KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER), 1)
$(error OpenMPTarget backend requires C++17 or newer)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
@ -281,7 +283,7 @@ endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_CXX11_FLAG := --c++11
KOKKOS_INTERNAL_CXX14_FLAG := --c++14
#KOKKOS_INTERNAL_CXX17_FLAG := --c++17
KOKKOS_INTERNAL_CXX17_FLAG := --c++17
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
@ -338,35 +340,27 @@ KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pas
KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70)
KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72)
KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75)
KOKKOS_INTERNAL_USE_ARCH_AMPERE80 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere80)
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
+ $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
+ $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80))
#SEK: This seems like a bug to me
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell)
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler)
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
+ $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50))
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
@ -394,19 +388,20 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
# AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC)
KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906)
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Decide what ISA level we are able to support.
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
@ -430,7 +425,7 @@ endif
KOKKOS_CPPFLAGS =
KOKKOS_LIBDIRS =
ifneq ($(KOKKOS_CMAKE), yes)
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH)
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
endif
KOKKOS_TPL_INCLUDE_DIRS =
KOKKOS_TPL_LIBRARY_DIRS =
@ -458,88 +453,91 @@ KOKKOS_CONFIG_HEADER=KokkosCore_config.h
# Functions for generating config header file
kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP))
# assign hash sign to variable for compat. with make 4.3
H := \#
# Do not append first line
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
tmp := $(call kokkos_append_header,"Makefile constructed configuration:")
tmp := $(call kokkos_append_header,"$(shell date)")
tmp := $(call kokkos_append_header,"----------------------------------------------*/")
tmp := $(call kokkos_append_header,'\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
tmp := $(call kokkos_append_header,'\#else')
tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H')
tmp := $(call kokkos_append_header,'\#endif')
tmp := $(call kokkos_append_header,'$H''if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
tmp := $(call kokkos_append_header,'$H''error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
tmp := $(call kokkos_append_header,'$H''else')
tmp := $(call kokkos_append_header,'$H''define KOKKOS_CORE_CONFIG_H')
tmp := $(call kokkos_append_header,'$H''endif')
tmp := $(call kokkos_append_header,"")
tmp := $(call kokkos_append_header,"\#define KOKKOS_VERSION $(KOKKOS_VERSION)")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION $(KOKKOS_VERSION)")
tmp := $(call kokkos_append_header,"")
tmp := $(call kokkos_append_header,"/* Execution Spaces */")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA")
tmp := $(call kokkos_append_header,"\#define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM')
tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_ROCM')
tmp := $(call kokkos_append_header,'$H''define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_HIP')
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_HIP')
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET')
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMPTARGET')
ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMP')
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMP')
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_THREADS")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX")
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_SERIAL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL")
endif
ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TM")
tmp := $(call kokkos_append_header,"\#endif")
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TM")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_X86_64")
tmp := $(call kokkos_append_header,"\#endif")
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_X86_64")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_KNC")
tmp := $(call kokkos_append_header,"\#endif")
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_KNC")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCLE")
tmp := $(call kokkos_append_header,"\#endif")
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCLE")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCBE")
tmp := $(call kokkos_append_header,"\#endif")
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCBE")
tmp := $(call kokkos_append_header,"$H""endif")
endif
#only add the c++ standard flags if this is not CMake
@ -548,34 +546,39 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX11")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1)
ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG)
endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX14")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1)
#I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Y_FLAG)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX14")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1)
ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG)
endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
#I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1)
#I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX20")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX20), 1)
#I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX20_FLAG)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
@ -585,20 +588,26 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
KOKKOS_CXXFLAGS += -g
KOKKOS_LDFLAGS += -g
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG")
ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
endif
endif
ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_COMPLEX_ALIGN")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_COMPLEX_ALIGN")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_TUNING), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TUNING")
endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LIBDL")
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
ifneq ($(KOKKOS_CMAKE), yes)
ifneq ($(HWLOC_PATH),)
@ -611,11 +620,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
KOKKOS_LIBS += -lhwloc
KOKKOS_TPL_LIBRARY_NAMES += hwloc
endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HWLOC")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC")
endif
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_LIBRT")
KOKKOS_LIBS += -lrt
KOKKOS_TPL_LIBRARY_NAMES += rt
endif
@ -632,50 +641,36 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
KOKKOS_LIBS += -lmemkind -lnuma
KOKKOS_TPL_LIBRARY_NAMES += memkind numa
endif
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HBWSPACE")
endif
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE")
endif
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_ETI")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HBWSPACE")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LARGE_MEM_TESTS")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS")
endif
tmp := $(call kokkos_append_header,"/* Optimization Settings */")
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
endif
tmp := $(call kokkos_append_header,"/* Cuda Settings */")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
endif
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_UVM")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_UVM")
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_CXXFLAGS += -fcuda-rdc
KOKKOS_LDFLAGS += -fcuda-rdc
@ -696,7 +691,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
KOKKOS_CXXFLAGS += -expt-extended-lambda
else
$(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
@ -704,14 +699,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
endif
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -ge 80; echo $$?),0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR")
KOKKOS_CXXFLAGS += -expt-relaxed-constexpr
else
$(warning Warning: Cuda relaxed constexpr support was requested but NVCC version is too low. This requires NVCC for Cuda version 8.0 or higher. Disabling relaxed constexpr support now.)
@ -719,25 +714,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_CONSTEXPR")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR")
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH")
endif
endif
# Add Architecture flags.
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
@ -754,7 +749,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
@ -770,9 +765,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_EPYC")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_AVX2")
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -mavx2
@ -783,9 +778,22 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -mavx2
KOKKOS_LDFLAGS += -mavx2
else
KOKKOS_CXXFLAGS += -march=znver2 -mtune=znver2
KOKKOS_LDFLAGS += -march=znver2 -mtune=znver2
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
@ -802,8 +810,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX2")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
@ -820,7 +828,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_SSE42")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xSSE4.2
@ -842,7 +850,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -mavx
@ -864,7 +872,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER7")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER7")
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -876,7 +884,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER8")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER8")
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -897,7 +905,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER9")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER9")
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -918,7 +926,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX2
@ -940,7 +948,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX2
@ -962,7 +970,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512MIC")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512MIC")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xMIC-AVX512
@ -983,7 +991,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512XEON")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX512
@ -1004,7 +1012,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KNC")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC")
KOKKOS_CXXFLAGS += -mmic
KOKKOS_LDFLAGS += -mmic
endif
@ -1022,8 +1030,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
KOKKOS_CXXFLAGS += -x cuda
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
KOKKOS_CXXFLAGS += -x cuda
else
$(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) )
endif
@ -1039,65 +1047,70 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER32")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER35")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER37")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL50")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL52")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL53")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL60")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA70")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING75")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80
endif
ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
@ -1121,13 +1134,13 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
# Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 900")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA900")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 900")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 906")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA906")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 906")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906
endif
@ -1138,7 +1151,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
KOKKOS_CXXFLAGS+=-fgpu-rdc
KOKKOS_LDFLAGS+=-fgpu-rdc
else
@ -1171,9 +1184,6 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Cuda/*.cpp)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
ifneq ($(CUDA_PATH),)
KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include
@ -1211,9 +1221,6 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/OpenMP/*.cpp)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -1228,9 +1235,6 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Threads/*.cpp)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
KOKKOS_LIBS += -lpthread
KOKKOS_TPL_LIBRARY_NAMES += pthread
@ -1279,9 +1283,6 @@ endif
# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
# device to avoid a link warning.
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Serial/*.cpp)
endif
endif
ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))

View File

@ -26,21 +26,17 @@ Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spi
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
include $(KOKKOS_ETI_PATH)/Serial/Makefile.eti_Serial
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
@ -50,9 +46,6 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
include $(KOKKOS_ETI_PATH)/Cuda/Makefile.eti_Cuda
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
@ -75,9 +68,6 @@ Kokkos_ROCm_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_RO
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
Kokkos_ROCm_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
include $(KOKKOS_ETI_PATH)/ROCm/Makefile.eti_ROCm
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
@ -85,9 +75,6 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
include $(KOKKOS_ETI_PATH)/Threads/Makefile.eti_Threads
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -95,9 +82,6 @@ Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokko
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
include $(KOKKOS_ETI_PATH)/OpenMP/Makefile.eti_OpenMP
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)

View File

@ -151,7 +151,7 @@ Full details are given in the [build instructions](BUILD.md). Basic setups are s
## CMake
The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
````
````bash
cmake $srcdir \
-DCMAKE_CXX_COMPILER=$path_to_compiler \
-DCMAKE_INSTALL_PREFIX=$path_to_install \
@ -170,7 +170,7 @@ and run `make test` after completing the build.
For your CMake project using Kokkos, code such as the following:
````
````cmake
find_package(Kokkos)
...
target_link_libraries(myTarget Kokkos::kokkos)
@ -187,17 +187,15 @@ for the install location given above.
## Spack
An alternative to manually building with the CMake is to use the Spack package manager.
To do so, download the `kokkos-spack` git repo and add to the package list:
````
spack repo add $path-to-kokkos-spack
To get started, download the Spack [repo](https://github.com/spack/spack).
````
A basic installation would be done as:
````
spack install kokkos
````bash
> spack install kokkos
````
Spack allows options and and compilers to be tuned in the install command.
````
spack install kokkos@3.0 %gcc@7.3.0 +openmp
````bash
> spack install kokkos@3.0 %gcc@7.3.0 +openmp
````
This example illustrates the three most common parameters to Spack:
* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
@ -205,33 +203,33 @@ This example illustrates the three most common parameters to Spack:
* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.
For a complete list of Kokkos options, run:
````
spack info kokkos
````bash
> spack info kokkos
````
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with:
````
spack find -p kokkos ...
````bash
> spack find -p kokkos ...
````
where `...` is the unique spec identifying the particular Kokkos configuration and version.
Some more details can found in the Kokkos spack [documentation](Spack.md) or the Spack [website](https://spack.readthedocs.io/en/latest).
## Raw Makefile
A bash script is provided to generate raw makefiles.
To install Kokkos as a library create a build directory and run the following
````
$KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install
````bash
> $KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install
````
Once the Makefile is generated, run:
````
make kokkoslib
make install
````bash
> make kokkoslib
> make install
````
To additionally run the unit tests:
````
make build-test
make test
````bash
> make build-test
> make test
````
Run `generate_makefile.bash --help` for more detailed options such as
changing the device type for which to build.
@ -274,7 +272,7 @@ more than a single GPU is used by a single process.
If you publish work which mentions Kokkos, please cite the following paper:
````
````BibTeX
@article{CarterEdwards20143202,
title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",
journal = "Journal of Parallel and Distributed Computing ",

267
lib/kokkos/Spack.md Normal file
View File

@ -0,0 +1,267 @@
![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4)
# Kokkos Spack
This gives instructions for using Spack to install Kokkos and developing packages that depend on Kokkos.
## Getting Started
Make sure you have downloaded [Spack](https://github.com/spack/spack).
The easiest way to configure the Spack environment is:
````bash
> source spack/share/spack/setup-env.sh
````
with other scripts available for other shells.
You can display information about how to install packages with:
````bash
> spack info kokkos
````
This will print all the information about how to install Kokkos with Spack.
For detailed instructions on how to use Spack, see the [User Manual](https://spack.readthedocs.io).
## Setting Up Spack: Avoiding the Package Cascade
By default, Spack doesn't 'see' anything on your system - including things like CMake and CUDA.
This can be limited by adding a `packages.yaml` to your `$HOME/.spack` folder that includes CMake (and CUDA, if applicable). For example, your `packages.yaml` file could be:
````yaml
packages:
cuda:
modules:
cuda@10.1.243: [cuda/10.1.243]
paths:
cuda@10.1.243:
/opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243
buildable: false
cmake:
modules:
cmake: [cmake/3.16.8]
paths:
cmake:
/opt/local/ppc64le/cmake/3.16.8
buildable: false
````
The `modules` entry is only necessary on systems that require loading Modules (i.e. most DOE systems).
The `buildable` flag is useful to make sure Spack crashes if there is a path error,
rather than having a type-o and Spack rebuilding everything because `cmake` isn't found.
You can verify your environment is set up correctly by running `spack graph` or `spack spec`.
For example:
````bash
> spack graph kokkos +cuda
o kokkos
|\
o | cuda
/
o cmake
````
Without the existing CUDA and CMake being identified in `packages.yaml`, a (subset!) of the output would be:
````bash
o kokkos
|\
| o cmake
| |\
| | | |\
| | | | | |\
| | | | | | | |\
| | | | | | | | | |\
| | | | | | | o | | | libarchive
| | | | | | | |\ \ \ \
| | | | | | | | | |\ \ \ \
| | | | | | | | | | | | |_|/
| | | | | | | | | | | |/| |
| | | | | | | | | | | | | o curl
| | |_|_|_|_|_|_|_|_|_|_|/|
| |/| | | |_|_|_|_|_|_|_|/
| | | | |/| | | | | | | |
| | | | o | | | | | | | | openssl
| |/| | | | | | | | | | |
| | | | | | | | | | o | | libxml2
| | |_|_|_|_|_|_|_|/| | |
| | | | | | | | | | |\ \ \
| o | | | | | | | | | | | | zlib
| / / / / / / / / / / / /
| o | | | | | | | | | | | xz
| / / / / / / / / / / /
| o | | | | | | | | | | rhash
| / / / / / / / / / /
| | | | o | | | | | | nettle
| | | | |\ \ \ \ \ \ \
| | | o | | | | | | | | libuv
| | | | o | | | | | | | autoconf
| | |_|/| | | | | | | |
| | | | |/ / / / / / /
| o | | | | | | | | | perl
| o | | | | | | | | | gdbm
| o | | | | | | | | | readline
````
## Configuring Kokkos as a Project Dependency
Say you have a project "SuperScience" which needs to use Kokkos.
In your `package.py` file, you would generally include something like:
````python
class SuperScience(CMakePackage):
...
depends_on("kokkos")
````
Often projects want to tweak behavior when using certain features, e.g.
````python
depends_on("kokkos+cuda", when="+cuda")
````
if your project needs CUDA-specific logic to configure and build.
This illustrates the general principle in Spack of "flowing-up".
A user requests a feature in the final app:
````bash
> spack install superscience+cuda
````
This flows upstream to the Kokkos dependency, causing the `kokkos+cuda` variant to build.
The downstream app (SuperScience) tells the upstream app (Kokkos) how to build.
Because Kokkos is a performance portability library, it somewhat inverts this principle.
Kokkos "flows-down", telling your application how best to configure for performance.
Rather than a downstream app (SuperScience) telling the upstream (Kokkos) what variants to build,
a pre-built Kokkos should be telling the downstream app SuperScience what variants to use.
Kokkos works best when there is an "expert" configuration installed on your system.
Your build should simply request `-DKokkos_ROOT=<BEST_KOKKOS_FOR_MY_SYSTEM>` and configure appropriately based on the Kokkos it finds.
Kokkos has many, many build variants.
Where possible, projects should only depend on a general Kokkos, not specific variants.
We recommend instead adding for each system you build on a Kokkos configuration to your `packages.yaml` file (usually found in `~/.spack` for specific users).
For a Xeon + Volta system, this could look like:
````yaml
kokkos:
variants: +cuda +openmp +cuda_lambda +wrapper ^cuda@10.1 cuda_arch=70
compiler: [gcc@7.2.0]
````
which gives the "best" Kokkos configuration as CUDA+OpenMP optimized for a Volta 70 architecture using CUDA 10.1.
It also enables support for CUDA Lambdas.
The `+wrapper` option tells Kokkos to build with the special `nvcc_wrapper` (more below).
Note here that we use the built-in `cuda_arch` variant of Spack to specify the archicture.
For a Haswell system, we use
````yaml
kokkos:
variants: +openmp std=14 target=haswell
compiler: [intel@18]
````
which uses the built-in microarchitecture variants of Spack.
Consult the Spack documentation for more details of Spack microarchitectures
and CUDA architectures.
Spack does not currently provide an AMD GPU microarchitecture option.
If building for HIP or an AMD GPU, Kokkos provides an `amd_gpu_arch` similar to `cuda_arch`.
````yaml
kokkos:
variants: +hip amd_gpu_arch=vega900
````
Without an optimal default in your `packages.yaml` file, it is highly likely that the default Kokkos configuration you get will not be what you want.
For example, CUDA is not enabled by default (there is no easy logic to conditionally activate this for CUDA-enabled systems).
If you don't specify a CUDA build variant in a `packages.yaml` and you build your Kokkos-dependent project:
````bash
> spack install superscience
````
you may end up just getting the default Kokkos (i.e. Serial).
Some examples are included in the `config/yaml` folder for common platforms.
Before running `spack install <package>` we recommend running `spack spec <package>` to confirm your dependency tree is correct.
For example, with Kokkos Kernels:
````bash
kokkos-kernels@3.0%gcc@8.3.0~blas build_type=RelWithDebInfo ~cblas~complex_double~complex_float~cublas~cuda cuda_arch=none ~cusparse~diy+double execspace_cuda=auto execspace_openmp=auto execspace_serial=auto execspace_threads=auto ~float~lapack~lapacke+layoutleft~layoutright memspace_cudaspace=auto memspace_cudauvmspace=auto +memspace_hostspace~mkl+offset_int+offset_size_t~openmp+ordinal_int~ordinal_int64_t~serial~superlu arch=linux-rhel7-skylake_avx512
^cmake@3.16.2%gcc@8.3.0~doc+ncurses+openssl+ownlibs~qt arch=linux-rhel7-skylake_avx512
^kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=14 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512
^cuda@10.1%gcc@8.3.0 arch=linux-rhel7-skylake_avx512
^kokkos-nvcc-wrapper@old%gcc@8.3.0 build_type=RelWithDebInfo +mpi arch=linux-rhel7-skylake_avx512
^openmpi@4.0.2%gcc@8.3.0~cuda+cxx_exceptions fabrics=none ~java~legacylaunchers~memchecker patches=073477a76bba780c67c36e959cd3ee6910743e2735c7e76850ffba6791d498e4 ~pmi schedulers=none ~sqlite3~thread_multiple+vt arch=linux-rhel7-skylake_avx512
````
The output can be very verbose, but we can verify the expected `kokkos`:
````bash
kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=11 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512
````
We see that we do have `+volta70` and `+wrapper`, e.g.
### Spack Environments
The encouraged way to use Spack is with Spack environments ([more details here](https://spack-tutorial.readthedocs.io/en/latest/tutorial_environments.html#dealing-with-many-specs-at-once)).
Rather than installing packages one-at-a-time, you add packages to an environment.
After adding all packages, you concretize and install them all.
Using environments, one can explicitly add a desired Kokkos for the environment, e.g.
````bash
> spack add kokkos +cuda +cuda_lambda +volta70
> spack add my_project +my_variant
> ...
> spack install
````
All packages within the environment will build against the CUDA-enabled Kokkos,
even if they only request a default Kokkos.
## NVCC Wrapper
Kokkos is a C++ project, but often builds for the CUDA backend.
This is particularly problematic with CMake. At this point, `nvcc` does not accept all the flags that normally get passed to a C++ compiler.
Kokkos provides `nvcc_wrapper` that identifies correctly as a C++ compiler to CMake and accepts C++ flags, but uses `nvcc` as the underlying compiler.
`nvcc` itself also uses an underlying host compiler, e.g. GCC.
In Spack, the underlying host compiler is specified as below, e.g.:
````bash
> spack install package %gcc@8.0.0
````
This is still valid for Kokkos. To use the special wrapper for CUDA builds, request a desired compiler and simply add the `+wrapper` variant.
````bash
> spack install kokkos +cuda +wrapper %gcc@7.2.0
````
Downstream projects depending on Kokkos need to override their compiler.
Kokkos provides the compiler in a `kokkos_cxx` variable,
which points to either `nvcc_wrapper` when needed or the regular compiler otherwise.
Spack projects already do this to use MPI compiler wrappers.
````python
def cmake_args(self):
options = []
...
options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["kokkos"].kokkos_cxx)
...
return options
````
Note: `nvcc_wrapper` works with the MPI compiler wrappers.
If building your project with MPI, do NOT set your compiler to `nvcc_wrapper`.
Instead set your compiler to `mpicxx` and `nvcc_wrapper` will be used under the hood.
````python
def cmake_args(self):
options = []
...
options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["mpi"].mpicxx)
...
return options
````
To accomplish this, `nvcc_wrapper` must depend on MPI (even though it uses no MPI).
This has the unfortunate consequence that Kokkos CUDA projects not using MPI will implicitly depend on MPI anyway.
This behavior is necessary for now, but will hopefully be removed later.
When using environments, if MPI is not needed, you can remove the MPI dependency with:
````bash
> spack add kokkos-nvcc-wrapper ~mpi
````
## Developing With Spack
Spack has historically been much more suited to *deployment* of mature packages than active testing or developing.
However, recent features have improved support for development.
Future releases are likely to make this even easier and incorporate Git integration.
The most common commands will do a full build and install of the packages.
If doing development, you may wish to merely set up a build environment.
This allows you to modify the source and re-build.
In this case, you can stop after configuring.
Suppose you have Kokkos checkout in the folder `kokkos-src`:
````bash
> spack dev-build -d kokkos-src -u cmake kokkos@develop +wrapper +openmp
````
This sets up a development environment for you in `kokkos-src` which you can use (Bash example shown):
Note: Always specify `develop` as the version when doing `dev-build`, except in rare cases.
You are usually developing a feature branch that will merge into `develop`,
hence you are making a new `develop` branch.
````bash
> cd kokko-src
> source spack-build-env.txt
> cd spack-build
> make
````
Before sourcing the Spack development environment, you may wish to save your current environment:
````bash
> declare -px > myenv.sh
````
When done with Spack, you can then restore your original environment:
````bash
> source myenv.sh
````

View File

@ -2,7 +2,9 @@
KOKKOS_SUBPACKAGE(Algorithms)
ADD_SUBDIRECTORY(src)
IF (NOT Kokkos_INSTALL_TESTING)
ADD_SUBDIRECTORY(src)
ENDIF()
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)

View File

@ -7,9 +7,15 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS *.hpp)
FILE(GLOB SOURCES *.cpp)
LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
FILE(GLOB ALGO_HEADERS *.hpp)
FILE(GLOB ALGO_SOURCES *.cpp)
LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
INSTALL (
DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
DESTINATION ${KOKKOS_HEADER_DIR}
FILES_MATCHING PATTERN "*.hpp"
)
#-----------------------------------------------------------------------------
@ -17,8 +23,8 @@ LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
# These will get ignored for standalone CMake and a true interface library made
KOKKOS_ADD_INTERFACE_LIBRARY(
kokkosalgorithms
HEADERS ${HEADERS}
SOURCES ${SOURCES}
HEADERS ${ALGO_HEADERS}
SOURCES ${ALGO_SOURCES}
)
KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
${KOKKOS_TOP_BUILD_DIR}

View File

@ -94,9 +94,9 @@ namespace Kokkos {
class Pool {
public:
//The Kokkos device type
typedef Device device_type;
using device_type = Device;
//The actual generator type
typedef Generator<Device> generator_type;
using generator_type = Generator<Device>;
//Default constructor: does not initialize a pool
Pool();
@ -124,7 +124,7 @@ namespace Kokkos {
class Generator {
public:
//The Kokkos device type
typedef DeviceType device_type;
using device_type = DeviceType;
//Max return values of respective [X]rand[S]() functions
enum {MAX_URAND = 0xffffffffU};
@ -138,75 +138,75 @@ namespace Kokkos {
KOKKOS_INLINE_FUNCTION
Generator (STATE_ARGUMENTS, int state_idx = 0);
//Draw a equidistributed uint32_t in the range (0,MAX_URAND]
//Draw a equidistributed uint32_t in the range [0,MAX_URAND)
KOKKOS_INLINE_FUNCTION
uint32_t urand();
//Draw a equidistributed uint64_t in the range (0,MAX_URAND64]
//Draw a equidistributed uint64_t in the range [0,MAX_URAND64)
KOKKOS_INLINE_FUNCTION
uint64_t urand64();
//Draw a equidistributed uint32_t in the range (0,range]
//Draw a equidistributed uint32_t in the range [0,range)
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range);
//Draw a equidistributed uint32_t in the range (start,end]
//Draw a equidistributed uint32_t in the range [start,end)
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end );
//Draw a equidistributed uint64_t in the range (0,range]
//Draw a equidistributed uint64_t in the range [0,range)
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range);
//Draw a equidistributed uint64_t in the range (start,end]
//Draw a equidistributed uint64_t in the range [start,end)
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end );
//Draw a equidistributed int in the range (0,MAX_RAND]
//Draw a equidistributed int in the range [0,MAX_RAND)
KOKKOS_INLINE_FUNCTION
int rand();
//Draw a equidistributed int in the range (0,range]
//Draw a equidistributed int in the range [0,range)
KOKKOS_INLINE_FUNCTION
int rand(const int& range);
//Draw a equidistributed int in the range (start,end]
//Draw a equidistributed int in the range [start,end)
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end );
//Draw a equidistributed int64_t in the range (0,MAX_RAND64]
//Draw a equidistributed int64_t in the range [0,MAX_RAND64)
KOKKOS_INLINE_FUNCTION
int64_t rand64();
//Draw a equidistributed int64_t in the range (0,range]
//Draw a equidistributed int64_t in the range [0,range)
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range);
//Draw a equidistributed int64_t in the range (start,end]
//Draw a equidistributed int64_t in the range [start,end)
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end );
//Draw a equidistributed float in the range (0,1.0]
//Draw a equidistributed float in the range [0,1.0)
KOKKOS_INLINE_FUNCTION
float frand();
//Draw a equidistributed float in the range (0,range]
//Draw a equidistributed float in the range [0,range)
KOKKOS_INLINE_FUNCTION
float frand(const float& range);
//Draw a equidistributed float in the range (start,end]
//Draw a equidistributed float in the range [start,end)
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end );
//Draw a equidistributed double in the range (0,1.0]
//Draw a equidistributed double in the range [0,1.0)
KOKKOS_INLINE_FUNCTION
double drand();
//Draw a equidistributed double in the range (0,range]
//Draw a equidistributed double in the range [0,range)
KOKKOS_INLINE_FUNCTION
double drand(const double& range);
//Draw a equidistributed double in the range (start,end]
//Draw a equidistributed double in the range [start,end)
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end );
@ -221,11 +221,11 @@ namespace Kokkos {
//Additional Functions:
//Fills view with random numbers in the range (0,range]
//Fills view with random numbers in the range [0,range)
template<class ViewType, class PoolType>
void fill_random(ViewType view, PoolType pool, ViewType::value_type range);
//Fills view with random numbers in the range (start,end]
//Fills view with random numbers in the range [start,end)
template<class ViewType, class PoolType>
void fill_random(ViewType view, PoolType pool,
ViewType::value_type start, ViewType::value_type end);
@ -381,7 +381,7 @@ struct rand<Generator, unsigned long> {
// NOTE (mfh 26 oct 2014) This is a partial specialization for long
// long, a C99 / C++11 signed type which is guaranteed to be at
// least 64 bits. Do NOT write a partial specialization for
// int64_t!!! This is just a typedef! It could be either long or
// int64_t!!! This is just an alias! It could be either long or
// long long. We don't know which a priori, and I've seen both.
// The types long and long long are guaranteed to differ, so it's
// always safe to specialize for both.
@ -413,7 +413,7 @@ struct rand<Generator, long long> {
// NOTE (mfh 26 oct 2014) This is a partial specialization for
// unsigned long long, a C99 / C++11 unsigned type which is
// guaranteed to be at least 64 bits. Do NOT write a partial
// specialization for uint64_t!!! This is just a typedef! It could
// specialization for uint64_t!!! This is just an alias! It could
// be either unsigned long or unsigned long long. We don't know
// which a priori, and I've seen both. The types unsigned long and
// unsigned long long are guaranteed to differ, so it's always safe
@ -604,11 +604,7 @@ struct Random_UniqueIndex {
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type) {
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
const int i = ExecutionSpace::hardware_thread_id();
#else
const int i = ExecutionSpace::impl_hardware_thread_id();
#endif
return i;
#else
return 0;
@ -652,15 +648,13 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __HIP_DEVICE_COMPILE__
const int i_offset =
(hipThreadIdx_x * hipBlockDim_y + hipThreadIdx_y) * hipBlockDim_z +
hipThreadIdx_z;
int i = (((hipBlockIdx_x * hipGridDim_y + hipBlockIdx_y) * hipGridDim_z +
hipBlockIdx_z) *
hipBlockDim_x * hipBlockDim_y * hipBlockDim_z +
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += hipBlockDim_x * hipBlockDim_y * hipBlockDim_z;
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
@ -687,7 +681,7 @@ class Random_XorShift64 {
friend class Random_XorShift64_Pool<DeviceType>;
public:
typedef DeviceType device_type;
using device_type = DeviceType;
constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
@ -805,11 +799,6 @@ class Random_XorShift64 {
// number
KOKKOS_INLINE_FUNCTION
double normal() {
#ifndef __HIP_DEVICE_COMPILE__ // FIXME_HIP
using std::sqrt;
#else
using ::sqrt;
#endif
double S = 2.0;
double U;
while (S >= 1.0) {
@ -817,7 +806,7 @@ class Random_XorShift64 {
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * sqrt(-2.0 * log(S) / S);
return U * std::sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
@ -830,15 +819,15 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift64_Pool {
private:
using execution_space = typename DeviceType::execution_space;
typedef View<int*, execution_space> locks_type;
typedef View<uint64_t*, DeviceType> state_data_type;
using locks_type = View<int*, execution_space>;
using state_data_type = View<uint64_t*, DeviceType>;
locks_type locks_;
state_data_type state_;
int num_states_;
public:
typedef Random_XorShift64<DeviceType> generator_type;
typedef DeviceType device_type;
using generator_type = Random_XorShift64<DeviceType>;
using device_type = DeviceType;
KOKKOS_INLINE_FUNCTION
Random_XorShift64_Pool() { num_states_ = 0; }
@ -923,8 +912,8 @@ class Random_XorShift1024 {
friend class Random_XorShift1024_Pool<DeviceType>;
public:
typedef Random_XorShift1024_Pool<DeviceType> pool_type;
typedef DeviceType device_type;
using pool_type = Random_XorShift1024_Pool<DeviceType>;
using device_type = DeviceType;
constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
@ -1046,11 +1035,6 @@ class Random_XorShift1024 {
// number
KOKKOS_INLINE_FUNCTION
double normal() {
#ifndef KOKKOS_ENABLE_HIP // FIXME_HIP
using std::sqrt;
#else
using ::sqrt;
#endif
double S = 2.0;
double U;
while (S >= 1.0) {
@ -1058,7 +1042,7 @@ class Random_XorShift1024 {
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * sqrt(-2.0 * log(S) / S);
return U * std::sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
@ -1071,9 +1055,9 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift1024_Pool {
private:
using execution_space = typename DeviceType::execution_space;
typedef View<int*, execution_space> locks_type;
typedef View<int*, DeviceType> int_view_type;
typedef View<uint64_t * [16], DeviceType> state_data_type;
using locks_type = View<int*, execution_space>;
using int_view_type = View<int*, DeviceType>;
using state_data_type = View<uint64_t * [16], DeviceType>;
locks_type locks_;
state_data_type state_;
@ -1082,9 +1066,9 @@ class Random_XorShift1024_Pool {
friend class Random_XorShift1024<DeviceType>;
public:
typedef Random_XorShift1024<DeviceType> generator_type;
using generator_type = Random_XorShift1024<DeviceType>;
typedef DeviceType device_type;
using device_type = DeviceType;
KOKKOS_INLINE_FUNCTION
Random_XorShift1024_Pool() { num_states_ = 0; }
@ -1176,14 +1160,13 @@ struct fill_random_functor_begin_end;
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_)
@ -1203,14 +1186,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 1, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_)
@ -1232,14 +1214,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 2, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_)
@ -1262,14 +1243,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 3, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_)
@ -1293,14 +1273,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 4, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_)
@ -1326,14 +1305,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 5, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_)
@ -1361,14 +1339,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 6, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_)
@ -1398,14 +1375,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 7, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_)
@ -1437,14 +1413,13 @@ struct fill_random_functor_range<ViewType, RandomPool, loops, 8, IndexType> {
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_,
@ -1466,14 +1441,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_,
@ -1497,14 +1471,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_,
@ -1529,14 +1502,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_,
@ -1562,14 +1534,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_,
@ -1597,14 +1568,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_,
@ -1634,14 +1604,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_,
@ -1673,14 +1642,13 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8,
IndexType> {
typedef typename ViewType::execution_space execution_space;
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
typedef rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>
Rand;
using Rand = rand<typename RandomPool::generator_type,
typename ViewType::non_const_value_type>;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_,

View File

@ -95,9 +95,9 @@ class BinSort {
public:
template <class DstViewType, class SrcViewType>
struct copy_functor {
typedef typename SrcViewType::const_type src_view_type;
using src_view_type = typename SrcViewType::const_type;
typedef Impl::CopyOp<DstViewType, src_view_type> copy_op;
using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
DstViewType dst_values;
src_view_type src_values;
@ -120,17 +120,17 @@ class BinSort {
// If a Kokkos::View then can generate constant random access
// otherwise can only use the constant type.
typedef typename std::conditional<
using src_view_type = typename std::conditional<
Kokkos::is_view<SrcViewType>::value,
Kokkos::View<typename SrcViewType::const_data_type,
typename SrcViewType::array_layout,
typename SrcViewType::device_type,
Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
typename SrcViewType::const_type>::type src_view_type;
typename SrcViewType::const_type>::type;
typedef typename PermuteViewType::const_type perm_view_type;
using perm_view_type = typename PermuteViewType::const_type;
typedef Impl::CopyOp<DstViewType, src_view_type> copy_op;
using copy_op = Impl::CopyOp<DstViewType, src_view_type>;
DstViewType dst_values;
perm_view_type sort_order;
@ -151,8 +151,8 @@ class BinSort {
}
};
typedef typename Space::execution_space execution_space;
typedef BinSortOp bin_op_type;
using execution_space = typename Space::execution_space;
using bin_op_type = BinSortOp;
struct bin_count_tag {};
struct bin_offset_tag {};
@ -160,30 +160,30 @@ class BinSort {
struct bin_sort_bins_tag {};
public:
typedef SizeType size_type;
typedef size_type value_type;
using size_type = SizeType;
using value_type = size_type;
typedef Kokkos::View<size_type*, Space> offset_type;
typedef Kokkos::View<const int*, Space> bin_count_type;
using offset_type = Kokkos::View<size_type*, Space>;
using bin_count_type = Kokkos::View<const int*, Space>;
typedef typename KeyViewType::const_type const_key_view_type;
using const_key_view_type = typename KeyViewType::const_type;
// If a Kokkos::View then can generate constant random access
// otherwise can only use the constant type.
typedef typename std::conditional<
using const_rnd_key_view_type = typename std::conditional<
Kokkos::is_view<KeyViewType>::value,
Kokkos::View<typename KeyViewType::const_data_type,
typename KeyViewType::array_layout,
typename KeyViewType::device_type,
Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
const_key_view_type>::type const_rnd_key_view_type;
const_key_view_type>::type;
typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
typedef typename KeyViewType::const_value_type const_key_scalar;
using non_const_key_scalar = typename KeyViewType::non_const_value_type;
using const_key_scalar = typename KeyViewType::const_value_type;
typedef Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >
bin_count_atomic_type;
using bin_count_atomic_type =
Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> >;
private:
const_key_view_type keys;
@ -266,10 +266,10 @@ class BinSort {
template <class ValuesViewType>
void sort(ValuesViewType const& values, int values_range_begin,
int values_range_end) const {
typedef Kokkos::View<typename ValuesViewType::data_type,
typename ValuesViewType::array_layout,
typename ValuesViewType::device_type>
scratch_view_type;
using scratch_view_type =
Kokkos::View<typename ValuesViewType::data_type,
typename ValuesViewType::array_layout,
typename ValuesViewType::device_type>;
const size_t len = range_end - range_begin;
const size_t values_len = values_range_end - values_range_begin;
@ -278,13 +278,6 @@ class BinSort {
"BinSort::sort: values range length != permutation vector length");
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
scratch_view_type sorted_values(
ViewAllocateWithoutInitializing(
"Kokkos::SortImpl::BinSortFunctor::sorted_values"),
len, values.extent(1), values.extent(2), values.extent(3),
values.extent(4), values.extent(5), values.extent(6), values.extent(7));
#else
scratch_view_type sorted_values(
ViewAllocateWithoutInitializing(
"Kokkos::SortImpl::BinSortFunctor::sorted_values"),
@ -303,7 +296,6 @@ class BinSort {
: KOKKOS_IMPL_CTOR_DEFAULT_ARG,
values.rank_dynamic > 7 ? values.extent(7)
: KOKKOS_IMPL_CTOR_DEFAULT_ARG);
#endif
{
copy_permute_functor<scratch_view_type /* DstViewType */
@ -511,8 +503,8 @@ bool try_std_sort(ViewType view) {
template <class ViewType>
struct min_max_functor {
typedef Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>
minmax_scalar;
using minmax_scalar =
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type>;
ViewType view;
min_max_functor(const ViewType& view_) : view(view_) {}
@ -531,7 +523,7 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
if (!always_use_kokkos_sort) {
if (Impl::try_std_sort(view)) return;
}
typedef BinOp1D<ViewType> CompType;
using CompType = BinOp1D<ViewType>;
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
@ -548,8 +540,8 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
template <class ViewType>
void sort(ViewType view, size_t const begin, size_t const end) {
typedef Kokkos::RangePolicy<typename ViewType::execution_space> range_policy;
typedef BinOp1D<ViewType> CompType;
using range_policy = Kokkos::RangePolicy<typename ViewType::execution_space>;
using CompType = BinOp1D<ViewType>;
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);

View File

@ -20,14 +20,18 @@ KOKKOS_ADD_TEST_LIBRARY(
HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
)
# WORKAROUND FOR HIPCC
IF(Kokkos_ENABLE_HIP)
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0 --amdgpu-target=gfx906")
ELSE()
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0")
# avoid deprecation warnings from MSVC
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC GTEST_HAS_TR1_TUPLE=0 GTEST_HAS_PTHREAD=0)
IF(NOT (Kokkos_ENABLE_CUDA AND WIN32))
TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
ENDIF()
TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
# Suppress clang-tidy diagnostics on code that we do not have control over
IF(CMAKE_CXX_CLANG_TIDY)
SET_TARGET_PROPERTIES(kokkosalgorithms_gtest PROPERTIES CXX_CLANG_TIDY "")
ENDIF()
SET(SOURCES
UnitTestMain.cpp

View File

@ -111,10 +111,10 @@ struct RandomProperties {
template <class GeneratorPool, class Scalar>
struct test_random_functor {
typedef typename GeneratorPool::generator_type rnd_type;
using rnd_type = typename GeneratorPool::generator_type;
typedef RandomProperties value_type;
typedef typename GeneratorPool::device_type device_type;
using value_type = RandomProperties;
using device_type = typename GeneratorPool::device_type;
GeneratorPool rand_pool;
const double mean;
@ -125,12 +125,12 @@ struct test_random_functor {
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View types below.
typedef Kokkos::View<int[HIST_DIM1D + 1], typename GeneratorPool::device_type>
type_1d;
using type_1d =
Kokkos::View<int[HIST_DIM1D + 1], typename GeneratorPool::device_type>;
type_1d density_1d;
typedef Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
typename GeneratorPool::device_type>
type_3d;
using type_3d =
Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
typename GeneratorPool::device_type>;
type_3d density_3d;
test_random_functor(GeneratorPool rand_pool_, type_1d d1d, type_3d d3d)
@ -200,9 +200,9 @@ struct test_random_functor {
template <class DeviceType>
struct test_histogram1d_functor {
typedef RandomProperties value_type;
typedef typename DeviceType::execution_space execution_space;
typedef typename DeviceType::memory_space memory_space;
using value_type = RandomProperties;
using execution_space = typename DeviceType::execution_space;
using memory_space = typename DeviceType::memory_space;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
@ -210,7 +210,7 @@ struct test_histogram1d_functor {
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View type below.
typedef Kokkos::View<int[HIST_DIM1D + 1], memory_space> type_1d;
using type_1d = Kokkos::View<int[HIST_DIM1D + 1], memory_space>;
type_1d density_1d;
double mean;
@ -219,7 +219,7 @@ struct test_histogram1d_functor {
KOKKOS_INLINE_FUNCTION void operator()(
const typename memory_space::size_type i, RandomProperties& prop) const {
typedef typename memory_space::size_type size_type;
using size_type = typename memory_space::size_type;
const double count = density_1d(i);
prop.mean += count;
prop.variance += 1.0 * (count - mean) * (count - mean);
@ -234,9 +234,9 @@ struct test_histogram1d_functor {
template <class DeviceType>
struct test_histogram3d_functor {
typedef RandomProperties value_type;
typedef typename DeviceType::execution_space execution_space;
typedef typename DeviceType::memory_space memory_space;
using value_type = RandomProperties;
using execution_space = typename DeviceType::execution_space;
using memory_space = typename DeviceType::memory_space;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
@ -244,9 +244,9 @@ struct test_histogram3d_functor {
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View type below.
typedef Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
memory_space>
type_3d;
using type_3d =
Kokkos::View<int[HIST_DIM3D + 1][HIST_DIM3D + 1][HIST_DIM3D + 1],
memory_space>;
type_3d density_3d;
double mean;
@ -255,7 +255,7 @@ struct test_histogram3d_functor {
KOKKOS_INLINE_FUNCTION void operator()(
const typename memory_space::size_type i, RandomProperties& prop) const {
typedef typename memory_space::size_type size_type;
using size_type = typename memory_space::size_type;
const double count = density_3d(
i / (HIST_DIM3D * HIST_DIM3D),
(i % (HIST_DIM3D * HIST_DIM3D)) / HIST_DIM3D, i % HIST_DIM3D);
@ -276,7 +276,7 @@ struct test_histogram3d_functor {
//
template <class RandomGenerator, class Scalar>
struct test_random_scalar {
typedef typename RandomGenerator::generator_type rnd_type;
using rnd_type = typename RandomGenerator::generator_type;
int pass_mean, pass_var, pass_covar;
int pass_hist1d_mean, pass_hist1d_var, pass_hist1d_covar;
@ -294,7 +294,7 @@ struct test_random_scalar {
cout << " -- Testing randomness properties" << endl;
RandomProperties result;
typedef test_random_functor<RandomGenerator, Scalar> functor_type;
using functor_type = test_random_functor<RandomGenerator, Scalar>;
parallel_reduce(num_draws / 1024,
functor_type(pool, density_1d, density_3d), result);
@ -325,8 +325,8 @@ struct test_random_scalar {
cout << " -- Testing 1-D histogram" << endl;
RandomProperties result;
typedef test_histogram1d_functor<typename RandomGenerator::device_type>
functor_type;
using functor_type =
test_histogram1d_functor<typename RandomGenerator::device_type>;
parallel_reduce(HIST_DIM1D, functor_type(density_1d, num_draws), result);
double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D);
@ -357,8 +357,8 @@ struct test_random_scalar {
cout << " -- Testing 3-D histogram" << endl;
RandomProperties result;
typedef test_histogram3d_functor<typename RandomGenerator::device_type>
functor_type;
using functor_type =
test_histogram3d_functor<typename RandomGenerator::device_type>;
parallel_reduce(HIST_DIM1D, functor_type(density_3d, num_draws), result);
double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D);

View File

@ -55,8 +55,8 @@ namespace Impl {
template <class ExecutionSpace, class Scalar>
struct is_sorted_struct {
typedef unsigned int value_type;
typedef ExecutionSpace execution_space;
using value_type = unsigned int;
using execution_space = ExecutionSpace;
Kokkos::View<Scalar*, ExecutionSpace> keys;
@ -69,8 +69,8 @@ struct is_sorted_struct {
template <class ExecutionSpace, class Scalar>
struct sum {
typedef double value_type;
typedef ExecutionSpace execution_space;
using value_type = double;
using execution_space = ExecutionSpace;
Kokkos::View<Scalar*, ExecutionSpace> keys;
@ -81,8 +81,8 @@ struct sum {
template <class ExecutionSpace, class Scalar>
struct bin3d_is_sorted_struct {
typedef unsigned int value_type;
typedef ExecutionSpace execution_space;
using value_type = unsigned int;
using execution_space = ExecutionSpace;
Kokkos::View<Scalar * [3], ExecutionSpace> keys;
@ -115,8 +115,8 @@ struct bin3d_is_sorted_struct {
template <class ExecutionSpace, class Scalar>
struct sum3D {
typedef double value_type;
typedef ExecutionSpace execution_space;
using value_type = double;
using execution_space = ExecutionSpace;
Kokkos::View<Scalar * [3], ExecutionSpace> keys;
@ -131,7 +131,7 @@ struct sum3D {
template <class ExecutionSpace, typename KeyType>
void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
KeyViewType keys("Keys", n);
// Test sorting array with all numbers equal
@ -166,7 +166,7 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
template <class ExecutionSpace, typename KeyType>
void test_3D_sort_impl(unsigned int n) {
typedef Kokkos::View<KeyType * [3], ExecutionSpace> KeyViewType;
using KeyViewType = Kokkos::View<KeyType * [3], ExecutionSpace>;
KeyViewType keys("Keys", n * n * n);
@ -186,7 +186,7 @@ void test_3D_sort_impl(unsigned int n) {
typename KeyViewType::value_type min[3] = {0, 0, 0};
typename KeyViewType::value_type max[3] = {100, 100, 100};
typedef Kokkos::BinOp3D<KeyViewType> BinOp;
using BinOp = Kokkos::BinOp3D<KeyViewType>;
BinOp bin_op(bin_max, min, max);
Kokkos::BinSort<KeyViewType, BinOp> Sorter(keys, bin_op, false);
Sorter.create_permute_vector();
@ -215,9 +215,9 @@ void test_3D_sort_impl(unsigned int n) {
template <class ExecutionSpace, typename KeyType>
void test_dynamic_view_sort_impl(unsigned int n) {
typedef Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>
KeyDynamicViewType;
typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
using KeyDynamicViewType =
Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>;
using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
const size_t upper_bound = 2 * n;
const size_t min_chunk_size = 1024;
@ -305,8 +305,8 @@ void test_issue_1160_impl() {
Kokkos::deep_copy(x_, h_x);
Kokkos::deep_copy(v_, h_v);
typedef decltype(element_) KeyViewType;
typedef Kokkos::BinOp1D<KeyViewType> BinOp;
using KeyViewType = decltype(element_);
using BinOp = Kokkos::BinOp1D<KeyViewType>;
int begin = 3;
int end = 8;

View File

@ -5,6 +5,6 @@ build_script:
- cmd: >-
mkdir build &&
cd build &&
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_LIBDL=OFF -DKokkos_ENABLE_PROFILING=OFF &&
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON &&
cmake --build . --target install &&
ctest -C Debug -V

View File

@ -69,13 +69,13 @@ int main(int argc, char* argv[]) {
return 0;
}
int L = atoi(argv[1]);
int N = atoi(argv[2]);
int M = atoi(argv[3]);
int D = atoi(argv[4]);
int K = atoi(argv[5]);
int R = atoi(argv[6]);
int type = atoi(argv[7]);
int L = std::stoi(argv[1]);
int N = std::stoi(argv[2]);
int M = std::stoi(argv[3]);
int D = std::stoi(argv[4]);
int K = std::stoi(argv[5]);
int R = std::stoi(argv[6]);
int type = std::stoi(argv[7]);
Kokkos::View<int*> offsets("Offsets", L, M);
Kokkos::Random_XorShift64_Pool<> pool(12371);

View File

@ -73,15 +73,15 @@ int main(int argc, char* argv[]) {
return 0;
}
int P = atoi(argv[1]);
int N = atoi(argv[2]);
int K = atoi(argv[3]);
int R = atoi(argv[4]);
int D = atoi(argv[5]);
int U = atoi(argv[6]);
int F = atoi(argv[7]);
int T = atoi(argv[8]);
int S = atoi(argv[9]);
int P = std::stoi(argv[1]);
int N = std::stoi(argv[2]);
int K = std::stoi(argv[3]);
int R = std::stoi(argv[4]);
int D = std::stoi(argv[5]);
int U = std::stoi(argv[6]);
int F = std::stoi(argv[7]);
int T = std::stoi(argv[8]);
int S = std::stoi(argv[9]);
if (U > 8) {
printf("U must be 1-8\n");

View File

@ -72,13 +72,13 @@ int main(int argc, char* argv[]) {
return 0;
}
int S = atoi(argv[1]);
int N = atoi(argv[2]);
int K = atoi(argv[3]);
int D = atoi(argv[4]);
int R = atoi(argv[5]);
int U = atoi(argv[6]);
int F = atoi(argv[7]);
int S = std::stoi(argv[1]);
int N = std::stoi(argv[2]);
int K = std::stoi(argv[3]);
int D = std::stoi(argv[4]);
int R = std::stoi(argv[5]);
int U = std::stoi(argv[6]);
int F = std::stoi(argv[7]);
if ((S != 1) && (S != 2) && (S != 4)) {
printf("S must be one of 1,2,4\n");

View File

@ -50,151 +50,152 @@
#define HLINE "-------------------------------------------------------------\n"
#if defined(KOKKOS_ENABLE_CUDA)
typedef Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror GUPSHostArray;
typedef Kokkos::View<int64_t*, Kokkos::CudaSpace> GUPSDeviceArray;
using GUPSHostArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror;
using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>;
#else
typedef Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror GUPSHostArray;
typedef Kokkos::View<int64_t*, Kokkos::HostSpace> GUPSDeviceArray;
using GUPSHostArray = Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror;
using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::HostSpace>;
#endif
typedef int GUPSIndex;
using GUPSIndex = int;
double now() {
struct timeval now;
gettimeofday(&now, nullptr);
struct timeval now;
gettimeofday(&now, nullptr);
return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
}
void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices, const int64_t dataCount) {
for( GUPSIndex i = 0; i < indices.extent(0); ++i ) {
indices[i] = lrand48() % dataCount;
}
void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices,
const int64_t dataCount) {
for (GUPSIndex i = 0; i < indices.extent(0); ++i) {
indices[i] = lrand48() % dataCount;
}
Kokkos::deep_copy(dev_indices, indices);
Kokkos::deep_copy(dev_indices, indices);
}
void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data, const int64_t datum,
const bool performAtomics) {
void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data,
const int64_t datum, const bool performAtomics) {
if (performAtomics) {
Kokkos::parallel_for(
"bench-gups-atomic", indices.extent(0),
KOKKOS_LAMBDA(const GUPSIndex i) {
Kokkos::atomic_fetch_xor(&data[indices[i]], datum);
});
} else {
Kokkos::parallel_for(
"bench-gups-non-atomic", indices.extent(0),
KOKKOS_LAMBDA(const GUPSIndex i) { data[indices[i]] ^= datum; });
}
if( performAtomics ) {
Kokkos::parallel_for("bench-gups-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) {
Kokkos::atomic_fetch_xor( &data[indices[i]], datum );
});
} else {
Kokkos::parallel_for("bench-gups-non-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) {
data[indices[i]] ^= datum;
});
}
Kokkos::fence();
Kokkos::fence();
}
int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const int repeats,
const bool useAtomics) {
int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount,
const int repeats, const bool useAtomics) {
printf("Reports fastest timing per kernel\n");
printf("Creating Views...\n");
printf("Reports fastest timing per kernel\n");
printf("Creating Views...\n");
printf("Memory Sizes:\n");
printf("- Elements: %15" PRIu64 " (%12.4f MB)\n",
static_cast<uint64_t>(dataCount),
1.0e-6 * ((double)dataCount * (double)sizeof(int64_t)));
printf("- Indices: %15" PRIu64 " (%12.4f MB)\n",
static_cast<uint64_t>(indicesCount),
1.0e-6 * ((double)indicesCount * (double)sizeof(int64_t)));
printf(" - Atomics: %15s\n", (useAtomics ? "Yes" : "No"));
printf("Benchmark kernels will be performed for %d iterations.\n", repeats);
printf("Memory Sizes:\n");
printf("- Elements: %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(dataCount),
1.0e-6 * ((double) dataCount * (double) sizeof(int64_t)));
printf("- Indices: %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(indicesCount),
1.0e-6 * ((double) indicesCount * (double) sizeof(int64_t)));
printf(" - Atomics: %15s\n", (useAtomics ? "Yes" : "No") );
printf("Benchmark kernels will be performed for %d iterations.\n", repeats);
printf(HLINE);
printf(HLINE);
GUPSDeviceArray dev_indices("indices", indicesCount);
GUPSDeviceArray dev_data("data", dataCount);
int64_t datum = -1;
GUPSDeviceArray dev_indices("indices", indicesCount);
GUPSDeviceArray dev_data("data", dataCount);
int64_t datum = -1;
GUPSHostArray indices = Kokkos::create_mirror_view(dev_indices);
GUPSHostArray data = Kokkos::create_mirror_view(dev_data);
GUPSHostArray indices = Kokkos::create_mirror_view(dev_indices);
GUPSHostArray data = Kokkos::create_mirror_view(dev_data);
double gupsTime = 0.0;
double gupsTime = 0.0;
printf("Initializing Views...\n");
printf("Initializing Views...\n");
#if defined(KOKKOS_HAVE_OPENMP)
Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
Kokkos::parallel_for(
"init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
#else
Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
Kokkos::parallel_for(
"init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
#endif
KOKKOS_LAMBDA(const int i) {
data[i] = 10101010101;
});
KOKKOS_LAMBDA(const int i) { data[i] = 10101010101; });
#if defined(KOKKOS_HAVE_OPENMP)
Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
Kokkos::parallel_for(
"init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
#else
Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
Kokkos::parallel_for(
"init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
#endif
KOKKOS_LAMBDA(const int i) {
KOKKOS_LAMBDA(const int i) { indices[i] = 0; });
indices[i] = 0;
});
Kokkos::deep_copy(dev_data, data);
Kokkos::deep_copy(dev_indices, indices);
double start;
Kokkos::deep_copy(dev_data, data);
Kokkos::deep_copy(dev_indices, indices);
double start;
printf("Starting benchmarking...\n");
printf("Starting benchmarking...\n");
for (GUPSIndex k = 0; k < repeats; ++k) {
randomize_indices(indices, dev_indices, data.extent(0));
for( GUPSIndex k = 0; k < repeats; ++k ) {
randomize_indices(indices, dev_indices, data.extent(0));
start = now();
run_gups(dev_indices, dev_data, datum, useAtomics);
gupsTime += now() - start;
}
start = now();
run_gups(dev_indices, dev_data, datum, useAtomics);
gupsTime += now() - start;
}
Kokkos::deep_copy(indices, dev_indices);
Kokkos::deep_copy(data, dev_data);
Kokkos::deep_copy(indices, dev_indices);
Kokkos::deep_copy(data, dev_data);
printf(HLINE);
printf(
"GUP/s Random: %18.6f\n",
(1.0e-9 * ((double)repeats) * (double)dev_indices.extent(0)) / gupsTime);
printf(HLINE);
printf(HLINE);
printf("GUP/s Random: %18.6f\n",
(1.0e-9 * ((double) repeats) * (double) dev_indices.extent(0)) / gupsTime);
printf(HLINE);
return 0;
return 0;
}
int main(int argc, char* argv[]) {
printf(HLINE);
printf("Kokkos GUPS Benchmark\n");
printf(HLINE);
printf(HLINE);
printf("Kokkos GUPS Benchmark\n");
printf(HLINE);
srand48(1010101);
srand48(1010101);
Kokkos::initialize(argc, argv);
Kokkos::initialize(argc, argv);
int64_t indices = 8192;
int64_t data = 33554432;
int64_t repeats = 10;
bool useAtomics = false;
int64_t indices = 8192;
int64_t data = 33554432;
int64_t repeats = 10;
bool useAtomics = false;
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "--indices") == 0) {
indices = std::atoll(argv[i + 1]);
++i;
} else if (strcmp(argv[i], "--data") == 0) {
data = std::atoll(argv[i + 1]);
++i;
} else if (strcmp(argv[i], "--repeats") == 0) {
repeats = std::atoll(argv[i + 1]);
++i;
} else if (strcmp(argv[i], "--atomics") == 0) {
useAtomics = true;
}
}
for( int i = 1; i < argc; ++i ) {
if( strcmp( argv[i], "--indices" ) == 0 ) {
indices = std::atoll(argv[i+1]);
++i;
} else if( strcmp( argv[i], "--data" ) == 0 ) {
data = std::atoll(argv[i+1]);
++i;
} else if( strcmp( argv[i], "--repeats" ) == 0 ) {
repeats = std::atoll(argv[i+1]);
++i;
} else if( strcmp( argv[i], "--atomics" ) == 0 ) {
useAtomics = true;
}
}
const int rc = run_benchmark(indices, data, repeats, useAtomics);
const int rc = run_benchmark(indices, data, repeats, useAtomics);
Kokkos::finalize();
Kokkos::finalize();
return rc;
return rc;
}

View File

@ -94,22 +94,22 @@ int main(int argc, char* argv[]) {
return 0;
}
int team_range = atoi(argv[1]);
int thread_range = atoi(argv[2]);
int vector_range = atoi(argv[3]);
int team_range = std::stoi(argv[1]);
int thread_range = std::stoi(argv[2]);
int vector_range = std::stoi(argv[3]);
int outer_repeat = atoi(argv[4]);
int thread_repeat = atoi(argv[5]);
int vector_repeat = atoi(argv[6]);
int outer_repeat = std::stoi(argv[4]);
int thread_repeat = std::stoi(argv[5]);
int vector_repeat = std::stoi(argv[6]);
int team_size = atoi(argv[7]);
int vector_size = atoi(argv[8]);
int schedule = atoi(argv[9]);
int test_type = atoi(argv[10]);
int team_size = std::stoi(argv[7]);
int vector_size = std::stoi(argv[8]);
int schedule = std::stoi(argv[9]);
int test_type = std::stoi(argv[10]);
int disable_verbose_output = 0;
if (argc > 11) {
disable_verbose_output = atoi(argv[11]);
disable_verbose_output = std::stoi(argv[11]);
}
if (schedule != 1 && schedule != 2) {
@ -138,9 +138,9 @@ int main(int argc, char* argv[]) {
double& lval) { lval += 1; },
result);
typedef Kokkos::View<double*, Kokkos::LayoutRight> view_type_1d;
typedef Kokkos::View<double**, Kokkos::LayoutRight> view_type_2d;
typedef Kokkos::View<double***, Kokkos::LayoutRight> view_type_3d;
using view_type_1d = Kokkos::View<double*, Kokkos::LayoutRight>;
using view_type_2d = Kokkos::View<double**, Kokkos::LayoutRight>;
using view_type_3d = Kokkos::View<double***, Kokkos::LayoutRight>;
// Allocate view without initializing
// Call a 'warmup' test with 1 repeat - this will initialize the corresponding

View File

@ -68,8 +68,8 @@ void test_policy(int team_range, int thread_range, int vector_range,
int team_size, int vector_size, int test_type, ViewType1& v1,
ViewType2& v2, ViewType3& v3, double& result,
double& result_expect, double& time) {
typedef Kokkos::TeamPolicy<ScheduleType, IndexType> t_policy;
typedef typename t_policy::member_type t_team;
using t_policy = Kokkos::TeamPolicy<ScheduleType, IndexType>;
using t_team = typename t_policy::member_type;
Kokkos::Timer timer;
for (int orep = 0; orep < outer_repeat; orep++) {

View File

@ -48,219 +48,224 @@
#include <sys/time.h>
#define STREAM_ARRAY_SIZE 100000000
#define STREAM_NTIMES 20
#define STREAM_NTIMES 20
#define HLINE "-------------------------------------------------------------\n"
#if defined(KOKKOS_ENABLE_CUDA)
typedef Kokkos::View<double*, Kokkos::CudaSpace>::HostMirror StreamHostArray;
typedef Kokkos::View<double*, Kokkos::CudaSpace> StreamDeviceArray;
using StreamHostArray = Kokkos::View<double*, Kokkos::CudaSpace>::HostMirror;
using StreamDeviceArray = Kokkos::View<double*, Kokkos::CudaSpace>;
#else
typedef Kokkos::View<double*, Kokkos::HostSpace>::HostMirror StreamHostArray;
typedef Kokkos::View<double*, Kokkos::HostSpace> StreamDeviceArray;
using StreamHostArray = Kokkos::View<double*, Kokkos::HostSpace>::HostMirror;
using StreamDeviceArray = Kokkos::View<double*, Kokkos::HostSpace>;
#endif
typedef int StreamIndex;
using StreamIndex = int;
double now() {
struct timeval now;
gettimeofday(&now, nullptr);
struct timeval now;
gettimeofday(&now, nullptr);
return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
}
void perform_copy(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c) {
void perform_copy(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c) {
Kokkos::parallel_for(
"copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) { c[i] = a[i]; });
Kokkos::parallel_for("copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
c[i] = a[i];
});
Kokkos::fence();
Kokkos::fence();
}
void perform_scale(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c,
const double scalar) {
void perform_scale(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c, const double scalar) {
Kokkos::parallel_for(
"copy", a.extent(0),
KOKKOS_LAMBDA(const StreamIndex i) { b[i] = scalar * c[i]; });
Kokkos::parallel_for("copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
b[i] = scalar * c[i];
});
Kokkos::fence();
Kokkos::fence();
}
void perform_add(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c) {
Kokkos::parallel_for("add", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
c[i] = a[i] + b[i];
});
void perform_add(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c) {
Kokkos::parallel_for(
"add", a.extent(0),
KOKKOS_LAMBDA(const StreamIndex i) { c[i] = a[i] + b[i]; });
Kokkos::fence();
Kokkos::fence();
}
void perform_triad(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c,
const double scalar) {
void perform_triad(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c, const double scalar) {
Kokkos::parallel_for(
"triad", a.extent(0),
KOKKOS_LAMBDA(const StreamIndex i) { a[i] = b[i] + scalar * c[i]; });
Kokkos::parallel_for("triad", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
a[i] = b[i] + scalar * c[i];
});
Kokkos::fence();
Kokkos::fence();
}
int perform_validation(StreamHostArray& a, StreamHostArray& b, StreamHostArray& c,
const StreamIndex arraySize, const double scalar) {
int perform_validation(StreamHostArray& a, StreamHostArray& b,
StreamHostArray& c, const StreamIndex arraySize,
const double scalar) {
double ai = 1.0;
double bi = 2.0;
double ci = 0.0;
double ai = 1.0;
double bi = 2.0;
double ci = 0.0;
for (StreamIndex i = 0; i < arraySize; ++i) {
ci = ai;
bi = scalar * ci;
ci = ai + bi;
ai = bi + scalar * ci;
};
for( StreamIndex i = 0; i < arraySize; ++i ) {
ci = ai;
bi = scalar * ci;
ci = ai + bi;
ai = bi + scalar * ci;
};
double aError = 0.0;
double bError = 0.0;
double cError = 0.0;
double aError = 0.0;
double bError = 0.0;
double cError = 0.0;
for (StreamIndex i = 0; i < arraySize; ++i) {
aError = std::abs(a[i] - ai);
bError = std::abs(b[i] - bi);
cError = std::abs(c[i] - ci);
}
for( StreamIndex i = 0; i < arraySize; ++i ) {
aError = std::abs( a[i] - ai );
bError = std::abs( b[i] - bi );
cError = std::abs( c[i] - ci );
}
double aAvgError = aError / (double)arraySize;
double bAvgError = bError / (double)arraySize;
double cAvgError = cError / (double)arraySize;
double aAvgError = aError / (double) arraySize;
double bAvgError = bError / (double) arraySize;
double cAvgError = cError / (double) arraySize;
const double epsilon = 1.0e-13;
int errorCount = 0;
const double epsilon = 1.0e-13;
int errorCount = 0;
if (std::abs(aAvgError / ai) > epsilon) {
fprintf(stderr, "Error: validation check on View a failed.\n");
errorCount++;
}
if( std::abs( aAvgError / ai ) > epsilon ) {
fprintf(stderr, "Error: validation check on View a failed.\n");
errorCount++;
}
if (std::abs(bAvgError / bi) > epsilon) {
fprintf(stderr, "Error: validation check on View b failed.\n");
errorCount++;
}
if( std::abs( bAvgError / bi ) > epsilon ) {
fprintf(stderr, "Error: validation check on View b failed.\n");
errorCount++;
}
if (std::abs(cAvgError / ci) > epsilon) {
fprintf(stderr, "Error: validation check on View c failed.\n");
errorCount++;
}
if( std::abs( cAvgError / ci ) > epsilon ) {
fprintf(stderr, "Error: validation check on View c failed.\n");
errorCount++;
}
if (errorCount == 0) {
printf("All solutions checked and verified.\n");
}
if( errorCount == 0 ) {
printf("All solutions checked and verified.\n");
}
return errorCount;
return errorCount;
}
int run_benchmark() {
printf("Reports fastest timing per kernel\n");
printf("Creating Views...\n");
printf("Reports fastest timing per kernel\n");
printf("Creating Views...\n");
printf("Memory Sizes:\n");
printf("- Array Size: %" PRIu64 "\n",
static_cast<uint64_t>(STREAM_ARRAY_SIZE));
printf("- Per Array: %12.2f MB\n",
1.0e-6 * (double)STREAM_ARRAY_SIZE * (double)sizeof(double));
printf("- Total: %12.2f MB\n",
3.0e-6 * (double)STREAM_ARRAY_SIZE * (double)sizeof(double));
printf("Memory Sizes:\n");
printf("- Array Size: %" PRIu64 "\n", static_cast<uint64_t>(STREAM_ARRAY_SIZE));
printf("- Per Array: %12.2f MB\n", 1.0e-6 * (double) STREAM_ARRAY_SIZE * (double) sizeof(double));
printf("- Total: %12.2f MB\n", 3.0e-6 * (double) STREAM_ARRAY_SIZE * (double) sizeof(double));
printf("Benchmark kernels will be performed for %d iterations.\n",
STREAM_NTIMES);
printf("Benchmark kernels will be performed for %d iterations.\n", STREAM_NTIMES);
printf(HLINE);
printf(HLINE);
StreamDeviceArray dev_a("a", STREAM_ARRAY_SIZE);
StreamDeviceArray dev_b("b", STREAM_ARRAY_SIZE);
StreamDeviceArray dev_c("c", STREAM_ARRAY_SIZE);
StreamDeviceArray dev_a("a", STREAM_ARRAY_SIZE);
StreamDeviceArray dev_b("b", STREAM_ARRAY_SIZE);
StreamDeviceArray dev_c("c", STREAM_ARRAY_SIZE);
StreamHostArray a = Kokkos::create_mirror_view(dev_a);
StreamHostArray b = Kokkos::create_mirror_view(dev_b);
StreamHostArray c = Kokkos::create_mirror_view(dev_c);
StreamHostArray a = Kokkos::create_mirror_view(dev_a);
StreamHostArray b = Kokkos::create_mirror_view(dev_b);
StreamHostArray c = Kokkos::create_mirror_view(dev_c);
const double scalar = 3.0;
const double scalar = 3.0;
double copyTime = std::numeric_limits<double>::max();
double scaleTime = std::numeric_limits<double>::max();
double addTime = std::numeric_limits<double>::max();
double triadTime = std::numeric_limits<double>::max();
double copyTime = std::numeric_limits<double>::max();
double scaleTime = std::numeric_limits<double>::max();
double addTime = std::numeric_limits<double>::max();
double triadTime = std::numeric_limits<double>::max();
printf("Initializing Views...\n");
printf("Initializing Views...\n");
#if defined(KOKKOS_HAVE_OPENMP)
Kokkos::parallel_for("init", Kokkos::RangePolicy<Kokkos::OpenMP>(0, STREAM_ARRAY_SIZE),
Kokkos::parallel_for(
"init", Kokkos::RangePolicy<Kokkos::OpenMP>(0, STREAM_ARRAY_SIZE),
#else
Kokkos::parallel_for("init", Kokkos::RangePolicy<Kokkos::Serial>(0, STREAM_ARRAY_SIZE),
Kokkos::parallel_for(
"init", Kokkos::RangePolicy<Kokkos::Serial>(0, STREAM_ARRAY_SIZE),
#endif
KOKKOS_LAMBDA(const int i) {
KOKKOS_LAMBDA(const int i) {
a[i] = 1.0;
b[i] = 2.0;
c[i] = 0.0;
});
a[i] = 1.0;
b[i] = 2.0;
c[i] = 0.0;
});
// Copy contents of a (from the host) to the dev_a (device)
Kokkos::deep_copy(dev_a, a);
Kokkos::deep_copy(dev_b, b);
Kokkos::deep_copy(dev_c, c);
// Copy contents of a (from the host) to the dev_a (device)
Kokkos::deep_copy(dev_a, a);
Kokkos::deep_copy(dev_b, b);
Kokkos::deep_copy(dev_c, c);
double start;
double start;
printf("Starting benchmarking...\n");
printf("Starting benchmarking...\n");
for (StreamIndex k = 0; k < STREAM_NTIMES; ++k) {
start = now();
perform_copy(dev_a, dev_b, dev_c);
copyTime = std::min(copyTime, (now() - start));
for( StreamIndex k = 0; k < STREAM_NTIMES; ++k ) {
start = now();
perform_copy(dev_a, dev_b, dev_c);
copyTime = std::min( copyTime, (now() - start) );
start = now();
perform_scale(dev_a, dev_b, dev_c, scalar);
scaleTime = std::min(scaleTime, (now() - start));
start = now();
perform_scale(dev_a, dev_b, dev_c, scalar);
scaleTime = std::min( scaleTime, (now() - start) );
start = now();
perform_add(dev_a, dev_b, dev_c);
addTime = std::min(addTime, (now() - start));
start = now();
perform_add(dev_a, dev_b, dev_c);
addTime = std::min( addTime, (now() - start) );
start = now();
perform_triad(dev_a, dev_b, dev_c, scalar);
triadTime = std::min(triadTime, (now() - start));
}
start = now();
perform_triad(dev_a, dev_b, dev_c, scalar);
triadTime = std::min( triadTime, (now() - start) );
}
Kokkos::deep_copy(a, dev_a);
Kokkos::deep_copy(b, dev_b);
Kokkos::deep_copy(c, dev_c);
Kokkos::deep_copy(a, dev_a);
Kokkos::deep_copy(b, dev_b);
Kokkos::deep_copy(c, dev_c);
printf("Performing validation...\n");
int rc = perform_validation(a, b, c, STREAM_ARRAY_SIZE, scalar);
printf("Performing validation...\n");
int rc = perform_validation(a, b, c, STREAM_ARRAY_SIZE, scalar);
printf(HLINE);
printf(HLINE);
printf("Copy %11.2f MB/s\n",
(1.0e-06 * 2.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
copyTime);
printf("Scale %11.2f MB/s\n",
(1.0e-06 * 2.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
scaleTime);
printf("Add %11.2f MB/s\n",
(1.0e-06 * 3.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
addTime);
printf("Triad %11.2f MB/s\n",
(1.0e-06 * 3.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
triadTime);
printf("Copy %11.2f MB/s\n",
( 1.0e-06 * 2.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / copyTime );
printf("Scale %11.2f MB/s\n",
( 1.0e-06 * 2.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / scaleTime );
printf("Add %11.2f MB/s\n",
( 1.0e-06 * 3.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / addTime );
printf("Triad %11.2f MB/s\n",
( 1.0e-06 * 3.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / triadTime );
printf(HLINE);
printf(HLINE);
return rc;
return rc;
}
int main(int argc, char* argv[]) {
printf(HLINE);
printf("Kokkos STREAM Benchmark\n");
printf(HLINE);
printf(HLINE);
printf("Kokkos STREAM Benchmark\n");
printf(HLINE);
Kokkos::initialize(argc, argv);
const int rc = run_benchmark();
Kokkos::finalize();
Kokkos::initialize(argc, argv);
const int rc = run_benchmark();
Kokkos::finalize();
return rc;
return rc;
}

View File

@ -19,6 +19,13 @@ default_arch="sm_35"
# The default C++ compiler.
#
host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
# Default to whatever is in the path
nvcc_compiler=nvcc
if [ ! -z $CUDA_ROOT ]; then
nvcc_compiler="$CUDA_ROOT/bin/nvcc"
fi
#host_compiler="icpc"
#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
@ -58,7 +65,7 @@ object_files_xlinker=""
shared_versioned_libraries_host=""
shared_versioned_libraries=""
# Does the User set the architecture
# Does the User set the architecture
arch_set=0
# Does the user overwrite the host compiler
@ -77,7 +84,7 @@ host_only_args=""
# Just run version on host compiler
get_host_version=0
# Enable workaround for CUDA 6.5 for pragma ident
# Enable workaround for CUDA 6.5 for pragma ident
replace_pragma_ident=0
# Mark first host compiler argument
@ -179,7 +186,7 @@ do
shift
;;
#Handle known nvcc args
--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*|--fmad*)
--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad*|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
cuda_args="$cuda_args $1"
;;
#Handle more known nvcc args
@ -187,7 +194,7 @@ do
cuda_args="$cuda_args $1"
;;
#Handle known nvcc args that have an argument
-rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad)
-rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart)
cuda_args="$cuda_args $1 $2"
shift
;;
@ -195,11 +202,11 @@ do
cuda_args="$cuda_args $1"
;;
#Handle unsupported standard flags
--std=c++1y|-std=c++1y|--std=c++1z|-std=c++1z|--std=gnu++1y|-std=gnu++1y|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a|--std=c++17|-std=c++17)
--std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a)
fallback_std_flag="-std=c++14"
# this is hopefully just occurring in a downstream project during CMake feature tests
# we really have no choice here but to accept the flag and change to an accepted C++ standard
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
if [ -n "$std_flag" ]; then
warn_std_flag
shared_args=${shared_args/ $std_flag/}
@ -216,7 +223,25 @@ do
fi
std_flag=$corrected_std_flag
shared_args="$shared_args $std_flag"
;;
;;
--std=c++17|-std=c++17)
if [ -n "$std_flag" ]; then
warn_std_flag
shared_args=${shared_args/ $std_flag/}
fi
# NVCC only has C++17 from version 11 on
cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]})
if [ ${cuda_main_version} -lt 11 ]; then
fallback_std_flag="-std=c++14"
# this is hopefully just occurring in a downstream project during CMake feature tests
# we really have no choice here but to accept the flag and change to an accepted C++ standard
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
std_flag=$fallback_std_flag
else
std_flag=$1
fi
shared_args="$shared_args $std_flag"
;;
--std=c++11|-std=c++11|--std=c++14|-std=c++14)
if [ -n "$std_flag" ]; then
warn_std_flag
@ -226,6 +251,20 @@ do
shared_args="$shared_args $std_flag"
;;
#convert PGI standard flags to something nvcc can handle
--c++11|--c++14|--c++17)
if [ -n "$std_flag" ]; then
warn_std_flag
shared_args=${shared_args/ $std_flag/}
fi
std_flag="-std=${1#--}"
shared_args="$shared_args $std_flag"
;;
#ignore PGI forcing ISO C++-conforming code
-A)
;;
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-std=c++98|--std=c++98)
;;
@ -237,13 +276,17 @@ do
;;
#strip -Xcompiler because we add it
-Xcompiler)
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="$2"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$2"
if [[ $2 != "-o" ]]; then
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="$2"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$2"
fi
shift
fi
shift
# else this we have -Xcompiler -o <filename>, in this case just drop -Xcompiler and process
# the -o flag with the filename (done above)
;;
#strip of "-x cu" because we add that
-x)
@ -329,7 +372,7 @@ do
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args=$1
first_xcompiler_arg=0
else
else
xcompiler_args="$xcompiler_args,$1"
fi
;;
@ -387,7 +430,7 @@ if [ $arch_set -ne 1 ]; then
fi
#Compose compilation command
nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
nvcc_command="$nvcc_compiler $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
if [ $first_xcompiler_arg -eq 0 ]; then
nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
fi

View File

@ -2,6 +2,7 @@ SET(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@)
SET(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@)
SET(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@)
SET(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@)
SET(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@")
# These are needed by KokkosKernels
FOREACH(DEV ${Kokkos_DEVICES})
@ -38,7 +39,7 @@ include(FindPackageHandleStandardArgs)
# kokkos_check(
# [DEVICES <devices>...] # Set of backends (e.g. "OpenMP" and/or "Cuda")
# [ARCH <archs>...] # Target architectures (e.g. "Power9" and/or "Volta70")
# [OPTIONS <options>...] # Optional settings (e.g. "PROFILING")
# [OPTIONS <options>...] # Optional settings (e.g. "TUNING")
# [TPLS <tpls>...] # Third party libraries
# [RETURN_VALUE <result>] # Set a variable that indicates the result of the
# # check instead of a fatal error

View File

@ -1,6 +1,7 @@
#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
#error \
"Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
#else
#define KOKKOS_CORE_CONFIG_H
#endif
@ -10,7 +11,6 @@
// KOKKOS_VERSION / 10000 is the major version
#cmakedefine KOKKOS_VERSION @KOKKOS_VERSION@
/* Execution Spaces */
#cmakedefine KOKKOS_ENABLE_SERIAL
#cmakedefine KOKKOS_ENABLE_OPENMP
@ -47,10 +47,9 @@
#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
#cmakedefine KOKKOS_ENABLE_COMPILER_WARNINGS
#cmakedefine KOKKOS_ENABLE_PROFILING
#cmakedefine KOKKOS_ENABLE_PROFILING_LOAD_PRINT
#cmakedefine KOKKOS_ENABLE_TUNING
#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE
#cmakedefine KOKKOS_ENABLE_ETI
#cmakedefine KOKKOS_ENABLE_LARGE_MEM_TESTS
#cmakedefine KOKKOS_ENABLE_DUALVIEW_MODIFY_CHECK
#cmakedefine KOKKOS_ENABLE_COMPLEX_ALIGN
@ -60,7 +59,7 @@
#cmakedefine KOKKOS_ENABLE_HWLOC
#cmakedefine KOKKOS_USE_LIBRT
#cmakedefine KOKKOS_ENABLE_HWBSPACE
#cmakedefine KOKKOS_ENABLE_LIBDL
#cmakedefine KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
#cmakedefine KOKKOS_COMPILER_CUDA_VERSION @KOKKOS_COMPILER_CUDA_VERSION@
@ -95,4 +94,6 @@
#cmakedefine KOKKOS_ARCH_VOLTA70
#cmakedefine KOKKOS_ARCH_VOLTA72
#cmakedefine KOKKOS_ARCH_TURING75
#cmakedefine KOKKOS_ARCH_AMD_EPYC
#cmakedefine KOKKOS_ARCH_AMPERE80
#cmakedefine KOKKOS_ARCH_AMD_ZEN
#cmakedefine KOKKOS_ARCH_AMD_ZEN2

View File

@ -0,0 +1,958 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#[=======================================================================[.rst:
FindCUDAToolkit
---------------
This script locates the NVIDIA CUDA toolkit and the associated libraries, but
does not require the ``CUDA`` language be enabled for a given project. This
module does not search for the NVIDIA CUDA Samples.
Search Behavior
^^^^^^^^^^^^^^^
Finding the CUDA Toolkit requires finding the ``nvcc`` executable, which is
searched for in the following order:
1. If the ``CUDA`` language has been enabled we will use the directory
containing the compiler as the first search location for ``nvcc``.
2. If the ``CUDAToolkit_ROOT`` cmake configuration variable (e.g.,
``-DCUDAToolkit_ROOT=/some/path``) *or* environment variable is defined, it
will be searched. If both an environment variable **and** a
configuration variable are specified, the *configuration* variable takes
precedence.
The directory specified here must be such that the executable ``nvcc`` can be
found underneath the directory specified by ``CUDAToolkit_ROOT``. If
``CUDAToolkit_ROOT`` is specified, but no ``nvcc`` is found underneath, this
package is marked as **not** found. No subsequent search attempts are
performed.
3. If the CUDA_PATH environment variable is defined, it will be searched.
4. The user's path is searched for ``nvcc`` using :command:`find_program`. If
this is found, no subsequent search attempts are performed. Users are
responsible for ensuring that the first ``nvcc`` to show up in the path is
the desired path in the event that multiple CUDA Toolkits are installed.
5. On Unix systems, if the symbolic link ``/usr/local/cuda`` exists, this is
used. No subsequent search attempts are performed. No default symbolic link
location exists for the Windows platform.
6. The platform specific default install locations are searched. If exactly one
candidate is found, this is used. The default CUDA Toolkit install locations
searched are:
+-------------+-------------------------------------------------------------+
| Platform | Search Pattern |
+=============+=============================================================+
| macOS | ``/Developer/NVIDIA/CUDA-X.Y`` |
+-------------+-------------------------------------------------------------+
| Other Unix | ``/usr/local/cuda-X.Y`` |
+-------------+-------------------------------------------------------------+
| Windows | ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y`` |
+-------------+-------------------------------------------------------------+
Where ``X.Y`` would be a specific version of the CUDA Toolkit, such as
``/usr/local/cuda-9.0`` or
``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0``
.. note::
When multiple CUDA Toolkits are installed in the default location of a
system (e.g., both ``/usr/local/cuda-9.0`` and ``/usr/local/cuda-10.0``
exist but the ``/usr/local/cuda`` symbolic link does **not** exist), this
package is marked as **not** found.
There are too many factors involved in making an automatic decision in
the presence of multiple CUDA Toolkits being installed. In this
situation, users are encouraged to either (1) set ``CUDAToolkit_ROOT`` or
(2) ensure that the correct ``nvcc`` executable shows up in ``$PATH`` for
:command:`find_program` to find.
Options
^^^^^^^
``VERSION``
If specified, describes the version of the CUDA Toolkit to search for.
``REQUIRED``
If specified, configuration will error if a suitable CUDA Toolkit is not
found.
``QUIET``
If specified, the search for a suitable CUDA Toolkit will not produce any
messages.
``EXACT``
If specified, the CUDA Toolkit is considered found only if the exact
``VERSION`` specified is recovered.
Imported targets
^^^^^^^^^^^^^^^^
An :ref:`imported target <Imported targets>` named ``CUDA::toolkit`` is provided.
This module defines :prop_tgt:`IMPORTED` targets for each
of the following libraries that are part of the CUDAToolkit:
- :ref:`CUDA Runtime Library<cuda_toolkit_rt_lib>`
- :ref:`CUDA Driver Library<cuda_toolkit_driver_lib>`
- :ref:`cuBLAS<cuda_toolkit_cuBLAS>`
- :ref:`cuFFT<cuda_toolkit_cuFFT>`
- :ref:`cuRAND<cuda_toolkit_cuRAND>`
- :ref:`cuSOLVER<cuda_toolkit_cuSOLVER>`
- :ref:`cuSPARSE<cuda_toolkit_cuSPARSE>`
- :ref:`cuPTI<cuda_toolkit_cupti>`
- :ref:`NPP<cuda_toolkit_NPP>`
- :ref:`nvBLAS<cuda_toolkit_nvBLAS>`
- :ref:`nvGRAPH<cuda_toolkit_nvGRAPH>`
- :ref:`nvJPEG<cuda_toolkit_nvJPEG>`
- :ref:`nvidia-ML<cuda_toolkit_nvML>`
- :ref:`nvRTC<cuda_toolkit_nvRTC>`
- :ref:`nvToolsExt<cuda_toolkit_nvToolsExt>`
- :ref:`OpenCL<cuda_toolkit_opencl>`
- :ref:`cuLIBOS<cuda_toolkit_cuLIBOS>`
.. _`cuda_toolkit_rt_lib`:
CUDA Runtime Library
""""""""""""""""""""
The CUDA Runtime library (cudart) are what most applications will typically
need to link against to make any calls such as `cudaMalloc`, and `cudaFree`.
Targets Created:
- ``CUDA::cudart``
- ``CUDA::cudart_static``
.. _`cuda_toolkit_driver_lib`:
CUDA Driver Library
""""""""""""""""""""
The CUDA Driver library (cuda) are used by applications that use calls
such as `cuMemAlloc`, and `cuMemFree`. This is generally used by advanced
Targets Created:
- ``CUDA::cuda_driver``
- ``CUDA::cuda_driver``
.. _`cuda_toolkit_cuBLAS`:
cuBLAS
""""""
The `cuBLAS <https://docs.nvidia.com/cuda/cublas/index.html>`_ library.
Targets Created:
- ``CUDA::cublas``
- ``CUDA::cublas_static``
.. _`cuda_toolkit_cuFFT`:
cuFFT
"""""
The `cuFFT <https://docs.nvidia.com/cuda/cufft/index.html>`_ library.
Targets Created:
- ``CUDA::cufft``
- ``CUDA::cufftw``
- ``CUDA::cufft_static``
- ``CUDA::cufftw_static``
cuRAND
""""""
The `cuRAND <https://docs.nvidia.com/cuda/curand/index.html>`_ library.
Targets Created:
- ``CUDA::curand``
- ``CUDA::curand_static``
.. _`cuda_toolkit_cuSOLVER`:
cuSOLVER
""""""""
The `cuSOLVER <https://docs.nvidia.com/cuda/cusolver/index.html>`_ library.
Targets Created:
- ``CUDA::cusolver``
- ``CUDA::cusolver_static``
.. _`cuda_toolkit_cuSPARSE`:
cuSPARSE
""""""""
The `cuSPARSE <https://docs.nvidia.com/cuda/cusparse/index.html>`_ library.
Targets Created:
- ``CUDA::cusparse``
- ``CUDA::cusparse_static``
.. _`cuda_toolkit_cupti`:
cupti
"""""
The `NVIDIA CUDA Profiling Tools Interface <https://developer.nvidia.com/CUPTI>`_.
Targets Created:
- ``CUDA::cupti``
- ``CUDA::cupti_static``
.. _`cuda_toolkit_NPP`:
NPP
"""
The `NPP <https://docs.nvidia.com/cuda/npp/index.html>`_ libraries.
Targets Created:
- `nppc`:
- ``CUDA::nppc``
- ``CUDA::nppc_static``
- `nppial`: Arithmetic and logical operation functions in `nppi_arithmetic_and_logical_operations.h`
- ``CUDA::nppial``
- ``CUDA::nppial_static``
- `nppicc`: Color conversion and sampling functions in `nppi_color_conversion.h`
- ``CUDA::nppicc``
- ``CUDA::nppicc_static``
- `nppicom`: JPEG compression and decompression functions in `nppi_compression_functions.h`
- ``CUDA::nppicom``
- ``CUDA::nppicom_static``
- `nppidei`: Data exchange and initialization functions in `nppi_data_exchange_and_initialization.h`
- ``CUDA::nppidei``
- ``CUDA::nppidei_static``
- `nppif`: Filtering and computer vision functions in `nppi_filter_functions.h`
- ``CUDA::nppif``
- ``CUDA::nppif_static``
- `nppig`: Geometry transformation functions found in `nppi_geometry_transforms.h`
- ``CUDA::nppig``
- ``CUDA::nppig_static``
- `nppim`: Morphological operation functions found in `nppi_morphological_operations.h`
- ``CUDA::nppim``
- ``CUDA::nppim_static``
- `nppist`: Statistics and linear transform in `nppi_statistics_functions.h` and `nppi_linear_transforms.h`
- ``CUDA::nppist``
- ``CUDA::nppist_static``
- `nppisu`: Memory support functions in `nppi_support_functions.h`
- ``CUDA::nppisu``
- ``CUDA::nppisu_static``
- `nppitc`: Threshold and compare operation functions in `nppi_threshold_and_compare_operations.h`
- ``CUDA::nppitc``
- ``CUDA::nppitc_static``
- `npps`:
- ``CUDA::npps``
- ``CUDA::npps_static``
.. _`cuda_toolkit_nvBLAS`:
nvBLAS
""""""
The `nvBLAS <https://docs.nvidia.com/cuda/nvblas/index.html>`_ libraries.
This is a shared library only.
Targets Created:
- ``CUDA::nvblas``
.. _`cuda_toolkit_nvGRAPH`:
nvGRAPH
"""""""
The `nvGRAPH <https://docs.nvidia.com/cuda/nvgraph/index.html>`_ library.
Targets Created:
- ``CUDA::nvgraph``
- ``CUDA::nvgraph_static``
.. _`cuda_toolkit_nvJPEG`:
nvJPEG
""""""
The `nvJPEG <https://docs.nvidia.com/cuda/nvjpeg/index.html>`_ library.
Introduced in CUDA 10.
Targets Created:
- ``CUDA::nvjpeg``
- ``CUDA::nvjpeg_static``
.. _`cuda_toolkit_nvRTC`:
nvRTC
"""""
The `nvRTC <https://docs.nvidia.com/cuda/nvrtc/index.html>`_ (Runtime Compilation) library.
This is a shared library only.
Targets Created:
- ``CUDA::nvrtc``
.. _`cuda_toolkit_nvml`:
nvidia-ML
"""""""""
The `NVIDIA Management Library <https://developer.nvidia.com/nvidia-management-library-nvml>`_.
This is a shared library only.
Targets Created:
- ``CUDA::nvml``
.. _`cuda_toolkit_nvToolsExt`:
nvToolsExt
""""""""""
The `NVIDIA Tools Extension <https://docs.nvidia.com/gameworks/content/gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm>`_.
This is a shared library only.
Targets Created:
- ``CUDA::nvToolsExt``
.. _`cuda_toolkit_opencl`:
OpenCL
""""""
The `NVIDIA OpenCL Library <https://developer.nvidia.com/opencl>`_.
This is a shared library only.
Targets Created:
- ``CUDA::OpenCL``
.. _`cuda_toolkit_cuLIBOS`:
cuLIBOS
"""""""
The cuLIBOS library is a backend thread abstraction layer library which is
static only. The ``CUDA::cublas_static``, ``CUDA::cusparse_static``,
``CUDA::cufft_static``, ``CUDA::curand_static``, and (when implemented) NPP
libraries all automatically have this dependency linked.
Target Created:
- ``CUDA::culibos``
**Note**: direct usage of this target by consumers should not be necessary.
.. _`cuda_toolkit_cuRAND`:
Result variables
^^^^^^^^^^^^^^^^
``CUDAToolkit_FOUND``
A boolean specifying whether or not the CUDA Toolkit was found.
``CUDAToolkit_VERSION``
The exact version of the CUDA Toolkit found (as reported by
``nvcc --version``).
``CUDAToolkit_VERSION_MAJOR``
The major version of the CUDA Toolkit.
``CUDAToolkit_VERSION_MAJOR``
The minor version of the CUDA Toolkit.
``CUDAToolkit_VERSION_PATCH``
The patch version of the CUDA Toolkit.
``CUDAToolkit_BIN_DIR``
The path to the CUDA Toolkit library directory that contains the CUDA
executable ``nvcc``.
``CUDAToolkit_INCLUDE_DIRS``
The path to the CUDA Toolkit ``include`` folder containing the header files
required to compile a project linking against CUDA.
``CUDAToolkit_LIBRARY_DIR``
The path to the CUDA Toolkit library directory that contains the CUDA
Runtime library ``cudart``.
``CUDAToolkit_TARGET_DIR``
The path to the CUDA Toolkit directory including the target architecture
when cross-compiling. When not cross-compiling this will be equivalant to
``CUDAToolkit_ROOT_DIR``.
``CUDAToolkit_NVCC_EXECUTABLE``
The path to the NVIDIA CUDA compiler ``nvcc``. Note that this path may
**not** be the same as
:variable:`CMAKE_CUDA_COMPILER <CMAKE_<LANG>_COMPILER>`. ``nvcc`` must be
found to determine the CUDA Toolkit version as well as determining other
features of the Toolkit. This variable is set for the convenience of
modules that depend on this one.
#]=======================================================================]
# NOTE: much of this was simply extracted from FindCUDA.cmake.
# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
#
# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
#
# Copyright (c) 2007-2009
# Scientific Computing and Imaging Institute, University of Utah
#
# This code is licensed under the MIT License. See the FindCUDA.cmake script
# for the text of the license.
# The MIT License
#
# License for the specific language governing rights and limitations under
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
###############################################################################
# For NVCC we can easily deduce the SDK binary directory from the compiler path.
if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY)
set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "")
mark_as_advanced(CUDAToolkit_BIN_DIR)
unset(cuda_dir)
endif()
IF(CMAKE_VERSION VERSION_LESS "3.12.0")
function(import_target_link_libraries target)
cmake_parse_arguments(HACK
"SYSTEM;INTERFACE;PUBLIC"
""
""
${ARGN}
)
get_target_property(LIBS ${target} INTERFACE_LINK_LIBRARIES)
if (LIBS)
list(APPEND LIBS ${HACK_UNPARSED_ARGUMENTS})
else()
set(LIBS ${HACK_UNPARSED_ARGUMENTS})
endif()
set_target_properties(${target} PROPERTIES
INTERFACE_LINK_LIBRARIES "${LIBS}")
endfunction()
ELSE()
function(import_target_link_libraries)
target_link_libraries(${ARGN})
endfunction()
ENDIF()
IF(CMAKE_VERSION VERSION_LESS "3.13.0")
function(import_target_link_directories target)
cmake_parse_arguments(HACK
"SYSTEM;INTERFACE;PUBLIC"
""
""
${ARGN}
)
get_target_property(LINK_LIBS ${target} INTERFACE_LINK_LIBRARIES)
if (LINK_LIBS) #could be not-found
set(LINK_LIBS_LIST ${LINK_LIBS})
endif()
foreach(LIB ${HACK_UNPARSED_ARGUMENTS})
list(APPEND LINK_LIBS_LIST -L${LIB})
endforeach()
set_target_properties(${target} PROPERTIES
INTERFACE_LINK_LIBRARIES "${LINK_LIBS_LIST}")
endfunction()
ELSE()
function(import_target_link_directories)
target_link_directories(${ARGN})
endfunction()
ENDIF()
IF(CMAKE_VERSION VERSION_LESS "3.12.0")
function(import_target_include_directories target)
cmake_parse_arguments(HACK
"SYSTEM;INTERFACE;PUBLIC"
""
""
${ARGN}
)
get_target_property(INLUDE_DIRS ${target} INTERFACE_INCLUDE_DIRECTORIES)
if (INCLUDE_DIRS)
list(APPEND INCLUDE_DIRS ${HACK_UNPARSED_ARGUMENTS})
else()
set(INCLUDE_DIRS ${HACK_UNPARSED_ARGUMENTS})
endif()
set_target_properties(${target} PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${INCLUDE_DIRS}")
endfunction()
ELSE()
function(import_target_include_directories)
target_include_directories(${ARGN})
endfunction()
ENDIF()
# Try language- or user-provided path first.
if(CUDAToolkit_BIN_DIR)
find_program(CUDAToolkit_NVCC_EXECUTABLE
NAMES nvcc nvcc.exe
PATHS ${CUDAToolkit_BIN_DIR}
NO_DEFAULT_PATH
)
endif()
# Search using CUDAToolkit_ROOT
find_program(CUDAToolkit_NVCC_EXECUTABLE
NAMES nvcc nvcc.exe
PATHS ENV CUDA_PATH
PATH_SUFFIXES bin
)
# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error.
if (NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT}))
# Declare error messages now, print later depending on find_package args.
set(fail_base "Could not find nvcc executable in path specified by")
set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}")
if (CUDAToolkit_FIND_REQUIRED)
if (DEFINED CUDAToolkit_ROOT)
message(FATAL_ERROR ${cuda_root_fail})
elseif (DEFINED ENV{CUDAToolkit_ROOT})
message(FATAL_ERROR ${env_cuda_root_fail})
endif()
else()
if (NOT CUDAToolkit_FIND_QUIETLY)
if (DEFINED CUDAToolkit_ROOT)
message(STATUS ${cuda_root_fail})
elseif (DEFINED ENV{CUDAToolkit_ROOT})
message(STATUS ${env_cuda_root_fail})
endif()
endif()
set(CUDAToolkit_FOUND FALSE)
unset(fail_base)
unset(cuda_root_fail)
unset(env_cuda_root_fail)
return()
endif()
endif()
# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults.
#
# - Linux: /usr/local/cuda-X.Y
# - macOS: /Developer/NVIDIA/CUDA-X.Y
# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y
#
# We will also search the default symlink location /usr/local/cuda first since
# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked
# directory is the desired location.
if (NOT CUDAToolkit_NVCC_EXECUTABLE)
if (UNIX)
if (NOT APPLE)
set(platform_base "/usr/local/cuda-")
else()
set(platform_base "/Developer/NVIDIA/CUDA-")
endif()
else()
set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v")
endif()
# Build out a descending list of possible cuda installations, e.g.
file(GLOB possible_paths "${platform_base}*")
# Iterate the glob results and create a descending list.
set(possible_versions)
foreach (p ${possible_paths})
# Extract version number from end of string
string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p})
if (IS_DIRECTORY ${p} AND p_version)
list(APPEND possible_versions ${p_version})
endif()
endforeach()
# Cannot use list(SORT) because that is alphabetical, we need numerical.
# NOTE: this is not an efficient sorting strategy. But even if a user had
# every possible version of CUDA installed, this wouldn't create any
# significant overhead.
set(versions)
foreach (v ${possible_versions})
list(LENGTH versions num_versions)
# First version, nothing to compare with so just append.
if (num_versions EQUAL 0)
list(APPEND versions ${v})
else()
# Loop through list. Insert at an index when comparison is
# VERSION_GREATER since we want a descending list. Duplicates will not
# happen since this came from a glob list of directories.
set(i 0)
set(early_terminate FALSE)
while (i LESS num_versions)
list(GET versions ${i} curr)
if (v VERSION_GREATER curr)
list(INSERT versions ${i} ${v})
set(early_terminate TRUE)
break()
endif()
math(EXPR i "${i} + 1")
endwhile()
# If it did not get inserted, place it at the end.
if (NOT early_terminate)
list(APPEND versions ${v})
endif()
endif()
endforeach()
# With a descending list of versions, populate possible paths to search.
set(search_paths)
foreach (v ${versions})
list(APPEND search_paths "${platform_base}${v}")
endforeach()
# Force the global default /usr/local/cuda to the front on Unix.
if (UNIX)
list(INSERT search_paths 0 "/usr/local/cuda")
endif()
# Now search for nvcc again using the platform default search paths.
find_program(CUDAToolkit_NVCC_EXECUTABLE
NAMES nvcc nvcc.exe
PATHS ${search_paths}
PATH_SUFFIXES bin
)
# We are done with these variables now, cleanup for caller.
unset(platform_base)
unset(possible_paths)
unset(possible_versions)
unset(versions)
unset(i)
unset(early_terminate)
unset(search_paths)
if (NOT CUDAToolkit_NVCC_EXECUTABLE)
if (CUDAToolkit_FIND_REQUIRED)
message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.")
elseif(NOT CUDAToolkit_FIND_QUIETLY)
message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.")
endif()
set(CUDAToolkit_FOUND FALSE)
return()
endif()
endif()
if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE)
get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY)
set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE)
mark_as_advanced(CUDAToolkit_BIN_DIR)
unset(cuda_dir)
endif()
if(CUDAToolkit_NVCC_EXECUTABLE AND
CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER)
# Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value
# This if statement will always match, but is used to provide variables for MATCH 1,2,3...
if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=])
set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")
endif()
else()
# Compute the version by invoking nvcc
execute_process (COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
endif()
unset(NVCC_OUT)
endif()
get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE)
# Handle cross compilation
if(CMAKE_CROSSCOMPILING)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
# Support for NVPACK
set (CUDAToolkit_TARGET_NAME "armv7-linux-androideabi")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
# Support for arm cross compilation
set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
# Support for aarch64 cross compilation
if (ANDROID_ARCH_NAME STREQUAL "arm64")
set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi")
else()
set(CUDAToolkit_TARGET_NAME "aarch64-linux")
endif (ANDROID_ARCH_NAME STREQUAL "arm64")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
set(CUDAToolkit_TARGET_NAME "x86_64-linux")
endif()
if (EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
# add known CUDA target root path to the set of directories we search for programs, libraries and headers
list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}")
# Mark that we need to pop the root search path changes after we have
# found all cuda libraries so that searches for our cross-compilation
# libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or
# PATh
set(_CUDAToolkit_Pop_ROOT_PATH True)
endif()
else()
# Not cross compiling
set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}")
# Now that we have the real ROOT_DIR, find components inside it.
list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR})
# Mark that we need to pop the prefix path changes after we have
# found the cudart library.
set(_CUDAToolkit_Pop_Prefix True)
endif()
# Find the include/ directory
find_path(CUDAToolkit_INCLUDE_DIR
NAMES cuda_runtime.h
)
# And find the CUDA Runtime Library libcudart
find_library(CUDA_CUDART
NAMES cudart
PATH_SUFFIXES lib64 lib/x64
)
if (NOT CUDA_CUDART)
find_library(CUDA_CUDART
NAMES cudart
PATH_SUFFIXES lib64/stubs lib/x64/stubs
)
endif()
if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY)
message(STATUS "Unable to find cudart library.")
endif()
unset(CUDAToolkit_ROOT_DIR)
if(_CUDAToolkit_Pop_Prefix)
list(REMOVE_AT CMAKE_PREFIX_PATH -1)
unset(_CUDAToolkit_Pop_Prefix)
endif()
#-----------------------------------------------------------------------------
# Perform version comparison and validate all required variables are set.
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(CUDAToolkit
REQUIRED_VARS
CUDAToolkit_INCLUDE_DIR
CUDA_CUDART
CUDAToolkit_NVCC_EXECUTABLE
VERSION_VAR
CUDAToolkit_VERSION
)
mark_as_advanced(CUDA_CUDART
CUDAToolkit_INCLUDE_DIR
CUDAToolkit_NVCC_EXECUTABLE
)
#-----------------------------------------------------------------------------
# Construct result variables
if(CUDAToolkit_FOUND)
set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR})
get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE)
endif()
#-----------------------------------------------------------------------------
# Construct import targets
if(CUDAToolkit_FOUND)
function(_CUDAToolkit_find_and_add_import_lib lib_name)
cmake_parse_arguments(arg "" "" "ALT;DEPS;EXTRA_PATH_SUFFIXES" ${ARGN})
set(search_names ${lib_name} ${arg_ALT})
find_library(CUDA_${lib_name}_LIBRARY
NAMES ${search_names}
HINTS ${CUDAToolkit_LIBRARY_DIR}
ENV CUDA_PATH
PATH_SUFFIXES nvidia/current lib64 lib/x64 lib
${arg_EXTRA_PATH_SUFFIXES}
)
# Don't try any stub directories intil we have exhausted all other
# search locations.
if(NOT CUDA_${lib_name}_LIBRARY)
find_library(CUDA_${lib_name}_LIBRARY
NAMES ${search_names}
HINTS ${CUDAToolkit_LIBRARY_DIR}
ENV CUDA_PATH
PATH_SUFFIXES lib64/stubs lib/x64/stubs lib/stubs stubs
)
endif()
mark_as_advanced(CUDA_${lib_name}_LIBRARY)
if (NOT TARGET CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY)
add_library(CUDA::${lib_name} IMPORTED INTERFACE)
import_target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}")
import_target_link_libraries(CUDA::${lib_name} INTERFACE "${CUDA_${lib_name}_LIBRARY}")
foreach(dep ${arg_DEPS})
if(TARGET CUDA::${dep})
import_target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep})
endif()
endforeach()
endif()
endfunction()
if(NOT TARGET CUDA::toolkit)
add_library(CUDA::toolkit IMPORTED INTERFACE)
import_target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}")
import_target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}")
endif()
_CUDAToolkit_find_and_add_import_lib(cuda_driver ALT cuda)
_CUDAToolkit_find_and_add_import_lib(cudart)
_CUDAToolkit_find_and_add_import_lib(cudart_static)
# setup dependencies that are required for cudart_static when building
# on linux. These are generally only required when using the CUDA toolkit
# when CUDA language is disabled
if(NOT TARGET CUDA::cudart_static_deps
AND TARGET CUDA::cudart_static)
add_library(CUDA::cudart_static_deps IMPORTED INTERFACE)
import_target_link_libraries(CUDA::cudart_static INTERFACE CUDA::cudart_static_deps)
if(UNIX AND (CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER))
find_package(Threads REQUIRED)
import_target_link_libraries(CUDA::cudart_static_deps INTERFACE Threads::Threads ${CMAKE_DL_LIBS})
endif()
if(UNIX AND NOT APPLE)
# On Linux, you must link against librt when using the static cuda runtime.
find_library(CUDAToolkit_rt_LIBRARY rt)
mark_as_advanced(CUDAToolkit_rt_LIBRARY)
if(NOT CUDAToolkit_rt_LIBRARY)
message(WARNING "Could not find librt library, needed by CUDA::cudart_static")
else()
import_target_link_libraries(CUDA::cudart_static_deps INTERFACE ${CUDAToolkit_rt_LIBRARY})
endif()
endif()
endif()
_CUDAToolkit_find_and_add_import_lib(culibos) # it's a static library
foreach (cuda_lib cublas cufft curand cusparse nppc nvjpeg)
_CUDAToolkit_find_and_add_import_lib(${cuda_lib})
_CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS culibos)
endforeach()
# cuFFTW depends on cuFFT
_CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft)
_CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft_static)
# cuSOLVER depends on cuBLAS, and cuSPARSE
_CUDAToolkit_find_and_add_import_lib(cusolver DEPS cublas cusparse)
_CUDAToolkit_find_and_add_import_lib(cusolver_static DEPS cublas_static cusparse_static culibos)
# nvGRAPH depends on cuRAND, and cuSOLVER.
_CUDAToolkit_find_and_add_import_lib(nvgraph DEPS curand cusolver)
_CUDAToolkit_find_and_add_import_lib(nvgraph_static DEPS curand_static cusolver_static)
# Process the majority of the NPP libraries.
foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu)
_CUDAToolkit_find_and_add_import_lib(${cuda_lib} DEPS nppc)
_CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS nppc_static)
endforeach()
_CUDAToolkit_find_and_add_import_lib(cupti
EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/
../extras/CUPTI/lib/)
_CUDAToolkit_find_and_add_import_lib(cupti_static
EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/
../extras/CUPTI/lib/)
_CUDAToolkit_find_and_add_import_lib(nvrtc DEPS cuda_driver)
_CUDAToolkit_find_and_add_import_lib(nvml ALT nvidia-ml nvml)
if(WIN32)
# nvtools can be installed outside the CUDA toolkit directory
# so prefer the NVTOOLSEXT_PATH windows only environment variable
# In addition on windows the most common name is nvToolsExt64_1
find_library(CUDA_nvToolsExt_LIBRARY
NAMES nvToolsExt64_1 nvToolsExt64 nvToolsExt
PATHS ENV NVTOOLSEXT_PATH
ENV CUDA_PATH
PATH_SUFFIXES lib/x64 lib
)
endif()
_CUDAToolkit_find_and_add_import_lib(nvToolsExt ALT nvToolsExt64)
_CUDAToolkit_find_and_add_import_lib(OpenCL)
endif()
if(_CUDAToolkit_Pop_ROOT_PATH)
list(REMOVE_AT CMAKE_FIND_ROOT_PATH 0)
unset(_CUDAToolkit_Pop_ROOT_PATH)
endif()

View File

@ -1,17 +1,37 @@
IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
# Note: "stubs" suffix allows CMake to find the dummy
# libcuda.so provided by the NVIDIA CUDA Toolkit for
# cross-compiling CUDA on a host without a GPU.
KOKKOS_FIND_IMPORTED(CUDA INTERFACE
LIBRARIES cudart cuda
LIBRARY_PATHS ENV LD_LIBRARY_PATH ENV CUDA_PATH /usr/local/cuda
LIBRARY_SUFFIXES lib lib64 lib/stubs lib64/stubs
ALLOW_SYSTEM_PATH_FALLBACK
)
ELSE()
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
LINK_LIBRARIES cuda
)
IF (NOT CUDAToolkit_ROOT)
IF (NOT CUDA_ROOT)
SET(CUDA_ROOT $ENV{CUDA_ROOT})
ENDIF()
IF(CUDA_ROOT)
SET(CUDAToolkit_ROOT ${CUDA_ROOT})
ENDIF()
ENDIF()
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
find_package(CUDAToolkit)
ELSE()
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
ENDIF()
IF (TARGET CUDA::cudart)
SET(FOUND_CUDART TRUE)
KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cudart)
ELSE()
SET(FOUND_CUDART FALSE)
ENDIF()
IF (TARGET CUDA::cuda_driver)
SET(FOUND_CUDA_DRIVER TRUE)
KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cuda_driver)
ELSE()
SET(FOUND_CUDA_DRIVVER FALSE)
ENDIF()
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA DEFAULT_MSG FOUND_CUDART FOUND_CUDA_DRIVER)
IF (FOUND_CUDA_DRIVER AND FOUND_CUDART)
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart
)
ENDIF()

View File

@ -1,3 +1,4 @@
/*
//@HEADER
// ************************************************************************
//
@ -8,8 +9,6 @@
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Kokkos is licensed under 3-clause BSD terms of use:
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -41,18 +40,43 @@
//
// ************************************************************************
//@HEADER
*/
#define KOKKOS_IMPL_COMPILING_LIBRARY true
#include <Kokkos_Core.hpp>
namespace Kokkos {
namespace Impl {
KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****, LayoutStride, LayoutRight, OpenMP,
int64_t)
KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****, LayoutStride, LayoutLeft, OpenMP,
int64_t)
KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****, LayoutStride, LayoutStride, OpenMP,
int64_t)
KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****, LayoutStride, OpenMP, int64_t)
#include <iostream>
} // namespace Impl
} // namespace Kokkos
int main() {
cudaDeviceProp device_properties;
const cudaError_t error = cudaGetDeviceProperties(&device_properties,
/*device*/ 0);
if (error != cudaSuccess) {
std::cout << "CUDA error: " << cudaGetErrorString(error) << '\n';
return error;
}
unsigned int const compute_capability =
device_properties.major * 10 + device_properties.minor;
#ifdef SM_ONLY
std::cout << compute_capability;
#else
switch (compute_capability) {
// clang-format off
case 30: std::cout << "Set -DKokkos_ARCH_KEPLER30=ON ." << std::endl; break;
case 32: std::cout << "Set -DKokkos_ARCH_KEPLER32=ON ." << std::endl; break;
case 35: std::cout << "Set -DKokkos_ARCH_KEPLER35=ON ." << std::endl; break;
case 37: std::cout << "Set -DKokkos_ARCH_KEPLER37=ON ." << std::endl; break;
case 50: std::cout << "Set -DKokkos_ARCH_MAXWELL50=ON ." << std::endl; break;
case 52: std::cout << "Set -DKokkos_ARCH_MAXWELL52=ON ." << std::endl; break;
case 53: std::cout << "Set -DKokkos_ARCH_MAXWELL53=ON ." << std::endl; break;
case 60: std::cout << "Set -DKokkos_ARCH_PASCAL60=ON ." << std::endl; break;
case 61: std::cout << "Set -DKokkos_ARCH_PASCAL61=ON ." << std::endl; break;
case 70: std::cout << "Set -DKokkos_ARCH_VOLTA70=ON ." << std::endl; break;
case 72: std::cout << "Set -DKokkos_ARCH_VOLTA72=ON ." << std::endl; break;
case 75: std::cout << "Set -DKokkos_ARCH_TURING75=ON ." << std::endl; break;
case 80: std::cout << "Set -DKokkos_ARCH_AMPERE80=ON ." << std::endl; break;
default:
std::cout << "Compute capability " << compute_capability
<< " is not supported" << std::endl;
// clang-format on
}
#endif
return 0;
}

View File

@ -88,7 +88,7 @@ FUNCTION(KOKKOS_ADD_TEST)
if (KOKKOS_HAS_TRILINOS)
CMAKE_PARSE_ARGUMENTS(TEST
""
"EXE;NAME"
"EXE;NAME;TOOL"
""
${ARGN})
IF(TEST_EXE)
@ -104,10 +104,15 @@ FUNCTION(KOKKOS_ADD_TEST)
NUM_MPI_PROCS 1
${TEST_UNPARSED_ARGUMENTS}
)
if(TEST_TOOL)
add_dependencies(${EXE} ${TEST_TOOL}) #make sure the exe has to build the tool
set_property(TEST ${TEST_NAME} APPEND_STRING PROPERTY ENVIRONMENT "KOKKOS_PROFILE_LIBRARY=$<TARGET_FILE:${TEST_TOOL}>")
endif()
else()
CMAKE_PARSE_ARGUMENTS(TEST
"WILL_FAIL"
"FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME"
"FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME;TOOL"
"CATEGORIES;CMD_ARGS"
${ARGN})
# To match Tribits, we should always be receiving
@ -135,6 +140,10 @@ FUNCTION(KOKKOS_ADD_TEST)
IF(TEST_PASS_REGULAR_EXPRESSION)
SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${TEST_PASS_REGULAR_EXPRESSION})
ENDIF()
if(TEST_TOOL)
add_dependencies(${EXE} ${TEST_TOOL}) #make sure the exe has to build the tool
set_property(TEST ${TEST_NAME} APPEND_STRING PROPERTY ENVIRONMENT "KOKKOS_PROFILE_LIBRARY=$<TARGET_FILE:${TEST_TOOL}>")
endif()
VERIFY_EMPTY(KOKKOS_ADD_TEST ${TEST_UNPARSED_ARGUMENTS})
endif()
ENDFUNCTION()

View File

@ -2,11 +2,14 @@
FUNCTION(KOKKOS_ARCH_OPTION SUFFIX DEV_TYPE DESCRIPTION)
#all optimizations off by default
KOKKOS_OPTION(ARCH_${SUFFIX} OFF BOOL "Optimize for ${DESCRIPTION} (${DEV_TYPE})")
IF (KOKKOS_ARCH_${SUFFIX})
SET(KOKKOS_ARCH_${SUFFIX} ${KOKKOS_ARCH_${SUFFIX}} PARENT_SCOPE)
SET(KOKKOS_OPTION_KEYS ${KOKKOS_OPTION_KEYS} PARENT_SCOPE)
SET(KOKKOS_OPTION_VALUES ${KOKKOS_OPTION_VALUES} PARENT_SCOPE)
SET(KOKKOS_OPTION_TYPES ${KOKKOS_OPTION_TYPES} PARENT_SCOPE)
IF(KOKKOS_ARCH_${SUFFIX})
LIST(APPEND KOKKOS_ENABLED_ARCH_LIST ${SUFFIX})
SET(KOKKOS_ENABLED_ARCH_LIST ${KOKKOS_ENABLED_ARCH_LIST} PARENT_SCOPE)
ENDIF()
SET(KOKKOS_ARCH_${SUFFIX} ${KOKKOS_ARCH_${SUFFIX}} PARENT_SCOPE)
ENDFUNCTION()
@ -15,6 +18,10 @@ KOKKOS_CFG_DEPENDS(ARCH COMPILER_ID)
KOKKOS_CFG_DEPENDS(ARCH DEVICES)
KOKKOS_CFG_DEPENDS(ARCH OPTIONS)
KOKKOS_CHECK_DEPRECATED_OPTIONS(
ARCH_EPYC "Please replace EPYC with ZEN or ZEN2, depending on your platform"
ARCH_RYZEN "Please replace RYZEN with ZEN or ZEN2, depending on your platform"
)
#-------------------------------------------------------------------------------
# List of possible host architectures.
@ -51,9 +58,12 @@ KOKKOS_ARCH_OPTION(PASCAL61 GPU "NVIDIA Pascal generation CC 6.1")
KOKKOS_ARCH_OPTION(VOLTA70 GPU "NVIDIA Volta generation CC 7.0")
KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2")
KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5")
KOKKOS_ARCH_OPTION(EPYC HOST "AMD Epyc architecture")
KOKKOS_ARCH_OPTION(AMPERE80 GPU "NVIDIA Ampere generation CC 8.0")
KOKKOS_ARCH_OPTION(ZEN HOST "AMD Zen architecture")
KOKKOS_ARCH_OPTION(ZEN2 HOST "AMD Zen2 architecture")
KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900")
KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906")
KOKKOS_ARCH_OPTION(INTEL_GEN GPU "Intel GPUs Gen9+")
IF (KOKKOS_ENABLE_CUDA)
#Regardless of version, make sure we define the general architecture name
@ -75,6 +85,10 @@ IF (KOKKOS_ENABLE_CUDA)
IF (KOKKOS_ARCH_VOLTA70 OR KOKKOS_ARCH_VOLTA72)
SET(KOKKOS_ARCH_VOLTA ON)
ENDIF()
IF (KOKKOS_ARCH_AMPERE80)
SET(KOKKOS_ARCH_AMPERE ON)
ENDIF()
ENDIF()
@ -88,9 +102,10 @@ IF(KOKKOS_ENABLE_COMPILER_WARNINGS)
${COMMON_WARNINGS})
COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED
GNU ${GNU_WARNINGS}
DEFAULT ${COMMON_WARNINGS}
COMPILER_ID CMAKE_CXX_COMPILER_ID
PGI NO-VALUE-SPECIFIED
GNU ${GNU_WARNINGS}
DEFAULT ${COMMON_WARNINGS}
)
ENDIF()
@ -102,6 +117,9 @@ GLOBAL_SET(KOKKOS_CUDA_OPTIONS)
IF (KOKKOS_ENABLE_CUDA_LAMBDA)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-expt-extended-lambda")
IF(KOKKOS_COMPILER_CUDA_VERSION GREATER_EQUAL 110)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-Wext-lambda-captures-this")
ENDIF()
ENDIF()
ENDIF()
@ -113,7 +131,6 @@ ENDIF()
IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
SET(CUDA_ARCH_FLAG "--cuda-gpu-arch")
SET(AMDGPU_ARCH_FLAG "--amdgpu-target")
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -x cuda)
IF (KOKKOS_ENABLE_CUDA)
SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE)
@ -133,6 +150,15 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
ENDIF()
ENDIF()
#------------------------------- KOKKOS_HIP_OPTIONS ---------------------------
#clear anything that might be in the cache
GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL HIP)
SET(AMDGPU_ARCH_FLAG "--amdgpu-target")
ENDIF()
IF (KOKKOS_ARCH_ARMV80)
COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED
@ -167,12 +193,21 @@ IF (KOKKOS_ARCH_ARMV8_THUNDERX2)
)
ENDIF()
IF (KOKKOS_ARCH_EPYC)
IF (KOKKOS_ARCH_ZEN)
COMPILER_SPECIFIC_FLAGS(
Intel -mavx2
DEFAULT -march=znver1 -mtune=znver1
)
SET(KOKKOS_ARCH_AMD_EPYC ON)
SET(KOKKOS_ARCH_AMD_ZEN ON)
SET(KOKKOS_ARCH_AMD_AVX2 ON)
ENDIF()
IF (KOKKOS_ARCH_ZEN2)
COMPILER_SPECIFIC_FLAGS(
Intel -mavx2
DEFAULT -march=znver2 -mtune=znver2
)
SET(KOKKOS_ARCH_AMD_ZEN2 ON)
SET(KOKKOS_ARCH_AMD_AVX2 ON)
ENDIF()
@ -216,14 +251,6 @@ IF (KOKKOS_ARCH_BDW)
)
ENDIF()
IF (KOKKOS_ARCH_EPYC)
SET(KOKKOS_ARCH_AMD_AVX2 ON)
COMPILER_SPECIFIC_FLAGS(
Intel -mvax2
DEFAULT -march=znver1 -mtune=znver1
)
ENDIF()
IF (KOKKOS_ARCH_KNL)
#avx512-mic
SET(KOKKOS_ARCH_AVX512MIC ON) #not a cache variable
@ -253,7 +280,7 @@ IF (KOKKOS_ARCH_SKX)
)
ENDIF()
IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_EPYC)
IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2)
SET(KOKKOS_USE_ISA_X86_64 ON)
ENDIF()
@ -296,6 +323,21 @@ IF (Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
)
ENDIF()
# Clang needs mcx16 option enabled for Windows atomic functions
IF (CMAKE_CXX_COMPILER_ID STREQUAL Clang AND WIN32)
COMPILER_SPECIFIC_OPTIONS(
Clang -mcx16
)
ENDIF()
# MSVC ABI has many deprecation warnings, so ignore them
IF (CMAKE_CXX_COMPILER_ID STREQUAL MSVC OR "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
COMPILER_SPECIFIC_DEFS(
Clang _CRT_SECURE_NO_WARNINGS
)
ENDIF()
#Right now we cannot get the compiler ID when cross-compiling, so just check
#that HIP is enabled
IF (Kokkos_ENABLE_HIP)
@ -324,11 +366,15 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG)
ELSE()
SET(KOKKOS_CUDA_ARCH_FLAG ${FLAG} PARENT_SCOPE)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
ENDIF()
ENDIF()
ENDIF()
LIST(APPEND KOKKOS_CUDA_ARCH_FLAGS ${FLAG})
SET(KOKKOS_CUDA_ARCH_FLAGS ${KOKKOS_CUDA_ARCH_FLAGS} PARENT_SCOPE)
LIST(APPEND KOKKOS_CUDA_ARCH_LIST ${ARCH})
SET(KOKKOS_CUDA_ARCH_LIST ${KOKKOS_CUDA_ARCH_LIST} PARENT_SCOPE)
ENDFUNCTION()
@ -346,6 +392,7 @@ CHECK_CUDA_ARCH(PASCAL61 sm_61)
CHECK_CUDA_ARCH(VOLTA70 sm_70)
CHECK_CUDA_ARCH(VOLTA72 sm_72)
CHECK_CUDA_ARCH(TURING75 sm_75)
CHECK_CUDA_ARCH(AMPERE80 sm_80)
SET(AMDGPU_ARCH_ALREADY_SPECIFIED "")
FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG)
@ -372,12 +419,19 @@ ENDFUNCTION()
CHECK_AMDGPU_ARCH(VEGA900 gfx900) # Radeon Instinct MI25
CHECK_AMDGPU_ARCH(VEGA906 gfx906) # Radeon Instinct MI50 and MI60
IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED)
MESSAGE(SEND_ERROR "HIP enabled but no AMD GPU architecture currently enabled. "
"Please enable one AMD GPU architecture via -DKokkos_ARCH_{..}=ON'.")
ENDIF()
IF (KOKKOS_ENABLE_OPENMPTARGET)
SET(CLANG_CUDA_ARCH ${KOKKOS_CUDA_ARCH_FLAG})
IF (CLANG_CUDA_ARCH)
STRING(REPLACE "sm_" "cc" PGI_CUDA_ARCH ${CLANG_CUDA_ARCH})
COMPILER_SPECIFIC_FLAGS(
Clang -Xopenmp-target -march=${CLANG_CUDA_ARCH} -fopenmp-targets=nvptx64-nvidia-cuda
XL -qtgtarch=${KOKKOS_CUDA_ARCH_FLAG}
PGI -gpu=${PGI_CUDA_ARCH}
)
ENDIF()
SET(CLANG_AMDGPU_ARCH ${KOKKOS_AMDGPU_ARCH_FLAG})
@ -386,10 +440,39 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
Clang -Xopenmp-target=amdgcn-amd-amdhsa -march=${CLANG_AMDGPU_ARCH} -fopenmp-targets=amdgcn-amd-amdhsa
)
ENDIF()
IF (KOKKOS_ARCH_INTEL_GEN)
COMPILER_SPECIFIC_FLAGS(
IntelClang -fopenmp-targets=spir64 -D__STRICT_ANSI__
)
ENDIF()
ENDIF()
IF(KOKKOS_ENABLE_CUDA AND NOT CUDA_ARCH_ALREADY_SPECIFIED)
MESSAGE(SEND_ERROR "CUDA enabled but no NVIDIA GPU architecture currently enabled. Please give one -DKokkos_ARCH_{..}=ON' to enable an NVIDIA GPU architecture.")
# Try to autodetect the CUDA Compute Capability by asking the device
SET(_BINARY_TEST_DIR ${CMAKE_CURRENT_BINARY_DIR}/cmake/compile_tests/CUDAComputeCapabilityWorkdir)
FILE(REMOVE_RECURSE ${_BINARY_TEST_DIR})
FILE(MAKE_DIRECTORY ${_BINARY_TEST_DIR})
TRY_RUN(
_RESULT
_COMPILE_RESULT
${_BINARY_TEST_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/cmake/compile_tests/cuda_compute_capability.cc
COMPILE_DEFINITIONS -DSM_ONLY
RUN_OUTPUT_VARIABLE _CUDA_COMPUTE_CAPABILITY)
LIST(FIND KOKKOS_CUDA_ARCH_FLAGS sm_${_CUDA_COMPUTE_CAPABILITY} FLAG_INDEX)
IF(_COMPILE_RESULT AND _RESULT EQUAL 0 AND NOT FLAG_INDEX EQUAL -1)
MESSAGE(STATUS "Detected CUDA Compute Capability ${_CUDA_COMPUTE_CAPABILITY}")
LIST(GET KOKKOS_CUDA_ARCH_LIST ${FLAG_INDEX} ARCHITECTURE)
KOKKOS_SET_OPTION(ARCH_${ARCHITECTURE} ON)
CHECK_CUDA_ARCH(${ARCHITECTURE} sm_${_CUDA_COMPUTE_CAPABILITY})
LIST(APPEND KOKKOS_ENABLED_ARCH_LIST ${ARCHITECTURE})
ELSE()
MESSAGE(SEND_ERROR "CUDA enabled but no NVIDIA GPU architecture currently enabled and auto-detection failed. "
"Please give one -DKokkos_ARCH_{..}=ON' to enable an NVIDIA GPU architecture.\n"
"You can yourself try to compile ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compile_tests/cuda_compute_capability.cc and run the executable. "
"If you are cross-compiling, you should try to do this on a compute node.")
ENDIF()
ENDIF()
#CMake verbose is kind of pointless
@ -453,4 +536,3 @@ MESSAGE(STATUS "Architectures:")
FOREACH(Arch ${KOKKOS_ENABLED_ARCH_LIST})
MESSAGE(STATUS " ${Arch}")
ENDFOREACH()

View File

@ -4,33 +4,54 @@ SET(KOKKOS_CXX_COMPILER ${CMAKE_CXX_COMPILER})
SET(KOKKOS_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
SET(KOKKOS_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION})
# Check if the compiler is nvcc (which really means nvcc_wrapper).
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
COMMAND grep nvcc
COMMAND wc -l
OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC
OUTPUT_STRIP_TRAILING_WHITESPACE)
IF(Kokkos_ENABLE_CUDA)
# Check if the compiler is nvcc (which really means nvcc_wrapper).
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
OUTPUT_VARIABLE INTERNAL_COMPILER_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
STRING(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION} )
STRING(FIND ${INTERNAL_COMPILER_VERSION_ONE_LINE} "nvcc" INTERNAL_COMPILER_VERSION_CONTAINS_NVCC)
STRING(REGEX REPLACE "^ +" ""
INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}")
STRING(REGEX REPLACE "^ +" ""
INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}")
IF(${INTERNAL_COMPILER_VERSION_CONTAINS_NVCC} GREATER -1)
SET(INTERNAL_HAVE_COMPILER_NVCC true)
ELSE()
SET(INTERNAL_HAVE_COMPILER_NVCC false)
ENDIF()
ENDIF()
IF(INTERNAL_HAVE_COMPILER_NVCC)
# Save the host compiler id before overwriting it.
SET(KOKKOS_CXX_HOST_COMPILER_ID ${KOKKOS_CXX_COMPILER_ID})
# SET the compiler id to nvcc. We use the value used by CMake 3.8.
SET(KOKKOS_CXX_COMPILER_ID NVIDIA CACHE STRING INTERNAL FORCE)
# SET nvcc's compiler version.
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
COMMAND grep release
OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$"
TEMP_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION})
STRING(REGEX MATCH "V[0-9]+\\.[0-9]+\\.[0-9]+"
TEMP_CXX_COMPILER_VERSION ${INTERNAL_COMPILER_VERSION_ONE_LINE})
STRING(SUBSTRING ${TEMP_CXX_COMPILER_VERSION} 1 -1 TEMP_CXX_COMPILER_VERSION)
SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
MESSAGE(STATUS "Compiler Version: ${KOKKOS_CXX_COMPILER_VERSION}")
ENDIF()
IF(Kokkos_ENABLE_HIP)
# get HIP version
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
OUTPUT_VARIABLE INTERNAL_COMPILER_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
STRING(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION} )
SET(KOKKOS_CXX_COMPILER_ID HIP CACHE STRING INTERNAL FORCE)
STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+"
TEMP_CXX_COMPILER_VERSION ${INTERNAL_COMPILER_VERSION_ONE_LINE})
SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
MESSAGE(STATUS "Compiler Version: ${KOKKOS_CXX_COMPILER_VERSION}")
ENDIF()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
# The Cray compiler reports as Clang to most versions of CMake
@ -42,6 +63,16 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
IF (INTERNAL_HAVE_CRAY_COMPILER) #not actually Clang
SET(KOKKOS_CLANG_IS_CRAY TRUE)
ENDIF()
# The clang based Intel compiler reports as Clang to most versions of CMake
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
COMMAND grep icpx
COMMAND wc -l
OUTPUT_VARIABLE INTERNAL_HAVE_INTEL_COMPILER
OUTPUT_STRIP_TRAILING_WHITESPACE)
IF (INTERNAL_HAVE_INTEL_COMPILER) #not actually Clang
SET(KOKKOS_CLANG_IS_INTEL TRUE)
SET(KOKKOS_CXX_COMPILER_ID IntelClang CACHE STRING INTERNAL FORCE)
ENDIF()
ENDIF()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray OR KOKKOS_CLANG_IS_CRAY)
@ -65,6 +96,7 @@ SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher"
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.8.4 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 15.0.2 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 9.0.69 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC 3.5.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n")
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
@ -84,6 +116,10 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE)
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIP)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 3.5.0)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 17.1)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")

View File

@ -1,4 +1,4 @@
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP AND NOT KOKKOS_CLANG_IS_CRAY)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP AND NOT KOKKOS_CLANG_IS_CRAY AND NOT "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
# The clang "version" doesn't actually tell you what runtimes and tools
# were built into Clang. We should therefore make sure that libomp
# was actually built into Clang. Otherwise the user will get nonsensical
@ -49,11 +49,11 @@ ENDIF()
IF (KOKKOS_CXX_STANDARD STREQUAL 17)
IF (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7)
MESSAGE(FATAL_ERROR "You have requested c++17 support for GCC ${KOKKOS_CXX_COMPILER_VERSION}. Although CMake has allowed this and GCC accepts -std=c++1z/c++17, GCC <= 6 does not properly support *this capture. Please reduce the C++ standard to 14 or upgrade the compiler if you do need 17 support")
MESSAGE(FATAL_ERROR "You have requested c++17 support for GCC ${KOKKOS_CXX_COMPILER_VERSION}. Although CMake has allowed this and GCC accepts -std=c++1z/c++17, GCC <= 6 does not properly support *this capture. Please reduce the C++ standard to 14 or upgrade the compiler if you do need C++17 support.")
ENDIF()
IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
MESSAGE(FATAL_ERROR "You have requested c++17 support for NVCC. Please reduce the C++ standard to 14. No versions of NVCC currently support 17.")
IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11)
MESSAGE(FATAL_ERROR "You have requested c++17 support for NVCC ${KOKKOS_CXX_COMPILER_VERSION}. NVCC only supports C++17 from version 11 on. Please reduce the C++ standard to 14 or upgrade the compiler if you need C++17 support.")
ENDIF()
ENDIF()

View File

@ -36,25 +36,51 @@ IF(KOKKOS_ENABLE_OPENMP)
IF(KOKKOS_CLANG_IS_CRAY)
SET(ClangOpenMPFlag -fopenmp)
ENDIF()
COMPILER_SPECIFIC_FLAGS(
Clang ${ClangOpenMPFlag}
AppleClang -Xpreprocessor -fopenmp
PGI -mp
NVIDIA -Xcompiler -fopenmp
Cray NO-VALUE-SPECIFIED
XL -qsmp=omp
DEFAULT -fopenmp
)
COMPILER_SPECIFIC_LIBS(
AppleClang -lomp
)
IF(KOKKOS_CLANG_IS_INTEL)
SET(ClangOpenMPFlag -fiopenmp)
ENDIF()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
#expression /openmp yields error, so add a specific Clang flag
COMPILER_SPECIFIC_OPTIONS(Clang /clang:-fopenmp)
#link omp library from LLVM lib dir
get_filename_component(LLVM_BIN_DIR ${CMAKE_CXX_COMPILER_AR} DIRECTORY)
COMPILER_SPECIFIC_LIBS(Clang "${LLVM_BIN_DIR}/../lib/libomp.lib")
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Clang -Xcompiler ${ClangOpenMPFlag}
PGI -Xcompiler -mp
Cray NO-VALUE-SPECIFIED
XL -Xcompiler -qsmp=omp
DEFAULT -Xcompiler -fopenmp
)
ELSE()
COMPILER_SPECIFIC_FLAGS(
Clang ${ClangOpenMPFlag}
AppleClang -Xpreprocessor -fopenmp
PGI -mp
Cray NO-VALUE-SPECIFIED
XL -qsmp=omp
DEFAULT -fopenmp
)
COMPILER_SPECIFIC_LIBS(
AppleClang -lomp
)
ENDIF()
ENDIF()
KOKKOS_DEVICE_OPTION(OPENMPTARGET OFF DEVICE "Whether to build the OpenMP target backend")
IF (KOKKOS_ENABLE_OPENMPTARGET)
SET(ClangOpenMPFlag -fopenmp=libomp)
IF(KOKKOS_CLANG_IS_CRAY)
SET(ClangOpenMPFlag -fopenmp)
ENDIF()
COMPILER_SPECIFIC_FLAGS(
Clang -fopenmp -fopenmp=libomp
Clang ${ClangOpenMPFlag} -Wno-openmp-mapping
IntelClang -fiopenmp -Wno-openmp-mapping
XL -qsmp=omp -qoffload -qnoeh
PGI -mp=gpu
DEFAULT -fopenmp
)
COMPILER_SPECIFIC_DEFS(
@ -65,6 +91,9 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
# COMPILER_SPECIFIC_LIBS(
# Clang -lopenmptarget
# )
IF(KOKKOS_CXX_STANDARD LESS 17)
MESSAGE(FATAL_ERROR "OpenMPTarget backend requires C++17 or newer")
ENDIF()
ENDIF()
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA)
@ -76,6 +105,9 @@ KOKKOS_DEVICE_OPTION(CUDA ${CUDA_DEFAULT} DEVICE "Whether to build CUDA backend"
IF (KOKKOS_ENABLE_CUDA)
GLOBAL_SET(KOKKOS_DONT_ALLOW_EXTENSIONS "CUDA enabled")
IF(WIN32)
GLOBAL_APPEND(KOKKOS_COMPILE_OPTIONS -x cu)
ENDIF()
ENDIF()
# We want this to default to OFF for cache reasons, but if no

View File

@ -45,10 +45,9 @@ UNSET(_UPPERCASE_CMAKE_BUILD_TYPE)
KOKKOS_ENABLE_OPTION(LARGE_MEM_TESTS OFF "Whether to perform extra large memory tests")
KOKKOS_ENABLE_OPTION(DEBUG_BOUNDS_CHECK OFF "Whether to use bounds checking - will increase runtime")
KOKKOS_ENABLE_OPTION(COMPILER_WARNINGS OFF "Whether to print all compiler warnings")
KOKKOS_ENABLE_OPTION(PROFILING ON "Whether to create bindings for profiling tools")
KOKKOS_ENABLE_OPTION(PROFILING_LOAD_PRINT OFF "Whether to print information about which profiling tools got loaded")
KOKKOS_ENABLE_OPTION(TUNING OFF "Whether to create bindings for tuning tools")
KOKKOS_ENABLE_OPTION(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops")
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE OFF "Whether to enable deprecated code")
IF (KOKKOS_ENABLE_CUDA)
SET(KOKKOS_COMPILER_CUDA_VERSION "${KOKKOS_COMPILER_VERSION_MAJOR}${KOKKOS_COMPILER_VERSION_MINOR}")

View File

@ -47,6 +47,13 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING)
SET(CAMEL_NAME Kokkos_${CAMEL_SUFFIX})
STRING(TOUPPER ${CAMEL_NAME} UC_NAME)
LIST(APPEND KOKKOS_OPTION_KEYS ${CAMEL_SUFFIX})
SET(KOKKOS_OPTION_KEYS ${KOKKOS_OPTION_KEYS} PARENT_SCOPE)
LIST(APPEND KOKKOS_OPTION_VALUES "${DOCSTRING}")
SET(KOKKOS_OPTION_VALUES ${KOKKOS_OPTION_VALUES} PARENT_SCOPE)
LIST(APPEND KOKKOS_OPTION_TYPES ${TYPE})
SET(KOKKOS_OPTION_TYPES ${KOKKOS_OPTION_TYPES} PARENT_SCOPE)
# Make sure this appears in the cache with the appropriate DOCSTRING
SET(${CAMEL_NAME} ${DEFAULT} CACHE ${TYPE} ${DOCSTRING})
@ -73,7 +80,21 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING)
ELSE()
SET(${UC_NAME} ${DEFAULT} PARENT_SCOPE)
ENDIF()
ENDFUNCTION()
FUNCTION(kokkos_set_option CAMEL_SUFFIX VALUE)
LIST(FIND KOKKOS_OPTION_KEYS ${CAMEL_SUFFIX} OPTION_INDEX)
IF(OPTION_INDEX EQUAL -1)
MESSAGE(FATAL_ERROR "Couldn't set value for Kokkos_${CAMEL_SUFFIX}")
ENDIF()
SET(CAMEL_NAME Kokkos_${CAMEL_SUFFIX})
STRING(TOUPPER ${CAMEL_NAME} UC_NAME)
LIST(GET KOKKOS_OPTION_VALUES ${OPTION_INDEX} DOCSTRING)
LIST(GET KOKKOS_OPTION_TYPES ${OPTION_INDEX} TYPE)
SET(${CAMEL_NAME} ${VALUE} CACHE ${TYPE} ${DOCSTRING} FORCE)
MESSAGE(STATUS "Setting ${CAMEL_NAME}=${VALUE}")
SET(${UC_NAME} ${VALUE} PARENT_SCOPE)
ENDFUNCTION()
FUNCTION(kokkos_append_config_line LINE)
@ -109,8 +130,8 @@ ENDMACRO()
MACRO(kokkos_export_imported_tpl NAME)
IF (NOT KOKKOS_HAS_TRILINOS)
GET_TARGET_PROPERTY(LIB_TYPE ${NAME} TYPE)
IF (${LIB_TYPE} STREQUAL "INTERFACE_LIBRARY")
GET_TARGET_PROPERTY(LIB_IMPORTED ${NAME} IMPORTED)
IF (NOT LIB_IMPORTED)
# This is not an imported target
# This an interface library that we created
INSTALL(
@ -123,12 +144,18 @@ MACRO(kokkos_export_imported_tpl NAME)
ELSE()
#make sure this also gets "exported" in the config file
KOKKOS_APPEND_CONFIG_LINE("IF(NOT TARGET ${NAME})")
KOKKOS_APPEND_CONFIG_LINE("ADD_LIBRARY(${NAME} UNKNOWN IMPORTED)")
KOKKOS_APPEND_CONFIG_LINE("SET_TARGET_PROPERTIES(${NAME} PROPERTIES")
GET_TARGET_PROPERTY(TPL_LIBRARY ${NAME} IMPORTED_LOCATION)
IF(TPL_LIBRARY)
KOKKOS_APPEND_CONFIG_LINE("IMPORTED_LOCATION ${TPL_LIBRARY}")
GET_TARGET_PROPERTY(LIB_TYPE ${NAME} TYPE)
IF (${LIB_TYPE} STREQUAL "INTERFACE_LIBRARY")
KOKKOS_APPEND_CONFIG_LINE("ADD_LIBRARY(${NAME} INTERFACE IMPORTED)")
KOKKOS_APPEND_CONFIG_LINE("SET_TARGET_PROPERTIES(${NAME} PROPERTIES")
ELSE()
KOKKOS_APPEND_CONFIG_LINE("ADD_LIBRARY(${NAME} UNKNOWN IMPORTED)")
KOKKOS_APPEND_CONFIG_LINE("SET_TARGET_PROPERTIES(${NAME} PROPERTIES")
GET_TARGET_PROPERTY(TPL_LIBRARY ${NAME} IMPORTED_LOCATION)
IF(TPL_LIBRARY)
KOKKOS_APPEND_CONFIG_LINE("IMPORTED_LOCATION ${TPL_LIBRARY}")
ENDIF()
ENDIF()
GET_TARGET_PROPERTY(TPL_INCLUDES ${NAME} INTERFACE_INCLUDE_DIRECTORIES)
@ -737,18 +764,22 @@ FUNCTION(kokkos_link_tpl TARGET)
ENDFUNCTION()
FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER)
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang GNU)
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelClang GNU HIP)
CMAKE_PARSE_ARGUMENTS(
PARSE
"LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES"
""
"COMPILER_ID"
"${COMPILERS}"
${ARGN})
IF(PARSE_UNPARSED_ARGUMENTS)
MESSAGE(SEND_ERROR "'${PARSE_UNPARSED_ARGUMENTS}' argument(s) not recognized when providing compiler specific options")
ENDIF()
SET(COMPILER ${KOKKOS_CXX_COMPILER_ID})
IF(PARSE_COMPILER_ID)
SET(COMPILER ${${PARSE_COMPILER_ID}})
ELSE()
SET(COMPILER ${KOKKOS_CXX_COMPILER_ID})
ENDIF()
SET(COMPILER_SPECIFIC_FLAGS_TMP)
FOREACH(COMP ${COMPILERS})
@ -792,6 +823,14 @@ FUNCTION(COMPILER_SPECIFIC_FLAGS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_OPTIONS LINK_OPTIONS)
ENDFUNCTION(COMPILER_SPECIFIC_FLAGS)
FUNCTION(COMPILER_SPECIFIC_OPTIONS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_OPTIONS)
ENDFUNCTION(COMPILER_SPECIFIC_OPTIONS)
FUNCTION(COMPILER_SPECIFIC_LINK_OPTIONS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} LINK_OPTIONS)
ENDFUNCTION(COMPILER_SPECIFIC_LINK_OPTIONS)
FUNCTION(COMPILER_SPECIFIC_DEFS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_DEFINITIONS)
ENDFUNCTION(COMPILER_SPECIFIC_DEFS)
@ -799,3 +838,36 @@ ENDFUNCTION(COMPILER_SPECIFIC_DEFS)
FUNCTION(COMPILER_SPECIFIC_LIBS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} LINK_LIBRARIES)
ENDFUNCTION(COMPILER_SPECIFIC_LIBS)
# Given a list of the form
# key1;value1;key2;value2,...
# Create a list of all keys in a variable named ${KEY_LIST_NAME}
# and set the value for each key in a variable ${VAR_PREFIX}key1,...
# kokkos_key_value_map(ARCH ALL_ARCHES key1;value1;key2;value2)
# would produce a list variable ALL_ARCHES=key1;key2
# and individual variables ARCHkey1=value1 and ARCHkey2=value2
MACRO(KOKKOS_KEY_VALUE_MAP VAR_PREFIX KEY_LIST_NAME)
SET(PARSE_KEY ON)
SET(${KEY_LIST_NAME})
FOREACH(ENTRY ${ARGN})
IF(PARSE_KEY)
SET(CURRENT_KEY ${ENTRY})
SET(PARSE_KEY OFF)
LIST(APPEND ${KEY_LIST_NAME} ${CURRENT_KEY})
ELSE()
SET(${VAR_PREFIX}${CURRENT_KEY} ${ENTRY})
SET(PARSE_KEY ON)
ENDIF()
ENDFOREACH()
ENDMACRO()
FUNCTION(KOKKOS_CHECK_DEPRECATED_OPTIONS)
KOKKOS_KEY_VALUE_MAP(DEPRECATED_MSG_ DEPRECATED_LIST ${ARGN})
FOREACH(OPTION_SUFFIX ${DEPRECATED_LIST})
SET(OPTION_NAME Kokkos_${OPTION_SUFFIX})
SET(OPTION_MESSAGE ${DEPRECATED_MSG_${OPTION_SUFFIX}})
IF(DEFINED ${OPTION_NAME}) # This variable has been given by the user as on or off
MESSAGE(SEND_ERROR "Removed option ${OPTION_NAME} has been given with value ${${OPTION_NAME}}. ${OPT_MESSAGE}")
ENDIF()
ENDFOREACH()
ENDFUNCTION()

View File

@ -1,5 +1,5 @@
INCLUDE(CMakePackageConfigHelpers)
IF (NOT KOKKOS_HAS_TRILINOS)
IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING)
INCLUDE(GNUInstallDirs)
#Set all the variables needed for KokkosConfig.cmake

View File

@ -28,19 +28,30 @@ FUNCTION(kokkos_set_cxx_standard_feature standard)
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME})
ELSEIF(NOT KOKKOS_USE_CXX_EXTENSIONS AND ${STANDARD_NAME})
MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard as feature")
IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND (KOKKOS_CXX_HOST_COMPILER_ID STREQUAL GNU OR KOKKOS_CXX_HOST_COMPILER_ID STREQUAL Clang))
SET(SUPPORTED_NVCC_FLAGS "-std=c++11;-std=c++14;-std=c++17")
IF (NOT ${${STANDARD_NAME}} IN_LIST SUPPORTED_NVCC_FLAGS)
MESSAGE(FATAL_ERROR "CMake wants to use ${${STANDARD_NAME}} which is not supported by NVCC. Using a more recent host compiler or a more recent CMake version might help.")
ENDIF()
ENDIF()
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME})
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
ELSEIF (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
#MSVC doesn't need a command line flag, that doesn't mean it has no support
MESSAGE(STATUS "Using no flag for C++${standard} standard as feature")
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME})
ELSEIF((KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA") AND WIN32)
MESSAGE(STATUS "Using no flag for C++${standard} standard as feature")
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE "")
ELSE()
#nope, we can't do anything here
MESSAGE(WARNING "C++${standard} is not supported as a compiler feature. We will choose custom flags for now, but this behavior has been deprecated. Please open an issue at https://github.com/kokkos/kokkos/issues reporting that ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION} failed for ${KOKKOS_CXX_STANDARD}, preferrably including your CMake command.")
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE "")
ENDIF()
IF(NOT ${FEATURE_NAME} IN_LIST CMAKE_CXX_COMPILE_FEATURES)
MESSAGE(FATAL_ERROR "Compiler ${KOKKOS_CXX_COMPILER_ID} should support ${FEATURE_NAME}, but CMake reports feature not supported")
IF(NOT WIN32)
IF(NOT ${FEATURE_NAME} IN_LIST CMAKE_CXX_COMPILE_FEATURES)
MESSAGE(FATAL_ERROR "Compiler ${KOKKOS_CXX_COMPILER_ID} should support ${FEATURE_NAME}, but CMake reports feature not supported")
ENDIF()
ENDIF()
ENDFUNCTION()
@ -123,7 +134,7 @@ IF (NOT KOKKOS_CXX_STANDARD_FEATURE)
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/intel.cmake)
kokkos_set_intel_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
ELSEIF((KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") OR ((KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA") AND WIN32))
INCLUDE(${KOKKOS_SRC_PATH}/cmake/msvc.cmake)
kokkos_set_msvc_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
ELSE()

View File

@ -13,10 +13,10 @@ KOKKOS_TPL_OPTION(LIBNUMA Off)
KOKKOS_TPL_OPTION(MEMKIND Off)
KOKKOS_TPL_OPTION(CUDA Off)
KOKKOS_TPL_OPTION(LIBRT Off)
KOKKOS_TPL_OPTION(LIBDL On)
IF(KOKKOS_ENABLE_PROFILING AND NOT KOKKOS_ENABLE_LIBDL)
MESSAGE(SEND_ERROR "Kokkos_ENABLE_PROFILING requires Kokkos_ENABLE_LIBDL=ON")
IF (WIN32)
KOKKOS_TPL_OPTION(LIBDL Off)
ELSE()
KOKKOS_TPL_OPTION(LIBDL On)
ENDIF()
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX)

View File

@ -21,10 +21,6 @@ IF (KOKKOS_HAS_TRILINOS)
SET(${PROJECT_NAME}_ENABLE_DEBUG OFF)
ENDIF()
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11)
SET(${PROJECT_NAME}_ENABLE_CXX11 ON)
ENDIF()
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS)
SET(${PROJECT_NAME}_ENABLE_TESTS OFF)
ENDIF()
@ -134,7 +130,7 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE ROOT_NAME)
VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE ${PARSE_UNPARSED_ARGUMENTS})
#All executables must link to all the kokkos targets
#This is just private linkage because exe is final
TARGET_LINK_LIBRARIES(${EXE_NAME} PRIVATE kokkos)
TARGET_LINK_LIBRARIES(${EXE_NAME} PRIVATE Kokkos::kokkos)
endif()
ENDFUNCTION()
@ -174,16 +170,42 @@ FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME)
ENDFUNCTION()
MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_compiler_id.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_devices.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_options.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_test_cxx_std.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_arch.cmake)
IF (NOT KOKKOS_HAS_TRILINOS)
SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/")
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tpls.cmake)
ENDIF()
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_corner_cases.cmake)
# This is needed for both regular build and install tests
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_compiler_id.cmake)
#set an internal option, if not already set
SET(Kokkos_INSTALL_TESTING OFF CACHE INTERNAL "Whether to build tests and examples against installation")
IF (Kokkos_INSTALL_TESTING)
SET(KOKKOS_ENABLE_TESTS ON)
SET(KOKKOS_ENABLE_EXAMPLES ON)
# This looks a little weird, but what we are doing
# is to NOT build Kokkos but instead look for an
# installed Kokkos - then build examples and tests
# against that installed Kokkos
FIND_PACKAGE(Kokkos REQUIRED)
# Just grab the configuration from the installation
FOREACH(DEV ${Kokkos_DEVICES})
SET(KOKKOS_ENABLE_${DEV} ON)
ENDFOREACH()
FOREACH(OPT ${Kokkos_OPTIONS})
SET(KOKKOS_ENABLE_${OPT} ON)
ENDFOREACH()
FOREACH(TPL ${Kokkos_TPLS})
SET(KOKKOS_ENABLE_${TPL} ON)
ENDFOREACH()
FOREACH(ARCH ${Kokkos_ARCH})
SET(KOKKOS_ARCH_${ARCH} ON)
ENDFOREACH()
ELSE()
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_devices.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_options.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_test_cxx_std.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_arch.cmake)
IF (NOT KOKKOS_HAS_TRILINOS)
SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/")
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tpls.cmake)
ENDIF()
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_corner_cases.cmake)
ENDIF()
ENDMACRO()
MACRO(KOKKOS_ADD_TEST_EXECUTABLE ROOT_NAME)
@ -310,28 +332,40 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME)
LIST(REMOVE_DUPLICATES PARSE_SOURCES)
ENDIF()
IF(PARSE_STATIC)
SET(LINK_TYPE STATIC)
ENDIF()
IF(PARSE_SHARED)
SET(LINK_TYPE SHARED)
ENDIF()
# MSVC and other platforms want to have
# the headers included as source files
# for better dependency detection
ADD_LIBRARY(
${LIBRARY_NAME}
${LINK_TYPE}
${PARSE_HEADERS}
${PARSE_SOURCES}
)
KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME})
INSTALL(
FILES ${PARSE_HEADERS}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT ${PACKAGE_NAME}
)
#In case we are building in-tree, add an alias name
#that matches the install Kokkos:: name
ADD_LIBRARY(Kokkos::${LIBRARY_NAME} ALIAS ${LIBRARY_NAME})
ENDFUNCTION()
FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME)
CMAKE_PARSE_ARGUMENTS(PARSE
"ADD_BUILD_OPTIONS"
""
""
${ARGN}
)
IF (KOKKOS_HAS_TRILINOS)
TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${ARGN})
TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${PARSE_UNPARSED_ARGUMENTS})
#Stolen from Tribits - it can add prefixes
SET(TRIBITS_LIBRARY_NAME_PREFIX "${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}")
SET(TRIBITS_LIBRARY_NAME ${TRIBITS_LIBRARY_NAME_PREFIX}${LIBRARY_NAME})
@ -346,8 +380,10 @@ FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME)
#KOKKOS_SET_LIBRARY_PROPERTIES(${TRIBITS_LIBRARY_NAME} PLAIN_STYLE)
ELSE()
KOKKOS_INTERNAL_ADD_LIBRARY(
${LIBRARY_NAME} ${ARGN})
KOKKOS_SET_LIBRARY_PROPERTIES(${LIBRARY_NAME})
${LIBRARY_NAME} ${PARSE_UNPARSED_ARGUMENTS})
IF (PARSE_ADD_BUILD_OPTIONS)
KOKKOS_SET_LIBRARY_PROPERTIES(${LIBRARY_NAME})
ENDIF()
ENDIF()
ENDFUNCTION()
@ -364,17 +400,6 @@ ELSE()
ADD_LIBRARY(${NAME} INTERFACE)
KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME})
INSTALL(
FILES ${PARSE_HEADERS}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
INSTALL(
FILES ${PARSE_HEADERS}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT ${PACKAGE_NAME}
)
ENDIF()
ENDFUNCTION()

View File

@ -0,0 +1,4 @@
packages:
kokkos:
variants: +cuda +openmp +volta70 +cuda_lambda +wrapper ^cuda@10.1
compiler: [gcc@7.2.0]

View File

@ -2,7 +2,9 @@
KOKKOS_SUBPACKAGE(Containers)
ADD_SUBDIRECTORY(src)
IF (NOT Kokkos_INSTALL_TESTING)
ADD_SUBDIRECTORY(src)
ENDIF()
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
KOKKOS_ADD_TEST_DIRECTORIES(performance_tests)

View File

@ -31,10 +31,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
OBJ_ROCM = TestROCm.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_ROCm
TEST_TARGETS += test-rocm
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
OBJ_HIP = TestHIP.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_HIP
TEST_TARGETS += test-hip
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)

View File

@ -58,7 +58,7 @@ namespace Performance {
// View functor
template <typename DeviceType>
struct InitViewFunctor {
typedef Kokkos::View<double ***, DeviceType> inviewtype;
using inviewtype = Kokkos::View<double ***, DeviceType>;
inviewtype _inview;
InitViewFunctor(inviewtype &inview_) : _inview(inview_) {}
@ -73,10 +73,10 @@ struct InitViewFunctor {
}
struct SumComputationTest {
typedef Kokkos::View<double ***, DeviceType> inviewtype;
using inviewtype = Kokkos::View<double ***, DeviceType>;
inviewtype _inview;
typedef Kokkos::View<double *, DeviceType> outviewtype;
using outviewtype = Kokkos::View<double *, DeviceType>;
outviewtype _outview;
KOKKOS_INLINE_FUNCTION
@ -96,7 +96,7 @@ struct InitViewFunctor {
template <typename DeviceType>
struct InitStrideViewFunctor {
typedef Kokkos::View<double ***, Kokkos::LayoutStride, DeviceType> inviewtype;
using inviewtype = Kokkos::View<double ***, Kokkos::LayoutStride, DeviceType>;
inviewtype _inview;
InitStrideViewFunctor(inviewtype &inview_) : _inview(inview_) {}
@ -113,7 +113,7 @@ struct InitStrideViewFunctor {
template <typename DeviceType>
struct InitViewRank7Functor {
typedef Kokkos::View<double *******, DeviceType> inviewtype;
using inviewtype = Kokkos::View<double *******, DeviceType>;
inviewtype _inview;
InitViewRank7Functor(inviewtype &inview_) : _inview(inview_) {}
@ -131,7 +131,7 @@ struct InitViewRank7Functor {
// DynRankView functor
template <typename DeviceType>
struct InitDynRankViewFunctor {
typedef Kokkos::DynRankView<double, DeviceType> inviewtype;
using inviewtype = Kokkos::DynRankView<double, DeviceType>;
inviewtype _inview;
InitDynRankViewFunctor(inviewtype &inview_) : _inview(inview_) {}
@ -146,10 +146,10 @@ struct InitDynRankViewFunctor {
}
struct SumComputationTest {
typedef Kokkos::DynRankView<double, DeviceType> inviewtype;
using inviewtype = Kokkos::DynRankView<double, DeviceType>;
inviewtype _inview;
typedef Kokkos::DynRankView<double, DeviceType> outviewtype;
using outviewtype = Kokkos::DynRankView<double, DeviceType>;
outviewtype _outview;
KOKKOS_INLINE_FUNCTION
@ -169,8 +169,8 @@ struct InitDynRankViewFunctor {
template <typename DeviceType>
void test_dynrankview_op_perf(const int par_size) {
typedef DeviceType execution_space;
typedef typename execution_space::size_type size_type;
using execution_space = DeviceType;
using size_type = typename execution_space::size_type;
const size_type dim_2 = 90;
const size_type dim_3 = 30;
@ -184,7 +184,7 @@ void test_dynrankview_op_perf(const int par_size) {
{
Kokkos::View<double ***, DeviceType> testview("testview", par_size, dim_2,
dim_3);
typedef InitViewFunctor<DeviceType> FunctorType;
using FunctorType = InitViewFunctor<DeviceType>;
timer.reset();
Kokkos::RangePolicy<DeviceType> policy(0, par_size);
@ -204,7 +204,7 @@ void test_dynrankview_op_perf(const int par_size) {
Kokkos::View<double ***, Kokkos::LayoutStride, DeviceType> teststrideview =
Kokkos::subview(testview, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL);
typedef InitStrideViewFunctor<DeviceType> FunctorStrideType;
using FunctorStrideType = InitStrideViewFunctor<DeviceType>;
timer.reset();
Kokkos::parallel_for(policy, FunctorStrideType(teststrideview));
@ -216,7 +216,7 @@ void test_dynrankview_op_perf(const int par_size) {
{
Kokkos::View<double *******, DeviceType> testview("testview", par_size,
dim_2, dim_3, 1, 1, 1, 1);
typedef InitViewRank7Functor<DeviceType> FunctorType;
using FunctorType = InitViewRank7Functor<DeviceType>;
timer.reset();
Kokkos::RangePolicy<DeviceType> policy(0, par_size);
@ -229,7 +229,7 @@ void test_dynrankview_op_perf(const int par_size) {
{
Kokkos::DynRankView<double, DeviceType> testdrview("testdrview", par_size,
dim_2, dim_3);
typedef InitDynRankViewFunctor<DeviceType> FunctorType;
using FunctorType = InitDynRankViewFunctor<DeviceType>;
timer.reset();
Kokkos::RangePolicy<DeviceType> policy(0, par_size);

View File

@ -65,9 +65,9 @@ union helper {
template <typename Device>
struct generate_ids {
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*, execution_space> local_id_view;
using execution_space = Device;
using size_type = typename execution_space::size_type;
using local_id_view = Kokkos::View<uint32_t*, execution_space>;
local_id_view local_2_global;
@ -96,13 +96,12 @@ struct generate_ids {
template <typename Device>
struct fill_map {
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<const uint32_t*, execution_space,
Kokkos::MemoryRandomAccess>
local_id_view;
typedef Kokkos::UnorderedMap<uint32_t, size_type, execution_space>
global_id_view;
using execution_space = Device;
using size_type = typename execution_space::size_type;
using local_id_view = Kokkos::View<const uint32_t*, execution_space,
Kokkos::MemoryRandomAccess>;
using global_id_view =
Kokkos::UnorderedMap<uint32_t, size_type, execution_space>;
global_id_view global_2_local;
local_id_view local_2_global;
@ -120,18 +119,17 @@ struct fill_map {
template <typename Device>
struct find_test {
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<const uint32_t*, execution_space,
Kokkos::MemoryRandomAccess>
local_id_view;
typedef Kokkos::UnorderedMap<const uint32_t, const size_type, execution_space>
global_id_view;
using execution_space = Device;
using size_type = typename execution_space::size_type;
using local_id_view = Kokkos::View<const uint32_t*, execution_space,
Kokkos::MemoryRandomAccess>;
using global_id_view =
Kokkos::UnorderedMap<const uint32_t, const size_type, execution_space>;
global_id_view global_2_local;
local_id_view local_2_global;
typedef size_t value_type;
using value_type = size_t;
find_test(global_id_view gIds, local_id_view lIds, value_type& num_errors)
: global_2_local(gIds), local_2_global(lIds) {
@ -156,12 +154,12 @@ struct find_test {
template <typename Device>
void test_global_to_local_ids(unsigned num_ids) {
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
using execution_space = Device;
using size_type = typename execution_space::size_type;
typedef Kokkos::View<uint32_t*, execution_space> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t, size_type, execution_space>
global_id_view;
using local_id_view = Kokkos::View<uint32_t*, execution_space>;
using global_id_view =
Kokkos::UnorderedMap<uint32_t, size_type, execution_space>;
// size
std::cout << num_ids << ", ";

View File

@ -50,14 +50,14 @@
namespace Perf {
template <typename ExecSpace, typename Layout, int duplication,
int contribution>
template <typename ExecSpace, typename Layout, typename Duplication,
typename Contribution>
void test_scatter_view(int m, int n) {
Kokkos::View<double * [3], Layout, ExecSpace> original_view("original_view",
n);
{
auto scatter_view = Kokkos::Experimental::create_scatter_view<
Kokkos::Experimental::ScatterSum, duplication, contribution>(
Kokkos::Experimental::ScatterSum, Duplication, Contribution>(
original_view);
Kokkos::Experimental::UniqueToken<
ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global>

View File

@ -55,9 +55,9 @@ namespace Perf {
template <typename Device, bool Near>
struct UnorderedMapTest {
typedef Device execution_space;
typedef Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space> map_type;
typedef typename map_type::histogram_type histogram_type;
using execution_space = Device;
using map_type = Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space>;
using histogram_type = typename map_type::histogram_type;
struct value_type {
uint32_t failed_count;

View File

@ -9,6 +9,10 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(KOKKOS_CONTAINERS_SRCS)
APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.cpp)
SET(KOKKOS_CONTAINER_HEADERS)
APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.hpp)
APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
INSTALL (
DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
@ -19,6 +23,7 @@ INSTALL (
KOKKOS_ADD_LIBRARY(
kokkoscontainers
SOURCES ${KOKKOS_CONTAINERS_SRCS}
HEADERS ${KOKKOS_CONTAINER_HEADERS}
)
SET_TARGET_PROPERTIES(kokkoscontainers PROPERTIES VERSION ${Kokkos_VERSION})

View File

@ -73,8 +73,8 @@ void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src);
template <typename Device>
class Bitset {
public:
typedef Device execution_space;
typedef unsigned size_type;
using execution_space = Device;
using size_type = unsigned;
enum { BIT_SCAN_REVERSE = 1u };
enum { MOVE_HINT_BACKWARD = 2u };
@ -137,9 +137,9 @@ class Bitset {
if (m_last_block_mask) {
// clear the unused bits in the last block
typedef Kokkos::Impl::DeepCopy<typename execution_space::memory_space,
Kokkos::HostSpace>
raw_deep_copy;
using raw_deep_copy =
Kokkos::Impl::DeepCopy<typename execution_space::memory_space,
Kokkos::HostSpace>;
raw_deep_copy(m_blocks.data() + (m_blocks.extent(0) - 1u),
&m_last_block_mask, sizeof(unsigned));
}
@ -234,6 +234,10 @@ class Bitset {
return find_any_helper(block_idx, offset, block, scan_direction);
}
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
return m_blocks.is_allocated();
}
private:
KOKKOS_FORCEINLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx,
@ -304,8 +308,8 @@ class Bitset {
template <typename Device>
class ConstBitset {
public:
typedef Device execution_space;
typedef unsigned size_type;
using execution_space = Device;
using size_type = unsigned;
private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned) * CHAR_BIT) };
@ -380,9 +384,9 @@ void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> const& src) {
"Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
typename SrcDevice::memory_space>
raw_deep_copy;
using raw_deep_copy =
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
typename SrcDevice::memory_space>;
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(),
sizeof(unsigned) * src.m_blocks.extent(0));
}
@ -394,9 +398,9 @@ void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
"Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
typename SrcDevice::memory_space>
raw_deep_copy;
using raw_deep_copy =
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
typename SrcDevice::memory_space>;
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(),
sizeof(unsigned) * src.m_blocks.extent(0));
}
@ -408,9 +412,9 @@ void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
"Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
typename SrcDevice::memory_space>
raw_deep_copy;
using raw_deep_copy =
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
typename SrcDevice::memory_space>;
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(),
sizeof(unsigned) * src.m_blocks.extent(0));
}

View File

@ -100,99 +100,91 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
public:
//! \name Typedefs for device types and various Kokkos::View specializations.
//@{
typedef ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> traits;
using traits = ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type>;
//! The Kokkos Host Device type;
typedef typename traits::host_mirror_space host_mirror_space;
using host_mirror_space = typename traits::host_mirror_space;
//! The type of a Kokkos::View on the device.
typedef View<typename traits::data_type, Arg1Type, Arg2Type, Arg3Type> t_dev;
using t_dev = View<typename traits::data_type, Arg1Type, Arg2Type, Arg3Type>;
/// \typedef t_host
/// \brief The type of a Kokkos::View host mirror of \c t_dev.
typedef typename t_dev::HostMirror t_host;
using t_host = typename t_dev::HostMirror;
//! The type of a const View on the device.
//! The type of a Kokkos::View on the device.
typedef View<typename traits::const_data_type, Arg1Type, Arg2Type, Arg3Type>
t_dev_const;
using t_dev_const =
View<typename traits::const_data_type, Arg1Type, Arg2Type, Arg3Type>;
/// \typedef t_host_const
/// \brief The type of a const View host mirror of \c t_dev_const.
typedef typename t_dev_const::HostMirror t_host_const;
using t_host_const = typename t_dev_const::HostMirror;
//! The type of a const, random-access View on the device.
typedef View<typename traits::const_data_type, typename traits::array_layout,
typename traits::device_type,
Kokkos::MemoryTraits<Kokkos::RandomAccess> >
t_dev_const_randomread;
using t_dev_const_randomread =
View<typename traits::const_data_type, typename traits::array_layout,
typename traits::device_type,
Kokkos::MemoryTraits<Kokkos::RandomAccess> >;
/// \typedef t_host_const_randomread
/// \brief The type of a const, random-access View host mirror of
/// \c t_dev_const_randomread.
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread;
using t_host_const_randomread = typename t_dev_const_randomread::HostMirror;
//! The type of an unmanaged View on the device.
typedef View<typename traits::data_type, typename traits::array_layout,
typename traits::device_type, MemoryUnmanaged>
t_dev_um;
using t_dev_um =
View<typename traits::data_type, typename traits::array_layout,
typename traits::device_type, MemoryUnmanaged>;
//! The type of an unmanaged View host mirror of \c t_dev_um.
typedef View<typename t_host::data_type, typename t_host::array_layout,
typename t_host::device_type, MemoryUnmanaged>
t_host_um;
using t_host_um =
View<typename t_host::data_type, typename t_host::array_layout,
typename t_host::device_type, MemoryUnmanaged>;
//! The type of a const unmanaged View on the device.
typedef View<typename traits::const_data_type, typename traits::array_layout,
typename traits::device_type, MemoryUnmanaged>
t_dev_const_um;
using t_dev_const_um =
View<typename traits::const_data_type, typename traits::array_layout,
typename traits::device_type, MemoryUnmanaged>;
//! The type of a const unmanaged View host mirror of \c t_dev_const_um.
typedef View<typename t_host::const_data_type, typename t_host::array_layout,
typename t_host::device_type, MemoryUnmanaged>
t_host_const_um;
using t_host_const_um =
View<typename t_host::const_data_type, typename t_host::array_layout,
typename t_host::device_type, MemoryUnmanaged>;
//! The type of a const, random-access View on the device.
typedef View<typename t_host::const_data_type, typename t_host::array_layout,
typename t_host::device_type,
Kokkos::MemoryTraits<Kokkos::Unmanaged | Kokkos::RandomAccess> >
t_dev_const_randomread_um;
using t_dev_const_randomread_um =
View<typename t_host::const_data_type, typename t_host::array_layout,
typename t_host::device_type,
Kokkos::MemoryTraits<Kokkos::Unmanaged | Kokkos::RandomAccess> >;
/// \typedef t_host_const_randomread
/// \brief The type of a const, random-access View host mirror of
/// \c t_dev_const_randomread.
typedef
typename t_dev_const_randomread::HostMirror t_host_const_randomread_um;
//@}
//! \name The two View instances.
//@{
t_dev d_view;
t_host h_view;
using t_host_const_randomread_um =
typename t_dev_const_randomread::HostMirror;
//@}
//! \name Counters to keep track of changes ("modified" flags)
//@{
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
protected:
// modified_flags[0] -> host
// modified_flags[1] -> device
typedef View<unsigned int[2], LayoutLeft, Kokkos::HostSpace> t_modified_flags;
using t_modified_flags = View<unsigned int[2], LayoutLeft, Kokkos::HostSpace>;
t_modified_flags modified_flags;
public:
#else
typedef View<unsigned int[2], LayoutLeft, typename t_host::execution_space>
t_modified_flags;
typedef View<unsigned int, LayoutLeft, typename t_host::execution_space>
t_modified_flag;
t_modified_flags modified_flags;
t_modified_flag modified_host, modified_device;
#endif
//@}
// Moved this specifically after modified_flags to resolve an alignment issue
// on MSVC/NVCC
//! \name The two View instances.
//@{
t_dev d_view;
t_host h_view;
//@}
//! \name Constructors
//@{
@ -201,14 +193,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
/// Both device and host View objects are constructed using their
/// default constructors. The "modified" flags are both initialized
/// to "unmodified."
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
DualView() = default;
#else
DualView() : modified_flags(t_modified_flags("DualView::modified_flags")) {
modified_host = t_modified_flag(modified_flags, 0);
modified_device = t_modified_flag(modified_flags, 1);
}
#endif
/// \brief Constructor that allocates View objects on both host and device.
///
@ -228,15 +213,10 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
: d_view(label, n0, n1, n2, n3, n4, n5, n6, n7),
: modified_flags(t_modified_flags("DualView::modified_flags")),
d_view(label, n0, n1, n2, n3, n4, n5, n6, n7),
h_view(create_mirror_view(d_view)) // without UVM, host View mirrors
,
modified_flags(t_modified_flags("DualView::modified_flags")) {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
modified_host = t_modified_flag(modified_flags, 0);
modified_device = t_modified_flag(modified_flags, 1);
#endif
}
{}
/// \brief Constructor that allocates View objects on both host and device.
///
@ -260,15 +240,10 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
: d_view(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7),
: modified_flags(t_modified_flags("DualView::modified_flags")),
d_view(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7),
h_view(create_mirror_view(d_view)) // without UVM, host View mirrors
,
modified_flags(t_modified_flags("DualView::modified_flags")) {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
modified_host = t_modified_flag(modified_flags, 0);
modified_device = t_modified_flag(modified_flags, 1);
#endif
}
{}
explicit inline DualView(const ViewAllocateWithoutInitializing& arg_prop,
const size_t arg_N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
@ -288,30 +263,16 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
//! Copy constructor (shallow copy)
template <class SS, class LS, class DS, class MS>
DualView(const DualView<SS, LS, DS, MS>& src)
: d_view(src.d_view),
h_view(src.h_view),
modified_flags(src.modified_flags)
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
,
modified_host(src.modified_host),
modified_device(src.modified_device)
#endif
{
}
: modified_flags(src.modified_flags),
d_view(src.d_view),
h_view(src.h_view) {}
//! Subview constructor
template <class SD, class S1, class S2, class S3, class Arg0, class... Args>
DualView(const DualView<SD, S1, S2, S3>& src, const Arg0& arg0, Args... args)
: d_view(Kokkos::subview(src.d_view, arg0, args...)),
h_view(Kokkos::subview(src.h_view, arg0, args...)),
modified_flags(src.modified_flags)
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
,
modified_host(src.modified_host),
modified_device(src.modified_device)
#endif
{
}
: modified_flags(src.modified_flags),
d_view(Kokkos::subview(src.d_view, arg0, args...)),
h_view(Kokkos::subview(src.h_view, arg0, args...)) {}
/// \brief Create DualView from existing device and host View objects.
///
@ -324,9 +285,9 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
/// \param d_view_ Device View
/// \param h_view_ Host View (must have type t_host = t_dev::HostMirror)
DualView(const t_dev& d_view_, const t_host& h_view_)
: d_view(d_view_),
h_view(h_view_),
modified_flags(t_modified_flags("DualView::modified_flags")) {
: modified_flags(t_modified_flags("DualView::modified_flags")),
d_view(d_view_),
h_view(h_view_) {
if (int(d_view.rank) != int(h_view.rank) ||
d_view.extent(0) != h_view.extent(0) ||
d_view.extent(1) != h_view.extent(1) ||
@ -348,10 +309,6 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
Kokkos::Impl::throw_runtime_exception(
"DualView constructed with incompatible views");
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
modified_host = t_modified_flag(modified_flags, 0);
modified_device = t_modified_flag(modified_flags, 1);
#endif
}
//@}
@ -367,20 +324,25 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
///
/// For example, suppose you create a DualView on Cuda, like this:
/// \code
/// typedef Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda>
/// dual_view_type; dual_view_type DV ("my dual view", 100); \endcode If you
/// want to get the CUDA device View, do this: \code typename
/// dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> (); \endcode and if
/// you want to get the host mirror of that View, do this: \code typedef
/// typename Kokkos::HostSpace::execution_space host_device_type; typename
/// dual_view_type::t_host hostView = DV.view<host_device_type> (); \endcode
/// using dual_view_type =
/// Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda>;
/// dual_view_type DV ("my dual view", 100);
/// \endcode
/// If you want to get the CUDA device View, do this:
/// \code
/// typename dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> ();
/// \endcode
/// and if you want to get the host mirror of that View, do this:
/// \code
/// using host_device_type = typename Kokkos::HostSpace::execution_space;
/// typename dual_view_type::t_host hostView = DV.view<host_device_type> ();
/// \endcode
template <class Device>
KOKKOS_INLINE_FUNCTION const typename Impl::if_c<
std::is_same<typename t_dev::memory_space,
typename Device::memory_space>::value,
t_dev, t_host>::type&
view() const {
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
constexpr bool device_is_memspace =
std::is_same<Device, typename Device::memory_space>::value;
constexpr bool device_is_execspace =
@ -415,7 +377,6 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
(device_exec_is_t_dev_exec || device_exec_is_t_host_exec))),
"Template parameter to .view() must exactly match one of the "
"DualView's device types or one of the execution or memory spaces");
#endif
return Impl::if_c<std::is_same<typename t_dev::memory_space,
typename Device::memory_space>::value,
@ -428,6 +389,10 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
KOKKOS_INLINE_FUNCTION
t_dev view_device() const { return d_view; }
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
return (d_view.is_allocated() && h_view.is_allocated());
}
template <class Device>
static int get_device_side() {
constexpr bool device_is_memspace =
@ -453,7 +418,6 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
std::is_same<typename Device::memory_space,
typename t_host::device_type>::value;
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
static_assert(
device_is_t_dev_device || device_is_t_host_device ||
(device_is_memspace &&
@ -465,13 +429,8 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
(device_exec_is_t_dev_exec || device_exec_is_t_host_exec))),
"Template parameter to .sync() must exactly match one of the "
"DualView's device types or one of the execution or memory spaces");
#endif
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
int dev = -1;
#else
int dev = 0;
#endif
if (device_is_t_dev_device)
dev = 1;
else if (device_is_t_host_device)
@ -822,11 +781,6 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
//! \name Methods for getting capacity, stride, or dimension(s).
//@{
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
//! The allocation size (same as Kokkos::View::capacity).
size_t capacity() const { return d_view.span(); }
#endif
//! The allocation size (same as Kokkos::View::span).
KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return d_view.span(); }
@ -854,29 +808,6 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
return static_cast<int>(d_view.extent(r));
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
/* Deprecate all 'dimension' functions in favor of
* ISO/C++ vocabulary 'extent'.
*/
/* \brief return size of dimension 0 */
size_t dimension_0() const { return d_view.extent(0); }
/* \brief return size of dimension 1 */
size_t dimension_1() const { return d_view.extent(1); }
/* \brief return size of dimension 2 */
size_t dimension_2() const { return d_view.extent(2); }
/* \brief return size of dimension 3 */
size_t dimension_3() const { return d_view.extent(3); }
/* \brief return size of dimension 4 */
size_t dimension_4() const { return d_view.extent(4); }
/* \brief return size of dimension 5 */
size_t dimension_5() const { return d_view.extent(5); }
/* \brief return size of dimension 6 */
size_t dimension_6() const { return d_view.extent(6); }
/* \brief return size of dimension 7 */
size_t dimension_7() const { return d_view.extent(7); }
#endif
//@}
};
@ -893,13 +824,12 @@ namespace Impl {
template <class D, class A1, class A2, class A3, class... Args>
struct DualViewSubview {
typedef typename Kokkos::Impl::ViewMapping<
void, Kokkos::ViewTraits<D, A1, A2, A3>, Args...>::traits_type dst_traits;
using dst_traits = typename Kokkos::Impl::ViewMapping<
void, Kokkos::ViewTraits<D, A1, A2, A3>, Args...>::traits_type;
typedef Kokkos::DualView<
using type = Kokkos::DualView<
typename dst_traits::data_type, typename dst_traits::array_layout,
typename dst_traits::device_type, typename dst_traits::memory_traits>
type;
typename dst_traits::device_type, typename dst_traits::memory_traits>;
};
} /* namespace Impl */

View File

@ -349,8 +349,8 @@ class ViewMapping<
public:
enum { is_assignable = is_assignable_value_type && is_assignable_layout };
typedef ViewMapping<DstTraits, typename DstTraits::specialize> DstType;
typedef ViewMapping<SrcTraits, typename SrcTraits::specialize> SrcType;
using DstType = ViewMapping<DstTraits, typename DstTraits::specialize>;
using SrcType = ViewMapping<SrcTraits, typename SrcTraits::specialize>;
template <typename DT, typename... DP, typename ST, typename... SP>
KOKKOS_INLINE_FUNCTION static void assign(
@ -365,13 +365,13 @@ class ViewMapping<
// Removed dimension checks...
typedef typename DstType::offset_type dst_offset_type;
using dst_offset_type = typename DstType::offset_type;
dst.m_map.m_impl_offset = dst_offset_type(
std::integral_constant<unsigned, 0>(),
src.layout()); // Check this for integer input1 for padding, etc
dst.m_map.m_impl_handle = Kokkos::Impl::ViewDataHandle<DstTraits>::assign(
src.m_map.m_impl_handle, src.m_track);
dst.m_track.assign(src.m_track, DstTraits::is_managed);
src.m_map.m_impl_handle, src.m_track.m_tracker);
dst.m_track.assign(src.m_track.m_tracker, DstTraits::is_managed);
dst.m_rank = src.Rank;
}
};
@ -415,16 +415,16 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
friend class Kokkos::Impl::ViewMapping;
public:
typedef ViewTraits<DataType, Properties...> drvtraits;
using drvtraits = ViewTraits<DataType, Properties...>;
typedef View<DataType*******, Properties...> view_type;
using view_type = View<DataType*******, Properties...>;
typedef ViewTraits<DataType*******, Properties...> traits;
using traits = ViewTraits<DataType*******, Properties...>;
private:
typedef Kokkos::Impl::ViewMapping<traits, typename traits::specialize>
map_type;
typedef Kokkos::Impl::SharedAllocationTracker track_type;
using map_type =
Kokkos::Impl::ViewMapping<traits, typename traits::specialize>;
using track_type = Kokkos::Impl::SharedAllocationTracker;
track_type m_track;
map_type m_map;
@ -440,28 +440,24 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
// 7 data_type of the traits
/** \brief Compatible view of array of scalar types */
typedef DynRankView<
using array_type = DynRankView<
typename drvtraits::scalar_array_type, typename drvtraits::array_layout,
typename drvtraits::device_type, typename drvtraits::memory_traits>
array_type;
typename drvtraits::device_type, typename drvtraits::memory_traits>;
/** \brief Compatible view of const data type */
typedef DynRankView<
using const_type = DynRankView<
typename drvtraits::const_data_type, typename drvtraits::array_layout,
typename drvtraits::device_type, typename drvtraits::memory_traits>
const_type;
typename drvtraits::device_type, typename drvtraits::memory_traits>;
/** \brief Compatible view of non-const data type */
typedef DynRankView<
using non_const_type = DynRankView<
typename drvtraits::non_const_data_type, typename drvtraits::array_layout,
typename drvtraits::device_type, typename drvtraits::memory_traits>
non_const_type;
typename drvtraits::device_type, typename drvtraits::memory_traits>;
/** \brief Compatible HostMirror view */
typedef DynRankView<typename drvtraits::non_const_data_type,
typename drvtraits::array_layout,
typename drvtraits::host_mirror_space>
HostMirror;
using HostMirror = DynRankView<typename drvtraits::non_const_data_type,
typename drvtraits::array_layout,
typename drvtraits::host_mirror_space>;
//----------------------------------------
// Domain rank and extents
@ -493,42 +489,6 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
* ISO/C++ vocabulary 'extent'.
*/
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
template <typename iType>
KOKKOS_INLINE_FUNCTION constexpr
typename std::enable_if<std::is_integral<iType>::value, size_t>::type
dimension(const iType& r) const {
return extent(r);
}
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const {
return m_map.dimension_0();
}
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const {
return m_map.dimension_1();
}
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const {
return m_map.dimension_2();
}
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const {
return m_map.dimension_3();
}
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const {
return m_map.dimension_4();
}
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const {
return m_map.dimension_5();
}
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const {
return m_map.dimension_6();
}
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const {
return m_map.dimension_7();
}
#endif
//----------------------------------------
KOKKOS_INLINE_FUNCTION constexpr size_t size() const {
return m_map.extent(0) * m_map.extent(1) * m_map.extent(2) *
m_map.extent(3) * m_map.extent(4) * m_map.extent(5) *
@ -568,8 +528,8 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
//----------------------------------------
// Range span is the span which contains all members.
typedef typename map_type::reference_type reference_type;
typedef typename map_type::pointer_type pointer_type;
using reference_type = typename map_type::reference_type;
using pointer_type = typename map_type::pointer_type;
enum {
reference_type_is_lvalue_reference =
@ -577,39 +537,18 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
};
KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); }
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
// Deprecated, use 'span()' instead
KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const {
return m_map.span();
}
#endif
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
return m_map.span_is_contiguous();
}
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const {
return m_map.data();
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
// Deprecated, use 'span_is_contigous()' instead
KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const {
return m_map.span_is_contiguous();
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
return (m_map.data() != nullptr);
}
// Deprecated, use 'data()' instead
KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const {
return m_map.data();
}
#endif
//----------------------------------------
// Allow specializations to query their specialized map
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
KOKKOS_INLINE_FUNCTION
const Kokkos::Impl::ViewMapping<traits, typename traits::specialize>&
implementation_map() const {
return m_map;
}
#endif
KOKKOS_INLINE_FUNCTION
const Kokkos::Impl::ViewMapping<traits, typename traits::specialize>&
impl_map() const {
@ -709,12 +648,11 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
const size_t dim_scalar = m_map.dimension_scalar();
const size_t bytes = this->span() / dim_scalar;
typedef Kokkos::View<
using tmp_view_type = Kokkos::View<
DataType*, typename traits::array_layout, typename traits::device_type,
Kokkos::MemoryTraits<traits::memory_traits::is_unmanaged |
traits::memory_traits::is_random_access |
traits::memory_traits::is_atomic> >
tmp_view_type;
traits::memory_traits::is_atomic> >;
tmp_view_type rankone_view(this->data(), bytes, dim_scalar);
return rankone_view(i0);
}
@ -1102,10 +1040,9 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
template <class RT, class... RP>
KOKKOS_INLINE_FUNCTION DynRankView(const DynRankView<RT, RP...>& rhs)
: m_track(rhs.m_track, traits::is_managed), m_map(), m_rank(rhs.m_rank) {
typedef typename DynRankView<RT, RP...>::traits SrcTraits;
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits,
typename traits::specialize>
Mapping;
using SrcTraits = typename DynRankView<RT, RP...>::traits;
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits,
typename traits::specialize>;
static_assert(Mapping::is_assignable,
"Incompatible DynRankView copy construction");
Mapping::assign(m_map, rhs.m_map, rhs.m_track);
@ -1114,10 +1051,9 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
template <class RT, class... RP>
KOKKOS_INLINE_FUNCTION DynRankView& operator=(
const DynRankView<RT, RP...>& rhs) {
typedef typename DynRankView<RT, RP...>::traits SrcTraits;
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits,
typename traits::specialize>
Mapping;
using SrcTraits = typename DynRankView<RT, RP...>::traits;
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits,
typename traits::specialize>;
static_assert(Mapping::is_assignable,
"Incompatible DynRankView copy construction");
Mapping::assign(m_map, rhs.m_map, rhs.m_track);
@ -1130,10 +1066,10 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
template <class RT, class... RP>
KOKKOS_INLINE_FUNCTION DynRankView(const View<RT, RP...>& rhs)
: m_track(), m_map(), m_rank(rhs.Rank) {
typedef typename View<RT, RP...>::traits SrcTraits;
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits,
Kokkos::Impl::ViewToDynRankViewTag>
Mapping;
using SrcTraits = typename View<RT, RP...>::traits;
using Mapping =
Kokkos::Impl::ViewMapping<traits, SrcTraits,
Kokkos::Impl::ViewToDynRankViewTag>;
static_assert(Mapping::is_assignable,
"Incompatible View to DynRankView copy construction");
Mapping::assign(*this, rhs);
@ -1141,10 +1077,10 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
template <class RT, class... RP>
KOKKOS_INLINE_FUNCTION DynRankView& operator=(const View<RT, RP...>& rhs) {
typedef typename View<RT, RP...>::traits SrcTraits;
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits,
Kokkos::Impl::ViewToDynRankViewTag>
Mapping;
using SrcTraits = typename View<RT, RP...>::traits;
using Mapping =
Kokkos::Impl::ViewMapping<traits, SrcTraits,
Kokkos::Impl::ViewToDynRankViewTag>;
static_assert(Mapping::is_assignable,
"Incompatible View to DynRankView copy assignment");
Mapping::assign(*this, rhs);
@ -1177,11 +1113,11 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
template computeRank<typename traits::array_layout, P...>(
arg_prop, arg_layout)) {
// Append layout and spaces if not input
typedef Kokkos::Impl::ViewCtorProp<P...> alloc_prop_input;
using alloc_prop_input = Kokkos::Impl::ViewCtorProp<P...>;
// use 'std::integral_constant<unsigned,I>' for non-types
// to avoid duplicate class error.
typedef Kokkos::Impl::ViewCtorProp<
using alloc_prop = Kokkos::Impl::ViewCtorProp<
P...,
typename std::conditional<alloc_prop_input::has_label,
std::integral_constant<unsigned, 0>,
@ -1193,19 +1129,13 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
typename std::conditional<
alloc_prop_input::has_execution_space,
std::integral_constant<unsigned, 2>,
typename traits::device_type::execution_space>::type>
alloc_prop;
typename traits::device_type::execution_space>::type>;
static_assert(traits::is_managed,
"View allocation constructor requires managed memory");
if (alloc_prop::initialize &&
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
!alloc_prop::execution_space::is_initialized()
#else
!alloc_prop::execution_space::impl_is_initialized()
#endif
) {
!alloc_prop::execution_space::impl_is_initialized()) {
// If initializing view data then
// the execution space must be initialized.
Kokkos::Impl::throw_runtime_exception(
@ -1499,36 +1429,34 @@ struct ViewMapping<
unsigned(R4) + unsigned(R5) + unsigned(R6)
};
typedef Kokkos::LayoutStride array_layout;
using array_layout = Kokkos::LayoutStride;
typedef typename SrcTraits::value_type value_type;
using value_type = typename SrcTraits::value_type;
typedef value_type******* data_type;
using data_type = value_type*******;
public:
typedef Kokkos::ViewTraits<data_type, array_layout,
typename SrcTraits::device_type,
typename SrcTraits::memory_traits>
traits_type;
using traits_type = Kokkos::ViewTraits<data_type, array_layout,
typename SrcTraits::device_type,
typename SrcTraits::memory_traits>;
typedef Kokkos::View<data_type, array_layout, typename SrcTraits::device_type,
typename SrcTraits::memory_traits>
type;
using type =
Kokkos::View<data_type, array_layout, typename SrcTraits::device_type,
typename SrcTraits::memory_traits>;
template <class MemoryTraits>
struct apply {
static_assert(Kokkos::Impl::is_memory_traits<MemoryTraits>::value, "");
typedef Kokkos::ViewTraits<data_type, array_layout,
typename SrcTraits::device_type, MemoryTraits>
traits_type;
using traits_type =
Kokkos::ViewTraits<data_type, array_layout,
typename SrcTraits::device_type, MemoryTraits>;
typedef Kokkos::View<data_type, array_layout,
typename SrcTraits::device_type, MemoryTraits>
type;
using type = Kokkos::View<data_type, array_layout,
typename SrcTraits::device_type, MemoryTraits>;
};
typedef typename SrcTraits::dimension dimension;
using dimension = typename SrcTraits::dimension;
template <class Arg0 = int, class Arg1 = int, class Arg2 = int,
class Arg3 = int, class Arg4 = int, class Arg5 = int,
@ -1544,18 +1472,17 @@ struct ViewMapping<
}
};
typedef Kokkos::DynRankView<value_type, array_layout,
typename SrcTraits::device_type,
typename SrcTraits::memory_traits>
ret_type;
using ret_type = Kokkos::DynRankView<value_type, array_layout,
typename SrcTraits::device_type,
typename SrcTraits::memory_traits>;
template <typename T, class... P>
KOKKOS_INLINE_FUNCTION static ret_type subview(
const unsigned src_rank, Kokkos::DynRankView<T, P...> const& src,
Args... args) {
typedef ViewMapping<traits_type, typename traits_type::specialize> DstType;
using DstType = ViewMapping<traits_type, typename traits_type::specialize>;
typedef typename std::conditional<
using DstDimType = typename std::conditional<
(rank == 0), ViewDimension<>,
typename std::conditional<
(rank == 1), ViewDimension<0>,
@ -1570,10 +1497,10 @@ struct ViewMapping<
typename std::conditional<
(rank == 6), ViewDimension<0, 0, 0, 0, 0, 0>,
ViewDimension<0, 0, 0, 0, 0, 0, 0> >::type>::
type>::type>::type>::type>::type>::type DstDimType;
type>::type>::type>::type>::type>::type;
typedef ViewOffset<DstDimType, Kokkos::LayoutStride> dst_offset_type;
typedef typename DstType::handle_type dst_handle_type;
using dst_offset_type = ViewOffset<DstDimType, Kokkos::LayoutStride>;
using dst_handle_type = typename DstType::handle_type;
ret_type dst;
@ -1636,9 +1563,9 @@ subdynrankview(const Kokkos::DynRankView<D, P...>& src, Args... args) {
"DynRankView");
}
typedef Kokkos::Impl::ViewMapping<Kokkos::Impl::DynRankSubviewTag,
Kokkos::ViewTraits<D*******, P...>, Args...>
metafcn;
using metafcn =
Kokkos::Impl::ViewMapping<Kokkos::Impl::DynRankSubviewTag,
Kokkos::ViewTraits<D*******, P...>, Args...>;
return metafcn::subview(src.rank(), src, args...);
}
@ -1659,8 +1586,8 @@ template <class LT, class... LP, class RT, class... RP>
KOKKOS_INLINE_FUNCTION bool operator==(const DynRankView<LT, LP...>& lhs,
const DynRankView<RT, RP...>& rhs) {
// Same data, layout, dimensions
typedef ViewTraits<LT, LP...> lhs_traits;
typedef ViewTraits<RT, RP...> rhs_traits;
using lhs_traits = ViewTraits<LT, LP...>;
using rhs_traits = ViewTraits<RT, RP...>;
return std::is_same<typename lhs_traits::const_value_type,
typename rhs_traits::const_value_type>::value &&
@ -1691,7 +1618,7 @@ namespace Impl {
template <class OutputView, typename Enable = void>
struct DynRankViewFill {
typedef typename OutputView::traits::const_value_type const_value_type;
using const_value_type = typename OutputView::traits::const_value_type;
const OutputView output;
const_value_type input;
@ -1722,15 +1649,11 @@ struct DynRankViewFill {
DynRankViewFill(const OutputView& arg_out, const_value_type& arg_in)
: output(arg_out), input(arg_in) {
typedef typename OutputView::execution_space execution_space;
typedef Kokkos::RangePolicy<execution_space> Policy;
using execution_space = typename OutputView::execution_space;
using Policy = Kokkos::RangePolicy<execution_space>;
const Kokkos::Impl::ParallelFor<DynRankViewFill, Policy> closure(
*this, Policy(0, output.extent(0)));
closure.execute();
execution_space().fence();
Kokkos::parallel_for("Kokkos::DynRankViewFill", Policy(0, output.extent(0)),
*this);
}
};
@ -1770,11 +1693,9 @@ struct DynRankViewRemap {
n5(std::min((size_t)arg_out.extent(5), (size_t)arg_in.extent(5))),
n6(std::min((size_t)arg_out.extent(6), (size_t)arg_in.extent(6))),
n7(std::min((size_t)arg_out.extent(7), (size_t)arg_in.extent(7))) {
typedef Kokkos::RangePolicy<ExecSpace> Policy;
const Kokkos::Impl::ParallelFor<DynRankViewRemap, Policy> closure(
*this, Policy(0, n0));
closure.execute();
// ExecSpace().fence(); // ??
using Policy = Kokkos::RangePolicy<ExecSpace>;
Kokkos::parallel_for("Kokkos::DynRankViewRemap", Policy(0, n0), *this);
}
KOKKOS_INLINE_FUNCTION
@ -1814,7 +1735,9 @@ inline void deep_copy(
typename ViewTraits<DT, DP...>::value_type>::value,
"deep_copy requires non-const type");
Kokkos::fence();
Kokkos::Impl::DynRankViewFill<DynRankView<DT, DP...> >(dst, value);
Kokkos::fence();
}
/** \brief Deep copy into a value in Host memory from a view. */
@ -1828,10 +1751,12 @@ inline void deep_copy(
Kokkos::abort("");
}
typedef ViewTraits<ST, SP...> src_traits;
typedef typename src_traits::memory_space src_memory_space;
using src_traits = ViewTraits<ST, SP...>;
using src_memory_space = typename src_traits::memory_space;
Kokkos::fence();
Kokkos::Impl::DeepCopy<HostSpace, src_memory_space>(&dst, src.data(),
sizeof(ST));
Kokkos::fence();
}
//----------------------------------------------------------------------------
@ -1851,13 +1776,13 @@ inline void deep_copy(
typename DstType::traits::non_const_value_type>::value,
"deep_copy requires non-const destination type");
typedef DstType dst_type;
typedef SrcType src_type;
using dst_type = DstType;
using src_type = SrcType;
typedef typename dst_type::execution_space dst_execution_space;
typedef typename src_type::execution_space src_execution_space;
typedef typename dst_type::memory_space dst_memory_space;
typedef typename src_type::memory_space src_memory_space;
using dst_execution_space = typename dst_type::execution_space;
using src_execution_space = typename src_type::execution_space;
using dst_memory_space = typename dst_type::memory_space;
using src_memory_space = typename src_type::memory_space;
enum {
DstExecCanAccessSrc =
@ -1878,9 +1803,11 @@ inline void deep_copy(
// If same type, equal layout, equal dimensions, equal span, and contiguous
// memory then can byte-wise copy
if (rank(src) == 0 && rank(dst) == 0) {
typedef typename dst_type::value_type value_type;
using value_type = typename dst_type::value_type;
Kokkos::fence();
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
dst.data(), src.data(), sizeof(value_type));
Kokkos::fence();
} else if (std::is_same<
typename DstType::traits::value_type,
typename SrcType::traits::non_const_value_type>::value &&
@ -1902,9 +1829,10 @@ inline void deep_copy(
dst.extent(6) == src.extent(6) &&
dst.extent(7) == src.extent(7)) {
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
Kokkos::fence();
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
dst.data(), src.data(), nbytes);
Kokkos::fence();
} else if (std::is_same<
typename DstType::traits::value_type,
typename SrcType::traits::non_const_value_type>::value &&
@ -1931,22 +1859,29 @@ inline void deep_copy(
dst.stride_6() == src.stride_6() &&
dst.stride_7() == src.stride_7()) {
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
Kokkos::fence();
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
dst.data(), src.data(), nbytes);
Kokkos::fence();
} else if (DstExecCanAccessSrc) {
// Copying data between views in accessible memory spaces and either
// non-contiguous or incompatible shape.
Kokkos::fence();
Kokkos::Impl::DynRankViewRemap<dst_type, src_type>(dst, src);
Kokkos::fence();
} else if (SrcExecCanAccessDst) {
// Copying data between views in accessible memory spaces and either
// non-contiguous or incompatible shape.
Kokkos::fence();
Kokkos::Impl::DynRankViewRemap<dst_type, src_type, src_execution_space>(
dst, src);
Kokkos::fence();
} else {
Kokkos::Impl::throw_runtime_exception(
"deep_copy given views that would require a temporary allocation");
}
} else {
Kokkos::fence();
}
}
@ -1962,45 +1897,45 @@ namespace Impl {
template <class Space, class T, class... P>
struct MirrorDRViewType {
// The incoming view_type
typedef typename Kokkos::DynRankView<T, P...> src_view_type;
using src_view_type = typename Kokkos::DynRankView<T, P...>;
// The memory space for the mirror view
typedef typename Space::memory_space memory_space;
using memory_space = typename Space::memory_space;
// Check whether it is the same memory space
enum {
is_same_memspace =
std::is_same<memory_space, typename src_view_type::memory_space>::value
};
// The array_layout
typedef typename src_view_type::array_layout array_layout;
using array_layout = typename src_view_type::array_layout;
// The data type (we probably want it non-const since otherwise we can't even
// deep_copy to it.
typedef typename src_view_type::non_const_data_type data_type;
using data_type = typename src_view_type::non_const_data_type;
// The destination view type if it is not the same memory space
typedef Kokkos::DynRankView<data_type, array_layout, Space> dest_view_type;
using dest_view_type = Kokkos::DynRankView<data_type, array_layout, Space>;
// If it is the same memory_space return the existsing view_type
// This will also keep the unmanaged trait if necessary
typedef typename std::conditional<is_same_memspace, src_view_type,
dest_view_type>::type view_type;
using view_type = typename std::conditional<is_same_memspace, src_view_type,
dest_view_type>::type;
};
template <class Space, class T, class... P>
struct MirrorDRVType {
// The incoming view_type
typedef typename Kokkos::DynRankView<T, P...> src_view_type;
using src_view_type = typename Kokkos::DynRankView<T, P...>;
// The memory space for the mirror view
typedef typename Space::memory_space memory_space;
using memory_space = typename Space::memory_space;
// Check whether it is the same memory space
enum {
is_same_memspace =
std::is_same<memory_space, typename src_view_type::memory_space>::value
};
// The array_layout
typedef typename src_view_type::array_layout array_layout;
using array_layout = typename src_view_type::array_layout;
// The data type (we probably want it non-const since otherwise we can't even
// deep_copy to it.
typedef typename src_view_type::non_const_data_type data_type;
using data_type = typename src_view_type::non_const_data_type;
// The destination view type if it is not the same memory space
typedef Kokkos::DynRankView<data_type, array_layout, Space> view_type;
using view_type = Kokkos::DynRankView<data_type, array_layout, Space>;
};
} // namespace Impl
@ -2012,8 +1947,8 @@ inline typename DynRankView<T, P...>::HostMirror create_mirror(
std::is_same<typename ViewTraits<T, P...>::specialize, void>::value &&
!std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
Kokkos::LayoutStride>::value>::type* = nullptr) {
typedef DynRankView<T, P...> src_type;
typedef typename src_type::HostMirror dst_type;
using src_type = DynRankView<T, P...>;
using dst_type = typename src_type::HostMirror;
return dst_type(std::string(src.label()).append("_mirror"),
Impl::reconstructLayout(src.layout(), src.rank()));
@ -2026,8 +1961,8 @@ inline typename DynRankView<T, P...>::HostMirror create_mirror(
std::is_same<typename ViewTraits<T, P...>::specialize, void>::value &&
std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
Kokkos::LayoutStride>::value>::type* = 0) {
typedef DynRankView<T, P...> src_type;
typedef typename src_type::HostMirror dst_type;
using src_type = DynRankView<T, P...>;
using dst_type = typename src_type::HostMirror;
return dst_type(std::string(src.label()).append("_mirror"),
Impl::reconstructLayout(src.layout(), src.rank()));
@ -2066,7 +2001,7 @@ inline typename DynRankView<T, P...>::HostMirror create_mirror_view(
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>::
value)>::type* = 0) {
value)>::type* = nullptr) {
return Kokkos::create_mirror(src);
}
@ -2085,7 +2020,8 @@ template <class Space, class T, class... P>
typename Impl::MirrorDRViewType<Space, T, P...>::view_type create_mirror_view(
const Space&, const Kokkos::DynRankView<T, P...>& src,
typename std::enable_if<
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = 0) {
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* =
nullptr) {
return typename Impl::MirrorDRViewType<Space, T, P...>::view_type(
src.label(), Impl::reconstructLayout(src.layout(), src.rank()));
}
@ -2112,7 +2048,8 @@ create_mirror_view_and_copy(
const Space&, const Kokkos::DynRankView<T, P...>& src,
std::string const& name = "",
typename std::enable_if<
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = 0) {
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* =
nullptr) {
using Mirror = typename Impl::MirrorDRViewType<Space, T, P...>::view_type;
std::string label = name.empty() ? src.label() : name;
auto mirror = Mirror(Kokkos::ViewAllocateWithoutInitializing(label),
@ -2139,7 +2076,7 @@ inline void resize(DynRankView<T, P...>& v,
const size_t n5 = KOKKOS_INVALID_INDEX,
const size_t n6 = KOKKOS_INVALID_INDEX,
const size_t n7 = KOKKOS_INVALID_INDEX) {
typedef DynRankView<T, P...> drview_type;
using drview_type = DynRankView<T, P...>;
static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
"Can only resize managed views");
@ -2163,7 +2100,7 @@ inline void realloc(DynRankView<T, P...>& v,
const size_t n5 = KOKKOS_INVALID_INDEX,
const size_t n6 = KOKKOS_INVALID_INDEX,
const size_t n7 = KOKKOS_INVALID_INDEX) {
typedef DynRankView<T, P...> drview_type;
using drview_type = DynRankView<T, P...>;
static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
"Can only realloc managed views");

View File

@ -85,13 +85,13 @@ struct ChunkArraySpace<Kokkos::Experimental::HIPSpace> {
template <typename DataType, typename... P>
class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
public:
typedef Kokkos::ViewTraits<DataType, P...> traits;
using traits = Kokkos::ViewTraits<DataType, P...>;
private:
template <class, class...>
friend class DynamicView;
typedef Kokkos::Impl::SharedAllocationTracker track_type;
using track_type = Kokkos::Impl::SharedAllocationTracker;
static_assert(traits::rank == 1 && traits::rank_dynamic == 1,
"DynamicView must be rank-one");
@ -118,8 +118,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
private:
track_type m_track;
typename traits::value_type**
m_chunks; // array of pointers to 'chunks' of memory
typename traits::value_type** m_chunks =
nullptr; // array of pointers to 'chunks' of memory
unsigned m_chunk_shift; // ceil(log2(m_chunk_size))
unsigned m_chunk_mask; // m_chunk_size - 1
unsigned m_chunk_max; // number of entries in the chunk array - each pointing
@ -130,38 +130,36 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
//----------------------------------------------------------------------
/** \brief Compatible view of array of scalar types */
typedef DynamicView<typename traits::data_type, typename traits::device_type>
array_type;
using array_type =
DynamicView<typename traits::data_type, typename traits::device_type>;
/** \brief Compatible view of const data type */
typedef DynamicView<typename traits::const_data_type,
typename traits::device_type>
const_type;
using const_type = DynamicView<typename traits::const_data_type,
typename traits::device_type>;
/** \brief Compatible view of non-const data type */
typedef DynamicView<typename traits::non_const_data_type,
typename traits::device_type>
non_const_type;
using non_const_type = DynamicView<typename traits::non_const_data_type,
typename traits::device_type>;
/** \brief Must be accessible everywhere */
typedef DynamicView HostMirror;
using HostMirror = DynamicView;
/** \brief Unified types */
typedef Kokkos::Device<typename traits::device_type::execution_space,
Kokkos::AnonymousSpace>
uniform_device;
typedef array_type uniform_type;
typedef const_type uniform_const_type;
typedef array_type uniform_runtime_type;
typedef const_type uniform_runtime_const_type;
typedef DynamicView<typename traits::data_type, uniform_device>
uniform_nomemspace_type;
typedef DynamicView<typename traits::const_data_type, uniform_device>
uniform_const_nomemspace_type;
typedef DynamicView<typename traits::data_type, uniform_device>
uniform_runtime_nomemspace_type;
typedef DynamicView<typename traits::const_data_type, uniform_device>
uniform_runtime_const_nomemspace_type;
using uniform_device =
Kokkos::Device<typename traits::device_type::execution_space,
Kokkos::AnonymousSpace>;
using uniform_type = array_type;
using uniform_const_type = const_type;
using uniform_runtime_type = array_type;
using uniform_runtime_const_type = const_type;
using uniform_nomemspace_type =
DynamicView<typename traits::data_type, uniform_device>;
using uniform_const_nomemspace_type =
DynamicView<typename traits::const_data_type, uniform_device>;
using uniform_runtime_nomemspace_type =
DynamicView<typename traits::data_type, uniform_device>;
using uniform_runtime_const_nomemspace_type =
DynamicView<typename traits::const_data_type, uniform_device>;
//----------------------------------------------------------------------
@ -193,17 +191,6 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
return r == 0 ? size() : 1;
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
KOKKOS_INLINE_FUNCTION size_t dimension_0() const { return size(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return 1; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return 1; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return 1; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return 1; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return 1; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return 1; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return 1; }
#endif
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return 0; }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return 0; }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return 0; }
@ -231,8 +218,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
//----------------------------------------------------------------------
// Range span is the span which contains all members.
typedef typename traits::value_type& reference_type;
typedef typename traits::value_type* pointer_type;
using reference_type = typename traits::value_type&;
using pointer_type = typename traits::value_type*;
enum {
reference_type_is_lvalue_reference =
@ -299,8 +286,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
typename Impl::ChunkArraySpace<
typename traits::memory_space>::memory_space>::accessible>::type
resize_serial(IntType const& n) {
typedef typename traits::value_type local_value_type;
typedef local_value_type* value_pointer_type;
using local_value_type = typename traits::value_type;
using value_pointer_type = local_value_type*;
const uintptr_t NC =
(n + m_chunk_mask) >>
@ -332,6 +319,17 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
*(pc + 1) = n;
}
KOKKOS_INLINE_FUNCTION bool is_allocated() const {
if (m_chunks == nullptr) {
return false;
} else {
// *m_chunks[m_chunk_max] stores the current number of chunks being used
uintptr_t* const pc =
reinterpret_cast<uintptr_t*>(m_chunks + m_chunk_max);
return (*(pc + 1) > 0);
}
}
//----------------------------------------------------------------------
~DynamicView() = default;
@ -349,8 +347,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
m_chunk_mask(rhs.m_chunk_mask),
m_chunk_max(rhs.m_chunk_max),
m_chunk_size(rhs.m_chunk_size) {
typedef typename DynamicView<RT, RP...>::traits SrcTraits;
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits, void> Mapping;
using SrcTraits = typename DynamicView<RT, RP...>::traits;
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, void>;
static_assert(Mapping::is_assignable,
"Incompatible DynamicView copy construction");
}
@ -373,9 +371,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
}
void execute(bool arg_destroy) {
typedef Kokkos::RangePolicy<typename HostSpace::execution_space> Range;
// typedef Kokkos::RangePolicy< typename Impl::ChunkArraySpace< typename
// traits::memory_space >::memory_space::execution_space > Range ;
using Range = Kokkos::RangePolicy<typename HostSpace::execution_space>;
m_destroy = arg_destroy;
@ -431,12 +427,11 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
m_chunk_shift) // max num pointers-to-chunks in array
,
m_chunk_size(2 << (m_chunk_shift - 1)) {
typedef typename Impl::ChunkArraySpace<
typename traits::memory_space>::memory_space chunk_array_memory_space;
using chunk_array_memory_space = typename Impl::ChunkArraySpace<
typename traits::memory_space>::memory_space;
// A functor to deallocate all of the chunks upon final destruction
typedef Kokkos::Impl::SharedAllocationRecord<chunk_array_memory_space,
Destroy>
record_type;
using record_type =
Kokkos::Impl::SharedAllocationRecord<chunk_array_memory_space, Destroy>;
// Allocate chunk pointers and allocation counter
record_type* const record =
@ -471,11 +466,11 @@ create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src) {
template <class T, class... DP, class... SP>
inline void deep_copy(const View<T, DP...>& dst,
const Kokkos::Experimental::DynamicView<T, SP...>& src) {
typedef View<T, DP...> dst_type;
typedef Kokkos::Experimental::DynamicView<T, SP...> src_type;
using dst_type = View<T, DP...>;
using src_type = Kokkos::Experimental::DynamicView<T, SP...>;
typedef typename ViewTraits<T, DP...>::execution_space dst_execution_space;
typedef typename ViewTraits<T, SP...>::memory_space src_memory_space;
using dst_execution_space = typename ViewTraits<T, DP...>::execution_space;
using src_memory_space = typename ViewTraits<T, SP...>::memory_space;
enum {
DstExecCanAccessSrc =
@ -496,11 +491,11 @@ inline void deep_copy(const View<T, DP...>& dst,
template <class T, class... DP, class... SP>
inline void deep_copy(const Kokkos::Experimental::DynamicView<T, DP...>& dst,
const View<T, SP...>& src) {
typedef Kokkos::Experimental::DynamicView<T, SP...> dst_type;
typedef View<T, DP...> src_type;
using dst_type = Kokkos::Experimental::DynamicView<T, SP...>;
using src_type = View<T, DP...>;
typedef typename ViewTraits<T, DP...>::execution_space dst_execution_space;
typedef typename ViewTraits<T, SP...>::memory_space src_memory_space;
using dst_execution_space = typename ViewTraits<T, DP...>::execution_space;
using src_memory_space = typename ViewTraits<T, SP...>::memory_space;
enum {
DstExecCanAccessSrc =
@ -522,10 +517,10 @@ namespace Impl {
template <class Arg0, class... DP, class... SP>
struct CommonSubview<Kokkos::Experimental::DynamicView<DP...>,
Kokkos::Experimental::DynamicView<SP...>, 1, Arg0> {
typedef Kokkos::Experimental::DynamicView<DP...> DstType;
typedef Kokkos::Experimental::DynamicView<SP...> SrcType;
typedef DstType dst_subview_type;
typedef SrcType src_subview_type;
using DstType = Kokkos::Experimental::DynamicView<DP...>;
using SrcType = Kokkos::Experimental::DynamicView<SP...>;
using dst_subview_type = DstType;
using src_subview_type = SrcType;
dst_subview_type dst_sub;
src_subview_type src_sub;
CommonSubview(const DstType& dst, const SrcType& src, const Arg0& /*arg0*/)
@ -535,9 +530,9 @@ struct CommonSubview<Kokkos::Experimental::DynamicView<DP...>,
template <class... DP, class SrcType, class Arg0>
struct CommonSubview<Kokkos::Experimental::DynamicView<DP...>, SrcType, 1,
Arg0> {
typedef Kokkos::Experimental::DynamicView<DP...> DstType;
typedef DstType dst_subview_type;
typedef typename Kokkos::Subview<SrcType, Arg0> src_subview_type;
using DstType = Kokkos::Experimental::DynamicView<DP...>;
using dst_subview_type = DstType;
using src_subview_type = typename Kokkos::Subview<SrcType, Arg0>;
dst_subview_type dst_sub;
src_subview_type src_sub;
CommonSubview(const DstType& dst, const SrcType& src, const Arg0& arg0)
@ -547,9 +542,9 @@ struct CommonSubview<Kokkos::Experimental::DynamicView<DP...>, SrcType, 1,
template <class DstType, class... SP, class Arg0>
struct CommonSubview<DstType, Kokkos::Experimental::DynamicView<SP...>, 1,
Arg0> {
typedef Kokkos::Experimental::DynamicView<SP...> SrcType;
typedef typename Kokkos::Subview<DstType, Arg0> dst_subview_type;
typedef SrcType src_subview_type;
using SrcType = Kokkos::Experimental::DynamicView<SP...>;
using dst_subview_type = typename Kokkos::Subview<DstType, Arg0>;
using src_subview_type = SrcType;
dst_subview_type dst_sub;
src_subview_type src_sub;
CommonSubview(const DstType& dst, const SrcType& src, const Arg0& arg0)
@ -559,11 +554,11 @@ struct CommonSubview<DstType, Kokkos::Experimental::DynamicView<SP...>, 1,
template <class... DP, class ViewTypeB, class Layout, class ExecSpace,
typename iType>
struct ViewCopy<Kokkos::Experimental::DynamicView<DP...>, ViewTypeB, Layout,
ExecSpace, 1, iType, false> {
ExecSpace, 1, iType> {
Kokkos::Experimental::DynamicView<DP...> a;
ViewTypeB b;
typedef Kokkos::RangePolicy<ExecSpace, Kokkos::IndexType<iType>> policy_type;
using policy_type = Kokkos::RangePolicy<ExecSpace, Kokkos::IndexType<iType>>;
ViewCopy(const Kokkos::Experimental::DynamicView<DP...>& a_,
const ViewTypeB& b_)
@ -580,11 +575,11 @@ template <class... DP, class... SP, class Layout, class ExecSpace,
typename iType>
struct ViewCopy<Kokkos::Experimental::DynamicView<DP...>,
Kokkos::Experimental::DynamicView<SP...>, Layout, ExecSpace, 1,
iType, false> {
iType> {
Kokkos::Experimental::DynamicView<DP...> a;
Kokkos::Experimental::DynamicView<SP...> b;
typedef Kokkos::RangePolicy<ExecSpace, Kokkos::IndexType<iType>> policy_type;
using policy_type = Kokkos::RangePolicy<ExecSpace, Kokkos::IndexType<iType>>;
ViewCopy(const Kokkos::Experimental::DynamicView<DP...>& a_,
const Kokkos::Experimental::DynamicView<SP...>& b_)

View File

@ -56,9 +56,9 @@ namespace Experimental {
template <typename ReportType, typename DeviceType>
class ErrorReporter {
public:
typedef ReportType report_type;
typedef DeviceType device_type;
typedef typename device_type::execution_space execution_space;
using report_type = ReportType;
using device_type = DeviceType;
using execution_space = typename device_type::execution_space;
ErrorReporter(int max_results)
: m_numReportsAttempted(""),
@ -103,10 +103,10 @@ class ErrorReporter {
}
private:
typedef Kokkos::View<report_type *, execution_space> reports_view_t;
typedef Kokkos::DualView<report_type *, execution_space> reports_dualview_t;
using reports_view_t = Kokkos::View<report_type *, execution_space>;
using reports_dualview_t = Kokkos::DualView<report_type *, execution_space>;
typedef typename reports_dualview_t::host_mirror_space host_mirror_space;
using host_mirror_space = typename reports_dualview_t::host_mirror_space;
Kokkos::View<int, execution_space> m_numReportsAttempted;
reports_dualview_t m_reports;
Kokkos::DualView<int *, execution_space> m_reporters;

View File

@ -52,10 +52,10 @@ namespace Kokkos {
template <typename T>
struct pod_hash {
typedef T argument_type;
typedef T first_argument_type;
typedef uint32_t second_argument_type;
typedef uint32_t result_type;
using argument_type = T;
using first_argument_type = T;
using second_argument_type = uint32_t;
using result_type = uint32_t;
KOKKOS_FORCEINLINE_FUNCTION
uint32_t operator()(T const& t) const {
@ -70,9 +70,9 @@ struct pod_hash {
template <typename T>
struct pod_equal_to {
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
using first_argument_type = T;
using second_argument_type = T;
using result_type = bool;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const& a, T const& b) const {
@ -82,9 +82,9 @@ struct pod_equal_to {
template <typename T>
struct pod_not_equal_to {
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
using first_argument_type = T;
using second_argument_type = T;
using result_type = bool;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const& a, T const& b) const {
@ -94,9 +94,9 @@ struct pod_not_equal_to {
template <typename T>
struct equal_to {
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
using first_argument_type = T;
using second_argument_type = T;
using result_type = bool;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const& a, T const& b) const { return a == b; }
@ -104,9 +104,9 @@ struct equal_to {
template <typename T>
struct not_equal_to {
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
using first_argument_type = T;
using second_argument_type = T;
using result_type = bool;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const& a, T const& b) const { return a != b; }
@ -114,9 +114,9 @@ struct not_equal_to {
template <typename T>
struct greater {
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
using first_argument_type = T;
using second_argument_type = T;
using result_type = bool;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const& a, T const& b) const { return a > b; }
@ -124,9 +124,9 @@ struct greater {
template <typename T>
struct less {
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
using first_argument_type = T;
using second_argument_type = T;
using result_type = bool;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const& a, T const& b) const { return a < b; }
@ -134,9 +134,9 @@ struct less {
template <typename T>
struct greater_equal {
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
using first_argument_type = T;
using second_argument_type = T;
using result_type = bool;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const& a, T const& b) const { return a >= b; }
@ -144,9 +144,9 @@ struct greater_equal {
template <typename T>
struct less_equal {
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
using first_argument_type = T;
using second_argument_type = T;
using result_type = bool;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const& a, T const& b) const { return a <= b; }

View File

@ -51,10 +51,10 @@ namespace Impl {
template <class ViewType>
struct GetOffsetViewTypeFromViewType {
typedef OffsetView<
typename ViewType::data_type, typename ViewType::array_layout,
typename ViewType::device_type, typename ViewType::memory_traits>
type;
using type =
OffsetView<typename ViewType::data_type, typename ViewType::array_layout,
typename ViewType::device_type,
typename ViewType::memory_traits>;
};
template <unsigned, class MapType, class BeginsType>
@ -180,7 +180,7 @@ void runtime_check_rank_device(const size_t rank_dynamic, const size_t rank,
template <class DataType, class... Properties>
class OffsetView : public ViewTraits<DataType, Properties...> {
public:
typedef ViewTraits<DataType, Properties...> traits;
using traits = ViewTraits<DataType, Properties...>;
private:
template <class, class...>
@ -190,12 +190,12 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
template <class, class...>
friend class Kokkos::Impl::ViewMapping;
typedef Kokkos::Impl::ViewMapping<traits, void> map_type;
typedef Kokkos::Impl::SharedAllocationTracker track_type;
using map_type = Kokkos::Impl::ViewMapping<traits, void>;
using track_type = Kokkos::Impl::SharedAllocationTracker;
public:
enum { Rank = map_type::Rank };
typedef Kokkos::Array<int64_t, Rank> begins_type;
using begins_type = Kokkos::Array<int64_t, Rank>;
template <
typename iType,
@ -223,28 +223,27 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
public:
//----------------------------------------
/** \brief Compatible view of array of scalar types */
typedef OffsetView<
typename traits::scalar_array_type, typename traits::array_layout,
typename traits::device_type, typename traits::memory_traits>
array_type;
using array_type =
OffsetView<typename traits::scalar_array_type,
typename traits::array_layout, typename traits::device_type,
typename traits::memory_traits>;
/** \brief Compatible view of const data type */
typedef OffsetView<
typename traits::const_data_type, typename traits::array_layout,
typename traits::device_type, typename traits::memory_traits>
const_type;
using const_type =
OffsetView<typename traits::const_data_type,
typename traits::array_layout, typename traits::device_type,
typename traits::memory_traits>;
/** \brief Compatible view of non-const data type */
typedef OffsetView<
typename traits::non_const_data_type, typename traits::array_layout,
typename traits::device_type, typename traits::memory_traits>
non_const_type;
using non_const_type =
OffsetView<typename traits::non_const_data_type,
typename traits::array_layout, typename traits::device_type,
typename traits::memory_traits>;
/** \brief Compatible HostMirror view */
typedef OffsetView<typename traits::non_const_data_type,
typename traits::array_layout,
typename traits::host_mirror_space>
HostMirror;
using HostMirror = OffsetView<typename traits::non_const_data_type,
typename traits::array_layout,
typename traits::host_mirror_space>;
//----------------------------------------
// Domain rank and extents
@ -335,8 +334,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
//----------------------------------------
// Range span is the span which contains all members.
typedef typename map_type::reference_type reference_type;
typedef typename map_type::pointer_type pointer_type;
using reference_type = typename map_type::reference_type;
using pointer_type = typename map_type::pointer_type;
enum {
reference_type_is_lvalue_reference =
@ -347,6 +346,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
KOKKOS_INLINE_FUNCTION bool span_is_contiguous() const {
return m_map.span_is_contiguous();
}
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
return m_map.data() != nullptr;
}
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const {
return m_map.data();
}
@ -841,10 +843,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
// interoperability with View
private:
typedef View<typename traits::scalar_array_type,
typename traits::array_layout, typename traits::device_type,
typename traits::memory_traits>
view_type;
using view_type =
View<typename traits::scalar_array_type, typename traits::array_layout,
typename traits::device_type, typename traits::memory_traits>;
public:
KOKKOS_INLINE_FUNCTION
@ -856,8 +857,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
template <class RT, class... RP>
KOKKOS_INLINE_FUNCTION OffsetView(const View<RT, RP...>& aview)
: m_track(aview.impl_track()), m_map() {
typedef typename OffsetView<RT, RP...>::traits SrcTraits;
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits, void> Mapping;
using SrcTraits = typename OffsetView<RT, RP...>::traits;
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, void>;
static_assert(Mapping::is_assignable,
"Incompatible OffsetView copy construction");
Mapping::assign(m_map, aview.impl_map(), m_track);
@ -871,8 +872,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
KOKKOS_INLINE_FUNCTION OffsetView(const View<RT, RP...>& aview,
const index_list_type& minIndices)
: m_track(aview.impl_track()), m_map() {
typedef typename OffsetView<RT, RP...>::traits SrcTraits;
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits, void> Mapping;
using SrcTraits = typename OffsetView<RT, RP...>::traits;
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, void>;
static_assert(Mapping::is_assignable,
"Incompatible OffsetView copy construction");
Mapping::assign(m_map, aview.impl_map(), m_track);
@ -894,8 +895,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
KOKKOS_INLINE_FUNCTION OffsetView(const View<RT, RP...>& aview,
const begins_type& beg)
: m_track(aview.impl_track()), m_map(), m_begins(beg) {
typedef typename OffsetView<RT, RP...>::traits SrcTraits;
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits, void> Mapping;
using SrcTraits = typename OffsetView<RT, RP...>::traits;
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, void>;
static_assert(Mapping::is_assignable,
"Incompatible OffsetView copy construction");
Mapping::assign(m_map, aview.impl_map(), m_track);
@ -917,8 +918,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
: m_track(rhs.m_track, traits::is_managed),
m_map(),
m_begins(rhs.m_begins) {
typedef typename OffsetView<RT, RP...>::traits SrcTraits;
typedef Kokkos::Impl::ViewMapping<traits, SrcTraits, void> Mapping;
using SrcTraits = typename OffsetView<RT, RP...>::traits;
using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, void>;
static_assert(Mapping::is_assignable,
"Incompatible OffsetView copy construction");
Mapping::assign(m_map, rhs.m_map, rhs.m_track); // swb what about assign?
@ -1215,11 +1216,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
for (size_t i = 0; i < Rank; ++i) m_begins[i] = minIndices.begin()[i];
// Append layout and spaces if not input
typedef Kokkos::Impl::ViewCtorProp<P...> alloc_prop_input;
using alloc_prop_input = Kokkos::Impl::ViewCtorProp<P...>;
// use 'std::integral_constant<unsigned,I>' for non-types
// to avoid duplicate class error.
typedef Kokkos::Impl::ViewCtorProp<
using alloc_prop = Kokkos::Impl::ViewCtorProp<
P...,
typename std::conditional<alloc_prop_input::has_label,
std::integral_constant<unsigned, 0>,
@ -1231,19 +1232,13 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
typename std::conditional<
alloc_prop_input::has_execution_space,
std::integral_constant<unsigned, 2>,
typename traits::device_type::execution_space>::type>
alloc_prop;
typename traits::device_type::execution_space>::type>;
static_assert(traits::is_managed,
"OffsetView allocation constructor requires managed memory");
if (alloc_prop::initialize &&
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
!alloc_prop::execution_space::is_initialized()
#else
!alloc_prop::execution_space::impl_is_initialized()
#endif
) {
!alloc_prop::execution_space::impl_is_initialized()) {
// If initializing view data then
// the execution space must be initialized.
Kokkos::Impl::throw_runtime_exception(
@ -1764,8 +1759,8 @@ template <class LT, class... LP, class RT, class... RP>
KOKKOS_INLINE_FUNCTION bool operator==(const OffsetView<LT, LP...>& lhs,
const OffsetView<RT, RP...>& rhs) {
// Same data, layout, dimensions
typedef ViewTraits<LT, LP...> lhs_traits;
typedef ViewTraits<RT, RP...> rhs_traits;
using lhs_traits = ViewTraits<LT, LP...>;
using rhs_traits = ViewTraits<RT, RP...>;
return std::is_same<typename lhs_traits::const_value_type,
typename rhs_traits::const_value_type>::value &&
@ -1795,8 +1790,8 @@ template <class LT, class... LP, class RT, class... RP>
KOKKOS_INLINE_FUNCTION bool operator==(const View<LT, LP...>& lhs,
const OffsetView<RT, RP...>& rhs) {
// Same data, layout, dimensions
typedef ViewTraits<LT, LP...> lhs_traits;
typedef ViewTraits<RT, RP...> rhs_traits;
using lhs_traits = ViewTraits<LT, LP...>;
using rhs_traits = ViewTraits<RT, RP...>;
return std::is_same<typename lhs_traits::const_value_type,
typename rhs_traits::const_value_type>::value &&
@ -1825,10 +1820,10 @@ KOKKOS_INLINE_FUNCTION bool operator==(const OffsetView<LT, LP...>& lhs,
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
template <class DT, class... DP>
inline void deep_copy(
const OffsetView<DT, DP...>& dst,
const Experimental::OffsetView<DT, DP...>& dst,
typename ViewTraits<DT, DP...>::const_value_type& value,
typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
@ -1844,7 +1839,8 @@ inline void deep_copy(
template <class DT, class... DP, class ST, class... SP>
inline void deep_copy(
const OffsetView<DT, DP...>& dst, const OffsetView<ST, SP...>& value,
const Experimental::OffsetView<DT, DP...>& dst,
const Experimental::OffsetView<ST, SP...>& value,
typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
@ -1858,7 +1854,8 @@ inline void deep_copy(
}
template <class DT, class... DP, class ST, class... SP>
inline void deep_copy(
const OffsetView<DT, DP...>& dst, const View<ST, SP...>& value,
const Experimental::OffsetView<DT, DP...>& dst,
const View<ST, SP...>& value,
typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
@ -1873,7 +1870,8 @@ inline void deep_copy(
template <class DT, class... DP, class ST, class... SP>
inline void deep_copy(
const View<DT, DP...>& dst, const OffsetView<ST, SP...>& value,
const View<DT, DP...>& dst,
const Experimental::OffsetView<ST, SP...>& value,
typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
@ -1884,53 +1882,54 @@ inline void deep_copy(
Kokkos::deep_copy(dst, value.view());
}
namespace Impl {
// Deduce Mirror Types
template <class Space, class T, class... P>
struct MirrorOffsetViewType {
// The incoming view_type
typedef typename Kokkos::Experimental::OffsetView<T, P...> src_view_type;
using src_view_type = typename Kokkos::Experimental::OffsetView<T, P...>;
// The memory space for the mirror view
typedef typename Space::memory_space memory_space;
using memory_space = typename Space::memory_space;
// Check whether it is the same memory space
enum {
is_same_memspace =
std::is_same<memory_space, typename src_view_type::memory_space>::value
};
// The array_layout
typedef typename src_view_type::array_layout array_layout;
using array_layout = typename src_view_type::array_layout;
// The data type (we probably want it non-const since otherwise we can't even
// deep_copy to it.
typedef typename src_view_type::non_const_data_type data_type;
using data_type = typename src_view_type::non_const_data_type;
// The destination view type if it is not the same memory space
typedef Kokkos::Experimental::OffsetView<data_type, array_layout, Space>
dest_view_type;
using dest_view_type =
Kokkos::Experimental::OffsetView<data_type, array_layout, Space>;
// If it is the same memory_space return the existsing view_type
// This will also keep the unmanaged trait if necessary
typedef typename std::conditional<is_same_memspace, src_view_type,
dest_view_type>::type view_type;
using view_type = typename std::conditional<is_same_memspace, src_view_type,
dest_view_type>::type;
};
template <class Space, class T, class... P>
struct MirrorOffsetType {
// The incoming view_type
typedef typename Kokkos::Experimental::OffsetView<T, P...> src_view_type;
using src_view_type = typename Kokkos::Experimental::OffsetView<T, P...>;
// The memory space for the mirror view
typedef typename Space::memory_space memory_space;
using memory_space = typename Space::memory_space;
// Check whether it is the same memory space
enum {
is_same_memspace =
std::is_same<memory_space, typename src_view_type::memory_space>::value
};
// The array_layout
typedef typename src_view_type::array_layout array_layout;
using array_layout = typename src_view_type::array_layout;
// The data type (we probably want it non-const since otherwise we can't even
// deep_copy to it.
typedef typename src_view_type::non_const_data_type data_type;
using data_type = typename src_view_type::non_const_data_type;
// The destination view type if it is not the same memory space
typedef Kokkos::Experimental::OffsetView<data_type, array_layout, Space>
view_type;
using view_type =
Kokkos::Experimental::OffsetView<data_type, array_layout, Space>;
};
} // namespace Impl
@ -1942,8 +1941,8 @@ create_mirror(
typename std::enable_if<
!std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
Kokkos::LayoutStride>::value>::type* = 0) {
typedef OffsetView<T, P...> src_type;
typedef typename src_type::HostMirror dst_type;
using src_type = Experimental::OffsetView<T, P...>;
using dst_type = typename src_type::HostMirror;
return dst_type(
Kokkos::Impl::ViewCtorProp<std::string>(
@ -1962,8 +1961,8 @@ create_mirror(
typename std::enable_if<
std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
Kokkos::LayoutStride>::value>::type* = 0) {
typedef OffsetView<T, P...> src_type;
typedef typename src_type::HostMirror dst_type;
using src_type = Experimental::OffsetView<T, P...>;
using dst_type = typename src_type::HostMirror;
Kokkos::LayoutStride layout;
@ -1992,14 +1991,13 @@ create_mirror(
// Create a mirror in a new space (specialization for different space)
template <class Space, class T, class... P>
typename Kokkos::Experimental::Impl::MirrorOffsetType<Space, T, P...>::view_type
typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type
create_mirror(const Space&,
const Kokkos::Experimental::OffsetView<T, P...>& src) {
return typename Kokkos::Experimental::Impl::MirrorOffsetType<
Space, T, P...>::view_type(src.label(), src.layout(),
{src.begin(0), src.begin(1), src.begin(2),
src.begin(3), src.begin(4), src.begin(5),
src.begin(6), src.begin(7)});
return typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type(
src.label(), src.layout(),
{src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
src.begin(5), src.begin(6), src.begin(7)});
}
template <class T, class... P>
@ -2031,13 +2029,12 @@ create_mirror_view(
typename Kokkos::Experimental::OffsetView<T, P...>::data_type,
typename Kokkos::Experimental::OffsetView<
T, P...>::HostMirror::data_type>::value)>::type* = 0) {
return Kokkos::Experimental::create_mirror(src);
return Kokkos::create_mirror(src);
}
// Create a mirror view in a new space (specialization for same space)
template <class Space, class T, class... P>
typename Kokkos::Experimental::Impl::MirrorOffsetViewType<Space, T,
P...>::view_type
typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type
create_mirror_view(const Space&,
const Kokkos::Experimental::OffsetView<T, P...>& src,
typename std::enable_if<Impl::MirrorOffsetViewType<
@ -2047,17 +2044,15 @@ create_mirror_view(const Space&,
// Create a mirror view in a new space (specialization for different space)
template <class Space, class T, class... P>
typename Kokkos::Experimental::Impl::MirrorOffsetViewType<Space, T,
P...>::view_type
typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type
create_mirror_view(const Space&,
const Kokkos::Experimental::OffsetView<T, P...>& src,
typename std::enable_if<!Impl::MirrorOffsetViewType<
Space, T, P...>::is_same_memspace>::type* = 0) {
return typename Kokkos::Experimental::Impl::MirrorOffsetViewType<
Space, T, P...>::view_type(src.label(), src.layout(),
{src.begin(0), src.begin(1), src.begin(2),
src.begin(3), src.begin(4), src.begin(5),
src.begin(6), src.begin(7)});
return typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type(
src.label(), src.layout(),
{src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
src.begin(5), src.begin(6), src.begin(7)});
}
//
// // Create a mirror view and deep_copy in a new space (specialization for
@ -2093,7 +2088,6 @@ create_mirror_view(const Space&,
// return mirror;
// }
} // namespace Experimental
} /* namespace Kokkos */
//----------------------------------------------------------------------------

File diff suppressed because it is too large Load Diff

View File

@ -57,7 +57,7 @@ namespace Kokkos {
namespace Impl {
template <class RowOffsetsType, class RowBlockOffsetsType>
struct StaticCrsGraphBalancerFunctor {
typedef typename RowOffsetsType::non_const_value_type int_type;
using int_type = typename RowOffsetsType::non_const_value_type;
RowOffsetsType row_offsets;
RowBlockOffsetsType row_block_offsets;
@ -148,7 +148,7 @@ struct StaticCrsGraphBalancerFunctor {
///
/// Here is an example loop over the entries in the row:
/// \code
/// typedef typename GraphRowViewConst<MatrixType>::ordinal_type ordinal_type;
/// using ordinal_type = typename GraphRowViewConst<MatrixType>::ordinal_type;
///
/// GraphRowView<GraphType> G_i = ...;
/// const ordinal_type numEntries = G_i.length;
@ -159,7 +159,7 @@ struct StaticCrsGraphBalancerFunctor {
/// \endcode
///
/// GraphType must provide the \c data_type
/// typedefs. In addition, it must make sense to use GraphRowViewConst to
/// aliases. In addition, it must make sense to use GraphRowViewConst to
/// view a row of GraphType. In particular, column
/// indices of a row must be accessible using the <tt>entries</tt>
/// resp. <tt>colidx</tt> arrays given to the constructor of this
@ -170,7 +170,7 @@ struct StaticCrsGraphBalancerFunctor {
template <class GraphType>
struct GraphRowViewConst {
//! The type of the column indices in the row.
typedef const typename GraphType::data_type ordinal_type;
using ordinal_type = const typename GraphType::data_type;
private:
//! Array of (local) column indices in the row.
@ -279,49 +279,33 @@ struct GraphRowViewConst {
/// <li> <tt> entries( row_map[i0] + i1 , i2 , i3 , ... ); </tt> </li>
/// </ul>
template <class DataType, class Arg1Type, class Arg2Type = void,
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
typename SizeType =
typename ViewTraits<DataType*, Arg1Type, Arg2Type>::size_type,
class Arg3Type = void>
#else
class Arg3Type = void,
typename SizeType = typename ViewTraits<DataType*, Arg1Type, Arg2Type,
Arg3Type>::size_type>
#endif
class StaticCrsGraph {
private:
typedef ViewTraits<DataType*, Arg1Type, Arg2Type, Arg3Type> traits;
using traits = ViewTraits<DataType*, Arg1Type, Arg2Type, Arg3Type>;
public:
typedef DataType data_type;
typedef typename traits::array_layout array_layout;
typedef typename traits::execution_space execution_space;
typedef typename traits::device_type device_type;
typedef typename traits::memory_traits memory_traits;
typedef SizeType size_type;
using data_type = DataType;
using array_layout = typename traits::array_layout;
using execution_space = typename traits::execution_space;
using device_type = typename traits::device_type;
using memory_traits = typename traits::memory_traits;
using size_type = SizeType;
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>
staticcrsgraph_type;
typedef StaticCrsGraph<data_type, array_layout,
typename traits::host_mirror_space, size_type,
memory_traits>
HostMirror;
#else
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>
staticcrsgraph_type;
typedef StaticCrsGraph<data_type, array_layout,
typename traits::host_mirror_space, memory_traits,
size_type>
HostMirror;
#endif
using staticcrsgraph_type =
StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>;
using HostMirror = StaticCrsGraph<data_type, array_layout,
typename traits::host_mirror_space,
memory_traits, size_type>;
typedef View<const size_type*, array_layout, device_type, memory_traits>
row_map_type;
typedef View<data_type*, array_layout, device_type, memory_traits>
entries_type;
typedef View<const size_type*, array_layout, device_type, memory_traits>
row_block_type;
using row_map_type =
View<const size_type*, array_layout, device_type, memory_traits>;
using entries_type =
View<data_type*, array_layout, device_type, memory_traits>;
using row_block_type =
View<const size_type*, array_layout, device_type, memory_traits>;
entries_type entries;
row_map_type row_map;
@ -370,6 +354,10 @@ class StaticCrsGraph {
: static_cast<size_type>(0);
}
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
return (row_map.is_allocated() && entries.is_allocated());
}
/// \brief Return a const view of row i of the graph.
///
/// If row i does not belong to the graph, return an empty view.
@ -436,35 +424,19 @@ typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
//----------------------------------------------------------------------------
template <class DataType, class Arg1Type, class Arg2Type,
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
typename SizeType, class Arg3Type>
typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
Arg3Type>::HostMirror
create_mirror_view(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
Arg3Type>& input);
#else
class Arg3Type, typename SizeType>
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
typename SizeType>
typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
SizeType>::HostMirror
create_mirror_view(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
SizeType>& input);
#endif
template <class DataType, class Arg1Type, class Arg2Type,
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
typename SizeType, class Arg3Type>
typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
Arg3Type>::HostMirror
create_mirror_view(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
Arg3Type>& input);
#else
class Arg3Type, typename SizeType>
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
typename SizeType>
typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
SizeType>::HostMirror
create_mirror(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
SizeType>& input);
#endif
} // namespace Kokkos
@ -481,8 +453,8 @@ namespace Impl {
template <class GraphType>
struct StaticCrsGraphMaximumEntry {
typedef typename GraphType::execution_space execution_space;
typedef typename GraphType::data_type value_type;
using execution_space = typename GraphType::execution_space;
using value_type = typename GraphType::data_type;
const typename GraphType::entries_type entries;
@ -505,22 +477,13 @@ struct StaticCrsGraphMaximumEntry {
} // namespace Impl
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
template <class DataType, class Arg1Type, class Arg2Type, typename SizeType,
class Arg3Type>
DataType maximum_entry(const StaticCrsGraph<DataType, Arg1Type, Arg2Type,
SizeType, Arg3Type>& graph) {
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>
GraphType;
#else
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
typename SizeType>
DataType maximum_entry(const StaticCrsGraph<DataType, Arg1Type, Arg2Type,
Arg3Type, SizeType>& graph) {
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>
GraphType;
#endif
typedef Impl::StaticCrsGraphMaximumEntry<GraphType> FunctorType;
using GraphType =
StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>;
using FunctorType = Impl::StaticCrsGraphMaximumEntry<GraphType>;
DataType result = 0;
Kokkos::parallel_reduce("Kokkos::maximum_entry", graph.entries.extent(0),

View File

@ -66,7 +66,7 @@
namespace Kokkos {
enum { UnorderedMapInvalidIndex = ~0u };
enum : unsigned { UnorderedMapInvalidIndex = ~0u };
/// \brief First element of the return value of UnorderedMap::insert().
///
@ -84,7 +84,7 @@ enum { UnorderedMapInvalidIndex = ~0u };
class UnorderedMapInsertResult {
private:
enum Status {
enum Status : uint32_t {
SUCCESS = 1u << 31,
EXISTING = 1u << 30,
FREED_EXISTING = 1u << 29,
@ -206,42 +206,40 @@ template <typename Key, typename Value,
pod_equal_to<typename std::remove_const<Key>::type> >
class UnorderedMap {
private:
typedef typename ViewTraits<Key, Device, void, void>::host_mirror_space
host_mirror_space;
using host_mirror_space =
typename ViewTraits<Key, Device, void, void>::host_mirror_space;
public:
//! \name Public types and constants
//@{
// key_types
typedef Key declared_key_type;
typedef typename std::remove_const<declared_key_type>::type key_type;
typedef typename std::add_const<key_type>::type const_key_type;
using declared_key_type = Key;
using key_type = typename std::remove_const<declared_key_type>::type;
using const_key_type = typename std::add_const<key_type>::type;
// value_types
typedef Value declared_value_type;
typedef typename std::remove_const<declared_value_type>::type value_type;
typedef typename std::add_const<value_type>::type const_value_type;
using declared_value_type = Value;
using value_type = typename std::remove_const<declared_value_type>::type;
using const_value_type = typename std::add_const<value_type>::type;
typedef Device device_type;
typedef typename Device::execution_space execution_space;
typedef Hasher hasher_type;
typedef EqualTo equal_to_type;
typedef uint32_t size_type;
using device_type = Device;
using execution_space = typename Device::execution_space;
using hasher_type = Hasher;
using equal_to_type = EqualTo;
using size_type = uint32_t;
// map_types
typedef UnorderedMap<declared_key_type, declared_value_type, device_type,
hasher_type, equal_to_type>
declared_map_type;
typedef UnorderedMap<key_type, value_type, device_type, hasher_type,
equal_to_type>
insertable_map_type;
typedef UnorderedMap<const_key_type, value_type, device_type, hasher_type,
equal_to_type>
modifiable_map_type;
typedef UnorderedMap<const_key_type, const_value_type, device_type,
hasher_type, equal_to_type>
const_map_type;
using declared_map_type =
UnorderedMap<declared_key_type, declared_value_type, device_type,
hasher_type, equal_to_type>;
using insertable_map_type = UnorderedMap<key_type, value_type, device_type,
hasher_type, equal_to_type>;
using modifiable_map_type =
UnorderedMap<const_key_type, value_type, device_type, hasher_type,
equal_to_type>;
using const_map_type = UnorderedMap<const_key_type, const_value_type,
device_type, hasher_type, equal_to_type>;
static const bool is_set = std::is_same<void, value_type>::value;
static const bool has_const_key =
@ -254,43 +252,42 @@ class UnorderedMap {
static const bool is_modifiable_map = has_const_key && !has_const_value;
static const bool is_const_map = has_const_key && has_const_value;
typedef UnorderedMapInsertResult insert_result;
using insert_result = UnorderedMapInsertResult;
typedef UnorderedMap<Key, Value, host_mirror_space, Hasher, EqualTo>
HostMirror;
using HostMirror =
UnorderedMap<Key, Value, host_mirror_space, Hasher, EqualTo>;
typedef Impl::UnorderedMapHistogram<const_map_type> histogram_type;
using histogram_type = Impl::UnorderedMapHistogram<const_map_type>;
//@}
private:
enum { invalid_index = ~static_cast<size_type>(0) };
enum : size_type { invalid_index = ~static_cast<size_type>(0) };
typedef typename Impl::if_c<is_set, int, declared_value_type>::type
impl_value_type;
using impl_value_type =
typename Impl::if_c<is_set, int, declared_value_type>::type;
typedef typename Impl::if_c<
using key_type_view = typename Impl::if_c<
is_insertable_map, View<key_type *, device_type>,
View<const key_type *, device_type, MemoryTraits<RandomAccess> > >::type
key_type_view;
View<const key_type *, device_type, MemoryTraits<RandomAccess> > >::type;
typedef typename Impl::if_c<is_insertable_map || is_modifiable_map,
View<impl_value_type *, device_type>,
View<const impl_value_type *, device_type,
MemoryTraits<RandomAccess> > >::type
value_type_view;
using value_type_view =
typename Impl::if_c<is_insertable_map || is_modifiable_map,
View<impl_value_type *, device_type>,
View<const impl_value_type *, device_type,
MemoryTraits<RandomAccess> > >::type;
typedef typename Impl::if_c<
using size_type_view = typename Impl::if_c<
is_insertable_map, View<size_type *, device_type>,
View<const size_type *, device_type, MemoryTraits<RandomAccess> > >::type
size_type_view;
View<const size_type *, device_type, MemoryTraits<RandomAccess> > >::type;
typedef typename Impl::if_c<is_insertable_map, Bitset<execution_space>,
ConstBitset<execution_space> >::type bitset_type;
using bitset_type =
typename Impl::if_c<is_insertable_map, Bitset<execution_space>,
ConstBitset<execution_space> >::type;
enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 };
enum { num_scalars = 3 };
typedef View<int[num_scalars], LayoutLeft, device_type> scalars_view;
using scalars_view = View<int[num_scalars], LayoutLeft, device_type>;
public:
//! \name Public member functions
@ -353,6 +350,11 @@ class UnorderedMap {
{ Kokkos::deep_copy(m_scalars, 0); }
}
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
return (m_keys.is_allocated() && m_values.is_allocated() &&
m_scalars.is_allocated());
}
/// \brief Change the capacity of the the map
///
/// If there are no failed inserts the current size of the map will
@ -742,9 +744,9 @@ class UnorderedMap {
Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes);
typedef Kokkos::Impl::DeepCopy<typename device_type::memory_space,
typename SDevice::memory_space>
raw_deep_copy;
using raw_deep_copy =
Kokkos::Impl::DeepCopy<typename device_type::memory_space,
typename SDevice::memory_space>;
raw_deep_copy(tmp.m_hash_lists.data(), src.m_hash_lists.data(),
sizeof(size_type) * src.m_hash_lists.extent(0));
@ -768,25 +770,25 @@ class UnorderedMap {
bool modified() const { return get_flag(modified_idx); }
void set_flag(int flag) const {
typedef Kokkos::Impl::DeepCopy<typename device_type::memory_space,
Kokkos::HostSpace>
raw_deep_copy;
using raw_deep_copy =
Kokkos::Impl::DeepCopy<typename device_type::memory_space,
Kokkos::HostSpace>;
const int true_ = true;
raw_deep_copy(m_scalars.data() + flag, &true_, sizeof(int));
}
void reset_flag(int flag) const {
typedef Kokkos::Impl::DeepCopy<typename device_type::memory_space,
Kokkos::HostSpace>
raw_deep_copy;
using raw_deep_copy =
Kokkos::Impl::DeepCopy<typename device_type::memory_space,
Kokkos::HostSpace>;
const int false_ = false;
raw_deep_copy(m_scalars.data() + flag, &false_, sizeof(int));
}
bool get_flag(int flag) const {
typedef Kokkos::Impl::DeepCopy<Kokkos::HostSpace,
typename device_type::memory_space>
raw_deep_copy;
using raw_deep_copy =
Kokkos::Impl::DeepCopy<Kokkos::HostSpace,
typename device_type::memory_space>;
int result = false;
raw_deep_copy(&result, m_scalars.data() + flag, sizeof(int));
return result;

View File

@ -58,19 +58,19 @@ namespace Kokkos {
template <class Scalar, class Arg1Type = void>
class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
public:
typedef Scalar value_type;
typedef Scalar* pointer;
typedef const Scalar* const_pointer;
typedef Scalar& reference;
typedef const Scalar& const_reference;
typedef Scalar* iterator;
typedef const Scalar* const_iterator;
typedef size_t size_type;
using value_type = Scalar;
using pointer = Scalar*;
using const_pointer = const Scalar*;
using reference = Scalar&;
using const_reference = const Scalar&;
using iterator = Scalar*;
using const_iterator = const Scalar*;
using size_type = size_t;
private:
size_t _size;
float _extra_storage;
typedef DualView<Scalar*, LayoutLeft, Arg1Type> DV;
using DV = DualView<Scalar*, LayoutLeft, Arg1Type>;
public:
#ifdef KOKKOS_ENABLE_CUDA_UVM
@ -212,14 +212,17 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
return begin() + start;
}
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
return DV::is_allocated();
}
size_type size() const { return _size; }
size_type max_size() const { return 2000000000; }
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
size_type capacity() const { return DV::capacity(); }
#endif
size_type span() const { return DV::span(); }
bool empty() const { return _size == 0; }
pointer data() const { return DV::h_view.data(); }
iterator begin() const { return DV::h_view.data(); }
iterator end() const {
@ -310,7 +313,7 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
public:
struct set_functor {
typedef typename DV::t_dev::execution_space execution_space;
using execution_space = typename DV::t_dev::execution_space;
typename DV::t_dev _data;
Scalar _val;
@ -321,7 +324,7 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
};
struct set_functor_host {
typedef typename DV::t_host::execution_space execution_space;
using execution_space = typename DV::t_host::execution_space;
typename DV::t_host _data;
Scalar _val;

View File

@ -65,11 +65,11 @@ unsigned rotate_right(unsigned i, int r) {
template <typename Bitset>
struct BitsetCount {
typedef Bitset bitset_type;
typedef
typename bitset_type::execution_space::execution_space execution_space;
typedef typename bitset_type::size_type size_type;
typedef size_type value_type;
using bitset_type = Bitset;
using execution_space =
typename bitset_type::execution_space::execution_space;
using size_type = typename bitset_type::size_type;
using value_type = size_type;
bitset_type m_bitset;

View File

@ -140,10 +140,10 @@ uint32_t MurmurHash3_x86_32(const void* key, int len, uint32_t seed) {
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION bool bitwise_equal(T const* const a_ptr,
T const* const b_ptr) {
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64; // NOLINT(modernize-use-using)
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32; // NOLINT(modernize-use-using)
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16; // NOLINT(modernize-use-using)
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8; // NOLINT(modernize-use-using)
enum {
NUM_8 = sizeof(T),

View File

@ -50,19 +50,6 @@
namespace Kokkos {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
template <class DataType, class Arg1Type, class Arg2Type, typename SizeType,
class Arg3Type>
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
Arg3Type>::HostMirror
create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>&
view,
typename std::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0) {
return view;
}
#else
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
typename SizeType>
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
@ -74,20 +61,7 @@ create_mirror_view(
Arg3Type>::is_hostspace>::type* = 0) {
return view;
}
#endif
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
template <class DataType, class Arg1Type, class Arg2Type, typename SizeType,
class Arg3Type>
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
Arg3Type>::HostMirror
create_mirror(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
Arg3Type>& view) {
// Force copy:
// typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>
staticcrsgraph_type;
#else
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
typename SizeType>
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
@ -95,10 +69,9 @@ inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
create_mirror(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
SizeType>& view) {
// Force copy:
// typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
typedef StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>
staticcrsgraph_type;
#endif
// using alloc = Impl::ViewAssignment<Impl::ViewDefault>; // unused
using staticcrsgraph_type =
StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>;
typename staticcrsgraph_type::HostMirror tmp;
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map =
@ -120,17 +93,6 @@ create_mirror(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
return tmp;
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
template <class DataType, class Arg1Type, class Arg2Type, typename SizeType,
class Arg3Type>
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
Arg3Type>::HostMirror
create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>&
view,
typename std::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0)
#else
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
typename SizeType>
inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
@ -139,9 +101,7 @@ create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>&
view,
typename std::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0)
#endif
{
Arg3Type>::is_hostspace>::type* = 0) {
return create_mirror(view);
}
} // namespace Kokkos
@ -154,16 +114,15 @@ namespace Kokkos {
template <class StaticCrsGraphType, class InputSizeType>
inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
const std::string& label, const std::vector<InputSizeType>& input) {
typedef StaticCrsGraphType output_type;
// typedef std::vector< InputSizeType > input_type ; // unused
using output_type = StaticCrsGraphType;
// using input_type = std::vector<InputSizeType>; // unused
typedef typename output_type::entries_type entries_type;
using entries_type = typename output_type::entries_type;
typedef View<typename output_type::size_type[],
typename output_type::array_layout,
typename output_type::execution_space,
typename output_type::memory_traits>
work_type;
using work_type = View<typename output_type::size_type[],
typename output_type::array_layout,
typename output_type::execution_space,
typename output_type::memory_traits>;
output_type output;
@ -197,16 +156,15 @@ template <class StaticCrsGraphType, class InputSizeType>
inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
const std::string& label,
const std::vector<std::vector<InputSizeType> >& input) {
typedef StaticCrsGraphType output_type;
typedef typename output_type::entries_type entries_type;
using output_type = StaticCrsGraphType;
using entries_type = typename output_type::entries_type;
static_assert(entries_type::rank == 1, "Graph entries view must be rank one");
typedef View<typename output_type::size_type[],
typename output_type::array_layout,
typename output_type::execution_space,
typename output_type::memory_traits>
work_type;
using work_type = View<typename output_type::size_type[],
typename output_type::array_layout,
typename output_type::execution_space,
typename output_type::memory_traits>;
output_type output;

View File

@ -60,10 +60,10 @@ uint32_t find_hash_size(uint32_t size);
template <typename Map>
struct UnorderedMapRehash {
typedef Map map_type;
typedef typename map_type::const_map_type const_map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
using map_type = Map;
using const_map_type = typename map_type::const_map_type;
using execution_space = typename map_type::execution_space;
using size_type = typename map_type::size_type;
map_type m_dst;
const_map_type m_src;
@ -84,11 +84,11 @@ struct UnorderedMapRehash {
template <typename UMap>
struct UnorderedMapErase {
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
typedef typename map_type::key_type key_type;
typedef typename map_type::impl_value_type value_type;
using map_type = UMap;
using execution_space = typename map_type::execution_space;
using size_type = typename map_type::size_type;
using key_type = typename map_type::key_type;
using value_type = typename map_type::impl_value_type;
map_type m_map;
@ -140,12 +140,12 @@ struct UnorderedMapErase {
template <typename UMap>
struct UnorderedMapHistogram {
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
using map_type = UMap;
using execution_space = typename map_type::execution_space;
using size_type = typename map_type::size_type;
typedef View<int[100], execution_space> histogram_view;
typedef typename histogram_view::HostMirror host_histogram_view;
using histogram_view = View<int[100], execution_space>;
using host_histogram_view = typename histogram_view::HostMirror;
map_type m_map;
histogram_view m_length;
@ -230,9 +230,9 @@ struct UnorderedMapHistogram {
template <typename UMap>
struct UnorderedMapPrint {
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
using map_type = UMap;
using execution_space = typename map_type::execution_space;
using size_type = typename map_type::size_type;
map_type m_map;

View File

@ -47,6 +47,7 @@
#include <iostream>
#include <Kokkos_Core.hpp>
#include <Kokkos_Bitset.hpp>
#include <array>
namespace Test {
@ -54,9 +55,9 @@ namespace Impl {
template <typename Bitset, bool Set>
struct TestBitset {
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
using bitset_type = Bitset;
using execution_space = typename bitset_type::execution_space;
using value_type = uint32_t;
bitset_type m_bitset;
@ -95,9 +96,9 @@ struct TestBitset {
template <typename Bitset>
struct TestBitsetTest {
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
using bitset_type = Bitset;
using execution_space = typename bitset_type::execution_space;
using value_type = uint32_t;
bitset_type m_bitset;
@ -127,9 +128,9 @@ struct TestBitsetTest {
template <typename Bitset, bool Set>
struct TestBitsetAny {
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
using bitset_type = Bitset;
using execution_space = typename bitset_type::execution_space;
using value_type = uint32_t;
bitset_type m_bitset;
@ -181,16 +182,30 @@ struct TestBitsetAny {
template <typename Device>
void test_bitset() {
typedef Kokkos::Bitset<Device> bitset_type;
typedef Kokkos::ConstBitset<Device> const_bitset_type;
using bitset_type = Kokkos::Bitset<Device>;
using const_bitset_type = Kokkos::ConstBitset<Device>;
// unsigned test_sizes[] = { 0u, 1000u, 1u<<14, 1u<<16, 10000001 };
unsigned test_sizes[] = {1000u, 1u << 14, 1u << 16, 10000001};
{
unsigned ts = 100u;
bitset_type b1;
ASSERT_TRUE(b1.is_allocated());
for (int i = 0, end = sizeof(test_sizes) / sizeof(unsigned); i < end; ++i) {
b1 = bitset_type(ts);
bitset_type b2(b1);
bitset_type b3(ts);
ASSERT_TRUE(b1.is_allocated());
ASSERT_TRUE(b2.is_allocated());
ASSERT_TRUE(b3.is_allocated());
}
std::array<unsigned, 7> test_sizes = {
{0u, 10u, 100u, 1000u, 1u << 14, 1u << 16, 10000001}};
for (const auto test_size : test_sizes) {
// std::cout << "Bitset " << test_sizes[i] << std::endl;
bitset_type bitset(test_sizes[i]);
bitset_type bitset(test_size);
// std::cout << " Check initial count " << std::endl;
// nothing should be set
@ -253,10 +268,7 @@ void test_bitset() {
}
}
// FIXME_HIP deadlock
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, bitset) { test_bitset<TEST_EXECSPACE>(); }
#endif
} // namespace Test
#endif // KOKKOS_TEST_BITSET_HPP

View File

@ -55,13 +55,45 @@
namespace Test {
namespace Impl {
template <typename Scalar, class Device>
struct test_dualview_alloc {
using scalar_type = Scalar;
using execution_space = Device;
template <typename ViewType>
bool run_me(unsigned int n, unsigned int m) {
if (n < 10) n = 10;
if (m < 3) m = 3;
{
ViewType b1;
if (b1.is_allocated() == true) return false;
b1 = ViewType("B1", n, m);
ViewType b2(b1);
ViewType b3("B3", n, m);
if (b1.is_allocated() == false) return false;
if (b2.is_allocated() == false) return false;
if (b3.is_allocated() == false) return false;
}
return true;
}
bool result = false;
test_dualview_alloc(unsigned int size) {
result = run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(
size, 3);
}
};
template <typename Scalar, class Device>
struct test_dualview_combinations {
typedef test_dualview_combinations<Scalar, Device> self_type;
using self_type = test_dualview_combinations<Scalar, Device>;
typedef Scalar scalar_type;
typedef Device execution_space;
using scalar_type = Scalar;
using execution_space = Device;
Scalar reference;
Scalar result;
@ -110,7 +142,7 @@ struct test_dualview_combinations {
template <typename Scalar, class ViewType>
struct SumViewEntriesFunctor {
typedef Scalar value_type;
using value_type = Scalar;
ViewType fv;
@ -126,8 +158,8 @@ struct SumViewEntriesFunctor {
template <typename Scalar, class Device>
struct test_dual_view_deep_copy {
typedef Scalar scalar_type;
typedef Device execution_space;
using scalar_type = Scalar;
using execution_space = Device;
template <typename ViewType>
void run_me(int n, const int m, const bool use_templ_sync) {
@ -153,8 +185,8 @@ struct test_dual_view_deep_copy {
// Check device view is initialized as expected
scalar_type a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
typedef typename ViewType::t_dev::memory_space::execution_space
t_dev_exec_space;
using t_dev_exec_space =
typename ViewType::t_dev::memory_space::execution_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, n),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
@ -220,8 +252,8 @@ struct test_dual_view_deep_copy {
template <typename Scalar, class Device>
struct test_dualview_resize {
typedef Scalar scalar_type;
typedef Device execution_space;
using scalar_type = Scalar;
using execution_space = Device;
template <typename ViewType>
void run_me() {
@ -244,8 +276,8 @@ struct test_dualview_resize {
// Check device view is initialized as expected
scalar_type a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
typedef typename ViewType::t_dev::memory_space::execution_space
t_dev_exec_space;
using t_dev_exec_space =
typename ViewType::t_dev::memory_space::execution_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
@ -274,8 +306,8 @@ struct test_dualview_resize {
// Check device view is initialized as expected
a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
typedef typename ViewType::t_dev::memory_space::execution_space
t_dev_exec_space;
using t_dev_exec_space =
typename ViewType::t_dev::memory_space::execution_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
@ -301,8 +333,8 @@ struct test_dualview_resize {
template <typename Scalar, class Device>
struct test_dualview_realloc {
typedef Scalar scalar_type;
typedef Device execution_space;
using scalar_type = Scalar;
using execution_space = Device;
template <typename ViewType>
void run_me() {
@ -319,8 +351,8 @@ struct test_dualview_realloc {
// Check device view is initialized as expected
scalar_type a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
typedef typename ViewType::t_dev::memory_space::execution_space
t_dev_exec_space;
using t_dev_exec_space =
typename ViewType::t_dev::memory_space::execution_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
@ -351,6 +383,12 @@ void test_dualview_combinations(unsigned int size, bool with_init) {
ASSERT_EQ(test.result, 0);
}
template <typename Scalar, typename Device>
void test_dualview_alloc(unsigned int size) {
Impl::test_dualview_alloc<Scalar, Device> test(size);
ASSERT_TRUE(test.result);
}
template <typename Scalar, typename Device>
void test_dualview_deep_copy() {
Impl::test_dual_view_deep_copy<Scalar, Device>();
@ -370,6 +408,10 @@ TEST(TEST_CATEGORY, dualview_combination) {
test_dualview_combinations<int, TEST_EXECSPACE>(10, true);
}
TEST(TEST_CATEGORY, dualview_alloc) {
test_dualview_alloc<int, TEST_EXECSPACE>(10);
}
TEST(TEST_CATEGORY, dualview_combinations_without_init) {
test_dualview_combinations<int, TEST_EXECSPACE>(10, false);
}

View File

@ -68,12 +68,12 @@ size_t allocation_count(const Kokkos::DynRankView<T, P...>& view) {
template <typename T, class DeviceType>
struct TestViewOperator {
typedef DeviceType execution_space;
using execution_space = DeviceType;
static const unsigned N = 100;
static const unsigned D = 3;
typedef Kokkos::DynRankView<T, execution_space> view_type;
using view_type = Kokkos::DynRankView<T, execution_space>;
const view_type v1;
const view_type v2;
@ -101,11 +101,11 @@ struct TestViewOperator_LeftAndRight;
template <class DataType, class DeviceType>
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 7> {
typedef DeviceType execution_space;
typedef typename execution_space::memory_space memory_space;
typedef typename execution_space::size_type size_type;
using execution_space = DeviceType;
using memory_space = typename execution_space::memory_space;
using size_type = typename execution_space::size_type;
typedef int value_type;
using value_type = int;
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type& update,
@ -116,11 +116,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 7> {
KOKKOS_INLINE_FUNCTION
static void init(value_type& update) { update = 0; }
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
left_view;
using left_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
right_view;
using right_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
left_view left;
right_view right;
@ -186,11 +186,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 7> {
template <class DataType, class DeviceType>
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 6> {
typedef DeviceType execution_space;
typedef typename execution_space::memory_space memory_space;
typedef typename execution_space::size_type size_type;
using execution_space = DeviceType;
using memory_space = typename execution_space::memory_space;
using size_type = typename execution_space::size_type;
typedef int value_type;
using value_type = int;
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type& update,
@ -201,11 +201,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 6> {
KOKKOS_INLINE_FUNCTION
static void init(value_type& update) { update = 0; }
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
left_view;
using left_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
right_view;
using right_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
left_view left;
right_view right;
@ -268,11 +268,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 6> {
template <class DataType, class DeviceType>
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 5> {
typedef DeviceType execution_space;
typedef typename execution_space::memory_space memory_space;
typedef typename execution_space::size_type size_type;
using execution_space = DeviceType;
using memory_space = typename execution_space::memory_space;
using size_type = typename execution_space::size_type;
typedef int value_type;
using value_type = int;
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type& update,
@ -283,14 +283,14 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 5> {
KOKKOS_INLINE_FUNCTION
static void init(value_type& update) { update = 0; }
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
left_view;
using left_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
right_view;
using right_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>
stride_view;
using stride_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>;
left_view left;
right_view right;
@ -363,11 +363,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 5> {
template <class DataType, class DeviceType>
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 4> {
typedef DeviceType execution_space;
typedef typename execution_space::memory_space memory_space;
typedef typename execution_space::size_type size_type;
using execution_space = DeviceType;
using memory_space = typename execution_space::memory_space;
using size_type = typename execution_space::size_type;
typedef int value_type;
using value_type = int;
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type& update,
@ -378,11 +378,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 4> {
KOKKOS_INLINE_FUNCTION
static void init(value_type& update) { update = 0; }
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
left_view;
using left_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
right_view;
using right_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
left_view left;
right_view right;
@ -438,11 +438,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 4> {
template <class DataType, class DeviceType>
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 3> {
typedef DeviceType execution_space;
typedef typename execution_space::memory_space memory_space;
typedef typename execution_space::size_type size_type;
using execution_space = DeviceType;
using memory_space = typename execution_space::memory_space;
using size_type = typename execution_space::size_type;
typedef int value_type;
using value_type = int;
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type& update,
@ -453,14 +453,14 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 3> {
KOKKOS_INLINE_FUNCTION
static void init(value_type& update) { update = 0; }
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
left_view;
using left_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
right_view;
using right_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>
stride_view;
using stride_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>;
left_view left;
right_view right;
@ -536,11 +536,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 3> {
template <class DataType, class DeviceType>
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 2> {
typedef DeviceType execution_space;
typedef typename execution_space::memory_space memory_space;
typedef typename execution_space::size_type size_type;
using execution_space = DeviceType;
using memory_space = typename execution_space::memory_space;
using size_type = typename execution_space::size_type;
typedef int value_type;
using value_type = int;
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type& update,
@ -551,11 +551,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 2> {
KOKKOS_INLINE_FUNCTION
static void init(value_type& update) { update = 0; }
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
left_view;
using left_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
right_view;
using right_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
left_view left;
right_view right;
@ -616,11 +616,11 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 2> {
template <class DataType, class DeviceType>
struct TestViewOperator_LeftAndRight<DataType, DeviceType, 1> {
typedef DeviceType execution_space;
typedef typename execution_space::memory_space memory_space;
typedef typename execution_space::size_type size_type;
using execution_space = DeviceType;
using memory_space = typename execution_space::memory_space;
using size_type = typename execution_space::size_type;
typedef int value_type;
using value_type = int;
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type& update,
@ -631,14 +631,14 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 1> {
KOKKOS_INLINE_FUNCTION
static void init(value_type& update) { update = 0; }
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>
left_view;
using left_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutLeft, execution_space>;
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>
right_view;
using right_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutRight, execution_space>;
typedef Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>
stride_view;
using stride_view =
Kokkos::DynRankView<DataType, Kokkos::LayoutStride, execution_space>;
left_view left;
right_view right;
@ -689,22 +689,22 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 1> {
template <typename T, class DeviceType>
class TestDynViewAPI {
public:
typedef DeviceType device;
using device = DeviceType;
enum { N0 = 1000, N1 = 3, N2 = 5, N3 = 7 };
typedef Kokkos::DynRankView<T, device> dView0;
typedef Kokkos::DynRankView<const T, device> const_dView0;
using dView0 = Kokkos::DynRankView<T, device>;
using const_dView0 = Kokkos::DynRankView<const T, device>;
typedef Kokkos::DynRankView<T, device, Kokkos::MemoryUnmanaged>
dView0_unmanaged;
typedef typename dView0::host_mirror_space host_drv_space;
using dView0_unmanaged =
Kokkos::DynRankView<T, device, Kokkos::MemoryUnmanaged>;
using host_drv_space = typename dView0::host_mirror_space;
typedef Kokkos::View<T, device> View0;
typedef Kokkos::View<T*, device> View1;
typedef Kokkos::View<T*******, device> View7;
using View0 = Kokkos::View<T, device>;
using View1 = Kokkos::View<T*, device>;
using View7 = Kokkos::View<T*******, device>;
typedef typename View0::host_mirror_space host_view_space;
using host_view_space = typename View0::host_mirror_space;
static void run_tests() {
run_test_resize_realloc();
@ -712,6 +712,7 @@ class TestDynViewAPI {
run_test_mirror_and_copy();
run_test_scalar();
run_test();
run_test_allocated();
run_test_const();
run_test_subview();
run_test_subview_strided();
@ -750,8 +751,8 @@ class TestDynViewAPI {
}
static void run_test_mirror() {
typedef Kokkos::DynRankView<int, host_drv_space> view_type;
typedef typename view_type::HostMirror mirror_type;
using view_type = Kokkos::DynRankView<int, host_drv_space>;
using mirror_type = typename view_type::HostMirror;
view_type a("a");
mirror_type am = Kokkos::create_mirror_view(a);
mirror_type ax = Kokkos::create_mirror(a);
@ -851,8 +852,8 @@ class TestDynViewAPI {
ASSERT_EQ(a_h.rank(), a_d.rank());
}
{
typedef Kokkos::DynRankView<int, Kokkos::LayoutStride, Kokkos::HostSpace>
view_stride_type;
using view_stride_type =
Kokkos::DynRankView<int, Kokkos::LayoutStride, Kokkos::HostSpace>;
unsigned order[] = {6, 5, 4, 3, 2, 1, 0},
dimen[] = {N0, N1, N2, 2, 2, 2, 2}; // LayoutRight equivalent
view_stride_type a_h(
@ -956,8 +957,8 @@ class TestDynViewAPI {
}
static void run_test_scalar() {
typedef typename dView0::HostMirror
hView0; // HostMirror of DynRankView is a DynRankView
using hView0 = typename dView0::HostMirror; // HostMirror of DynRankView is
// a DynRankView
dView0 dx, dy;
hView0 hx, hy;
@ -1050,12 +1051,12 @@ class TestDynViewAPI {
static void run_test() {
// mfh 14 Feb 2014: This test doesn't actually create instances of
// these types. In order to avoid "declared but unused typedef"
// these types. In order to avoid "unused type alias"
// warnings, we declare empty instances of these types, with the
// usual "(void)" marker to avoid compiler warnings for unused
// variables.
typedef typename dView0::HostMirror hView0;
using hView0 = typename dView0::HostMirror;
{
hView0 thing;
@ -1361,7 +1362,7 @@ class TestDynViewAPI {
}
}
typedef T DataType;
using DataType = T;
static void check_auto_conversion_to_const(
const Kokkos::DynRankView<const DataType, device>& arg_const,
@ -1369,12 +1370,28 @@ class TestDynViewAPI {
ASSERT_TRUE(arg_const == arg);
}
static void run_test_allocated() {
using device_type = Kokkos::DynRankView<DataType, device>;
const int N1 = 100;
const int N2 = 10;
device_type d1;
ASSERT_FALSE(d1.is_allocated());
d1 = device_type("d1", N1, N2);
device_type d2(d1);
device_type d3("d3", N1);
ASSERT_TRUE(d1.is_allocated());
ASSERT_TRUE(d2.is_allocated());
ASSERT_TRUE(d3.is_allocated());
}
static void run_test_const() {
typedef Kokkos::DynRankView<DataType, device> typeX;
typedef Kokkos::DynRankView<const DataType, device> const_typeX;
typedef Kokkos::DynRankView<const DataType, device,
Kokkos::MemoryRandomAccess>
const_typeR;
using typeX = Kokkos::DynRankView<DataType, device>;
using const_typeX = Kokkos::DynRankView<const DataType, device>;
using const_typeR =
Kokkos::DynRankView<const DataType, device, Kokkos::MemoryRandomAccess>;
typeX x("X", 2);
const_typeX xc = x;
const_typeR xr = x;
@ -1398,10 +1415,10 @@ class TestDynViewAPI {
}
static void run_test_subview() {
typedef Kokkos::DynRankView<const T, device> cdView;
typedef Kokkos::DynRankView<T, device> dView;
using cdView = Kokkos::DynRankView<const T, device>;
using dView = Kokkos::DynRankView<T, device>;
// LayoutStride required for all returned DynRankView subdynrankview's
typedef Kokkos::DynRankView<T, Kokkos::LayoutStride, device> sdView;
using sdView = Kokkos::DynRankView<T, Kokkos::LayoutStride, device>;
dView0 d0("d0");
cdView s0 = d0;
@ -1452,7 +1469,7 @@ class TestDynViewAPI {
ASSERT_EQ(dv6.rank(), 6);
// DynRankView with LayoutRight
typedef Kokkos::DynRankView<T, Kokkos::LayoutRight, device> drView;
using drView = Kokkos::DynRankView<T, Kokkos::LayoutRight, device>;
drView dr5("dr5", N0, N1, N2, 2, 2);
ASSERT_EQ(dr5.rank(), 5);
@ -1514,7 +1531,8 @@ class TestDynViewAPI {
ASSERT_EQ(ds5.extent(4), ds5plus.extent(4));
ASSERT_EQ(ds5.extent(5), ds5plus.extent(5));
#if !defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_UVM)
#if (!defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_UVM)) && \
!defined(KOKKOS_ENABLE_HIP)
ASSERT_EQ(&ds5(1, 1, 1, 1, 0) - &ds5plus(1, 1, 1, 1, 0), 0);
ASSERT_EQ(&ds5(1, 1, 1, 1, 0, 0) - &ds5plus(1, 1, 1, 1, 0, 0),
0); // passing argument to rank beyond the view's rank is allowed
@ -1538,12 +1556,12 @@ class TestDynViewAPI {
}
static void run_test_subview_strided() {
typedef Kokkos::DynRankView<int, Kokkos::LayoutLeft, host_drv_space>
drview_left;
typedef Kokkos::DynRankView<int, Kokkos::LayoutRight, host_drv_space>
drview_right;
typedef Kokkos::DynRankView<int, Kokkos::LayoutStride, host_drv_space>
drview_stride;
using drview_left =
Kokkos::DynRankView<int, Kokkos::LayoutLeft, host_drv_space>;
using drview_right =
Kokkos::DynRankView<int, Kokkos::LayoutRight, host_drv_space>;
using drview_stride =
Kokkos::DynRankView<int, Kokkos::LayoutStride, host_drv_space>;
drview_left xl2("xl2", 100, 200);
drview_right xr2("xr2", 100, 200);
@ -1588,31 +1606,29 @@ class TestDynViewAPI {
static void run_test_vector() {
static const unsigned Length = 1000, Count = 8;
typedef typename Kokkos::DynRankView<T, Kokkos::LayoutLeft, host_drv_space>
multivector_type;
using multivector_type =
typename Kokkos::DynRankView<T, Kokkos::LayoutLeft, host_drv_space>;
typedef typename Kokkos::DynRankView<T, Kokkos::LayoutRight, host_drv_space>
multivector_right_type;
using multivector_right_type =
typename Kokkos::DynRankView<T, Kokkos::LayoutRight, host_drv_space>;
multivector_type mv = multivector_type("mv", Length, Count);
multivector_right_type mv_right =
multivector_right_type("mv", Length, Count);
typedef
typename Kokkos::DynRankView<T, Kokkos::LayoutStride, host_drv_space>
svector_type;
typedef
typename Kokkos::DynRankView<T, Kokkos::LayoutStride, host_drv_space>
smultivector_type;
typedef typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
host_drv_space>
const_svector_right_type;
typedef typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
host_drv_space>
const_svector_type;
typedef typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
host_drv_space>
const_smultivector_type;
using svector_type =
typename Kokkos::DynRankView<T, Kokkos::LayoutStride, host_drv_space>;
using smultivector_type =
typename Kokkos::DynRankView<T, Kokkos::LayoutStride, host_drv_space>;
using const_svector_right_type =
typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
host_drv_space>;
using const_svector_type =
typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
host_drv_space>;
using const_smultivector_type =
typename Kokkos::DynRankView<const T, Kokkos::LayoutStride,
host_drv_space>;
svector_type v1 = Kokkos::subdynrankview(mv, Kokkos::ALL(), 0);
svector_type v2 = Kokkos::subdynrankview(mv, Kokkos::ALL(), 1);

View File

@ -44,10 +44,7 @@
#include <TestDynViewAPI.hpp>
namespace Test {
// FIXME_HIP attempt to access inaccessible memory space
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, dyn_rank_view_api_generic) {
TestDynViewAPI<double, TEST_EXECSPACE>::run_tests();
}
#endif
} // namespace Test

View File

@ -45,10 +45,7 @@
#include <TestDynViewAPI.hpp>
namespace Test {
// FIXME_HIP failing with wrong value
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, dyn_rank_view_api_operator_rank12345) {
TestDynViewAPI<double, TEST_EXECSPACE>::run_operator_test_rank12345();
}
#endif
} // namespace Test

Some files were not shown because too many files have changed in this diff Show More