Merge pull request #2004 from stanmoore1/kk_3.1

Update Kokkos library in LAMMPS to v3.1
This commit is contained in:
Axel Kohlmeyer
2020-04-24 18:35:53 -04:00
committed by GitHub
560 changed files with 24838 additions and 15005 deletions

View File

@ -14,16 +14,30 @@ endif()
option(EXTERNAL_KOKKOS "Build against external kokkos library" OFF) option(EXTERNAL_KOKKOS "Build against external kokkos library" OFF)
option(DOWNLOAD_KOKKOS "Download the KOKKOS library instead of using the bundled one" OFF) option(DOWNLOAD_KOKKOS "Download the KOKKOS library instead of using the bundled one" OFF)
if(DOWNLOAD_KOKKOS) if(DOWNLOAD_KOKKOS)
# extract Kokkos-related variables and values so we can forward them to the Kokkos library build
get_cmake_property(_VARS VARIABLES)
list(FILTER _VARS INCLUDE REGEX ^Kokkos_)
foreach(_VAR IN LISTS _VARS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-D${_VAR}=${${_VAR}}")
endforeach()
message(STATUS "KOKKOS download requested - we will build our own") message(STATUS "KOKKOS download requested - we will build our own")
file(DOWNLOAD https://github.com/kokkos/kokkos/compare/3.0.00...stanmoore1:lammps.diff ${CMAKE_CURRENT_BINARY_DIR}/kokkos-lammps.patch) list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>")
if(CMAKE_REQUEST_PIC)
list(APPEND KOKKOS_LIB_BUILD_ARGS ${CMAKE_REQUEST_PIC})
endif()
# append other CMake variables that need to be forwarded to CMAKE_ARGS
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_INSTALL_LIBDIR=lib")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject) include(ExternalProject)
ExternalProject_Add(kokkos_build ExternalProject_Add(kokkos_build
URL https://github.com/kokkos/kokkos/archive/3.0.00.tar.gz URL https://github.com/kokkos/kokkos/archive/3.1.00.tar.gz
URL_MD5 281c7093aa3a603276e93abdf4be23b9 URL_MD5 f638a6c786f748a602b26faa0e96ebab
PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_BINARY_DIR}/kokkos-lammps.patch CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> ${CMAKE_REQUEST_PIC}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_LIBDIR=lib
-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a
) )
ExternalProject_get_property(kokkos_build INSTALL_DIR) ExternalProject_get_property(kokkos_build INSTALL_DIR)
@ -39,9 +53,9 @@ if(DOWNLOAD_KOKKOS)
install(CODE "MESSAGE(FATAL_ERROR \"Installing liblammps with downloaded libraries is currently not supported.\")") install(CODE "MESSAGE(FATAL_ERROR \"Installing liblammps with downloaded libraries is currently not supported.\")")
endif() endif()
elseif(EXTERNAL_KOKKOS) elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 3) find_package(Kokkos 3.1)
if(NOT Kokkos_FOUND) if(NOT Kokkos_FOUND)
message(FATAL_ERROR "KOKKOS library not found, help CMake to find it by setting KOKKOS_LIBRARY, or set DOWNLOAD_KOKKOS=ON to download it") message(FATAL_ERROR "KOKKOS library version 3.1 or later not found, help CMake to find it by setting KOKKOS_LIBRARY, or set DOWNLOAD_KOKKOS=ON to download it")
endif() endif()
target_link_libraries(lammps PRIVATE Kokkos::kokkos) target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else() else()

View File

@ -320,11 +320,12 @@ to have an executable that will run on this and newer architectures.
.. note:: .. note::
NVIDIA GPUs with CC 5.0 (Maxwell) and newer are not compatible with If you run Kokkos on a newer GPU architecture than what LAMMPS was
CC 3.x (Kepler). If you run Kokkos on a newer architecture than what compiled with, there will be a delay during device initialization
LAMMPS was compiled with, there will be a significant delay during since the just-in-time compiler has to recompile all GPU kernels
device initialization since the just-in-time compiler has to for the new hardware. This is, however, not possible when compiled
recompile the GPU kernel code for the new hardware. for NVIDIA GPUs with CC 3.x (Kepler) for GPUs with CC 5.0 (Maxwell)
and newer as they are not compatible.
The settings discussed below have been tested with LAMMPS and are The settings discussed below have been tested with LAMMPS and are
confirmed to work. Kokkos is an active project with ongoing improvements confirmed to work. Kokkos is an active project with ongoing improvements
@ -343,73 +344,109 @@ be specified in uppercase.
:widths: auto :widths: auto
* - **Arch-ID** * - **Arch-ID**
- **HOST or GPU**
- **Description** - **Description**
* - AMDAVX * - AMDAVX
- HOST
- AMD 64-bit x86 CPU (AVX 1) - AMD 64-bit x86 CPU (AVX 1)
* - EPYC * - EPYC
- HOST
- AMD EPYC Zen class CPU (AVX 2) - AMD EPYC Zen class CPU (AVX 2)
* - ARMV80 * - ARMV80
- HOST
- ARMv8.0 Compatible CPU - ARMv8.0 Compatible CPU
* - ARMV81 * - ARMV81
- HOST
- ARMv8.1 Compatible CPU - ARMv8.1 Compatible CPU
* - ARMV8_THUNDERX * - ARMV8_THUNDERX
- HOST
- ARMv8 Cavium ThunderX CPU - ARMv8 Cavium ThunderX CPU
* - ARMV8_THUNDERX2 * - ARMV8_THUNDERX2
- HOST
- ARMv8 Cavium ThunderX2 CPU - ARMv8 Cavium ThunderX2 CPU
* - WSM * - WSM
- HOST
- Intel Westmere CPU (SSE 4.2) - Intel Westmere CPU (SSE 4.2)
* - SNB * - SNB
- HOST
- Intel Sandy/Ivy Bridge CPU (AVX 1) - Intel Sandy/Ivy Bridge CPU (AVX 1)
* - HSW * - HSW
- HOST
- Intel Haswell CPU (AVX 2) - Intel Haswell CPU (AVX 2)
* - BDW * - BDW
- HOST
- Intel Broadwell Xeon E-class CPU (AVX 2 + transactional mem) - Intel Broadwell Xeon E-class CPU (AVX 2 + transactional mem)
* - SKX * - SKX
- HOST
- Intel Sky Lake Xeon E-class HPC CPU (AVX512 + transactional mem) - Intel Sky Lake Xeon E-class HPC CPU (AVX512 + transactional mem)
* - KNC * - KNC
- HOST
- Intel Knights Corner Xeon Phi - Intel Knights Corner Xeon Phi
* - KNL * - KNL
- HOST
- Intel Knights Landing Xeon Phi - Intel Knights Landing Xeon Phi
* - BGQ * - BGQ
- HOST
- IBM Blue Gene/Q CPU - IBM Blue Gene/Q CPU
* - POWER7 * - POWER7
- IBM POWER8 CPU - HOST
- IBM POWER7 CPU
* - POWER8 * - POWER8
- HOST
- IBM POWER8 CPU - IBM POWER8 CPU
* - POWER9 * - POWER9
- HOST
- IBM POWER9 CPU - IBM POWER9 CPU
* - KEPLER30 * - KEPLER30
- GPU
- NVIDIA Kepler generation CC 3.0 GPU - NVIDIA Kepler generation CC 3.0 GPU
* - KEPLER32 * - KEPLER32
- GPU
- NVIDIA Kepler generation CC 3.2 GPU - NVIDIA Kepler generation CC 3.2 GPU
* - KEPLER35 * - KEPLER35
- GPU
- NVIDIA Kepler generation CC 3.5 GPU - NVIDIA Kepler generation CC 3.5 GPU
* - KEPLER37 * - KEPLER37
- GPU
- NVIDIA Kepler generation CC 3.7 GPU - NVIDIA Kepler generation CC 3.7 GPU
* - MAXWELL50 * - MAXWELL50
- GPU
- NVIDIA Maxwell generation CC 5.0 GPU - NVIDIA Maxwell generation CC 5.0 GPU
* - MAXWELL52 * - MAXWELL52
- GPU
- NVIDIA Maxwell generation CC 5.2 GPU - NVIDIA Maxwell generation CC 5.2 GPU
* - MAXWELL53 * - MAXWELL53
- GPU
- NVIDIA Maxwell generation CC 5.3 GPU - NVIDIA Maxwell generation CC 5.3 GPU
* - PASCAL60 * - PASCAL60
- GPU
- NVIDIA Pascal generation CC 6.0 GPU - NVIDIA Pascal generation CC 6.0 GPU
* - PASCAL61 * - PASCAL61
- GPU
- NVIDIA Pascal generation CC 6.1 GPU - NVIDIA Pascal generation CC 6.1 GPU
* - VOLTA70 * - VOLTA70
- GPU
- NVIDIA Volta generation CC 7.0 GPU - NVIDIA Volta generation CC 7.0 GPU
* - VOLTA72 * - VOLTA72
- GPU
- NVIDIA Volta generation CC 7.2 GPU - NVIDIA Volta generation CC 7.2 GPU
* - TURING75 * - TURING75
- GPU
- NVIDIA Turing generation CC 7.5 GPU - NVIDIA Turing generation CC 7.5 GPU
* - VEGA900
- GPU
- AMD GPU MI25 GFX900
* - VEGA906
- GPU
- AMD GPU MI50/MI60 GFX906
CMake build settings: Basic CMake build settings:
^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^
For multicore CPUs using OpenMP, set these 2 variables. For multicore CPUs using OpenMP, set these 2 variables.
.. code-block:: bash .. code-block:: bash
-D Kokkos_ARCH_CPUARCH=yes # CPUARCH = CPU from list above -D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above
-D Kokkos_ENABLE_OPENMP=yes -D Kokkos_ENABLE_OPENMP=yes
-D BUILD_OMP=yes -D BUILD_OMP=yes
@ -427,15 +464,19 @@ For NVIDIA GPUs using CUDA, set these variables:
.. code-block:: bash .. code-block:: bash
-D Kokkos_ARCH_CPUARCH=yes # CPUARCH = CPU from list above -D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above
-D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above -D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above
-D Kokkos_ENABLE_CUDA=yes -D Kokkos_ENABLE_CUDA=yes
-D Kokkos_ENABLE_OPENMP=yes -D Kokkos_ENABLE_OPENMP=yes
-D CMAKE_CXX_COMPILER=wrapper # wrapper = full path to Cuda nvcc wrapper -D CMAKE_CXX_COMPILER=wrapper # wrapper = full path to Cuda nvcc wrapper
The wrapper value is the Cuda nvcc compiler wrapper provided in the This will also enable executing FFTs on the GPU, either via the internal
Kokkos library: ``lib/kokkos/bin/nvcc_wrapper``\ . The setting should KISSFFT library, or - by preference - with the cuFFT library bundled
include the full path name to the wrapper, e.g. with the CUDA toolkit, depending on whether CMake can identify its
location. The *wrapper* value for ``CMAKE_CXX_COMPILER`` variable is
the path to the CUDA nvcc compiler wrapper provided in the Kokkos
library: ``lib/kokkos/bin/nvcc_wrapper``\ . The setting should include
the full path name to the wrapper, e.g.
.. code-block:: bash .. code-block:: bash
@ -455,8 +496,8 @@ common packages enabled, you can do the following:
cmake -C ../cmake/presets/minimal.cmake -C ../cmake/presets/kokkos-cuda.cmake ../cmake cmake -C ../cmake/presets/minimal.cmake -C ../cmake/presets/kokkos-cuda.cmake ../cmake
cmake --build . cmake --build .
Traditional make settings: Basic traditional make settings:
^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Choose which hardware to support in ``Makefile.machine`` via Choose which hardware to support in ``Makefile.machine`` via
``KOKKOS_DEVICES`` and ``KOKKOS_ARCH`` settings. See the ``KOKKOS_DEVICES`` and ``KOKKOS_ARCH`` settings. See the
@ -467,7 +508,7 @@ For multicore CPUs using OpenMP:
.. code-block:: make .. code-block:: make
KOKKOS_DEVICES = OpenMP KOKKOS_DEVICES = OpenMP
KOKKOS_ARCH = CPUARCH # CPUARCH = CPU from list above KOKKOS_ARCH = HOSTARCH # HOSTARCH = HOST from list above
For Intel KNLs using OpenMP: For Intel KNLs using OpenMP:
@ -481,7 +522,8 @@ For NVIDIA GPUs using CUDA:
.. code-block:: make .. code-block:: make
KOKKOS_DEVICES = Cuda KOKKOS_DEVICES = Cuda
KOKKOS_ARCH = CPUARCH,GPUARCH # CPUARCH = CPU from list above that is hosting the GPU KOKKOS_ARCH = HOSTARCH,GPUARCH # HOSTARCH = HOST from list above that is hosting the GPU
KOKKOS_CUDA_OPTIONS = "enable_lambda"
# GPUARCH = GPU from list above # GPUARCH = GPU from list above
FFT_INC = -DFFT_CUFFT # enable use of cuFFT (optional) FFT_INC = -DFFT_CUFFT # enable use of cuFFT (optional)
FFT_LIB = -lcufft # link to cuFFT library FFT_LIB = -lcufft # link to cuFFT library
@ -504,6 +546,44 @@ C++ compiler for non-Kokkos, non-CUDA files.
KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
Advanced KOKKOS compilation settings
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
There are other allowed options when building with the KOKKOS package
that can improve performance or assist in debugging or profiling. Below
are some examples that may be useful in combination with LAMMPS. For
the full list (which keeps changing as the Kokkos package itself evolves),
please consult the Kokkos library documentation.
As alternative to using multi-threading via OpenMP
(``-DKokkos_ENABLE_OPENMP=on`` or ``KOKKOS_DEVICES=OpenMP``) it is also
possible to use Posix threads directly (``-DKokkos_ENABLE_PTHREAD=on``
or ``KOKKOS_DEVICES=Pthread``). While binding of threads to individual
or groups of CPU cores is managed in OpenMP with environment variables,
you need assistance from either the "hwloc" or "libnuma" library for the
Pthread thread parallelization option. To enable use with CMake:
``-DKokkos_ENABLE_HWLOC=on`` or ``-DKokkos_ENABLE_LIBNUMA=on``; and with
conventional make: ``KOKKOS_USE_TPLS=hwloc`` or
``KOKKOS_USE_TPLS=libnuma``.
The CMake option ``-DKokkos_ENABLE_LIBRT=on`` or the makefile setting
``KOKKOS_USE_TPLS=librt`` enables the use of a more accurate timer
mechanism on many Unix-like platforms for internal profiling.
The CMake option ``-DKokkos_ENABLE_DEBUG=on`` or the makefile setting
``KOKKOS_DEBUG=yes`` enables printing of run-time
debugging information that can be useful. It also enables runtime
bounds checking on Kokkos data structures. As to be expected, enabling
this option will negatively impact the performance and thus is only
recommended when developing a Kokkos-enabled style in LAMMPS.
The CMake option ``-DKokkos_ENABLE_CUDA_UVM=on`` or the makefile
setting ``KOKKOS_CUDA_OPTIONS=enable_lambda,force_uvm`` enables the
use of CUDA "Unified Virtual Memory" in Kokkos. Please note, that
the LAMMPS KOKKOS package must **always** be compiled with the
*enable_lambda* option when using GPUs.
---------- ----------
.. _latte: .. _latte:

View File

@ -9,10 +9,7 @@ different back end languages such as CUDA, OpenMP, or Pthreads. The
Kokkos library also provides data abstractions to adjust (at compile Kokkos library also provides data abstractions to adjust (at compile
time) the memory layout of data structures like 2d and 3d arrays to time) the memory layout of data structures like 2d and 3d arrays to
optimize performance on different hardware. For more information on optimize performance on different hardware. For more information on
Kokkos, see `GitHub <https://github.com/kokkos/kokkos>`_. Kokkos is Kokkos, see `GitHub <https://github.com/kokkos/kokkos>`_.
part of `Trilinos <https://www.trilinos.org/>`_. The Kokkos
library was written primarily by Carter Edwards, Christian Trott, and
Dan Sunderland (all Sandia).
The LAMMPS KOKKOS package contains versions of pair, fix, and atom The LAMMPS KOKKOS package contains versions of pair, fix, and atom
styles that use data structures and macros provided by the Kokkos styles that use data structures and macros provided by the Kokkos
@ -21,7 +18,7 @@ package was developed primarily by Christian Trott (Sandia) and Stan
Moore (Sandia) with contributions of various styles by others, Moore (Sandia) with contributions of various styles by others,
including Sikandar Mashayak (UIUC), Ray Shan (Sandia), and Dan Ibanez including Sikandar Mashayak (UIUC), Ray Shan (Sandia), and Dan Ibanez
(Sandia). For more information on developing using Kokkos abstractions (Sandia). For more information on developing using Kokkos abstractions
see the Kokkos programmers' guide at /lib/kokkos/doc/Kokkos_PG.pdf. see the Kokkos `Wiki <https://github.com/kokkos/kokkos/wiki>`_.
Kokkos currently provides support for 3 modes of execution (per MPI Kokkos currently provides support for 3 modes of execution (per MPI
task). These are Serial (MPI-only for CPUs and Intel Phi), OpenMP task). These are Serial (MPI-only for CPUs and Intel Phi), OpenMP
@ -31,33 +28,30 @@ compatible with specific hardware.
.. note:: .. note::
Kokkos support within LAMMPS must be built with a C++11 compatible To build with Kokkos support for NVIDIA GPUs, the NVIDIA CUDA toolkit
compiler. This means GCC version 4.7.2 or later, Intel 14.0.4 or later, or
Clang 3.5.2 or later is required.
.. note::
To build with Kokkos support for NVIDIA GPUs, NVIDIA CUDA
software version 9.0 or later must be installed on your system. See software version 9.0 or later must be installed on your system. See
the discussion for the :doc:`GPU package <Speed_gpu>` for details of how the discussion for the :doc:`GPU package <Speed_gpu>` for details of
to check and do this. how to check and do this.
.. note:: .. note::
Kokkos with CUDA currently implicitly assumes that the MPI library Kokkos with CUDA currently implicitly assumes that the MPI library is
is CUDA-aware. This is not always the case, especially when using CUDA-aware. This is not always the case, especially when using
pre-compiled MPI libraries provided by a Linux distribution. This is not pre-compiled MPI libraries provided by a Linux distribution. This is
a problem when using only a single GPU with a single MPI rank. When not a problem when using only a single GPU with a single MPI
running with multiple MPI ranks, you may see segmentation faults without rank. When running with multiple MPI ranks, you may see segmentation
CUDA-aware MPI support. These can be avoided by adding the flags :doc:`-pk kokkos cuda/aware off <Run_options>` to the LAMMPS command line or by faults without CUDA-aware MPI support. These can be avoided by adding
using the command :doc:`package kokkos cuda/aware off <package>` in the the flags :doc:`-pk kokkos cuda/aware off <Run_options>` to the
input file. LAMMPS command line or by using the command :doc:`package kokkos
cuda/aware off <package>` in the input file.
**Building LAMMPS with the KOKKOS package:** Building LAMMPS with the KOKKOS package
"""""""""""""""""""""""""""""""""""""""
See the :ref:`Build extras <kokkos>` doc page for instructions. See the :ref:`Build extras <kokkos>` doc page for instructions.
**Running LAMMPS with the KOKKOS package:** Running LAMMPS with the KOKKOS package
""""""""""""""""""""""""""""""""""""""
All Kokkos operations occur within the context of an individual MPI All Kokkos operations occur within the context of an individual MPI
task running on a single node of the machine. The total number of MPI task running on a single node of the machine. The total number of MPI
@ -66,7 +60,8 @@ usual manner via the mpirun or mpiexec commands, and is independent of
Kokkos. E.g. the mpirun command in OpenMPI does this via its -np and Kokkos. E.g. the mpirun command in OpenMPI does this via its -np and
-npernode switches. Ditto for MPICH via -np and -ppn. -npernode switches. Ditto for MPICH via -np and -ppn.
**Running on a multi-core CPU:** Running on a multi-core CPU
^^^^^^^^^^^^^^^^^^^^^^^^^^^
Here is a quick overview of how to use the KOKKOS package Here is a quick overview of how to use the KOKKOS package
for CPU acceleration, assuming one or more 16-core nodes. for CPU acceleration, assuming one or more 16-core nodes.
@ -142,7 +137,8 @@ atom. When using the Kokkos Serial back end or the OpenMP back end with
a single thread, no duplication or atomic operations are used. For CUDA a single thread, no duplication or atomic operations are used. For CUDA
and half neighbor lists, the KOKKOS package always uses atomic operations. and half neighbor lists, the KOKKOS package always uses atomic operations.
**Core and Thread Affinity:** CPU Cores, Sockets and Thread Affinity
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
When using multi-threading, it is important for performance to bind When using multi-threading, it is important for performance to bind
both MPI tasks to physical cores, and threads to physical cores, so both MPI tasks to physical cores, and threads to physical cores, so
@ -156,15 +152,16 @@ for your MPI installation), binding can be forced with these flags:
OpenMPI 1.8: mpirun -np 2 --bind-to socket --map-by socket ./lmp_openmpi ... OpenMPI 1.8: mpirun -np 2 --bind-to socket --map-by socket ./lmp_openmpi ...
Mvapich2 2.0: mpiexec -np 2 --bind-to socket --map-by socket ./lmp_mvapich ... Mvapich2 2.0: mpiexec -np 2 --bind-to socket --map-by socket ./lmp_mvapich ...
For binding threads with KOKKOS OpenMP, use thread affinity For binding threads with KOKKOS OpenMP, use thread affinity environment
environment variables to force binding. With OpenMP 3.1 (gcc 4.7 or variables to force binding. With OpenMP 3.1 (gcc 4.7 or later, intel 12
later, intel 12 or later) setting the environment variable or later) setting the environment variable ``OMP_PROC_BIND=true`` should
OMP_PROC_BIND=true should be sufficient. In general, for best be sufficient. In general, for best performance with OpenMP 4.0 or later
performance with OpenMP 4.0 or better set OMP_PROC_BIND=spread and set ``OMP_PROC_BIND=spread`` and ``OMP_PLACES=threads``. For binding
OMP_PLACES=threads. For binding threads with the KOKKOS pthreads threads with the KOKKOS pthreads option, compile LAMMPS with the hwloc
option, compile LAMMPS the KOKKOS HWLOC=yes option as described below. or libnuma support enabled as described in the :ref:`extra build options page <kokkos>`.
**Running on Knight's Landing (KNL) Intel Xeon Phi:** Running on Knight's Landing (KNL) Intel Xeon Phi
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Here is a quick overview of how to use the KOKKOS package for the Here is a quick overview of how to use the KOKKOS package for the
Intel Knight's Landing (KNL) Xeon Phi: Intel Knight's Landing (KNL) Xeon Phi:
@ -222,7 +219,8 @@ threads/task as Nt. The product of these two values should be N, i.e.
them in "native" mode, not "offload" mode like the USER-INTEL package them in "native" mode, not "offload" mode like the USER-INTEL package
supports. supports.
**Running on GPUs:** Running on GPUs
^^^^^^^^^^^^^^^
Use the "-k" :doc:`command-line switch <Run_options>` to specify the Use the "-k" :doc:`command-line switch <Run_options>` to specify the
number of GPUs per node. Typically the -np setting of the mpirun command number of GPUs per node. Typically the -np setting of the mpirun command
@ -257,7 +255,7 @@ one or more nodes, each with two GPUs:
running on GPUs is to use "full" neighbor lists and set the Newton flag running on GPUs is to use "full" neighbor lists and set the Newton flag
to "off" for both pairwise and bonded interactions, along with threaded to "off" for both pairwise and bonded interactions, along with threaded
communication. When running on Maxwell or Kepler GPUs, this will communication. When running on Maxwell or Kepler GPUs, this will
typically be best. For Pascal GPUs, using "half" neighbor lists and typically be best. For Pascal GPUs and beyond, using "half" neighbor lists and
setting the Newton flag to "on" may be faster. For many pair styles, setting the Newton flag to "on" may be faster. For many pair styles,
setting the neighbor binsize equal to twice the CPU default value will setting the neighbor binsize equal to twice the CPU default value will
give speedup, which is the default when running on GPUs. Use the "-pk give speedup, which is the default when running on GPUs. Use the "-pk
@ -270,13 +268,6 @@ one or more nodes, each with two GPUs:
mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj # Newton on, half neighbor list, set binsize = neighbor ghost cutoff mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj # Newton on, half neighbor list, set binsize = neighbor ghost cutoff
.. note::
For good performance of the KOKKOS package on GPUs, you must
have Kepler generation GPUs (or later). The Kokkos library exploits
texture cache options not supported by Telsa generation GPUs (or
older).
.. note:: .. note::
When using a GPU, you will achieve the best performance if your When using a GPU, you will achieve the best performance if your
@ -293,7 +284,8 @@ one or more nodes, each with two GPUs:
kspace, etc., you must set the environment variable CUDA_LAUNCH_BLOCKING=1. kspace, etc., you must set the environment variable CUDA_LAUNCH_BLOCKING=1.
However, this will reduce performance and is not recommended for production runs. However, this will reduce performance and is not recommended for production runs.
**Run with the KOKKOS package by editing an input script:** Run with the KOKKOS package by editing an input script
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Alternatively the effect of the "-sf" or "-pk" switches can be Alternatively the effect of the "-sf" or "-pk" switches can be
duplicated by adding the :doc:`package kokkos <package>` or :doc:`suffix kk <suffix>` commands to your input script. duplicated by adding the :doc:`package kokkos <package>` or :doc:`suffix kk <suffix>` commands to your input script.
@ -316,17 +308,24 @@ You only need to use the :doc:`package kokkos <package>` command if you
wish to change any of its option defaults, as set by the "-k on" wish to change any of its option defaults, as set by the "-k on"
:doc:`command-line switch <Run_options>`. :doc:`command-line switch <Run_options>`.
**Using OpenMP threading and CUDA together (experimental):** **Using OpenMP threading and CUDA together:**
With the KOKKOS package, both OpenMP multi-threading and GPUs can be With the KOKKOS package, both OpenMP multi-threading and GPUs can be
used together in a few special cases. In the Makefile, the compiled and used together in a few special cases. In the makefile for
KOKKOS_DEVICES variable must include both "Cuda" and "OpenMP", as is the conventional build, the KOKKOS_DEVICES variable must include both,
the case for /src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi "Cuda" and "OpenMP", as is the case for ``/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi``.
.. code-block:: bash .. code-block:: bash
KOKKOS_DEVICES=Cuda,OpenMP KOKKOS_DEVICES=Cuda,OpenMP
When building with CMake you need to enable both features as it is done
in the ``kokkos-cuda.cmake`` CMake preset file.
.. code-block:: bash
cmake ../cmake -DKokkos_ENABLE_CUDA=yes -DKokkos_ENABLE_OPENMP=yes
The suffix "/kk" is equivalent to "/kk/device", and for Kokkos CUDA, The suffix "/kk" is equivalent to "/kk/device", and for Kokkos CUDA,
using the "-sf kk" in the command line gives the default CUDA version using the "-sf kk" in the command line gives the default CUDA version
everywhere. However, if the "/kk/host" suffix is added to a specific everywhere. However, if the "/kk/host" suffix is added to a specific
@ -360,7 +359,8 @@ suffix for kspace and bonds, angles, etc. in the input file and the
sure the environment variable CUDA_LAUNCH_BLOCKING is not set to "1" sure the environment variable CUDA_LAUNCH_BLOCKING is not set to "1"
so CPU/GPU overlap can occur. so CPU/GPU overlap can occur.
**Speed-ups to expect:** Performance to expect
"""""""""""""""""""""
The performance of KOKKOS running in different modes is a function of The performance of KOKKOS running in different modes is a function of
your hardware, which KOKKOS-enable styles are used, and the problem your hardware, which KOKKOS-enable styles are used, and the problem
@ -377,52 +377,26 @@ Generally speaking, the following rules of thumb apply:
performance of a KOKKOS style is a bit slower than the USER-OMP performance of a KOKKOS style is a bit slower than the USER-OMP
package. package.
* When running large number of atoms per GPU, KOKKOS is typically faster * When running large number of atoms per GPU, KOKKOS is typically faster
than the GPU package. than the GPU package when compiled for double precision. The benefit
of using single or mixed precision with the GPU package depends
significantly on the hardware in use and the simulated system and pair
style.
* When running on Intel hardware, KOKKOS is not as fast as * When running on Intel hardware, KOKKOS is not as fast as
the USER-INTEL package, which is optimized for that hardware. the USER-INTEL package, which is optimized for x86 hardware (not just
from Intel) and compilation with the Intel compilers. The USER-INTEL
package also can increase the vector length of vector instructions
by switching to single or mixed precision mode.
See the `Benchmark page <https://lammps.sandia.gov/bench.html>`_ of the See the `Benchmark page <https://lammps.sandia.gov/bench.html>`_ of the
LAMMPS web site for performance of the KOKKOS package on different LAMMPS web site for performance of the KOKKOS package on different
hardware. hardware.
**Advanced Kokkos options:** Advanced Kokkos options
"""""""""""""""""""""""
There are other allowed options when building with the KOKKOS package. There are other allowed options when building with the KOKKOS package
As explained on the :ref:`Build extras <kokkos>` doc page, that can improve performance or assist in debugging or profiling.
they can be set either as variables on the make command line or in They are explained on the :ref:`KOKKOS section of the build extras <kokkos>` doc page,
Makefile.machine, or they can be specified as CMake variables. Each
takes a value shown below. The default value is listed, which is set
in the lib/kokkos/Makefile.kokkos file.
* KOKKOS_DEBUG, values = *yes*\ , *no*\ , default = *no*
* KOKKOS_USE_TPLS, values = *hwloc*\ , *librt*\ , *experimental_memkind*, default = *none*
* KOKKOS_CXX_STANDARD, values = *c++11*\ , *c++1z*\ , default = *c++11*
* KOKKOS_OPTIONS, values = *aggressive_vectorization*, *disable_profiling*, default = *none*
* KOKKOS_CUDA_OPTIONS, values = *force_uvm*, *use_ldg*, *rdc*\ , *enable_lambda*, default = *enable_lambda*
KOKKOS_USE_TPLS=hwloc binds threads to hardware cores, so they do not
migrate during a simulation. KOKKOS_USE_TPLS=hwloc should always be
used if running with KOKKOS_DEVICES=Pthreads for pthreads. It is not
necessary for KOKKOS_DEVICES=OpenMP for OpenMP, because OpenMP
provides alternative methods via environment variables for binding
threads to hardware cores. More info on binding threads to cores is
given on the :doc:`Speed omp <Speed_omp>` doc page.
KOKKOS_USE_TPLS=librt enables use of a more accurate timer mechanism
on most Unix platforms. This library is not available on all
platforms.
KOKKOS_DEBUG is only useful when developing a Kokkos-enabled style
within LAMMPS. KOKKOS_DEBUG=yes enables printing of run-time
debugging information that can be useful. It also enables runtime
bounds checking on Kokkos data structures.
KOKKOS_CXX_STANDARD and KOKKOS_OPTIONS are typically not changed when
building LAMMPS.
KOKKOS_CUDA_OPTIONS are additional options for CUDA. The LAMMPS KOKKOS
package must be compiled with the *enable_lambda* option when using
GPUs.
Restrictions Restrictions
"""""""""""" """"""""""""

View File

@ -499,6 +499,7 @@ cuda
Cuda Cuda
CUDA CUDA
CuH CuH
cuFFT
Cummins Cummins
Curk Curk
customIDs customIDs
@ -1544,6 +1545,7 @@ libmeam
libmessage libmessage
libmpi libmpi
libmpich libmpich
libnuma
libplumed libplumed
libplumedKernel libplumedKernel
libpng libpng

View File

@ -40,6 +40,13 @@ cmake ${srcdir} \
```` ````
which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below. which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below.
## Platform-specific Problems
### Cray
* The Cray compiler wrappers do static linking by default. This seems to break the Kokkos build. You will likely need to set the environment variable `CRAYPE_LINK_TYPE=dynamic` in order to link correctly. Kokkos warns during configure if this is missing.
* The Cray compiler identifies to CMake as Clang, but it sometimes has its own flags that differ from Clang. We try to include all exceptions, but flag errors may occur in which a Clang-specific flag is passed that the Cray compiler does not recognize.
## Spack ## Spack
An alternative to manually building with the CMake is to use the Spack package manager. An alternative to manually building with the CMake is to use the Spack package manager.
To do so, download the `kokkos-spack` git repo and add to the package list: To do so, download the `kokkos-spack` git repo and add to the package list:
@ -63,6 +70,7 @@ For a complete list of Kokkos options, run:
```` ````
spack info kokkos spack info kokkos
```` ````
More details can be found in the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
#### Spack Development #### Spack Development
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable". Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
@ -73,32 +81,8 @@ spack find -p kokkos ...
```` ````
where `...` is the unique spec identifying the particular Kokkos configuration and version. where `...` is the unique spec identifying the particular Kokkos configuration and version.
A better way to use Spack for doing Kokkos development is the DIY feature of Spack. A better way to use Spack for doing Kokkos development is the dev-build feature of Spack.
If you wish to develop Kokkos itself, go to the Kokkos source folder: For dev-build details, consult the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
````
spack diy -u cmake kokkos@diy ...
````
where `...` is a Spack spec identifying the exact Kokkos configuration.
This then creates a `spack-build` directory where you can run `make`.
If doing development on a downstream project, you can do almost exactly the same thing.
````
spack diy -u cmake ${myproject}@${myversion} ... ^kokkos...
````
where the `...` are the specs for your project and the desired Kokkos configuration.
Again, a `spack-build` directory will be created where you can run `make`.
Spack has a few idiosyncracies that make building outside of Spack annoying related to Spack forcing use of a compiler wrapper. This can be worked around by having a `-DSpack_WORKAROUND=On` given your CMake. Then add the block of code to your CMakeLists.txt:
````
if (Spack_WORKAROUND)
set(SPACK_CXX $ENV{SPACK_CXX})
if(SPACK_CXX)
set(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE)
set(ENV{CXX} ${SPACK_CXX})
endif()
endif()
````
# Kokkos Keyword Listing # Kokkos Keyword Listing
@ -157,6 +141,9 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
* Kokkos_ENABLE_DEPRECATED_CODE * Kokkos_ENABLE_DEPRECATED_CODE
* Whether to enable deprecated code * Whether to enable deprecated code
* BOOL Default: OFF * BOOL Default: OFF
* Kokkos_ENABLE_EXAMPLES
* Whether to enable building examples
* BOOL Default: OFF
* Kokkos_ENABLE_HPX_ASYNC_DISPATCH * Kokkos_ENABLE_HPX_ASYNC_DISPATCH
* Whether HPX supports asynchronous dispatch * Whether HPX supports asynchronous dispatch
* BOOL Default: OFF * BOOL Default: OFF

View File

@ -1,5 +1,59 @@
# Change Log # Change Log
## [3.1.00](https://github.com/kokkos/kokkos/tree/3.1.00) (2020-04-14)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.0.00...3.1.00)
**Features:**
- HIP Support for AMD
- OpenMPTarget Support with clang
- Windows VS19 (Serial) Support [\#1533](https://github.com/kokkos/kokkos/issues/1533)
**Implemented enhancements:**
- generate\_makefile.bash should allow tests to be disabled [\#2886](https://github.com/kokkos/kokkos/issues/2886)
- clang/7+cuda/9 build -Werror-unused parameter error in nightly test [\#2884](https://github.com/kokkos/kokkos/issues/2884)
- ScatterView memory space is not user settable [\#2826](https://github.com/kokkos/kokkos/issues/2826)
- clang/8+cuda/10.0 build error with c++17 [\#2809](https://github.com/kokkos/kokkos/issues/2809)
- warnings.... [\#2805](https://github.com/kokkos/kokkos/issues/2805)
- Kokkos version in cpp define [\#2787](https://github.com/kokkos/kokkos/issues/2787)
- Remove Defunct QThreads Backend [\#2751](https://github.com/kokkos/kokkos/issues/2751)
- Improve Kokkos::fence behavior with multiple execution spaces [\#2659](https://github.com/kokkos/kokkos/issues/2659)
- polylithic\(?\) initialization of Kokkos [\#2658](https://github.com/kokkos/kokkos/issues/2658)
- Unnecessary\(?\) check for host execution space initialization from Cuda initialization [\#2652](https://github.com/kokkos/kokkos/issues/2652)
- Kokkos error reporting failures with CUDA GPUs in exclusive mode [\#2471](https://github.com/kokkos/kokkos/issues/2471)
- atomicMax equivalent \(and other atomics\) [\#2401](https://github.com/kokkos/kokkos/issues/2401)
- Fix alignment for Kokkos::complex [\#2255](https://github.com/kokkos/kokkos/issues/2255)
- Warnings with Cuda 10.1 [\#2206](https://github.com/kokkos/kokkos/issues/2206)
- dual view with Kokkos::ViewAllocateWithoutInitializing [\#2188](https://github.com/kokkos/kokkos/issues/2188)
- Check error code from cudaOccupancyMaxActiveBlocksPerMultiprocessor [\#2172](https://github.com/kokkos/kokkos/issues/2172)
- Add non-member Kokkos::resize/realloc for DualView [\#2170](https://github.com/kokkos/kokkos/issues/2170)
- Construct DualView without initialization [\#2046](https://github.com/kokkos/kokkos/issues/2046)
- Expose is\_assignable to determine if one view can be assigned to another [\#1936](https://github.com/kokkos/kokkos/issues/1936)
- profiling label [\#1935](https://github.com/kokkos/kokkos/issues/1935)
- team\_broadcast of bool failed on CUDA backend [\#1908](https://github.com/kokkos/kokkos/issues/1908)
- View static\_extent [\#660](https://github.com/kokkos/kokkos/issues/660)
- Misleading Kokkos::Cuda::initialize ERROR message when compiled for wrong GPU architecture [\#1944](https://github.com/kokkos/kokkos/issues/1944)
- Cryptic Error When Malloc Fails [\#2164](https://github.com/kokkos/kokkos/issues/2164)
- Drop support for intermediate standards in CMake [\#2336](https://github.com/kokkos/kokkos/issues/2336)
**Fixed bugs:**
- DualView sync\_device with length zero creates cuda errors [\#2946](https://github.com/kokkos/kokkos/issues/2946)
- building with nvcc and clang \(or clang based XL\) as host compiler: "Kokkos::atomic\_fetch\_min\(volatile int \*, int\)" has already been defined [\#2903](https://github.com/kokkos/kokkos/issues/2903)
- Cuda 9.1,10.1 debug builds failing due to -Werror=unused-parameter [\#2880](https://github.com/kokkos/kokkos/issues/2880)
- clang -Werror: Kokkos\_FixedBufferMemoryPool.hpp:140:28: error: unused parameter 'alloc\_size' [\#2869](https://github.com/kokkos/kokkos/issues/2869)
- intel/16.0.1, intel/17.0.1 nightly build failures with debugging enabled [\#2867](https://github.com/kokkos/kokkos/issues/2867)
- intel/16.0.1 debug build errors [\#2863](https://github.com/kokkos/kokkos/issues/2863)
- xl/16.1.1 with cpp14, openmp build, nightly test failures [\#2856](https://github.com/kokkos/kokkos/issues/2856)
- Intel nightly test failures: team\_vector [\#2852](https://github.com/kokkos/kokkos/issues/2852)
- Kokkos Views with intmax/2\<N\<intmax can hang during construction [\#2850](https://github.com/kokkos/kokkos/issues/2850)
- workgraph\_fib test seg-faults with threads backend and hwloc [\#2797](https://github.com/kokkos/kokkos/issues/2797)
- cuda.view\_64bit test hangs on Power8+Kepler37 system - develop and 2.9.00 branches [\#2771](https://github.com/kokkos/kokkos/issues/2771)
- device\_type for Kokkos\_Random ? [\#2693](https://github.com/kokkos/kokkos/issues/2693)
- "More than one tag given" error in Experimental::require\(\) [\#2608](https://github.com/kokkos/kokkos/issues/2608)
- Segfault on Marvell from our finalization stack [\#2542](https://github.com/kokkos/kokkos/issues/2542)
## [3.0.00](https://github.com/kokkos/kokkos/tree/3.0.00) (2020-01-27) ## [3.0.00](https://github.com/kokkos/kokkos/tree/3.0.00) (2020-01-27)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.9.00...3.0.00) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.9.00...3.0.00)

View File

@ -25,6 +25,8 @@ SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
# Needed to simplify syntax of if statements # Needed to simplify syntax of if statements
CMAKE_POLICY(SET CMP0054 NEW) CMAKE_POLICY(SET CMP0054 NEW)
# Needed to make IN_LIST a valid operator
CMAKE_POLICY(SET CMP0057 NEW)
# Is this a build as part of Trilinos? # Is this a build as part of Trilinos?
IF(COMMAND TRIBITS_PACKAGE_DECL) IF(COMMAND TRIBITS_PACKAGE_DECL)
@ -75,7 +77,15 @@ IF(NOT KOKKOS_HAS_TRILINOS)
ENDIF() ENDIF()
ENDif() ENDif()
IF(NOT DEFINED ${PROJECT_NAME}) IF(NOT DEFINED ${PROJECT_NAME})
# WORKAROUND FOR HIPCC
IF(Kokkos_ENABLE_HIP)
SET(KOKKOS_INTERNAL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --amdgpu-target=gfx906")
ENDIF()
PROJECT(Kokkos CXX) PROJECT(Kokkos CXX)
IF(Kokkos_ENABLE_HIP)
SET(CMAKE_CXX_FLAGS ${KOKKOS_INTERNAL_CMAKE_CXX_FLAGS})
ENDIF()
ENDIF() ENDIF()
ENDIF() ENDIF()
@ -92,9 +102,10 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 3) set(Kokkos_VERSION_MAJOR 3)
set(Kokkos_VERSION_MINOR 0) set(Kokkos_VERSION_MINOR 1)
set(Kokkos_VERSION_PATCH 0) set(Kokkos_VERSION_PATCH 0)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0") IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0")
MESSAGE(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables") MESSAGE(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables")
@ -118,18 +129,14 @@ ENDIF()
# These are the variables we will append to as we go # These are the variables we will append to as we go
# I really wish these were regular variables # I really wish these were regular variables
# but scoping issues can make it difficult # but scoping issues can make it difficult
GLOBAL_RESET(KOKKOS_COMPILE_OPTIONS) GLOBAL_SET(KOKKOS_COMPILE_OPTIONS)
GLOBAL_RESET(KOKKOS_LINK_OPTIONS) GLOBAL_SET(KOKKOS_LINK_OPTIONS)
GLOBAL_RESET(KOKKOS_CUDA_OPTIONS) GLOBAL_SET(KOKKOS_CUDA_OPTIONS)
GLOBAL_RESET(KOKKOS_CUDAFE_OPTIONS) GLOBAL_SET(KOKKOS_CUDAFE_OPTIONS)
GLOBAL_RESET(KOKKOS_XCOMPILER_OPTIONS) GLOBAL_SET(KOKKOS_XCOMPILER_OPTIONS)
# We need to append text here for making sure TPLs # We need to append text here for making sure TPLs
# we import are available for an installed Kokkos # we import are available for an installed Kokkos
GLOBAL_RESET(KOKKOS_TPL_EXPORTS) GLOBAL_SET(KOKKOS_TPL_EXPORTS)
# We need these for controlling the exact -std flag
GLOBAL_RESET(KOKKOS_DONT_ALLOW_EXTENSIONS)
GLOBAL_RESET(KOKKOS_USE_CXX_EXTENSIONS)
GLOBAL_RESET(KOKKOS_CXX_STANDARD_FEATURE)
# Include a set of Kokkos-specific wrapper functions that # Include a set of Kokkos-specific wrapper functions that
# will either call raw CMake or TriBITS # will either call raw CMake or TriBITS
@ -137,6 +144,9 @@ GLOBAL_RESET(KOKKOS_CXX_STANDARD_FEATURE)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake)
# Check the environment and set certain variables
# to allow platform-specific checks
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake)
# The build environment setup goes in the following steps # The build environment setup goes in the following steps
# 1) Check all the enable options. This includes checking Kokkos_DEVICES # 1) Check all the enable options. This includes checking Kokkos_DEVICES
# 2) Check the compiler ID (type and version) # 2) Check the compiler ID (type and version)
@ -187,14 +197,21 @@ IF (KOKKOS_HAS_TRILINOS)
# Because Tribits doesn't use lists, it uses spaces for the list of CXX flags # Because Tribits doesn't use lists, it uses spaces for the list of CXX flags
# we have to match the annoying behavior # we have to match the annoying behavior
STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS}") STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS}")
STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}") LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS})
FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS}) LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_CUDA_OPTIONS})
SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}")
ENDFOREACH()
FOREACH(XCOMP_FLAG ${KOKKOS_XCOMPILER_OPTIONS}) FOREACH(XCOMP_FLAG ${KOKKOS_XCOMPILER_OPTIONS})
SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}") SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
ENDFOREACH() ENDFOREACH()
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}") SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
IF (KOKKOS_ENABLE_CUDA)
STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}")
FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcudafe ${CUDAFE_FLAG})
ENDFOREACH()
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_CXX_FLAGS} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS}")
ENDIF()
# Both parent scope and this package # Both parent scope and this package
# In ProjectCompilerPostConfig.cmake, we capture the "global" flags Trilinos wants in # In ProjectCompilerPostConfig.cmake, we capture the "global" flags Trilinos wants in
# TRILINOS_TOPLEVEL_CXX_FLAGS # TRILINOS_TOPLEVEL_CXX_FLAGS
@ -203,6 +220,8 @@ IF (KOKKOS_HAS_TRILINOS)
#CMAKE_CXX_FLAGS will get added to Kokkos and Kokkos dependencies automatically here #CMAKE_CXX_FLAGS will get added to Kokkos and Kokkos dependencies automatically here
#These flags get set up in KOKKOS_PACKAGE_DECL, which means they #These flags get set up in KOKKOS_PACKAGE_DECL, which means they
#must be configured before KOKKOS_PACKAGE_DECL #must be configured before KOKKOS_PACKAGE_DECL
SET(KOKKOS_ALL_COMPILE_OPTIONS
$<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_ALL_COMPILE_OPTIONS}>)
ENDIF() ENDIF()
KOKKOS_PACKAGE_DECL() KOKKOS_PACKAGE_DECL()

View File

@ -6,15 +6,20 @@ ifndef KOKKOS_PATH
endif endif
CXXFLAGS=$(CCFLAGS) CXXFLAGS=$(CCFLAGS)
# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial KOKKOS_VERSION_MAJOR = 3
KOKKOS_VERSION_MINOR = 1
KOKKOS_VERSION_PATCH = 0
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial
KOKKOS_DEVICES ?= "OpenMP" KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads" #KOKKOS_DEVICES ?= "Pthread"
# Options: # Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX # Intel: KNC,KNL,SNB,HSW,BDW,SKX
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75 # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
# IBM: BGQ,Power7,Power8,Power9 # IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega # AMD-GPUS: Vega900,Vega906
# AMD-CPUS: AMDAVX,Ryzen,EPYC # AMD-CPUS: AMDAVX,Ryzen,EPYC
KOKKOS_ARCH ?= "" KOKKOS_ARCH ?= ""
# Options: yes,no # Options: yes,no
@ -35,6 +40,9 @@ KOKKOS_STANDALONE_CMAKE ?= "no"
# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr # Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr
KOKKOS_CUDA_OPTIONS ?= "enable_lambda" KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
# Options: rdc
KOKKOS_HIP_OPTIONS ?= ""
# Default settings specific options. # Default settings specific options.
# Options: enable_async_dispatch # Options: enable_async_dispatch
KOKKOS_HPX_OPTIONS ?= "" KOKKOS_HPX_OPTIONS ?= ""
@ -82,29 +90,50 @@ KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPT
KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti) KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti)
KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)
# Check for Kokkos Host Execution Spaces one of which must be on. # Check for Kokkos Host Execution Spaces one of which must be on.
KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP) KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP)
KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread) KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread)
KOKKOS_INTERNAL_USE_QTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Qthreads)
KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX) KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX)
KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial) KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) KOKKOS_INTERNAL_USE_SERIAL := 1
KOKKOS_INTERNAL_USE_SERIAL := 1
endif
endif endif
endif endif
endif endif
# Check for other Execution Spaces. # Check for other Execution Spaces.
KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda) KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda)
KOKKOS_INTERNAL_USE_ROCM := $(call kokkos_has_string,$(KOKKOS_DEVICES),ROCm) KOKKOS_INTERNAL_USE_HIP := $(call kokkos_has_string,$(KOKKOS_DEVICES),HIP)
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget) KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget)
KOKKOS_DEVICELIST =
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
KOKKOS_DEVICELIST += Serial
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_DEVICELIST += OpenMP
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_DEVICELIST += Threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
KOKKOS_DEVICELIST += HPX
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_DEVICELIST += Cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_DEVICELIST += HIP
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_DEVICELIST += OPENMPTARGET
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
ifeq ($(origin CUDA_PATH), undefined) ifeq ($(origin CUDA_PATH), undefined)
@ -132,6 +161,7 @@ KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMP
KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang)
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple LLVM) KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple LLVM)
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC)
# Check Host Compiler if using NVCC through nvcc_wrapper # Check Host Compiler if using NVCC through nvcc_wrapper
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -180,20 +210,20 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS = KOKKOS_INTERNAL_COMPILER_WARNINGS =
else else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
else else
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
else else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
else else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
# TODO check if cray accepts GNU style warnings # TODO check if cray accepts GNU style warnings
KOKKOS_INTERNAL_COMPILER_WARNINGS = KOKKOS_INTERNAL_COMPILER_WARNINGS =
else else
#gcc #gcc
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
endif endif
endif endif
endif endif
@ -230,7 +260,12 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh
else else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp #KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp
KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget
else
#Assume GCC
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none
endif endif
endif endif
@ -353,11 +388,8 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen) KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen)
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC) KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC)
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(call kokkos_has_string,$(KOKKOS_ARCH),Kaveri) KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(call kokkos_has_string,$(KOKKOS_ARCH),Carrizo) KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906)
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(call kokkos_has_string,$(KOKKOS_ARCH),Fiji)
KOKKOS_INTERNAL_USE_ARCH_VEGA := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega)
KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(call kokkos_has_string,$(KOKKOS_ARCH),gfx901)
# Any AVX? # Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM)) KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
@ -431,6 +463,10 @@ tmp := $(call kokkos_append_header,'\#else')
tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H') tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H')
tmp := $(call kokkos_append_header,'\#endif') tmp := $(call kokkos_append_header,'\#endif')
tmp := $(call kokkos_append_header,"")
tmp := $(call kokkos_append_header,"\#define KOKKOS_VERSION $(KOKKOS_VERSION)")
tmp := $(call kokkos_append_header,"")
tmp := $(call kokkos_append_header,"/* Execution Spaces */") tmp := $(call kokkos_append_header,"/* Execution Spaces */")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
@ -442,9 +478,15 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM') tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM')
tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1') tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_HIP')
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET') tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET')
ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -455,10 +497,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS") tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_QTHREADS")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX") tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX")
endif endif
@ -966,6 +1004,14 @@ endif
# Figure out the architecture flag for Cuda. # Figure out the architecture flag for Cuda.
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_INTERNAL_USE_CUDA_ARCH=1
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_INTERNAL_USE_CUDA_ARCH=1
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
@ -974,7 +1020,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
else else
$(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) ) $(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) )
endif endif
KOKKOS_INTERNAL_USE_CUDA_ARCH = 1
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march
endif
KOKKOS_INTERNAL_USE_CUDA_ARCH = 1
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30") tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30")
@ -1042,55 +1098,49 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
endif endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_CXXFLAGS += --expt-extended-lambda
endif endif
endif endif
# Figure out the architecture flag for ROCm. # Figure out the architecture flag for ROCm.
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
# Lets start with adding architecture defines # Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 701") tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 900")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KAVERI") tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA900")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701 KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 801") tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 906")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_CARRIZO") tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA906")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801 KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 803")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_FIJI")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 900")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 901")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_GFX901")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901
endif endif
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp)
KOKKOS_INTERNAL_HCC_PATH := $(shell which $(CXX)) KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
ROCM_HCC_PATH ?= $(KOKKOS_INTERNAL_HCC_PATH:/bin/clang++=) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags) ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1)
KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
KOKKOS_CXXLDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm KOKKOS_CXXFLAGS+=-fgpu-rdc
KOKKOS_TPL_LIBRARY_NAMES += hc_am m KOKKOS_LDFLAGS+=-fgpu-rdc
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG) else
KOKKOS_CXXFLAGS+=-fno-gpu-rdc
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp) KOKKOS_LDFLAGS+=-fno-gpu-rdc
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) endif
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/ROCm/*.cpp)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
endif endif
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
@ -1141,7 +1191,7 @@ endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
@ -1149,6 +1199,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
endif endif
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
KOKKOS_LIBS += $(KOKKOS_INTERNAL_OPENMPTARGET_LIB)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -1178,22 +1229,6 @@ endif
KOKKOS_TPL_LIBRARY_NAMES += pthread KOKKOS_TPL_LIBRARY_NAMES += pthread
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
ifneq ($(KOKKOS_CMAKE), yes)
ifneq ($(QTHREADS_PATH),)
KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
KOKKOS_LIBDIRS += -L$(QTHREADS_PATH)/lib
KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib
KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include
KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64
endif
KOKKOS_LIBS += -lqthread
KOKKOS_TPL_LIBRARY_NAMES += qthread
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp)

View File

@ -55,6 +55,17 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
Kokkos_HIP_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp
Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
Kokkos_HIP_KernelLaunch.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp
Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
Kokkos_ROCm_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp Kokkos_ROCm_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
@ -79,13 +90,6 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
Kokkos_QthreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp
Kokkos_Qthreads_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
@ -106,10 +110,12 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
Kokkos_OpenMPTarget_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp
Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
#Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
# $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
endif endif
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp

View File

@ -115,27 +115,27 @@ backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...).
We are using the following set of flags: We are using the following set of flags:
* GCC: * GCC:
```` ````
-Wall -Wshadow -pedantic -Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits -Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body -Wignored-qualifiers -Wempty-body
-Wclobbered -Wuninitialized -Wclobbered -Wuninitialized
```` ````
* Intel: * Intel:
```` ````
-Wall -Wshadow -pedantic -Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits -Werror -Wsign-compare -Wtype-limits
-Wuninitialized -Wuninitialized
```` ````
* Clang: * Clang:
```` ````
-Wall -Wshadow -pedantic -Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits -Werror -Wsign-compare -Wtype-limits
-Wuninitialized -Wuninitialized
```` ````
* NVCC: * NVCC:
```` ````
-Wall -Wshadow -pedantic -Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits -Werror -Wsign-compare -Wtype-limits
-Wuninitialized -Wuninitialized
```` ````

View File

@ -537,6 +537,145 @@ struct rand<Generator, Kokkos::complex<double> > {
} }
}; };
template <class DeviceType>
class Random_XorShift1024_Pool;
namespace Impl {
template <bool UseCArrayState>
struct Random_XorShift1024_State {
uint64_t state_[16];
KOKKOS_DEFAULTED_FUNCTION
Random_XorShift1024_State() = default;
template <class StateViewType>
KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v,
int state_idx) {
for (int i = 0; i < 16; i++) state_[i] = v(state_idx, i);
}
KOKKOS_FUNCTION
uint64_t operator[](const int i) const { return state_[i]; }
KOKKOS_FUNCTION
uint64_t& operator[](const int i) { return state_[i]; }
};
template <>
struct Random_XorShift1024_State<false> {
uint64_t* state_;
const int stride_;
KOKKOS_FUNCTION
Random_XorShift1024_State() : state_(nullptr), stride_(1){};
template <class StateViewType>
KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v,
int state_idx)
: state_(&v(state_idx, 0)), stride_(v.stride_1()) {}
KOKKOS_FUNCTION
uint64_t operator[](const int i) const { return state_[i * stride_]; }
KOKKOS_FUNCTION
uint64_t& operator[](const int i) { return state_[i * stride_]; }
};
template <class ExecutionSpace>
struct Random_XorShift1024_UseCArrayState : std::true_type {};
#ifdef KOKKOS_ENABLE_CUDA
template <>
struct Random_XorShift1024_UseCArrayState<Kokkos::Cuda> : std::false_type {};
#endif
#ifdef KOKKOS_ENABLE_HIP
template <>
struct Random_XorShift1024_UseCArrayState<Kokkos::Experimental::HIP>
: std::false_type {};
#endif
#ifdef KOKKOS_ENABLE_OPENMPTARGET
template <>
struct Random_XorShift1024_UseCArrayState<Kokkos::Experimental::OpenMPTarget>
: std::false_type {};
#endif
template <class ExecutionSpace>
struct Random_UniqueIndex {
using locks_view_type = View<int*, ExecutionSpace>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type) {
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
const int i = ExecutionSpace::hardware_thread_id();
#else
const int i = ExecutionSpace::impl_hardware_thread_id();
#endif
return i;
#else
return 0;
#endif
}
};
#ifdef KOKKOS_ENABLE_CUDA
template <>
struct Random_UniqueIndex<Kokkos::Cuda> {
using locks_view_type = View<int*, Kokkos::Cuda>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
}
return i;
#else
(void)locks_;
return 0;
#endif
}
};
#endif
#ifdef KOKKOS_ENABLE_HIP
template <>
struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
using locks_view_type = View<int*, Kokkos::Experimental::HIP>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __HIP_DEVICE_COMPILE__
const int i_offset =
(hipThreadIdx_x * hipBlockDim_y + hipThreadIdx_y) * hipBlockDim_z +
hipThreadIdx_z;
int i = (((hipBlockIdx_x * hipGridDim_y + hipBlockIdx_y) * hipGridDim_z +
hipBlockIdx_z) *
hipBlockDim_x * hipBlockDim_y * hipBlockDim_z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += hipBlockDim_x * hipBlockDim_y * hipBlockDim_z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
}
return i;
#else
(void)locks_;
return 0;
#endif
}
};
#endif
} // namespace Impl
template <class DeviceType> template <class DeviceType>
class Random_XorShift64_Pool; class Random_XorShift64_Pool;
@ -550,10 +689,10 @@ class Random_XorShift64 {
public: public:
typedef DeviceType device_type; typedef DeviceType device_type;
enum { MAX_URAND = 0xffffffffU }; constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
enum { MAX_RAND = static_cast<int>(0xffffffff / 2) }; constexpr static int32_t MAX_RAND = std::numeric_limits<int32_t>::max();
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffLL / 2 - 1) }; constexpr static int64_t MAX_RAND64 = std::numeric_limits<int64_t>::max();
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
Random_XorShift64(uint64_t state, int state_idx = 0) Random_XorShift64(uint64_t state, int state_idx = 0)
@ -637,10 +776,12 @@ class Random_XorShift64 {
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; } float frand() { return urand64() / static_cast<float>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; } float frand(const float& range) {
return range * urand64() / static_cast<float>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) { float frand(const float& start, const float& end) {
@ -648,10 +789,12 @@ class Random_XorShift64 {
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; } double drand() { return urand64() / static_cast<double>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; } double drand(const double& range) {
return range * urand64() / static_cast<double>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) { double drand(const double& start, const double& end) {
@ -662,6 +805,11 @@ class Random_XorShift64 {
// number // number
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double normal() { double normal() {
#ifndef __HIP_DEVICE_COMPILE__ // FIXME_HIP
using std::sqrt;
#else
using ::sqrt;
#endif
double S = 2.0; double S = 2.0;
double U; double U;
while (S >= 1.0) { while (S >= 1.0) {
@ -669,7 +817,7 @@ class Random_XorShift64 {
const double V = 2.0 * drand() - 1.0; const double V = 2.0 * drand() - 1.0;
S = U * U + V * V; S = U * U + V * V;
} }
return U * std::sqrt(-2.0 * log(S) / S); return U * sqrt(-2.0 * log(S) / S);
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -681,9 +829,10 @@ class Random_XorShift64 {
template <class DeviceType = Kokkos::DefaultExecutionSpace> template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift64_Pool { class Random_XorShift64_Pool {
private: private:
typedef View<int*, DeviceType> lock_type; using execution_space = typename DeviceType::execution_space;
typedef View<int*, execution_space> locks_type;
typedef View<uint64_t*, DeviceType> state_data_type; typedef View<uint64_t*, DeviceType> state_data_type;
lock_type locks_; locks_type locks_;
state_data_type state_; state_data_type state_;
int num_states_; int num_states_;
@ -695,11 +844,8 @@ class Random_XorShift64_Pool {
Random_XorShift64_Pool() { num_states_ = 0; } Random_XorShift64_Pool() { num_states_ = 0; }
Random_XorShift64_Pool(uint64_t seed) { Random_XorShift64_Pool(uint64_t seed) {
num_states_ = 0; num_states_ = 0;
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
init(seed, DeviceType::max_hardware_threads()); init(seed, execution_space().concurrency());
#else
init(seed, DeviceType::impl_max_hardware_threads());
#endif
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -719,11 +865,11 @@ class Random_XorShift64_Pool {
num_states_ = num_states; num_states_ = num_states;
locks_ = lock_type("Kokkos::Random_XorShift64::locks", num_states_); locks_ = locks_type("Kokkos::Random_XorShift64::locks", num_states_);
state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_); state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_);
typename state_data_type::HostMirror h_state = create_mirror_view(state_); typename state_data_type::HostMirror h_state = create_mirror_view(state_);
typename lock_type::HostMirror h_lock = create_mirror_view(locks_); typename locks_type::HostMirror h_lock = create_mirror_view(locks_);
// Execute on the HostMirror's default execution space. // Execute on the HostMirror's default execution space.
Random_XorShift64<typename state_data_type::HostMirror::execution_space> Random_XorShift64<typename state_data_type::HostMirror::execution_space>
@ -746,13 +892,8 @@ class Random_XorShift64_Pool {
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
Random_XorShift64<DeviceType> get_state() const { Random_XorShift64<DeviceType> get_state() const {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE const int i =
const int i = DeviceType::hardware_thread_id(); Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
;
#else
const int i = DeviceType::impl_hardware_thread_id();
;
#endif
return Random_XorShift64<DeviceType>(state_(i), i); return Random_XorShift64<DeviceType>(state_(i), i);
} }
@ -765,35 +906,35 @@ class Random_XorShift64_Pool {
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift64<DeviceType>& state) const { void free_state(const Random_XorShift64<DeviceType>& state) const {
state_(state.state_idx_) = state.state_; state_(state.state_idx_) = state.state_;
locks_(state.state_idx_) = 0;
} }
}; };
template <class DeviceType>
class Random_XorShift1024_Pool;
template <class DeviceType> template <class DeviceType>
class Random_XorShift1024 { class Random_XorShift1024 {
using execution_space = typename DeviceType::execution_space;
private: private:
int p_; int p_;
const int state_idx_; const int state_idx_;
uint64_t state_[16]; Impl::Random_XorShift1024_State<
Impl::Random_XorShift1024_UseCArrayState<execution_space>::value>
state_;
friend class Random_XorShift1024_Pool<DeviceType>; friend class Random_XorShift1024_Pool<DeviceType>;
public: public:
typedef Random_XorShift1024_Pool<DeviceType> pool_type; typedef Random_XorShift1024_Pool<DeviceType> pool_type;
typedef DeviceType device_type; typedef DeviceType device_type;
enum { MAX_URAND = 0xffffffffU }; constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
enum { MAX_RAND = static_cast<int>(0xffffffffU / 2) }; constexpr static int32_t MAX_RAND = std::numeric_limits<int32_t>::max();
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL / 2 - 1) }; constexpr static int64_t MAX_RAND64 = std::numeric_limits<int64_t>::max();
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
Random_XorShift1024(const typename pool_type::state_data_type& state, int p, Random_XorShift1024(const typename pool_type::state_data_type& state, int p,
int state_idx = 0) int state_idx = 0)
: p_(p), state_idx_(state_idx) { : p_(p), state_idx_(state_idx), state_(state, state_idx) {}
for (int i = 0; i < 16; i++) state_[i] = state(state_idx, i);
}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
uint32_t urand() { uint32_t urand() {
@ -876,10 +1017,12 @@ class Random_XorShift1024 {
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; } float frand() { return urand64() / static_cast<float>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; } float frand(const float& range) {
return range * urand64() / static_cast<float>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) { float frand(const float& start, const float& end) {
@ -887,10 +1030,12 @@ class Random_XorShift1024 {
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; } double drand() { return urand64() / static_cast<double>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; } double drand(const double& range) {
return range * urand64() / static_cast<double>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) { double drand(const double& start, const double& end) {
@ -901,6 +1046,11 @@ class Random_XorShift1024 {
// number // number
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
double normal() { double normal() {
#ifndef KOKKOS_ENABLE_HIP // FIXME_HIP
using std::sqrt;
#else
using ::sqrt;
#endif
double S = 2.0; double S = 2.0;
double U; double U;
while (S >= 1.0) { while (S >= 1.0) {
@ -908,7 +1058,7 @@ class Random_XorShift1024 {
const double V = 2.0 * drand() - 1.0; const double V = 2.0 * drand() - 1.0;
S = U * U + V * V; S = U * U + V * V;
} }
return U * std::sqrt(-2.0 * log(S) / S); return U * sqrt(-2.0 * log(S) / S);
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -920,10 +1070,12 @@ class Random_XorShift1024 {
template <class DeviceType = Kokkos::DefaultExecutionSpace> template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift1024_Pool { class Random_XorShift1024_Pool {
private: private:
using execution_space = typename DeviceType::execution_space;
typedef View<int*, execution_space> locks_type;
typedef View<int*, DeviceType> int_view_type; typedef View<int*, DeviceType> int_view_type;
typedef View<uint64_t * [16], DeviceType> state_data_type; typedef View<uint64_t * [16], DeviceType> state_data_type;
int_view_type locks_; locks_type locks_;
state_data_type state_; state_data_type state_;
int_view_type p_; int_view_type p_;
int num_states_; int num_states_;
@ -939,11 +1091,8 @@ class Random_XorShift1024_Pool {
inline Random_XorShift1024_Pool(uint64_t seed) { inline Random_XorShift1024_Pool(uint64_t seed) {
num_states_ = 0; num_states_ = 0;
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
init(seed, DeviceType::max_hardware_threads()); init(seed, execution_space().concurrency());
#else
init(seed, DeviceType::impl_max_hardware_threads());
#endif
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -965,12 +1114,12 @@ class Random_XorShift1024_Pool {
inline void init(uint64_t seed, int num_states) { inline void init(uint64_t seed, int num_states) {
if (seed == 0) seed = uint64_t(1318319); if (seed == 0) seed = uint64_t(1318319);
num_states_ = num_states; num_states_ = num_states;
locks_ = int_view_type("Kokkos::Random_XorShift1024::locks", num_states_); locks_ = locks_type("Kokkos::Random_XorShift1024::locks", num_states_);
state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_); state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_);
p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_); p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_);
typename state_data_type::HostMirror h_state = create_mirror_view(state_); typename state_data_type::HostMirror h_state = create_mirror_view(state_);
typename int_view_type::HostMirror h_lock = create_mirror_view(locks_); typename locks_type::HostMirror h_lock = create_mirror_view(locks_);
typename int_view_type::HostMirror h_p = create_mirror_view(p_); typename int_view_type::HostMirror h_p = create_mirror_view(p_);
// Execute on the HostMirror's default execution space. // Execute on the HostMirror's default execution space.
@ -997,11 +1146,8 @@ class Random_XorShift1024_Pool {
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
Random_XorShift1024<DeviceType> get_state() const { Random_XorShift1024<DeviceType> get_state() const {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE const int i =
const int i = DeviceType::hardware_thread_id(); Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
#else
const int i = DeviceType::impl_hardware_thread_id();
#endif
return Random_XorShift1024<DeviceType>(state_, p_(i), i); return Random_XorShift1024<DeviceType>(state_, p_(i), i);
}; };
@ -1014,482 +1160,11 @@ class Random_XorShift1024_Pool {
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift1024<DeviceType>& state) const { void free_state(const Random_XorShift1024<DeviceType>& state) const {
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
p_(state.state_idx_) = state.p_; p_(state.state_idx_) = state.p_;
locks_(state.state_idx_) = 0;
} }
}; };
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
template <>
class Random_XorShift1024<Kokkos::Cuda> {
private:
int p_;
const int state_idx_;
uint64_t* state_;
const int stride_;
friend class Random_XorShift1024_Pool<Kokkos::Cuda>;
public:
typedef Kokkos::Cuda device_type;
typedef Random_XorShift1024_Pool<device_type> pool_type;
enum { MAX_URAND = 0xffffffffU };
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
enum { MAX_RAND = static_cast<int>(0xffffffffU / 2) };
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL / 2 - 1) };
KOKKOS_INLINE_FUNCTION
Random_XorShift1024(const typename pool_type::state_data_type& state, int p,
int state_idx = 0)
: p_(p),
state_idx_(state_idx),
state_(&state(state_idx, 0)),
stride_(state.stride_1()) {}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
uint64_t tmp =
(state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL;
tmp = tmp >> 16;
return static_cast<uint32_t>(tmp & MAX_URAND);
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
return ((state_[p_ * stride_] = state_0 ^ state_1) *
1181783497276652981LL) -
1;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range) {
const uint32_t max_val = (MAX_URAND / range) * range;
uint32_t tmp = urand();
while (tmp >= max_val) urand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end) {
return urand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range) {
const uint64_t max_val = (MAX_URAND64 / range) * range;
uint64_t tmp = urand64();
while (tmp >= max_val) urand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end) {
return urand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int rand() { return static_cast<int>(urand() / 2); }
KOKKOS_INLINE_FUNCTION
int rand(const int& range) {
const int max_val = (MAX_RAND / range) * range;
int tmp = rand();
while (tmp >= max_val) rand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end) {
return rand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64() { return static_cast<int64_t>(urand64() / 2); }
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range) {
const int64_t max_val = (MAX_RAND64 / range) * range;
int64_t tmp = rand64();
while (tmp >= max_val) rand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end) {
return rand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) {
return frand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) {
return frand(end - start) + start;
}
// Marsaglia polar method for drawing a standard normal distributed random
// number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while (S >= 1.0) {
U = 2.0 * drand() - 1.0;
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
double normal(const double& mean, const double& std_dev = 1.0) {
return mean + normal() * std_dev;
}
};
template <>
inline Random_XorShift64_Pool<Kokkos::Cuda>::Random_XorShift64_Pool(
uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift64<Kokkos::Cuda>
Random_XorShift64_Pool<Kokkos::Cuda>::get_state() const {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift64<Kokkos::Cuda>(state_(i), i);
#else
return Random_XorShift64<Kokkos::Cuda>(state_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void Random_XorShift64_Pool<Kokkos::Cuda>::free_state(
const Random_XorShift64<Kokkos::Cuda>& state) const {
state_(state.state_idx_) = state.state_;
#ifdef __CUDA_ARCH__
locks_(state.state_idx_) = 0;
return;
#endif
}
template <>
inline Random_XorShift1024_Pool<Kokkos::Cuda>::Random_XorShift1024_Pool(
uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift1024<Kokkos::Cuda>
Random_XorShift1024_Pool<Kokkos::Cuda>::get_state() const {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift1024<Kokkos::Cuda>(state_, p_(i), i);
#else
return Random_XorShift1024<Kokkos::Cuda>(state_, p_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void Random_XorShift1024_Pool<Kokkos::Cuda>::free_state(
const Random_XorShift1024<Kokkos::Cuda>& state) const {
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
#ifdef __CUDA_ARCH__
locks_(state.state_idx_) = 0;
return;
#endif
}
#endif
#if defined(KOKKOS_ENABLE_ROCM)
template <>
class Random_XorShift1024<Kokkos::Experimental::ROCm> {
private:
int p_;
const int state_idx_;
uint64_t* state_;
const int stride_;
friend class Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>;
public:
typedef Kokkos::Experimental::ROCm device_type;
typedef Random_XorShift1024_Pool<device_type> pool_type;
enum { MAX_URAND = 0xffffffffU };
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
enum { MAX_RAND = static_cast<int>(0xffffffffU / 2) };
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL / 2 - 1) };
KOKKOS_INLINE_FUNCTION
Random_XorShift1024(const typename pool_type::state_data_type& state, int p,
int state_idx = 0)
: p_(p),
state_idx_(state_idx),
state_(&state(state_idx, 0)),
stride_(state.stride_1()) {}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
uint64_t tmp =
(state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL;
tmp = tmp >> 16;
return static_cast<uint32_t>(tmp & MAX_URAND);
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
return ((state_[p_ * stride_] = state_0 ^ state_1) *
1181783497276652981LL) -
1;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range) {
const uint32_t max_val = (MAX_URAND / range) * range;
uint32_t tmp = urand();
while (tmp >= max_val) urand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end) {
return urand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range) {
const uint64_t max_val = (MAX_URAND64 / range) * range;
uint64_t tmp = urand64();
while (tmp >= max_val) urand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end) {
return urand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int rand() { return static_cast<int>(urand() / 2); }
KOKKOS_INLINE_FUNCTION
int rand(const int& range) {
const int max_val = (MAX_RAND / range) * range;
int tmp = rand();
while (tmp >= max_val) rand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end) {
return rand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64() { return static_cast<int64_t>(urand64() / 2); }
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range) {
const int64_t max_val = (MAX_RAND64 / range) * range;
int64_t tmp = rand64();
while (tmp >= max_val) rand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end) {
return rand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) {
return frand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) {
return frand(end - start) + start;
}
// Marsaglia polar method for drawing a standard normal distributed random
// number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while (S >= 1.0) {
U = 2.0 * drand() - 1.0;
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
double normal(const double& mean, const double& std_dev = 1.0) {
return mean + normal() * std_dev;
}
};
template <>
inline Random_XorShift64_Pool<
Kokkos::Experimental::ROCm>::Random_XorShift64_Pool(uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift64<Kokkos::Experimental::ROCm>
Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::get_state() const {
#ifdef __HCC_ACCELERATOR__
const int i_offset =
(threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z;
int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) *
blockDim_x * blockDim_y * blockDim_z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim_x * blockDim_y * blockDim_z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(i), i);
#else
return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void
Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::free_state(
const Random_XorShift64<Kokkos::Experimental::ROCm>& state) const {
#ifdef __HCC_ACCELERATOR__
state_(state.state_idx_) = state.state_;
locks_(state.state_idx_) = 0;
return;
#endif
}
template <>
inline Random_XorShift1024_Pool<
Kokkos::Experimental::ROCm>::Random_XorShift1024_Pool(uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift1024<Kokkos::Experimental::ROCm>
Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::get_state() const {
#ifdef __HCC_ACCELERATOR__
const int i_offset =
(threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z;
int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) *
blockDim_x * blockDim_y * blockDim_z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim_x * blockDim_y * blockDim_z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(i), i);
#else
return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void
Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::free_state(
const Random_XorShift1024<Kokkos::Experimental::ROCm>& state) const {
#ifdef __HCC_ACCELERATOR__
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
locks_(state.state_idx_) = 0;
return;
#endif
}
#endif
namespace Impl { namespace Impl {
template <class ViewType, class RandomPool, int loops, int rank, template <class ViewType, class RandomPool, int loops, int rank,
@ -2043,7 +1718,7 @@ void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type range) { typename ViewType::const_value_type range) {
int64_t LDA = a.extent(0); int64_t LDA = a.extent(0);
if (LDA > 0) if (LDA > 0)
parallel_for((LDA + 127) / 128, parallel_for("Kokkos::fill_random", (LDA + 127) / 128,
Impl::fill_random_functor_range<ViewType, RandomPool, 128, Impl::fill_random_functor_range<ViewType, RandomPool, 128,
ViewType::Rank, IndexType>( ViewType::Rank, IndexType>(
a, g, range)); a, g, range));
@ -2055,7 +1730,7 @@ void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type end) { typename ViewType::const_value_type end) {
int64_t LDA = a.extent(0); int64_t LDA = a.extent(0);
if (LDA > 0) if (LDA > 0)
parallel_for((LDA + 127) / 128, parallel_for("Kokkos::fill_random", (LDA + 127) / 128,
Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128, Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128,
ViewType::Rank, IndexType>( ViewType::Rank, IndexType>(
a, g, begin, end)); a, g, begin, end));

View File

@ -201,7 +201,7 @@ class BinSort {
bool sort_within_bins; bool sort_within_bins;
public: public:
BinSort() {} BinSort() = default;
//---------------------------------------- //----------------------------------------
// Constructor: takes the keys, the binning_operator and optionally whether to // Constructor: takes the keys, the binning_operator and optionally whether to
@ -327,7 +327,7 @@ class BinSort {
Kokkos::RangePolicy<execution_space>(0, len), functor); Kokkos::RangePolicy<execution_space>(0, len), functor);
} }
Kokkos::fence(); execution_space().fence();
} }
template <class ValuesViewType> template <class ValuesViewType>
@ -349,14 +349,14 @@ class BinSort {
public: public:
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(const bin_count_tag& tag, const int& i) const { void operator()(const bin_count_tag& /*tag*/, const int i) const {
const int j = range_begin + i; const int j = range_begin + i;
bin_count_atomic(bin_op.bin(keys, j))++; bin_count_atomic(bin_op.bin(keys, j))++;
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(const bin_offset_tag& tag, const int& i, value_type& offset, void operator()(const bin_offset_tag& /*tag*/, const int i,
const bool& final) const { value_type& offset, const bool& final) const {
if (final) { if (final) {
bin_offsets(i) = offset; bin_offsets(i) = offset;
} }
@ -364,7 +364,7 @@ class BinSort {
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(const bin_binning_tag& tag, const int& i) const { void operator()(const bin_binning_tag& /*tag*/, const int i) const {
const int j = range_begin + i; const int j = range_begin + i;
const int bin = bin_op.bin(keys, j); const int bin = bin_op.bin(keys, j);
const int count = bin_count_atomic(bin)++; const int count = bin_count_atomic(bin)++;
@ -373,7 +373,7 @@ class BinSort {
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(const bin_sort_bins_tag& tag, const int& i) const { void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const {
auto bin_size = bin_count_const(i); auto bin_size = bin_count_const(i);
if (bin_size <= 1) return; if (bin_size <= 1) return;
int upper_bound = bin_offsets(i) + bin_size; int upper_bound = bin_offsets(i) + bin_size;
@ -381,7 +381,7 @@ class BinSort {
while (!sorted) { while (!sorted) {
sorted = true; sorted = true;
int old_idx = sort_order(bin_offsets(i)); int old_idx = sort_order(bin_offsets(i));
int new_idx; int new_idx = 0;
for (int k = bin_offsets(i) + 1; k < upper_bound; k++) { for (int k = bin_offsets(i) + 1; k < upper_bound; k++) {
new_idx = sort_order(k); new_idx = sort_order(k);
@ -446,7 +446,7 @@ struct BinOp3D {
typename KeyViewType::non_const_value_type range_[3]; typename KeyViewType::non_const_value_type range_[3];
typename KeyViewType::non_const_value_type min_[3]; typename KeyViewType::non_const_value_type min_[3];
BinOp3D() {} BinOp3D() = default;
BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
typename KeyViewType::const_value_type max[]) { typename KeyViewType::const_value_type max[]) {

View File

@ -20,16 +20,38 @@ KOKKOS_ADD_TEST_LIBRARY(
HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
) )
KOKKOS_TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") # WORKAROUND FOR HIPCC
IF(Kokkos_ENABLE_HIP)
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0 --amdgpu-target=gfx906")
ELSE()
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0")
ENDIF()
TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
SET(SOURCES SET(SOURCES
UnitTestMain.cpp UnitTestMain.cpp
TestCuda.cpp )
)
IF(Kokkos_ENABLE_OPENMP) IF(Kokkos_ENABLE_OPENMP)
LIST( APPEND SOURCES LIST( APPEND SOURCES
TestOpenMP.cpp TestOpenMP.cpp
TestOpenMP_Sort1D.cpp
TestOpenMP_Sort3D.cpp
TestOpenMP_SortDynamicView.cpp
TestOpenMP_Random.cpp
)
ENDIF()
IF(Kokkos_ENABLE_HIP)
LIST( APPEND SOURCES
TestHIP.cpp
)
ENDIF()
IF(Kokkos_ENABLE_CUDA)
LIST( APPEND SOURCES
TestCuda.cpp
) )
ENDIF() ENDIF()

View File

@ -44,7 +44,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o OBJ_OPENMP = TestOpenMP.o TestOpenMP_Random.o TestOpenMP_Sort1D.o TestOpenMP_Sort3D.o TestOpenMP_SortDynamicView.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_OpenMP TARGETS += KokkosAlgorithms_UnitTest_OpenMP
TEST_TARGETS += test-openmp TEST_TARGETS += test-openmp
endif endif

View File

@ -59,11 +59,15 @@
namespace Test { namespace Test {
void cuda_test_random_xorshift64(int num_draws) { void cuda_test_random_xorshift64(int num_draws) {
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda> >(num_draws); Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda>>(num_draws);
Impl::test_random<Kokkos::Random_XorShift64_Pool<
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>>>(num_draws);
} }
void cuda_test_random_xorshift1024(int num_draws) { void cuda_test_random_xorshift1024(int num_draws) {
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda> >(num_draws); Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda>>(num_draws);
Impl::test_random<Kokkos::Random_XorShift1024_Pool<
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>>>(num_draws);
} }
#define CUDA_RANDOM_XORSHIFT64(num_draws) \ #define CUDA_RANDOM_XORSHIFT64(num_draws) \

View File

@ -0,0 +1,83 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_HIP
#include <cstdint>
#include <iostream>
#include <iomanip>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <TestRandom.hpp>
#include <TestSort.hpp>
namespace Test {
void hip_test_random_xorshift64(size_t num_draws) {
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Experimental::HIP>>(
num_draws);
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Device<
Kokkos::Experimental::HIP, Kokkos::Experimental::HIPSpace>>>(num_draws);
}
void hip_test_random_xorshift1024(size_t num_draws) {
Impl::test_random<
Kokkos::Random_XorShift1024_Pool<Kokkos::Experimental::HIP>>(num_draws);
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Device<
Kokkos::Experimental::HIP, Kokkos::Experimental::HIPSpace>>>(num_draws);
}
TEST(hip, Random_XorShift64) { hip_test_random_xorshift64(132141141); }
TEST(hip, Random_XorShift1024_0) { hip_test_random_xorshift1024(52428813); }
TEST(hip, SortUnsigned) {
Impl::test_sort<Kokkos::Experimental::HIP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTHIP_PREVENT_LINK_ERROR() {}
#endif /* #ifdef KOKKOS_ENABLE_HIP */

View File

@ -55,30 +55,8 @@
namespace Test { namespace Test {
#define OPENMP_RANDOM_XORSHIFT64(num_draws) \ TEST(openmp, SortIssue1160) { Impl::test_issue_1160_sort<Kokkos::OpenMP>(); }
TEST(openmp, Random_XorShift64) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \
TEST(openmp, Random_XorShift1024) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
#define OPENMP_SORT_UNSIGNED(size) \
TEST(openmp, SortUnsigned) { \
Impl::test_sort<Kokkos::OpenMP, unsigned>(size); \
}
OPENMP_RANDOM_XORSHIFT64(10240000)
OPENMP_RANDOM_XORSHIFT1024(10130144)
OPENMP_SORT_UNSIGNED(171)
#undef OPENMP_RANDOM_XORSHIFT64
#undef OPENMP_RANDOM_XORSHIFT1024
#undef OPENMP_SORT_UNSIGNED
} // namespace Test } // namespace Test
#else #else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}

View File

@ -0,0 +1,77 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <iomanip>
namespace Test {
#define OPENMP_RANDOM_XORSHIFT64(num_draws) \
TEST(openmp, Random_XorShift64) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \
TEST(openmp, Random_XorShift1024) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
OPENMP_RANDOM_XORSHIFT64(10240000)
OPENMP_RANDOM_XORSHIFT1024(10130144)
#undef OPENMP_RANDOM_XORSHIFT64
#undef OPENMP_RANDOM_XORSHIFT1024
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -0,0 +1,65 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
TEST(openmp, SortUnsigned1D) {
Impl::test_1D_sort<Kokkos::OpenMP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -0,0 +1,65 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
TEST(openmp, SortUnsigned3D) {
Impl::test_3D_sort<Kokkos::OpenMP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -0,0 +1,65 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
TEST(openmp, SortUnsignedDynamicView) {
Impl::test_dynamic_view_sort<Kokkos::OpenMP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -140,7 +140,7 @@ struct test_random_functor {
density_3d(d3d) {} density_3d(d3d) {}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(int i, RandomProperties& prop) const { void operator()(int /*i*/, RandomProperties& prop) const {
using Kokkos::atomic_fetch_add; using Kokkos::atomic_fetch_add;
rnd_type rand_gen = rand_pool.get_state(); rnd_type rand_gen = rand_pool.get_state();

View File

@ -130,7 +130,7 @@ struct sum3D {
}; };
template <class ExecutionSpace, typename KeyType> template <class ExecutionSpace, typename KeyType>
void test_1D_sort(unsigned int n, bool force_kokkos) { void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType; typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
KeyViewType keys("Keys", n); KeyViewType keys("Keys", n);
@ -165,7 +165,7 @@ void test_1D_sort(unsigned int n, bool force_kokkos) {
} }
template <class ExecutionSpace, typename KeyType> template <class ExecutionSpace, typename KeyType>
void test_3D_sort(unsigned int n) { void test_3D_sort_impl(unsigned int n) {
typedef Kokkos::View<KeyType * [3], ExecutionSpace> KeyViewType; typedef Kokkos::View<KeyType * [3], ExecutionSpace> KeyViewType;
KeyViewType keys("Keys", n * n * n); KeyViewType keys("Keys", n * n * n);
@ -214,7 +214,7 @@ void test_3D_sort(unsigned int n) {
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
template <class ExecutionSpace, typename KeyType> template <class ExecutionSpace, typename KeyType>
void test_dynamic_view_sort(unsigned int n) { void test_dynamic_view_sort_impl(unsigned int n) {
typedef Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace> typedef Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>
KeyDynamicViewType; KeyDynamicViewType;
typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType; typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
@ -278,7 +278,7 @@ void test_dynamic_view_sort(unsigned int n) {
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
template <class ExecutionSpace> template <class ExecutionSpace>
void test_issue_1160() { void test_issue_1160_impl() {
Kokkos::View<int*, ExecutionSpace> element_("element", 10); Kokkos::View<int*, ExecutionSpace> element_("element", 10);
Kokkos::View<double*, ExecutionSpace> x_("x", 10); Kokkos::View<double*, ExecutionSpace> x_("x", 10);
Kokkos::View<double*, ExecutionSpace> v_("y", 10); Kokkos::View<double*, ExecutionSpace> v_("y", 10);
@ -346,16 +346,33 @@ void test_issue_1160() {
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
template <class ExecutionSpace, typename KeyType> template <class ExecutionSpace, typename KeyType>
void test_sort(unsigned int N) { void test_1D_sort(unsigned int N) {
test_1D_sort<ExecutionSpace, KeyType>(N * N * N, true); test_1D_sort_impl<ExecutionSpace, KeyType>(N * N * N, true);
test_1D_sort<ExecutionSpace, KeyType>(N * N * N, false); test_1D_sort_impl<ExecutionSpace, KeyType>(N * N * N, false);
#if !defined(KOKKOS_ENABLE_ROCM)
test_3D_sort<ExecutionSpace, KeyType>(N);
test_dynamic_view_sort<ExecutionSpace, KeyType>(N * N);
#endif
test_issue_1160<ExecutionSpace>();
} }
template <class ExecutionSpace, typename KeyType>
void test_3D_sort(unsigned int N) {
test_3D_sort_impl<ExecutionSpace, KeyType>(N);
}
template <class ExecutionSpace, typename KeyType>
void test_dynamic_view_sort(unsigned int N) {
test_dynamic_view_sort_impl<ExecutionSpace, KeyType>(N * N);
}
template <class ExecutionSpace>
void test_issue_1160_sort() {
test_issue_1160_impl<ExecutionSpace>();
}
template <class ExecutionSpace, typename KeyType>
void test_sort(unsigned int N) {
test_1D_sort<ExecutionSpace, KeyType>(N);
test_3D_sort<ExecutionSpace, KeyType>(N);
test_dynamic_view_sort<ExecutionSpace, KeyType>(N);
test_issue_1160_sort<ExecutionSpace>();
}
} // namespace Impl } // namespace Impl
} // namespace Test } // namespace Test
#endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */ #endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */

10
lib/kokkos/appveyor.yml Normal file
View File

@ -0,0 +1,10 @@
image:
- Visual Studio 2019
clone_folder: c:\projects\source
build_script:
- cmd: >-
mkdir build &&
cd build &&
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_LIBDL=OFF -DKokkos_ENABLE_PROFILING=OFF &&
cmake --build . --target install &&
ctest -C Debug -V

View File

@ -61,7 +61,7 @@ typedef int GUPSIndex;
double now() { double now() {
struct timeval now; struct timeval now;
gettimeofday(&now, NULL); gettimeofday(&now, nullptr);
return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6); return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
} }

View File

@ -64,7 +64,7 @@ typedef int StreamIndex;
double now() { double now() {
struct timeval now; struct timeval now;
gettimeofday(&now, NULL); gettimeofday(&now, nullptr);
return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6); return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
} }

View File

@ -1,339 +0,0 @@
#!/bin/bash
update_kokkos_devices() {
SEARCH_TEXT="*$1*"
if [[ $KOKKOS_DEVICES == $SEARCH_TEXT ]]; then
echo kokkos devices already includes $SEARCH_TEXT
else
if [ "$KOKKOS_DEVICES" = "" ]; then
KOKKOS_DEVICES="$1"
echo reseting kokkos devices to $KOKKOS_DEVICES
else
KOKKOS_DEVICES="${KOKKOS_DEVICES},$1"
echo appending to kokkos devices $KOKKOS_DEVICES
fi
fi
}
get_kokkos_device_list() {
KOKKOS_DEVICE_CMD=
PARSE_DEVICES_LST=$(echo $KOKKOS_DEVICES | tr "," "\n")
for DEVICE_ in $PARSE_DEVICES_LST
do
UC_DEVICE=$(echo $DEVICE_ | tr "[:lower:]" "[:upper:]")
KOKKOS_DEVICE_CMD="-DKokkos_ENABLE_${UC_DEVICE}=ON ${KOKKOS_DEVICE_CMD}"
done
}
get_kokkos_arch_list() {
KOKKOS_ARCH_CMD=
PARSE_ARCH_LST=$(echo $KOKKOS_ARCH | tr "," "\n")
for ARCH_ in $PARSE_ARCH_LST
do
UC_ARCH=$(echo $ARCH_ | tr "[:lower:]" "[:upper:]")
KOKKOS_ARCH_CMD="-DKokkos_ARCH_${UC_ARCH}=ON ${KOKKOS_ARCH_CMD}"
done
}
get_kokkos_cuda_option_list() {
echo parsing KOKKOS_CUDA_OPTIONS=$KOKKOS_CUDA_OPTIONS
KOKKOS_CUDA_OPTION_CMD=
PARSE_CUDA_LST=$(echo $KOKKOS_CUDA_OPTIONS | tr "," "\n")
for CUDA_ in $PARSE_CUDA_LST
do
CUDA_OPT_NAME=
if [ "${CUDA_}" == "enable_lambda" ]; then
CUDA_OPT_NAME=CUDA_LAMBDA
elif [ "${CUDA_}" == "rdc" ]; then
CUDA_OPT_NAME=CUDA_RELOCATABLE_DEVICE_CODE
elif [ "${CUDA_}" == "force_uvm" ]; then
CUDA_OPT_NAME=CUDA_UVM
elif [ "${CUDA_}" == "use_ldg" ]; then
CUDA_OPT_NAME=CUDA_LDG_INTRINSIC
else
echo "${CUDA_} is not a valid cuda options..."
fi
if [ "${CUDA_OPT_NAME}" != "" ]; then
KOKKOS_CUDA_OPTION_CMD="-DKokkos_ENABLE_${CUDA_OPT_NAME}=ON ${KOKKOS_CUDA_OPTION_CMD}"
fi
done
}
get_kokkos_option_list() {
echo parsing KOKKOS_OPTIONS=$KOKKOS_OPTIONS
KOKKOS_OPTION_CMD=
PARSE_OPTIONS_LST=$(echo $KOKKOS_OPTIONS | tr "," "\n")
for OPT_ in $PARSE_OPTIONS_LST
do
UC_OPT_=$(echo $OPT_ | tr "[:lower:]" "[:upper:]")
if [[ "$UC_OPT_" == *DISABLE* ]]; then
FLIP_OPT_=${UC_OPT_/DISABLE/ENABLE}
KOKKOS_OPTION_CMD="-DKokkos_${FLIP_OPT_}=OFF ${KOKKOS_OPTION_CMD}"
elif [[ "$UC_OPT_" == *ENABLE* ]]; then
KOKKOS_OPTION_CMD="-DKokkos_${UC_OPT_}=ON ${KOKKOS_OPTION_CMD}"
else
KOKKOS_OPTION_CMD="-DKokkos_ENABLE_${UC_OPT_}=ON ${KOKKOS_OPTION_CMD}"
fi
done
}
display_help_text() {
echo "Kokkos configure options:"
echo ""
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory."
echo "--prefix=/Install/Path: Path to install the Kokkos library."
echo ""
echo "--with-cuda[=/Path/To/Cuda]: Enable Cuda and set path to Cuda Toolkit."
echo "--with-openmp: Enable OpenMP backend."
echo "--with-pthread: Enable Pthreads backend."
echo "--with-serial: Enable Serial backend."
echo "--with-devices: Explicitly add a set of backends."
echo ""
echo "--arch=[OPT]: Set target architectures. Options are:"
echo " [AMD]"
echo " AMDAVX = AMD CPU"
echo " EPYC = AMD EPYC Zen-Core CPU"
echo " [ARM]"
echo " ARMv80 = ARMv8.0 Compatible CPU"
echo " ARMv81 = ARMv8.1 Compatible CPU"
echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU"
echo " ARMv8-TX2 = ARMv8 Cavium ThunderX2 CPU"
echo " [IBM]"
echo " BGQ = IBM Blue Gene Q"
echo " Power7 = IBM POWER7 and POWER7+ CPUs"
echo " Power8 = IBM POWER8 CPUs"
echo " Power9 = IBM POWER9 CPUs"
echo " [Intel]"
echo " WSM = Intel Westmere CPUs"
echo " SNB = Intel Sandy/Ivy Bridge CPUs"
echo " HSW = Intel Haswell CPUs"
echo " BDW = Intel Broadwell Xeon E-class CPUs"
echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)"
echo " [Intel Xeon Phi]"
echo " KNC = Intel Knights Corner Xeon Phi"
echo " KNL = Intel Knights Landing Xeon Phi"
echo " [NVIDIA]"
echo " Kepler30 = NVIDIA Kepler generation CC 3.0"
echo " Kepler32 = NVIDIA Kepler generation CC 3.2"
echo " Kepler35 = NVIDIA Kepler generation CC 3.5"
echo " Kepler37 = NVIDIA Kepler generation CC 3.7"
echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0"
echo " Maxwell52 = NVIDIA Maxwell generation CC 5.2"
echo " Maxwell53 = NVIDIA Maxwell generation CC 5.3"
echo " Pascal60 = NVIDIA Pascal generation CC 6.0"
echo " Pascal61 = NVIDIA Pascal generation CC 6.1"
echo " Volta70 = NVIDIA Volta generation CC 7.0"
echo " Volta72 = NVIDIA Volta generation CC 7.2"
echo ""
echo "--compiler=/Path/To/Compiler Set the compiler."
echo "--debug,-dbg: Enable Debugging."
echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test"
echo " build. This will still set certain required"
echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp,"
echo " --std=c++11, etc.)."
echo "--cxxstandard=[FLAGS] Overwrite KOKKOS_CXX_STANDARD for library build and test"
echo " c++11 (default), c++14, c++17, c++1y, c++1z, c++2a"
echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test"
echo " build. This will still set certain required"
echo " flags via KOKKOS_LDFLAGS (such as -fopenmp,"
echo " -lpthread, etc.)."
echo "--with-gtest=/Path/To/Gtest: Set path to gtest. (Used in unit and performance"
echo " tests.)"
echo "--with-hwloc=/Path/To/Hwloc: Set path to hwloc library."
echo "--with-memkind=/Path/To/MemKind: Set path to memkind library."
echo "--with-options=[OPT]: Additional options to Kokkos:"
echo " compiler_warnings"
echo " aggressive_vectorization = add ivdep on loops"
echo " disable_profiling = do not compile with profiling hooks"
echo " "
echo "--with-cuda-options=[OPT]: Additional options to CUDA:"
echo " force_uvm, use_ldg, enable_lambda, rdc"
echo "--with-hpx-options=[OPT]: Additional options to HPX:"
echo " enable_async_dispatch"
echo "--gcc-toolchain=/Path/To/GccRoot: Set the gcc toolchain to use with clang (e.g. /usr)"
echo "--make-j=[NUM]: DEPRECATED: call make with appropriate"
echo " -j flag"
}
while [[ $# > 0 ]]
do
key="$1"
case $key in
--kokkos-path*)
KOKKOS_PATH="${key#*=}"
;;
--hpx-path*)
HPX_PATH="${key#*=}"
;;
--prefix*)
PREFIX="${key#*=}"
;;
--with-cuda)
update_kokkos_devices Cuda
CUDA_PATH_NVCC=$(command -v nvcc)
CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc}
;;
# Catch this before '--with-cuda*'
--with-cuda-options*)
KOKKOS_CUDA_OPTIONS="${key#*=}"
;;
--with-cuda*)
update_kokkos_devices Cuda
CUDA_PATH="${key#*=}"
;;
--with-openmp)
update_kokkos_devices OpenMP
;;
--with-pthread)
update_kokkos_devices Pthread
;;
--with-serial)
update_kokkos_devices Serial
;;
--with-hpx-options*)
KOKKOS_HPX_OPT="${key#*=}"
;;
--with-hpx*)
update_kokkos_devices HPX
if [ -z "$HPX_PATH" ]; then
HPX_PATH="${key#*=}"
fi
;;
--with-devices*)
DEVICES="${key#*=}"
PARSE_DEVICES=$(echo $DEVICES | tr "," "\n")
for DEVICE_ in $PARSE_DEVICES
do
update_kokkos_devices $DEVICE_
done
;;
--with-gtest*)
GTEST_PATH="${key#*=}"
;;
--with-hwloc*)
HWLOC_PATH="${key#*=}"
;;
--with-memkind*)
MEMKIND_PATH="${key#*=}"
;;
--arch*)
KOKKOS_ARCH="${key#*=}"
;;
--cxxflags*)
KOKKOS_CXXFLAGS="${key#*=}"
KOKKOS_CXXFLAGS=${KOKKOS_CXXFLAGS//,/ }
;;
--cxxstandard*)
KOKKOS_CXX_STANDARD="${key#*=}"
;;
--ldflags*)
KOKKOS_LDFLAGS="${key#*=}"
;;
--debug|-dbg)
KOKKOS_DEBUG=yes
;;
--make-j*)
echo "Warning: ${key} is deprecated"
echo "Call make with appropriate -j flag"
;;
--compiler*)
COMPILER="${key#*=}"
CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l)
if [ ${CNUM} -gt 0 ]; then
echo "Invalid compiler by --compiler command: '${COMPILER}'"
exit
fi
if [[ ! -n ${COMPILER} ]]; then
echo "Empty compiler specified by --compiler command."
exit
fi
CNUM=$(command -v ${COMPILER} | grep ${COMPILER} | wc -l)
if [ ${CNUM} -eq 0 ]; then
echo "Invalid compiler by --compiler command: '${COMPILER}'"
exit
fi
# ... valid compiler, ensure absolute path set
WCOMPATH=$(command -v $COMPILER)
COMPDIR=$(dirname $WCOMPATH)
COMPNAME=$(basename $WCOMPATH)
COMPILER=${COMPDIR}/${COMPNAME}
;;
--with-options*)
KOKKOS_OPTIONS="${key#*=}"
;;
--gcc-toolchain*)
KOKKOS_GCC_TOOLCHAIN="${key#*=}"
;;
--help)
display_help_text
exit 0
;;
*)
echo "warning: ignoring unknown option $key"
;;
esac
shift
done
if [ "$COMPILER" == "" ]; then
COMPILER_CMD=
else
COMPILER_CMD=-DCMAKE_CXX_COMPILER=$COMPILER
fi
if [ "$KOKKOS_DEBUG" == "" ]; then
KOKKOS_DEBUG_CMD=-DCMAKE_BUILD_TYPE=RELEASE
else
KOKKOS_DEBUG_CMD=-DCMAKE_BUILD_TYPE=DEBUG
fi
if [ ! -e ${KOKKOS_PATH}/CMakeLists.txt ]; then
if [ "${KOKKOS_PATH}" == "" ]; then
CM_SCRIPT=$0
KOKKOS_PATH=`dirname $CM_SCRIPT`
if [ ! -e ${KOKKOS_PATH}/CMakeLists.txt ]; then
echo "${KOKKOS_PATH} repository appears to not be complete. please verify and try again"
exit 0
fi
else
echo "KOKKOS_PATH does not appear to be set properly. please specify in location of CMakeLists.txt"
display_help_text
exit 0
fi
fi
get_kokkos_device_list
get_kokkos_option_list
get_kokkos_arch_list
get_kokkos_cuda_option_list
## if HPX is enabled, we need to enforce cxx standard = 14
if [[ ${KOKKOS_DEVICE_CMD} == *Kokkos_ENABLE_HPX* ]]; then
if [ "${KOKKOS_CXX_STANDARD}" == "" ] || [ ${#KOKKOS_CXX_STANDARD} -lt 14 ]; then
echo CXX Standard must be 14 or higher for HPX to work.
KOKKOS_CXX_STANDARD=14
fi
fi
if [ "$KOKKOS_CXX_STANDARD" == "" ]; then
STANDARD_CMD=
else
STANDARD_CMD=-DKokkos_CXX_STANDARD=${KOKKOS_CXX_STANDARD}
fi
if [[ ${COMPILER} == *clang* ]]; then
gcc_path=$(which g++ | awk --field-separator='/bin/g++' '{printf $1}' )
KOKKOS_CXXFLAGS="${KOKKOS_CXXFLAGS} --gcc-toolchain=${gcc_path}"
if [ ! "${CUDA_PATH}" == "" ]; then
KOKKOS_CXXFLAGS="${KOKKOS_CXXFLAGS} --cuda-path=${CUDA_PATH}"
fi
fi
echo cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=ON ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_PATH}
cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS//\"}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS//\"}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=ON ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_PATH}

View File

@ -6,5 +6,4 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
Core core PS REQUIRED Core core PS REQUIRED
Containers containers PS OPTIONAL Containers containers PS OPTIONAL
Algorithms algorithms PS OPTIONAL Algorithms algorithms PS OPTIONAL
Example example EX OPTIONAL
) )

View File

@ -73,6 +73,9 @@ function(kokkos_check)
# use it to check that there are variables defined for all required # use it to check that there are variables defined for all required
# arguments. Success or failure messages will be displayed but we are # arguments. Success or failure messages will be displayed but we are
# responsible for signaling failure and skip the build system generation. # responsible for signaling failure and skip the build system generation.
if (KOKKOS_CHECK_RETURN_VALUE)
set(Kokkos_${arg}_FIND_QUIETLY ON)
endif()
find_package_handle_standard_args("Kokkos_${arg}" DEFAULT_MSG find_package_handle_standard_args("Kokkos_${arg}" DEFAULT_MSG
${KOKKOS_CHECK_${arg}}) ${KOKKOS_CHECK_${arg}})
if(NOT Kokkos_${arg}_FOUND) if(NOT Kokkos_${arg}_FOUND)

View File

@ -5,11 +5,19 @@
#define KOKKOS_CORE_CONFIG_H #define KOKKOS_CORE_CONFIG_H
#endif #endif
// KOKKOS_VERSION % 100 is the patch level
// KOKKOS_VERSION / 100 % 100 is the minor version
// KOKKOS_VERSION / 10000 is the major version
#cmakedefine KOKKOS_VERSION @KOKKOS_VERSION@
/* Execution Spaces */ /* Execution Spaces */
#cmakedefine KOKKOS_ENABLE_SERIAL #cmakedefine KOKKOS_ENABLE_SERIAL
#cmakedefine KOKKOS_ENABLE_OPENMP #cmakedefine KOKKOS_ENABLE_OPENMP
#cmakedefine KOKKOS_ENABLE_OPENMPTARGET
#cmakedefine KOKKOS_ENABLE_THREADS #cmakedefine KOKKOS_ENABLE_THREADS
#cmakedefine KOKKOS_ENABLE_CUDA #cmakedefine KOKKOS_ENABLE_CUDA
#cmakedefine KOKKOS_ENABLE_HIP
#cmakedefine KOKKOS_ENABLE_HPX #cmakedefine KOKKOS_ENABLE_HPX
#cmakedefine KOKKOS_ENABLE_MEMKIND #cmakedefine KOKKOS_ENABLE_MEMKIND
#cmakedefine KOKKOS_ENABLE_LIBRT #cmakedefine KOKKOS_ENABLE_LIBRT
@ -33,6 +41,7 @@
#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA #cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR #cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
#cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC #cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_HPX_ASYNC_DISPATCH #cmakedefine KOKKOS_ENABLE_HPX_ASYNC_DISPATCH
#cmakedefine KOKKOS_ENABLE_DEBUG #cmakedefine KOKKOS_ENABLE_DEBUG
#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK #cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK

View File

@ -0,0 +1,17 @@
IF (NOT TARGET Kokkos::kokkos)
# Compute the installation prefix relative to this file.
get_filename_component(KOKKOS_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(KOKKOS_IMPORT_PREFIX "${KOKKOS_IMPORT_PREFIX}" PATH)
get_filename_component(KOKKOS_IMPORT_PREFIX "${KOKKOS_IMPORT_PREFIX}" PATH)
get_filename_component(KOKKOS_IMPORT_PREFIX "${KOKKOS_IMPORT_PREFIX}" PATH)
if(KOKKOS_IMPORT_PREFIX STREQUAL "/")
set(KOKKOS_IMPORT_PREFIX "")
endif()
add_library(Kokkos::kokkos INTERFACE IMPORTED)
set_target_properties(Kokkos::kokkos PROPERTIES
INTERFACE_LINK_LIBRARIES "@Kokkos_LIBRARIES@;@KOKKOS_LINK_OPTIONS@"
INTERFACE_COMPILE_FEATURES "@KOKKOS_CXX_STANDARD_FEATURE@"
INTERFACE_COMPILE_OPTIONS "@KOKKOS_ALL_COMPILE_OPTIONS@"
INTERFACE_INCLUDE_DIRECTORIES "${KOKKOS_IMPORT_PREFIX}/include"
)
ENDIF()

View File

@ -1,8 +1,12 @@
IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang) IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
# Note: "stubs" suffix allows CMake to find the dummy
# libcuda.so provided by the NVIDIA CUDA Toolkit for
# cross-compiling CUDA on a host without a GPU.
KOKKOS_FIND_IMPORTED(CUDA INTERFACE KOKKOS_FIND_IMPORTED(CUDA INTERFACE
LIBRARIES cudart cuda LIBRARIES cudart cuda
LIBRARY_PATHS ENV LD_LIBRARY_PATH ENV CUDA_PATH LIBRARY_PATHS ENV LD_LIBRARY_PATH ENV CUDA_PATH /usr/local/cuda
LIBRARY_SUFFIXES lib lib64 lib/stubs lib64/stubs
ALLOW_SYSTEM_PATH_FALLBACK ALLOW_SYSTEM_PATH_FALLBACK
) )
ELSE() ELSE()

View File

@ -3,15 +3,18 @@ TRY_COMPILE(KOKKOS_HAS_PTHREAD_ARG
${KOKKOS_TOP_BUILD_DIR}/tpl_tests ${KOKKOS_TOP_BUILD_DIR}/tpl_tests
${KOKKOS_SOURCE_DIR}/cmake/compile_tests/pthread.cpp ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/pthread.cpp
LINK_LIBRARIES -pthread LINK_LIBRARIES -pthread
COMPILE_DEFINITIONS -pthread) COMPILE_DEFINITIONS -pthread
)
# The test no longer requires C++11
# if we did needed C++ standard support, then we should add option
# ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION}
INCLUDE(FindPackageHandleStandardArgs) INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PTHREAD DEFAULT_MSG KOKKOS_HAS_PTHREAD_ARG) FIND_PACKAGE_HANDLE_STANDARD_ARGS(PTHREAD DEFAULT_MSG KOKKOS_HAS_PTHREAD_ARG)
#Only create the TPL if we succeed
KOKKOS_CREATE_IMPORTED_TPL(PTHREAD IF (KOKKOS_HAS_PTHREAD_ARG)
INTERFACE #this is not a real library with a real location KOKKOS_CREATE_IMPORTED_TPL(PTHREAD
COMPILE_OPTIONS -pthread INTERFACE #this is not a real library with a real location
LINK_OPTIONS -pthread) COMPILE_OPTIONS -pthread
LINK_OPTIONS -pthread)
ENDIF()

View File

@ -166,8 +166,9 @@ There are 3 possibilities that could be used:
The following is the search order that Kokkos follows. Note: This differs from the default search order used by CMake `find_library` and `find_header`. CMake prefers default system paths over user-provided paths. The following is the search order that Kokkos follows. Note: This differs from the default search order used by CMake `find_library` and `find_header`. CMake prefers default system paths over user-provided paths.
For Kokkos (and package managers in general), it is better to prefer user-provided paths since this usually indicates a specific version we want. For Kokkos (and package managers in general), it is better to prefer user-provided paths since this usually indicates a specific version we want.
1. `<NAME>_ROOT` 1. `<NAME>_ROOT` command line option
1. `Kokkos_<NAME>_DIR` 1. `<NAME>_ROOT` environment variable
1. `Kokkos_<NAME>_DIR` command line option
1. Paths added by Kokkos CMake logic 1. Paths added by Kokkos CMake logic
1. Default system paths (if allowed) 1. Default system paths (if allowed)

View File

@ -1,6 +1,6 @@
#include <omp.h> #include <omp.h>
int main(int argc, char** argv) { int main(int, char**) {
int thr = omp_get_num_threads(); int thr = omp_get_num_threads();
if (thr > 0) if (thr > 0)
return thr; return thr;

View File

@ -4,6 +4,10 @@ void* kokkos_test(void* args) { return args; }
int main(void) { int main(void) {
pthread_t thread; pthread_t thread;
/* Use NULL to avoid C++11. Some compilers
do not have C++11 by default. Forcing C++11
in the compile tests can be done, but is unnecessary
*/
pthread_create(&thread, NULL, kokkos_test, NULL); pthread_create(&thread, NULL, kokkos_test, NULL);
pthread_join(thread, NULL); pthread_join(thread, NULL);
return 0; return 0;

View File

@ -24,10 +24,6 @@ IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "")
ENDIF() ENDIF()
ENDMACRO() ENDMACRO()
MACRO(GLOBAL_RESET VARNAME)
SET(${VARNAME} "" CACHE INTERNAL "" FORCE)
ENDMACRO()
MACRO(GLOBAL_OVERWRITE VARNAME VALUE TYPE) MACRO(GLOBAL_OVERWRITE VARNAME VALUE TYPE)
SET(${VARNAME} ${VALUE} CACHE ${TYPE} "" FORCE) SET(${VARNAME} ${VALUE} CACHE ${TYPE} "" FORCE)
ENDMACRO() ENDMACRO()
@ -88,10 +84,6 @@ MACRO(ADD_INTERFACE_LIBRARY LIB_NAME)
SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE)
ENDMACRO() ENDMACRO()
IF(NOT TARGET check)
ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})
ENDIF()
FUNCTION(KOKKOS_ADD_TEST) FUNCTION(KOKKOS_ADD_TEST)
if (KOKKOS_HAS_TRILINOS) if (KOKKOS_HAS_TRILINOS)
CMAKE_PARSE_ARGUMENTS(TEST CMAKE_PARSE_ARGUMENTS(TEST
@ -108,7 +100,6 @@ FUNCTION(KOKKOS_ADD_TEST)
TRIBITS_ADD_TEST( TRIBITS_ADD_TEST(
${EXE_ROOT} ${EXE_ROOT}
NAME ${TEST_NAME} NAME ${TEST_NAME}
${ARGN}
COMM serial mpi COMM serial mpi
NUM_MPI_PROCS 1 NUM_MPI_PROCS 1
${TEST_UNPARSED_ARGUMENTS} ${TEST_UNPARSED_ARGUMENTS}
@ -119,11 +110,17 @@ FUNCTION(KOKKOS_ADD_TEST)
"FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME" "FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME"
"CATEGORIES;CMD_ARGS" "CATEGORIES;CMD_ARGS"
${ARGN}) ${ARGN})
# To match Tribits, we should always be receiving
# the root names of exes/libs
IF(TEST_EXE) IF(TEST_EXE)
SET(EXE ${TEST_EXE}) SET(EXE_ROOT ${TEST_EXE})
ELSE() ELSE()
SET(EXE ${TEST_NAME}) SET(EXE_ROOT ${TEST_NAME})
ENDIF() ENDIF()
# Prepend package name to the test name
# These should be the full target name
SET(TEST_NAME ${PACKAGE_NAME}_${TEST_NAME})
SET(EXE ${PACKAGE_NAME}_${EXE_ROOT})
IF(WIN32) IF(WIN32)
ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX} ${TEST_CMD_ARGS}) ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX} ${TEST_CMD_ARGS})
ELSE() ELSE()
@ -277,10 +274,6 @@ ELSE()
LIST(REMOVE_DUPLICATES PARSE_SOURCES) LIST(REMOVE_DUPLICATES PARSE_SOURCES)
ENDIF() ENDIF()
ADD_LIBRARY(${NAME} ${PARSE_SOURCES}) ADD_LIBRARY(${NAME} ${PARSE_SOURCES})
target_link_libraries(
${NAME}
PUBLIC kokkos
)
ENDIF() ENDIF()
ENDFUNCTION() ENDFUNCTION()

View File

@ -9,52 +9,6 @@ FUNCTION(KOKKOS_ARCH_OPTION SUFFIX DEV_TYPE DESCRIPTION)
SET(KOKKOS_ARCH_${SUFFIX} ${KOKKOS_ARCH_${SUFFIX}} PARENT_SCOPE) SET(KOKKOS_ARCH_${SUFFIX} ${KOKKOS_ARCH_${SUFFIX}} PARENT_SCOPE)
ENDFUNCTION() ENDFUNCTION()
FUNCTION(ARCH_FLAGS)
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang GNU)
CMAKE_PARSE_ARGUMENTS(
PARSE
"LINK_ONLY;COMPILE_ONLY"
""
"${COMPILERS}"
${ARGN})
SET(COMPILER ${KOKKOS_CXX_COMPILER_ID})
SET(FLAGS)
SET(NEW_COMPILE_OPTIONS)
SET(NEW_XCOMPILER_OPTIONS)
SET(NEW_LINK_OPTIONS)
LIST(APPEND NEW_XCOMPILER_OPTIONS ${KOKKOS_XCOMPILER_OPTIONS})
LIST(APPEND NEW_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS})
LIST(APPEND NEW_LINK_OPTIONS ${KOKKOS_LINK_OPTIONS})
FOREACH(COMP ${COMPILERS})
IF (COMPILER STREQUAL "${COMP}")
IF (PARSE_${COMPILER})
IF (NOT "${PARSE_${COMPILER}}" STREQUAL "NO-VALUE-SPECIFIED")
SET(FLAGS ${PARSE_${COMPILER}})
ENDIF()
ELSEIF(PARSE_DEFAULT)
SET(FLAGS ${PARSE_DEFAULT})
ENDIF()
ENDIF()
ENDFOREACH()
IF (NOT LINK_ONLY)
# The funky logic here is for future handling of argument deduplication
# If we naively pass multiple -Xcompiler flags to target_compile_options
# -Xcompiler will get deduplicated and break the build
IF ("-Xcompiler" IN_LIST FLAGS)
LIST(REMOVE_ITEM FLAGS "-Xcompiler")
GLOBAL_APPEND(KOKKOS_XCOMPILER_OPTIONS ${FLAGS})
ELSE()
GLOBAL_APPEND(KOKKOS_COMPILE_OPTIONS ${FLAGS})
ENDIF()
ENDIF()
IF (NOT COMPILE_ONLY)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS ${FLAGS})
ENDIF()
ENDFUNCTION()
# Make sure devices and compiler ID are done # Make sure devices and compiler ID are done
KOKKOS_CFG_DEPENDS(ARCH COMPILER_ID) KOKKOS_CFG_DEPENDS(ARCH COMPILER_ID)
@ -98,7 +52,8 @@ KOKKOS_ARCH_OPTION(VOLTA70 GPU "NVIDIA Volta generation CC 7.0")
KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2") KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2")
KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5") KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5")
KOKKOS_ARCH_OPTION(EPYC HOST "AMD Epyc architecture") KOKKOS_ARCH_OPTION(EPYC HOST "AMD Epyc architecture")
KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900")
KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906")
IF (KOKKOS_ENABLE_CUDA) IF (KOKKOS_ENABLE_CUDA)
#Regardless of version, make sure we define the general architecture name #Regardless of version, make sure we define the general architecture name
@ -126,13 +81,13 @@ ENDIF()
IF(KOKKOS_ENABLE_COMPILER_WARNINGS) IF(KOKKOS_ENABLE_COMPILER_WARNINGS)
SET(COMMON_WARNINGS SET(COMMON_WARNINGS
"-Wall" "-Wshadow" "-pedantic" "-Wall" "-Wunused-parameter" "-Wshadow" "-pedantic"
"-Wsign-compare" "-Wtype-limits" "-Wuninitialized") "-Wsign-compare" "-Wtype-limits" "-Wuninitialized")
SET(GNU_WARNINGS "-Wempty-body" "-Wclobbered" "-Wignored-qualifiers" SET(GNU_WARNINGS "-Wempty-body" "-Wclobbered" "-Wignored-qualifiers"
${COMMON_WARNINGS}) ${COMMON_WARNINGS})
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED
GNU ${GNU_WARNINGS} GNU ${GNU_WARNINGS}
DEFAULT ${COMMON_WARNINGS} DEFAULT ${COMMON_WARNINGS}
@ -141,7 +96,8 @@ ENDIF()
#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- #------------------------------- KOKKOS_CUDA_OPTIONS ---------------------------
GLOBAL_RESET(KOKKOS_CUDA_OPTIONS) #clear anything that might be in the cache
GLOBAL_SET(KOKKOS_CUDA_OPTIONS)
# Construct the Makefile options # Construct the Makefile options
IF (KOKKOS_ENABLE_CUDA_LAMBDA) IF (KOKKOS_ENABLE_CUDA_LAMBDA)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
@ -157,6 +113,7 @@ ENDIF()
IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang) IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
SET(CUDA_ARCH_FLAG "--cuda-gpu-arch") SET(CUDA_ARCH_FLAG "--cuda-gpu-arch")
SET(AMDGPU_ARCH_FLAG "--amdgpu-target")
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -x cuda) GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -x cuda)
IF (KOKKOS_ENABLE_CUDA) IF (KOKKOS_ENABLE_CUDA)
SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE) SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE)
@ -171,27 +128,13 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -lineinfo) GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -lineinfo)
ENDIF() ENDIF()
UNSET(_UPPERCASE_CMAKE_BUILD_TYPE) UNSET(_UPPERCASE_CMAKE_BUILD_TYPE)
IF (KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER 9.0 OR KOKKOS_CXX_COMPILER_VERSION VERSION_EQUAL 9.0) IF (KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.0 AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 10.0)
GLOBAL_APPEND(KOKKOS_CUDAFE_OPTIONS --diag_suppress=esa_on_defaulted_function_ignored) GLOBAL_APPEND(KOKKOS_CUDAFE_OPTIONS --diag_suppress=esa_on_defaulted_function_ignored)
ENDIF() ENDIF()
ENDIF() ENDIF()
IF(KOKKOS_ENABLE_OPENMP)
IF (KOKKOS_CXX_COMPILER_ID STREQUAL AppleClang)
MESSAGE(FATAL_ERROR "Apple Clang does not support OpenMP. Use native Clang instead")
ENDIF()
ARCH_FLAGS(
Clang -fopenmp=libomp
PGI -mp
NVIDIA -Xcompiler -fopenmp
Cray NO-VALUE-SPECIFIED
XL -qsmp=omp
DEFAULT -fopenmp
)
ENDIF()
IF (KOKKOS_ARCH_ARMV80) IF (KOKKOS_ARCH_ARMV80)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED
DEFAULT -march=armv8-a DEFAULT -march=armv8-a
@ -199,7 +142,7 @@ IF (KOKKOS_ARCH_ARMV80)
ENDIF() ENDIF()
IF (KOKKOS_ARCH_ARMV81) IF (KOKKOS_ARCH_ARMV81)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED
DEFAULT -march=armv8.1-a DEFAULT -march=armv8.1-a
@ -208,7 +151,7 @@ ENDIF()
IF (KOKKOS_ARCH_ARMV8_THUNDERX) IF (KOKKOS_ARCH_ARMV8_THUNDERX)
SET(KOKKOS_ARCH_ARMV80 ON) #Not a cache variable SET(KOKKOS_ARCH_ARMV80 ON) #Not a cache variable
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED
DEFAULT -march=armv8-a -mtune=thunderx DEFAULT -march=armv8-a -mtune=thunderx
@ -217,7 +160,7 @@ ENDIF()
IF (KOKKOS_ARCH_ARMV8_THUNDERX2) IF (KOKKOS_ARCH_ARMV8_THUNDERX2)
SET(KOKKOS_ARCH_ARMV81 ON) #Not a cache variable SET(KOKKOS_ARCH_ARMV81 ON) #Not a cache variable
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED
DEFAULT -mcpu=thunderx2t99 -mtune=thunderx2t99 DEFAULT -mcpu=thunderx2t99 -mtune=thunderx2t99
@ -225,7 +168,7 @@ IF (KOKKOS_ARCH_ARMV8_THUNDERX2)
ENDIF() ENDIF()
IF (KOKKOS_ARCH_EPYC) IF (KOKKOS_ARCH_EPYC)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Intel -mavx2 Intel -mavx2
DEFAULT -march=znver1 -mtune=znver1 DEFAULT -march=znver1 -mtune=znver1
) )
@ -234,7 +177,7 @@ IF (KOKKOS_ARCH_EPYC)
ENDIF() ENDIF()
IF (KOKKOS_ARCH_WSM) IF (KOKKOS_ARCH_WSM)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Intel -xSSE4.2 Intel -xSSE4.2
PGI -tp=nehalem PGI -tp=nehalem
Cray NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED
@ -245,7 +188,7 @@ ENDIF()
IF (KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX) IF (KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX)
SET(KOKKOS_ARCH_AVX ON) SET(KOKKOS_ARCH_AVX ON)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Intel -mavx Intel -mavx
PGI -tp=sandybridge PGI -tp=sandybridge
Cray NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED
@ -255,7 +198,7 @@ ENDIF()
IF (KOKKOS_ARCH_HSW) IF (KOKKOS_ARCH_HSW)
SET(KOKKOS_ARCH_AVX2 ON) SET(KOKKOS_ARCH_AVX2 ON)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Intel -xCORE-AVX2 Intel -xCORE-AVX2
PGI -tp=haswell PGI -tp=haswell
Cray NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED
@ -265,7 +208,7 @@ ENDIF()
IF (KOKKOS_ARCH_BDW) IF (KOKKOS_ARCH_BDW)
SET(KOKKOS_ARCH_AVX2 ON) SET(KOKKOS_ARCH_AVX2 ON)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Intel -xCORE-AVX2 Intel -xCORE-AVX2
PGI -tp=haswell PGI -tp=haswell
Cray NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED
@ -275,7 +218,7 @@ ENDIF()
IF (KOKKOS_ARCH_EPYC) IF (KOKKOS_ARCH_EPYC)
SET(KOKKOS_ARCH_AMD_AVX2 ON) SET(KOKKOS_ARCH_AMD_AVX2 ON)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Intel -mvax2 Intel -mvax2
DEFAULT -march=znver1 -mtune=znver1 DEFAULT -march=znver1 -mtune=znver1
) )
@ -284,7 +227,7 @@ ENDIF()
IF (KOKKOS_ARCH_KNL) IF (KOKKOS_ARCH_KNL)
#avx512-mic #avx512-mic
SET(KOKKOS_ARCH_AVX512MIC ON) #not a cache variable SET(KOKKOS_ARCH_AVX512MIC ON) #not a cache variable
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Intel -xMIC-AVX512 Intel -xMIC-AVX512
PGI NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED
Cray NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED
@ -294,7 +237,7 @@ ENDIF()
IF (KOKKOS_ARCH_KNC) IF (KOKKOS_ARCH_KNC)
SET(KOKKOS_USE_ISA_KNC ON) SET(KOKKOS_USE_ISA_KNC ON)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
DEFAULT -mmic DEFAULT -mmic
) )
ENDIF() ENDIF()
@ -302,7 +245,7 @@ ENDIF()
IF (KOKKOS_ARCH_SKX) IF (KOKKOS_ARCH_SKX)
#avx512-xeon #avx512-xeon
SET(KOKKOS_ARCH_AVX512XEON ON) SET(KOKKOS_ARCH_AVX512XEON ON)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Intel -xCORE-AVX512 Intel -xCORE-AVX512
PGI NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED
Cray NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED
@ -319,7 +262,7 @@ IF (KOKKOS_ARCH_BDW OR KOKKOS_ARCH_SKX)
ENDIF() ENDIF()
IF (KOKKOS_ARCH_POWER7) IF (KOKKOS_ARCH_POWER7)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED
DEFAULT -mcpu=power7 -mtune=power7 DEFAULT -mcpu=power7 -mtune=power7
) )
@ -327,7 +270,7 @@ IF (KOKKOS_ARCH_POWER7)
ENDIF() ENDIF()
IF (KOKKOS_ARCH_POWER8) IF (KOKKOS_ARCH_POWER8)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED
NVIDIA NO-VALUE-SPECIFIED NVIDIA NO-VALUE-SPECIFIED
DEFAULT -mcpu=power8 -mtune=power8 DEFAULT -mcpu=power8 -mtune=power8
@ -335,7 +278,7 @@ IF (KOKKOS_ARCH_POWER8)
ENDIF() ENDIF()
IF (KOKKOS_ARCH_POWER9) IF (KOKKOS_ARCH_POWER9)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED
NVIDIA NO-VALUE-SPECIFIED NVIDIA NO-VALUE-SPECIFIED
DEFAULT -mcpu=power9 -mtune=power9 DEFAULT -mcpu=power9 -mtune=power9
@ -347,33 +290,50 @@ IF (KOKKOS_ARCH_POWER8 OR KOKKOS_ARCH_POWER9)
ENDIF() ENDIF()
IF (Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) IF (Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
ARCH_FLAGS( COMPILER_SPECIFIC_FLAGS(
Clang -fcuda-rdc Clang -fcuda-rdc
NVIDIA --relocatable-device-code=true NVIDIA --relocatable-device-code=true
) )
ENDIF() ENDIF()
#Right now we cannot get the compiler ID when cross-compiling, so just check
#that HIP is enabled
IF (Kokkos_ENABLE_HIP)
IF (Kokkos_ENABLE_HIP_RELOCATABLE_DEVICE_CODE)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fgpu-rdc
)
ELSE()
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fno-gpu-rdc
)
ENDIF()
ENDIF()
SET(CUDA_ARCH_ALREADY_SPECIFIED "") SET(CUDA_ARCH_ALREADY_SPECIFIED "")
FUNCTION(CHECK_CUDA_ARCH ARCH FLAG) FUNCTION(CHECK_CUDA_ARCH ARCH FLAG)
IF(KOKKOS_ARCH_${ARCH}) IF(KOKKOS_ARCH_${ARCH})
IF(CUDA_ARCH_ALREADY_SPECIFIED) IF(CUDA_ARCH_ALREADY_SPECIFIED)
MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${CUDA_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.") MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${CUDA_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.")
ENDIF() ENDIF()
SET(CUDA_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE) SET(CUDA_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE)
IF (NOT KOKKOS_ENABLE_CUDA) IF (NOT KOKKOS_ENABLE_CUDA AND NOT KOKKOS_ENABLE_OPENMPTARGET)
MESSAGE(WARNING "Given CUDA arch ${ARCH}, but Kokkos_ENABLE_CUDA is OFF. Option will be ignored.") MESSAGE(WARNING "Given CUDA arch ${ARCH}, but Kokkos_ENABLE_CUDA and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.")
UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE) UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE)
ELSE() ELSE()
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") SET(KOKKOS_CUDA_ARCH_FLAG ${FLAG} PARENT_SCOPE)
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
ENDIF()
ENDIF() ENDIF()
ENDIF() ENDIF()
ENDIF()
ENDFUNCTION() ENDFUNCTION()
#These will define KOKKOS_CUDA_ARCH_FLAG
#to the corresponding flag name if ON
CHECK_CUDA_ARCH(KEPLER30 sm_30) CHECK_CUDA_ARCH(KEPLER30 sm_30)
CHECK_CUDA_ARCH(KEPLER32 sm_32) CHECK_CUDA_ARCH(KEPLER32 sm_32)
CHECK_CUDA_ARCH(KEPLER35 sm_35) CHECK_CUDA_ARCH(KEPLER35 sm_35)
@ -383,18 +343,76 @@ CHECK_CUDA_ARCH(MAXWELL52 sm_52)
CHECK_CUDA_ARCH(MAXWELL53 sm_53) CHECK_CUDA_ARCH(MAXWELL53 sm_53)
CHECK_CUDA_ARCH(PASCAL60 sm_60) CHECK_CUDA_ARCH(PASCAL60 sm_60)
CHECK_CUDA_ARCH(PASCAL61 sm_61) CHECK_CUDA_ARCH(PASCAL61 sm_61)
CHECK_CUDA_ARCH(VOLTA70 sm_70) CHECK_CUDA_ARCH(VOLTA70 sm_70)
CHECK_CUDA_ARCH(VOLTA72 sm_72) CHECK_CUDA_ARCH(VOLTA72 sm_72)
CHECK_CUDA_ARCH(TURING75 sm_75) CHECK_CUDA_ARCH(TURING75 sm_75)
SET(AMDGPU_ARCH_ALREADY_SPECIFIED "")
FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG)
IF(KOKKOS_ARCH_${ARCH})
IF(AMDGPU_ARCH_ALREADY_SPECIFIED)
MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${AMDGPU_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.")
ENDIF()
SET(AMDGPU_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE)
IF (NOT KOKKOS_ENABLE_HIP AND NOT KOKKOS_ENABLE_OPENMPTARGET)
MESSAGE(WARNING "Given HIP arch ${ARCH}, but Kokkos_ENABLE_AMDGPU and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.")
UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE)
ELSE()
SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE)
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
IF(KOKKOS_ENABLE_HIP)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
ENDIF()
ENDIF()
ENDIF()
ENDFUNCTION()
#These will define KOKKOS_AMDGPU_ARCH_FLAG
#to the corresponding flag name if ON
CHECK_AMDGPU_ARCH(VEGA900 gfx900) # Radeon Instinct MI25
CHECK_AMDGPU_ARCH(VEGA906 gfx906) # Radeon Instinct MI50 and MI60
IF (KOKKOS_ENABLE_OPENMPTARGET)
SET(CLANG_CUDA_ARCH ${KOKKOS_CUDA_ARCH_FLAG})
IF (CLANG_CUDA_ARCH)
COMPILER_SPECIFIC_FLAGS(
Clang -Xopenmp-target -march=${CLANG_CUDA_ARCH} -fopenmp-targets=nvptx64-nvidia-cuda
XL -qtgtarch=${KOKKOS_CUDA_ARCH_FLAG}
)
ENDIF()
SET(CLANG_AMDGPU_ARCH ${KOKKOS_AMDGPU_ARCH_FLAG})
IF (CLANG_AMDGPU_ARCH)
COMPILER_SPECIFIC_FLAGS(
Clang -Xopenmp-target=amdgcn-amd-amdhsa -march=${CLANG_AMDGPU_ARCH} -fopenmp-targets=amdgcn-amd-amdhsa
)
ENDIF()
ENDIF()
IF(KOKKOS_ENABLE_CUDA AND NOT CUDA_ARCH_ALREADY_SPECIFIED)
MESSAGE(SEND_ERROR "CUDA enabled but no NVIDIA GPU architecture currently enabled. Please give one -DKokkos_ARCH_{..}=ON' to enable an NVIDIA GPU architecture.")
ENDIF()
#CMake verbose is kind of pointless #CMake verbose is kind of pointless
#Let's just always print things #Let's just always print things
MESSAGE(STATUS "Execution Spaces:") MESSAGE(STATUS "Execution Spaces:")
IF(KOKKOS_ENABLE_CUDA)
MESSAGE(STATUS " Device Parallel: CUDA") FOREACH (_BACKEND CUDA OPENMPTARGET HIP)
ELSE() IF(KOKKOS_ENABLE_${_BACKEND})
MESSAGE(STATUS " Device Parallel: NONE") IF(_DEVICE_PARALLEL)
MESSAGE(FATAL_ERROR "Multiple device parallel execution spaces are not allowed! "
"Trying to enable execution space ${_BACKEND}, "
"but execution space ${_DEVICE_PARALLEL} is already enabled. "
"Remove the CMakeCache.txt file and re-configure.")
ENDIF()
SET(_DEVICE_PARALLEL ${_BACKEND})
ENDIF()
ENDFOREACH()
IF(NOT _DEVICE_PARALLEL)
SET(_DEVICE_PARALLEL "NONE")
ENDIF() ENDIF()
MESSAGE(STATUS " Device Parallel: ${_DEVICE_PARALLEL}")
UNSET(_DEVICE_PARALLEL)
FOREACH (_BACKEND OPENMP PTHREAD HPX) FOREACH (_BACKEND OPENMP PTHREAD HPX)
IF(KOKKOS_ENABLE_${_BACKEND}) IF(KOKKOS_ENABLE_${_BACKEND})

View File

@ -0,0 +1,12 @@
SET(CRAYPE_VERSION $ENV{CRAYPE_VERSION})
IF (CRAYPE_VERSION)
SET(KOKKOS_IS_CRAYPE TRUE)
SET(CRAYPE_LINK_TYPE $ENV{CRAYPE_LINK_TYPE})
IF (CRAYPE_LINK_TYPE)
IF (NOT CRAYPE_LINK_TYPE STREQUAL "dynamic")
MESSAGE(WARNING "CRAYPE_LINK_TYPE is set to ${CRAYPE_LINK_TYPE}. Linking is likely to fail unless this is set to 'dynamic'")
ENDIF()
ELSE()
MESSAGE(WARNING "CRAYPE_LINK_TYPE is not set. Linking is likely to fail unless this is set to 'dynamic'")
ENDIF()
ENDIF()

View File

@ -13,7 +13,7 @@ EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
STRING(REGEX REPLACE "^ +" "" STRING(REGEX REPLACE "^ +" ""
INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}")
IF(INTERNAL_HAVE_COMPILER_NVCC) IF(INTERNAL_HAVE_COMPILER_NVCC)
@ -31,16 +31,32 @@ IF(INTERNAL_HAVE_COMPILER_NVCC)
SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE) SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
ENDIF() ENDIF()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray)
# SET nvcc's compiler version. IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
# The Cray compiler reports as Clang to most versions of CMake
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
COMMAND grep Cray
COMMAND wc -l
OUTPUT_VARIABLE INTERNAL_HAVE_CRAY_COMPILER
OUTPUT_STRIP_TRAILING_WHITESPACE)
IF (INTERNAL_HAVE_CRAY_COMPILER) #not actually Clang
SET(KOKKOS_CLANG_IS_CRAY TRUE)
ENDIF()
ENDIF()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray OR KOKKOS_CLANG_IS_CRAY)
# SET Cray's compiler version.
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE) OUTPUT_STRIP_TRAILING_WHITESPACE)
STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$" STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+"
TEMP_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) TEMP_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION})
SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE) IF (KOKKOS_CLANG_IS_CRAY)
SET(KOKKOS_CLANG_CRAY_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION})
ELSE()
SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
ENDIF()
ENDIF() ENDIF()
# Enforce the minimum compilers supported by Kokkos. # Enforce the minimum compilers supported by Kokkos.

View File

@ -1,4 +1,4 @@
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP) IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP AND NOT KOKKOS_CLANG_IS_CRAY)
# The clang "version" doesn't actually tell you what runtimes and tools # The clang "version" doesn't actually tell you what runtimes and tools
# were built into Clang. We should therefore make sure that libomp # were built into Clang. We should therefore make sure that libomp
# was actually built into Clang. Otherwise the user will get nonsensical # was actually built into Clang. Otherwise the user will get nonsensical
@ -22,6 +22,30 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP)
UNSET(CLANG_HAS_OMP CACHE) #make sure CMake always re-runs this UNSET(CLANG_HAS_OMP CACHE) #make sure CMake always re-runs this
ENDIF() ENDIF()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL AppleClang AND KOKKOS_ENABLE_OPENMP)
# The clang "version" doesn't actually tell you what runtimes and tools
# were built into Clang. We should therefore make sure that libomp
# was actually built into Clang. Otherwise the user will get nonsensical
# errors when they try to build.
#Try compile is the height of CMake nonsense
#I can't just give it compiler and link flags
#I have to hackily pretend that compiler flags are compiler definitions
#and that linker flags are libraries
#also - this is easier to use than CMakeCheckCXXSourceCompiles
TRY_COMPILE(APPLECLANG_HAS_OMP
${KOKKOS_TOP_BUILD_DIR}/corner_cases
${KOKKOS_SOURCE_DIR}/cmake/compile_tests/clang_omp.cpp
COMPILE_DEFINITIONS -Xpreprocessor -fopenmp
LINK_LIBRARIES -lomp
)
IF (NOT APPLECLANG_HAS_OMP)
UNSET(APPLECLANG_HAS_OMP CACHE) #make sure CMake always re-runs this
MESSAGE(FATAL_ERROR "AppleClang failed OpenMP check. You have requested -DKokkos_ENABLE_OPENMP=ON, but the AppleClang compiler does not appear to have been built with OpenMP support")
ENDIF()
UNSET(APPLECLANG_HAS_OMP CACHE) #make sure CMake always re-runs this
ENDIF()
IF (KOKKOS_CXX_STANDARD STREQUAL 17) IF (KOKKOS_CXX_STANDARD STREQUAL 17)
IF (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7) IF (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7)

View File

@ -31,6 +31,41 @@ ELSE()
SET(OMP_DEFAULT OFF) SET(OMP_DEFAULT OFF)
ENDIF() ENDIF()
KOKKOS_DEVICE_OPTION(OPENMP ${OMP_DEFAULT} HOST "Whether to build OpenMP backend") KOKKOS_DEVICE_OPTION(OPENMP ${OMP_DEFAULT} HOST "Whether to build OpenMP backend")
IF(KOKKOS_ENABLE_OPENMP)
SET(ClangOpenMPFlag -fopenmp=libomp)
IF(KOKKOS_CLANG_IS_CRAY)
SET(ClangOpenMPFlag -fopenmp)
ENDIF()
COMPILER_SPECIFIC_FLAGS(
Clang ${ClangOpenMPFlag}
AppleClang -Xpreprocessor -fopenmp
PGI -mp
NVIDIA -Xcompiler -fopenmp
Cray NO-VALUE-SPECIFIED
XL -qsmp=omp
DEFAULT -fopenmp
)
COMPILER_SPECIFIC_LIBS(
AppleClang -lomp
)
ENDIF()
KOKKOS_DEVICE_OPTION(OPENMPTARGET OFF DEVICE "Whether to build the OpenMP target backend")
IF (KOKKOS_ENABLE_OPENMPTARGET)
COMPILER_SPECIFIC_FLAGS(
Clang -fopenmp -fopenmp=libomp
XL -qsmp=omp -qoffload -qnoeh
DEFAULT -fopenmp
)
COMPILER_SPECIFIC_DEFS(
XL KOKKOS_IBM_XL_OMP45_WORKAROUND
Clang KOKKOS_WORKAROUND_OPENMPTARGET_CLANG
)
# Are there compilers which identify as Clang and need this library?
# COMPILER_SPECIFIC_LIBS(
# Clang -lopenmptarget
# )
ENDIF()
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA) IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA)
SET(CUDA_DEFAULT ON) SET(CUDA_DEFAULT ON)
@ -59,3 +94,5 @@ ENDIF()
KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend") KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend")
KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)") KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)")
KOKKOS_DEVICE_OPTION(HIP OFF DEVICE "Whether to build HIP backend")

View File

@ -21,6 +21,7 @@ ENDFUNCTION()
# Certain defaults will depend on knowing the enabled devices # Certain defaults will depend on knowing the enabled devices
KOKKOS_CFG_DEPENDS(OPTIONS DEVICES) KOKKOS_CFG_DEPENDS(OPTIONS DEVICES)
KOKKOS_CFG_DEPENDS(OPTIONS COMPILER_ID)
# Put a check in just in case people are using this option # Put a check in just in case people are using this option
KOKKOS_DEPRECATED_LIST(OPTIONS ENABLE) KOKKOS_DEPRECATED_LIST(OPTIONS ENABLE)
@ -28,8 +29,10 @@ KOKKOS_DEPRECATED_LIST(OPTIONS ENABLE)
KOKKOS_ENABLE_OPTION(CUDA_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for CUDA") KOKKOS_ENABLE_OPTION(CUDA_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for CUDA")
KOKKOS_ENABLE_OPTION(CUDA_UVM OFF "Whether to use unified memory (UM) for CUDA by default") KOKKOS_ENABLE_OPTION(CUDA_UVM OFF "Whether to use unified memory (UM) for CUDA by default")
KOKKOS_ENABLE_OPTION(CUDA_LDG_INTRINSIC OFF "Whether to use CUDA LDG intrinsics") KOKKOS_ENABLE_OPTION(CUDA_LDG_INTRINSIC OFF "Whether to use CUDA LDG intrinsics")
KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
KOKKOS_ENABLE_OPTION(HPX_ASYNC_DISPATCH OFF "Whether HPX supports asynchronous dispatch") KOKKOS_ENABLE_OPTION(HPX_ASYNC_DISPATCH OFF "Whether HPX supports asynchronous dispatch")
KOKKOS_ENABLE_OPTION(TESTS OFF "Whether to build the unit tests") KOKKOS_ENABLE_OPTION(TESTS OFF "Whether to build the unit tests")
KOKKOS_ENABLE_OPTION(EXAMPLES OFF "Whether to build the examples")
STRING(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_CMAKE_BUILD_TYPE) STRING(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_CMAKE_BUILD_TYPE)
IF(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG") IF(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
KOKKOS_ENABLE_OPTION(DEBUG ON "Whether to activate extra debug features - may increase compile times") KOKKOS_ENABLE_OPTION(DEBUG ON "Whether to activate extra debug features - may increase compile times")
@ -51,12 +54,14 @@ IF (KOKKOS_ENABLE_CUDA)
SET(KOKKOS_COMPILER_CUDA_VERSION "${KOKKOS_COMPILER_VERSION_MAJOR}${KOKKOS_COMPILER_VERSION_MINOR}") SET(KOKKOS_COMPILER_CUDA_VERSION "${KOKKOS_COMPILER_VERSION_MAJOR}${KOKKOS_COMPILER_VERSION_MINOR}")
ENDIF() ENDIF()
IF (Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA AND DEFINED KOKKOS_COMPILER_CUDA_VERSION AND KOKKOS_COMPILER_CUDA_VERSION GREATER 70) IF (Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA)
SET(LAMBDA_DEFAULT ON) SET(CUDA_LAMBDA_DEFAULT ON)
ELSEIF (KOKKOS_ENABLE_CUDA AND (KOKKOS_CXX_COMPILER_ID STREQUAL Clang))
SET(CUDA_LAMBDA_DEFAULT ON)
ELSE() ELSE()
SET(LAMBDA_DEFAULT OFF) SET(CUDA_LAMBDA_DEFAULT OFF)
ENDIF() ENDIF()
KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${LAMBDA_DEFAULT} "Whether to activate experimental lambda features") KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to activate experimental lambda features")
IF (Trilinos_ENABLE_Kokkos) IF (Trilinos_ENABLE_Kokkos)
SET(COMPLEX_ALIGN_DEFAULT OFF) SET(COMPLEX_ALIGN_DEFAULT OFF)
ELSE() ELSE()
@ -64,7 +69,13 @@ ELSE()
ENDIF() ENDIF()
KOKKOS_ENABLE_OPTION(COMPLEX_ALIGN ${COMPLEX_ALIGN_DEFAULT} "Whether to align Kokkos::complex to 2*alignof(RealType)") KOKKOS_ENABLE_OPTION(COMPLEX_ALIGN ${COMPLEX_ALIGN_DEFAULT} "Whether to align Kokkos::complex to 2*alignof(RealType)")
KOKKOS_ENABLE_OPTION(CUDA_CONSTEXPR OFF "Whether to activate experimental relaxed constexpr functions")
IF (KOKKOS_ENABLE_CUDA AND (KOKKOS_CXX_COMPILER_ID STREQUAL Clang))
SET(CUDA_CONSTEXPR_DEFAULT ON)
ELSE()
SET(CUDA_CONSTEXPR_DEFAULT OFF)
ENDIF()
KOKKOS_ENABLE_OPTION(CUDA_CONSTEXPR ${CUDA_CONSTEXPR_DEFAULT} "Whether to activate experimental relaxed constexpr functions")
FUNCTION(check_device_specific_options) FUNCTION(check_device_specific_options)
CMAKE_PARSE_ARGUMENTS(SOME "" "DEVICE" "OPTIONS" ${ARGN}) CMAKE_PARSE_ARGUMENTS(SOME "" "DEVICE" "OPTIONS" ${ARGN})
@ -84,9 +95,18 @@ FUNCTION(check_device_specific_options)
ENDFUNCTION() ENDFUNCTION()
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC) CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS HPX_ASYNC_DISPATCH) CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS HPX_ASYNC_DISPATCH)
# Needed due to change from deprecated name to new header define name # Needed due to change from deprecated name to new header define name
IF (KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) IF (KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION)
SET(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ON) SET(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ON)
ENDIF() ENDIF()
# This is known to occur with Clang 9. We would need to use nvcc as the linker
# http://lists.llvm.org/pipermail/cfe-dev/2018-June/058296.html
# TODO: Through great effort we can use a different linker by hacking
# CMAKE_CXX_LINK_EXECUTABLE in a future release
IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE AND KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
MESSAGE(FATAL_ERROR "Relocatable device code is currently not supported with Clang - must use nvcc_wrapper or turn off RDC")
ENDIF()

View File

@ -62,7 +62,7 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING)
UNSET(${opt} CACHE) UNSET(${opt} CACHE)
ELSE() ELSE()
MESSAGE(FATAL_ERROR "Matching option found for ${CAMEL_NAME} with the wrong case ${opt}. Please delete your CMakeCache.txt and change option to -D${CAMEL_NAME}=${${opt}}. This is now enforced to avoid hard-to-debug CMake cache inconsistencies.") MESSAGE(FATAL_ERROR "Matching option found for ${CAMEL_NAME} with the wrong case ${opt}. Please delete your CMakeCache.txt and change option to -D${CAMEL_NAME}=${${opt}}. This is now enforced to avoid hard-to-debug CMake cache inconsistencies.")
ENDIF() ENDIF()
ENDIF() ENDIF()
ENDIF() ENDIF()
ENDFOREACH() ENDFOREACH()
@ -341,11 +341,12 @@ ENDMACRO()
# default, custom paths are prioritized over system paths. The searched # default, custom paths are prioritized over system paths. The searched
# order is: # order is:
# 1. <NAME>_ROOT variable # 1. <NAME>_ROOT variable
# 2. Kokkos_<NAME>_DIR variable # 2. <NAME>_ROOT environment variable
# 3. Locations in the PATHS option # 3. Kokkos_<NAME>_DIR variable
# 4. Default system paths, if allowed. # 4. Locations in the PATHS option
# 5. Default system paths, if allowed.
# #
# Default system paths are allowed if none of options (1)-(3) are specified # Default system paths are allowed if none of options (1)-(4) are specified
# or if default paths are specifically allowed via ALLOW_SYSTEM_PATH_FALLBACK # or if default paths are specifically allowed via ALLOW_SYSTEM_PATH_FALLBACK
# #
# Usage:: # Usage::
@ -387,33 +388,29 @@ MACRO(kokkos_find_header VAR_NAME HEADER TPL_NAME)
"PATHS" "PATHS"
${ARGN}) ${ARGN})
SET(${HEADER}_FOUND FALSE) SET(${VAR_NAME} "${VARNAME}-NOTFOUND")
SET(HAVE_CUSTOM_PATHS FALSE) SET(HAVE_CUSTOM_PATHS FALSE)
IF(NOT ${HEADER}_FOUND AND DEFINED ${TPL_NAME}_ROOT)
#ONLY look in the root directory IF(DEFINED ${TPL_NAME}_ROOT OR
FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${${TPL_NAME}_ROOT}/include NO_DEFAULT_PATH) DEFINED ENV{${TPL_NAME}_ROOT} OR
DEFINED KOKKOS_${TPL_NAME}_DIR OR
TPL_PATHS)
FIND_PATH(${VAR_NAME} ${HEADER}
PATHS
${${TPL_NAME}_ROOT}
$ENV{${TPL_NAME}_ROOT}
${KOKKOS_${TPL_NAME}_DIR}
${TPL_PATHS}
PATH_SUFFIXES include
NO_DEFAULT_PATH)
SET(HAVE_CUSTOM_PATHS TRUE) SET(HAVE_CUSTOM_PATHS TRUE)
ENDIF() ENDIF()
IF(NOT ${HEADER}_FOUND AND DEFINED KOKKOS_${TPL_NAME}_DIR) IF(NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK)
#ONLY look in the root directory #No-op if ${VAR_NAME} set by previous call
FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${KOKKOS_${TPL_NAME}_DIR}/include NO_DEFAULT_PATH) FIND_PATH(${VAR_NAME} ${HEADER})
SET(HAVE_CUSTOM_PATHS TRUE)
ENDIF() ENDIF()
IF (NOT ${HEADER}_FOUND AND TPL_PATHS)
#we got custom paths
#ONLY look in these paths and nowhere else
FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${TPL_PATHS} NO_DEFAULT_PATH)
SET(HAVE_CUSTOM_PATHS TRUE)
ENDIF()
IF (NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK)
#Now go ahead and look in system paths
IF (NOT ${HEADER}_FOUND)
FIND_PATH(${VAR_NAME} ${HEADER})
ENDIF()
ENDIF()
ENDMACRO() ENDMACRO()
# #
@ -424,9 +421,10 @@ ENDMACRO()
# default, custom paths are prioritized over system paths. The search # default, custom paths are prioritized over system paths. The search
# order is: # order is:
# 1. <NAME>_ROOT variable # 1. <NAME>_ROOT variable
# 2. Kokkos_<NAME>_DIR variable # 2. <NAME>_ROOT environment variable
# 3. Locations in the PATHS option # 3. Kokkos_<NAME>_DIR variable
# 4. Default system paths, if allowed. # 4. Locations in the PATHS option
# 5. Default system paths, if allowed.
# #
# Default system paths are allowed if none of options (1)-(3) are specified # Default system paths are allowed if none of options (1)-(3) are specified
# or if default paths are specifically allowed via ALLOW_SYSTEM_PATH_FALLBACK # or if default paths are specifically allowed via ALLOW_SYSTEM_PATH_FALLBACK
@ -439,6 +437,7 @@ ENDMACRO()
# <TPL_NAME> # <TPL_NAME>
# [ALLOW_SYSTEM_PATH_FALLBACK] # [ALLOW_SYSTEM_PATH_FALLBACK]
# [PATHS path1 [path2 ...]] # [PATHS path1 [path2 ...]]
# [SUFFIXES suffix1 [suffix2 ...]]
# ) # )
# #
# ``<VAR_NAME>`` # ``<VAR_NAME>``
@ -463,39 +462,46 @@ ENDMACRO()
# #
# Custom paths to search for the library # Custom paths to search for the library
# #
# ``SUFFIXES``
#
# Suffixes appended to PATHS when attempting to locate
# the library. Defaults to {lib, lib64}.
#
MACRO(kokkos_find_library VAR_NAME LIB TPL_NAME) MACRO(kokkos_find_library VAR_NAME LIB TPL_NAME)
CMAKE_PARSE_ARGUMENTS(TPL CMAKE_PARSE_ARGUMENTS(TPL
"ALLOW_SYSTEM_PATH_FALLBACK" "ALLOW_SYSTEM_PATH_FALLBACK"
"" ""
"PATHS" "PATHS;SUFFIXES"
${ARGN}) ${ARGN})
SET(${LIB}_FOUND FALSE) IF(NOT TPL_SUFFIXES)
SET(TPL_SUFFIXES lib lib64)
ENDIF()
SET(${VAR_NAME} "${VARNAME}-NOTFOUND")
SET(HAVE_CUSTOM_PATHS FALSE) SET(HAVE_CUSTOM_PATHS FALSE)
IF(NOT ${LIB}_FOUND AND DEFINED ${TPL_NAME}_ROOT)
FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${${TPL_NAME}_ROOT}/lib ${${TPL_NAME}_ROOT}/lib64 NO_DEFAULT_PATH) IF(DEFINED ${TPL_NAME}_ROOT OR
DEFINED ENV{${TPL_NAME}_ROOT} OR
DEFINED KOKKOS_${TPL_NAME}_DIR OR
TPL_PATHS)
FIND_LIBRARY(${VAR_NAME} ${LIB}
PATHS
${${TPL_NAME}_ROOT}
$ENV{${TPL_NAME}_ROOT}
${KOKKOS_${TPL_NAME}_DIR}
${TPL_PATHS}
PATH_SUFFIXES
${TPL_SUFFIXES}
NO_DEFAULT_PATH)
SET(HAVE_CUSTOM_PATHS TRUE) SET(HAVE_CUSTOM_PATHS TRUE)
ENDIF() ENDIF()
IF(NOT ${LIB}_FOUND AND DEFINED KOKKOS_${TPL_NAME}_DIR) IF(NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK)
#we got root paths, only look in these paths and nowhere else #No-op if ${VAR_NAME} set by previous call
FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${KOKKOS_${TPL_NAME}_DIR}/lib ${KOKKOS_${TPL_NAME}_DIR}/lib64 NO_DEFAULT_PATH) FIND_LIBRARY(${VAR_NAME} ${LIB} PATH_SUFFIXES ${TPL_SUFFIXES})
SET(HAVE_CUSTOM_PATHS TRUE)
ENDIF() ENDIF()
IF (NOT ${LIB}_FOUND AND TPL_PATHS)
#we got custom paths, only look in these paths and nowhere else
FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${TPL_PATHS} NO_DEFAULT_PATH)
SET(HAVE_CUSTOM_PATHS TRUE)
ENDIF()
IF (NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK)
IF (NOT ${LIB}_FOUND)
#Now go ahead and look in system paths
FIND_LIBRARY(${VAR_NAME} ${LIB})
ENDIF()
ENDIF()
ENDMACRO() ENDMACRO()
# #
@ -510,26 +516,28 @@ ENDMACRO()
# <NAME> # <NAME>
# INTERFACE # INTERFACE
# ALLOW_SYSTEM_PATH_FALLBACK # ALLOW_SYSTEM_PATH_FALLBACK
# LIBRARY <path_to_librarY> # MODULE_NAME <name>
# LINK_LIBRARIES <lib1> <lib2> ... # IMPORTED_NAME <name>
# COMPILE_OPTIONS <opt1> <opt2> ... # LIBRARY <name>
# LINK_OPTIONS <opt1> <opt2> ... # LIBRARIES <name1> <name2> ...
# LIBRARY_PATHS <path1> <path2> ...
# LIBRARY_SUFFIXES <suffix1> <suffix2> ...
# HEADER <name>
# HEADERS <name1> <name2> ...
# HEADER_PATHS <path1> <path2> ...
# )
# #
# ``INTERFACE`` # ``INTERFACE``
# #
# If specified, this TPL will build an INTERFACE library rather than an # If specified, this TPL will build an INTERFACE library rather than an
# IMPORTED target # IMPORTED target
# #
# ``ALLOW_SYSTEM_PATH_FALLBACK" # ``ALLOW_SYSTEM_PATH_FALLBACK``
# #
# If custom paths are given and the library is not found # If custom paths are given and the library is not found
# should we be allowed to search default system paths # should we be allowed to search default system paths
# or error out if not found in given paths. # or error out if not found in given paths.
# #
# ``LIBRARY <name>``
#
# If specified, this gives the name of the library to look for
#
# ``MODULE_NAME <name>`` # ``MODULE_NAME <name>``
# #
# If specified, the name of the enclosing module passed to # If specified, the name of the enclosing module passed to
@ -541,29 +549,42 @@ ENDMACRO()
# If specified, this gives the name of the target to build. # If specified, this gives the name of the target to build.
# Defaults to Kokkos::<NAME> # Defaults to Kokkos::<NAME>
# #
# ``LIBRARY <name>``
#
# If specified, this gives the name of the library to look for
#
# ``LIBRARIES <name1> <name2> ...``
#
# If specified, this gives a list of libraries to find for the package
#
# ``LIBRARY_PATHS <path1> <path2> ...`` # ``LIBRARY_PATHS <path1> <path2> ...``
# #
# If specified, this gives a list of paths to search for the library # If specified, this gives a list of paths to search for the library.
# If not given, <NAME>_ROOT/lib and <NAME>_ROOT/lib64 will be searched. # If not given, <NAME>_ROOT will be searched.
#
# ``LIBRARY_SUFFIXES <suffix1> <suffix2> ...``
#
# Suffixes appended to LIBRARY_PATHS when attempting to locate
# libraries. If not given, defaults to {lib, lib64}.
#
# ``HEADER <name>``
#
# If specified, this gives the name of a header to to look for
#
# ``HEADERS <name1> <name2> ...``
#
# If specified, this gives a list of headers to find for the package
# #
# ``HEADER_PATHS <path1> <path2> ...`` # ``HEADER_PATHS <path1> <path2> ...``
# #
# If specified, this gives a list of paths to search for the headers # If specified, this gives a list of paths to search for the headers
# If not given, <NAME>_ROOT/include and <NAME>_ROOT/include will be searched. # If not given, <NAME>_ROOT/include and <NAME>_ROOT/include will be searched.
# #
# ``HEADERS <name1> <name2> ...``
#
# If specified, this gives a list of headers to find for the package
#
# ``LIBRARIES <name1> <name2> ...``
#
# If specified, this gives a list of libraries to find for the package
#
MACRO(kokkos_find_imported NAME) MACRO(kokkos_find_imported NAME)
CMAKE_PARSE_ARGUMENTS(TPL CMAKE_PARSE_ARGUMENTS(TPL
"INTERFACE;ALLOW_SYSTEM_PATH_FALLBACK" "INTERFACE;ALLOW_SYSTEM_PATH_FALLBACK"
"HEADER;LIBRARY;IMPORTED_NAME;MODULE_NAME" "IMPORTED_NAME;MODULE_NAME;LIBRARY;HEADER"
"HEADER_PATHS;LIBRARY_PATHS;HEADERS;LIBRARIES" "LIBRARIES;LIBRARY_PATHS;LIBRARY_SUFFIXES;HEADERS;HEADER_PATHS"
${ARGN}) ${ARGN})
IF(NOT TPL_MODULE_NAME) IF(NOT TPL_MODULE_NAME)
@ -584,6 +605,10 @@ MACRO(kokkos_find_imported NAME)
ENDIF() ENDIF()
ENDIF() ENDIF()
IF (NOT TPL_LIBRARY_SUFFIXES)
SET(TPL_LIBRARY_SUFFIXES lib lib64)
ENDIF()
SET(${NAME}_INCLUDE_DIRS) SET(${NAME}_INCLUDE_DIRS)
IF (TPL_HEADER) IF (TPL_HEADER)
KOKKOS_FIND_HEADER(${NAME}_INCLUDE_DIRS ${TPL_HEADER} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_HEADER_PATHS}) KOKKOS_FIND_HEADER(${NAME}_INCLUDE_DIRS ${TPL_HEADER} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_HEADER_PATHS})
@ -598,12 +623,18 @@ MACRO(kokkos_find_imported NAME)
SET(${NAME}_LIBRARY) SET(${NAME}_LIBRARY)
IF(TPL_LIBRARY) IF(TPL_LIBRARY)
KOKKOS_FIND_LIBRARY(${NAME}_LIBRARY ${TPL_LIBRARY} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_LIBRARY_PATHS}) KOKKOS_FIND_LIBRARY(${NAME}_LIBRARY ${TPL_LIBRARY} ${NAME}
${ALLOW_PATH_FALLBACK_OPT}
PATHS ${TPL_LIBRARY_PATHS}
SUFFIXES ${TPL_LIBRARY_SUFFIXES})
ENDIF() ENDIF()
SET(${NAME}_FOUND_LIBRARIES) SET(${NAME}_FOUND_LIBRARIES)
FOREACH(LIB ${TPL_LIBRARIES}) FOREACH(LIB ${TPL_LIBRARIES})
KOKKOS_FIND_LIBRARY(${LIB}_LOCATION ${LIB} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_LIBRARY_PATHS}) KOKKOS_FIND_LIBRARY(${LIB}_LOCATION ${LIB} ${NAME}
${ALLOW_PATH_FALLBACK_OPT}
PATHS ${TPL_LIBRARY_PATHS}
SUFFIXES ${TPL_LIBRARY_SUFFIXES})
IF(${LIB}_LOCATION) IF(${LIB}_LOCATION)
LIST(APPEND ${NAME}_FOUND_LIBRARIES ${${LIB}_LOCATION}) LIST(APPEND ${NAME}_FOUND_LIBRARIES ${${LIB}_LOCATION})
ELSE() ELSE()
@ -629,6 +660,13 @@ MACRO(kokkos_find_imported NAME)
MARK_AS_ADVANCED(${NAME}_INCLUDE_DIRS ${NAME}_FOUND_LIBRARIES ${NAME}_LIBRARY) MARK_AS_ADVANCED(${NAME}_INCLUDE_DIRS ${NAME}_FOUND_LIBRARIES ${NAME}_LIBRARY)
#this is so much fun on a Cray system
#/usr/include should never be added as a -isystem include
#this freaks out the compiler include search order
IF (KOKKOS_IS_CRAYPE)
LIST(REMOVE_ITEM ${NAME}_INCLUDE_DIRS "/usr/include")
ENDIF()
IF (${TPL_MODULE_NAME}_FOUND) IF (${TPL_MODULE_NAME}_FOUND)
SET(IMPORT_TYPE) SET(IMPORT_TYPE)
IF (TPL_INTERFACE) IF (TPL_INTERFACE)
@ -698,3 +736,66 @@ FUNCTION(kokkos_link_tpl TARGET)
ENDIF() ENDIF()
ENDFUNCTION() ENDFUNCTION()
FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER)
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang GNU)
CMAKE_PARSE_ARGUMENTS(
PARSE
"LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES"
""
"${COMPILERS}"
${ARGN})
IF(PARSE_UNPARSED_ARGUMENTS)
MESSAGE(SEND_ERROR "'${PARSE_UNPARSED_ARGUMENTS}' argument(s) not recognized when providing compiler specific options")
ENDIF()
SET(COMPILER ${KOKKOS_CXX_COMPILER_ID})
SET(COMPILER_SPECIFIC_FLAGS_TMP)
FOREACH(COMP ${COMPILERS})
IF (COMPILER STREQUAL "${COMP}")
IF (PARSE_${COMPILER})
IF (NOT "${PARSE_${COMPILER}}" STREQUAL "NO-VALUE-SPECIFIED")
SET(COMPILER_SPECIFIC_FLAGS_TMP ${PARSE_${COMPILER}})
ENDIF()
ELSEIF(PARSE_DEFAULT)
SET(COMPILER_SPECIFIC_FLAGS_TMP ${PARSE_DEFAULT})
ENDIF()
ENDIF()
ENDFOREACH()
IF (PARSE_COMPILE_OPTIONS)
# The funky logic here is for future handling of argument deduplication
# If we naively pass multiple -Xcompiler flags to target_compile_options
# -Xcompiler will get deduplicated and break the build
IF ("-Xcompiler" IN_LIST COMPILER_SPECIFIC_FLAGS_TMP)
LIST(REMOVE_ITEM COMPILER_SPECIFIC_FLAGS_TMP "-Xcompiler")
GLOBAL_APPEND(KOKKOS_XCOMPILER_OPTIONS ${COMPILER_SPECIFIC_FLAGS_TMP})
ELSE()
GLOBAL_APPEND(KOKKOS_COMPILE_OPTIONS ${COMPILER_SPECIFIC_FLAGS_TMP})
ENDIF()
ENDIF()
IF (PARSE_LINK_OPTIONS)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS ${COMPILER_SPECIFIC_FLAGS_TMP})
ENDIF()
IF (PARSE_COMPILE_DEFINITIONS)
GLOBAL_APPEND(KOKKOS_COMPILE_DEFINITIONS ${COMPILER_SPECIFIC_FLAGS_TMP})
ENDIF()
IF (PARSE_LINK_LIBRARIES)
GLOBAL_APPEND(KOKKOS_LINK_LIBRARIES ${COMPILER_SPECIFIC_FLAGS_TMP})
ENDIF()
ENDFUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER)
FUNCTION(COMPILER_SPECIFIC_FLAGS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_OPTIONS LINK_OPTIONS)
ENDFUNCTION(COMPILER_SPECIFIC_FLAGS)
FUNCTION(COMPILER_SPECIFIC_DEFS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_DEFINITIONS)
ENDFUNCTION(COMPILER_SPECIFIC_DEFS)
FUNCTION(COMPILER_SPECIFIC_LIBS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} LINK_LIBRARIES)
ENDFUNCTION(COMPILER_SPECIFIC_LIBS)

View File

@ -31,12 +31,11 @@ IF (NOT KOKKOS_HAS_TRILINOS)
ELSE() ELSE()
CONFIGURE_FILE(cmake/KokkosConfigCommon.cmake.in ${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake @ONLY) CONFIGURE_FILE(cmake/KokkosConfigCommon.cmake.in ${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake @ONLY)
file(READ ${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake KOKKOS_CONFIG_COMMON) file(READ ${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake KOKKOS_CONFIG_COMMON)
file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" ${KOKKOS_CONFIG_COMMON}) file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" "${KOKKOS_CONFIG_COMMON}")
CONFIGURE_FILE(cmake/KokkosTrilinosConfig.cmake.in ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake @ONLY)
file(READ ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake KOKKOS_TRILINOS_CONFIG)
file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" "${KOKKOS_TRILINOS_CONFIG}")
ENDIF() ENDIF()
# build and install pkgconfig file
CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY)
INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION ${KOKKOS_HEADER_DIR}) INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION ${KOKKOS_HEADER_DIR})

View File

@ -29,6 +29,10 @@ FUNCTION(kokkos_set_cxx_standard_feature standard)
ELSEIF(NOT KOKKOS_USE_CXX_EXTENSIONS AND ${STANDARD_NAME}) ELSEIF(NOT KOKKOS_USE_CXX_EXTENSIONS AND ${STANDARD_NAME})
MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard as feature") MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard as feature")
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME})
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
#MSVC doesn't need a command line flag, that doesn't mean it has no support
MESSAGE(STATUS "Using no flag for C++${standard} standard as feature")
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME})
ELSE() ELSE()
#nope, we can't do anything here #nope, we can't do anything here
MESSAGE(WARNING "C++${standard} is not supported as a compiler feature. We will choose custom flags for now, but this behavior has been deprecated. Please open an issue at https://github.com/kokkos/kokkos/issues reporting that ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION} failed for ${KOKKOS_CXX_STANDARD}, preferrably including your CMake command.") MESSAGE(WARNING "C++${standard} is not supported as a compiler feature. We will choose custom flags for now, but this behavior has been deprecated. Please open an issue at https://github.com/kokkos/kokkos/issues reporting that ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION} failed for ${KOKKOS_CXX_STANDARD}, preferrably including your CMake command.")
@ -119,6 +123,9 @@ IF (NOT KOKKOS_CXX_STANDARD_FEATURE)
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/intel.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/intel.cmake)
kokkos_set_intel_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) kokkos_set_intel_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
INCLUDE(${KOKKOS_SRC_PATH}/cmake/msvc.cmake)
kokkos_set_msvc_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
ELSE() ELSE()
INCLUDE(${KOKKOS_SRC_PATH}/cmake/gnu.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/gnu.cmake)
kokkos_set_gnu_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) kokkos_set_gnu_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
@ -128,9 +135,9 @@ IF (NOT KOKKOS_CXX_STANDARD_FEATURE)
IF (DEFINED CXX_STD_FLAGS_ACCEPTED) IF (DEFINED CXX_STD_FLAGS_ACCEPTED)
UNSET(CXX_STD_FLAGS_ACCEPTED CACHE) UNSET(CXX_STD_FLAGS_ACCEPTED CACHE)
ENDIF() ENDIF()
CHECK_CXX_COMPILER_FLAG(${KOKKOS_CXX_STANDARD_FLAG} CXX_STD_FLAGS_ACCEPTED) CHECK_CXX_COMPILER_FLAG("${KOKKOS_CXX_STANDARD_FLAG}" CXX_STD_FLAGS_ACCEPTED)
IF (NOT CXX_STD_FLAGS_ACCEPTED) IF (NOT CXX_STD_FLAGS_ACCEPTED)
CHECK_CXX_COMPILER_FLAG(${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG} CXX_INT_STD_FLAGS_ACCEPTED) CHECK_CXX_COMPILER_FLAG("${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG}" CXX_INT_STD_FLAGS_ACCEPTED)
IF (NOT CXX_INT_STD_FLAGS_ACCEPTED) IF (NOT CXX_INT_STD_FLAGS_ACCEPTED)
MESSAGE(FATAL_ERROR "${KOKKOS_CXX_COMPILER_ID} did not accept ${KOKKOS_CXX_STANDARD_FLAG} or ${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG}. You likely need to reduce the level of the C++ standard from ${KOKKOS_CXX_STANDARD}") MESSAGE(FATAL_ERROR "${KOKKOS_CXX_COMPILER_ID} did not accept ${KOKKOS_CXX_STANDARD_FLAG} or ${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG}. You likely need to reduce the level of the C++ standard from ${KOKKOS_CXX_STANDARD}")
ENDIF() ENDIF()

View File

@ -15,6 +15,10 @@ KOKKOS_TPL_OPTION(CUDA Off)
KOKKOS_TPL_OPTION(LIBRT Off) KOKKOS_TPL_OPTION(LIBRT Off)
KOKKOS_TPL_OPTION(LIBDL On) KOKKOS_TPL_OPTION(LIBDL On)
IF(KOKKOS_ENABLE_PROFILING AND NOT KOKKOS_ENABLE_LIBDL)
MESSAGE(SEND_ERROR "Kokkos_ENABLE_PROFILING requires Kokkos_ENABLE_LIBDL=ON")
ENDIF()
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX) IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX)
SET(HPX_DEFAULT ON) SET(HPX_DEFAULT ON)
ELSE() ELSE()

View File

@ -43,6 +43,8 @@ MACRO(KOKKOS_SUBPACKAGE NAME)
SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME})
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
#ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME})
#GLOBAL_SET(${PACKAGE_NAME}_LIBS "")
endif() endif()
ENDMACRO() ENDMACRO()
@ -114,9 +116,9 @@ MACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL LIBRARY_NAME)
VERIFY_EMPTY(KOKKOS_ADD_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) VERIFY_EMPTY(KOKKOS_ADD_LIBRARY ${PARSE_UNPARSED_ARGUMENTS})
ENDMACRO() ENDMACRO()
FUNCTION(KOKKOS_ADD_EXECUTABLE EXE_NAME) FUNCTION(KOKKOS_ADD_EXECUTABLE ROOT_NAME)
if (KOKKOS_HAS_TRILINOS) if (KOKKOS_HAS_TRILINOS)
TRIBITS_ADD_EXECUTABLE(${EXE_NAME} ${ARGN}) TRIBITS_ADD_EXECUTABLE(${ROOT_NAME} ${ARGN})
else() else()
CMAKE_PARSE_ARGUMENTS(PARSE CMAKE_PARSE_ARGUMENTS(PARSE
"TESTONLY" "TESTONLY"
@ -124,19 +126,18 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE EXE_NAME)
"SOURCES;TESTONLYLIBS" "SOURCES;TESTONLYLIBS"
${ARGN}) ${ARGN})
SET(EXE_NAME ${PACKAGE_NAME}_${ROOT_NAME})
ADD_EXECUTABLE(${EXE_NAME} ${PARSE_SOURCES}) ADD_EXECUTABLE(${EXE_NAME} ${PARSE_SOURCES})
IF (PARSE_TESTONLYLIBS) IF (PARSE_TESTONLYLIBS)
TARGET_LINK_LIBRARIES(${EXE_NAME} ${PARSE_TESTONLYLIBS}) TARGET_LINK_LIBRARIES(${EXE_NAME} PRIVATE ${PARSE_TESTONLYLIBS})
ENDIF() ENDIF()
VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE ${PARSE_UNPARSED_ARGUMENTS}) VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE ${PARSE_UNPARSED_ARGUMENTS})
#All executables must link to all the kokkos targets
#This is just private linkage because exe is final
TARGET_LINK_LIBRARIES(${EXE_NAME} PRIVATE kokkos)
endif() endif()
ENDFUNCTION() ENDFUNCTION()
IF(NOT TARGET check)
ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})
ENDIF()
FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME) FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME)
IF (KOKKOS_HAS_TRILINOS) IF (KOKKOS_HAS_TRILINOS)
TRIBITS_ADD_EXECUTABLE_AND_TEST( TRIBITS_ADD_EXECUTABLE_AND_TEST(
@ -154,17 +155,24 @@ ELSE()
"SOURCES;CATEGORIES" "SOURCES;CATEGORIES"
${ARGN}) ${ARGN})
VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE_AND_TEST ${PARSE_UNPARSED_ARGUMENTS}) VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE_AND_TEST ${PARSE_UNPARSED_ARGUMENTS})
SET(EXE_NAME ${PACKAGE_NAME}_${ROOT_NAME}) KOKKOS_ADD_TEST_EXECUTABLE(${ROOT_NAME}
KOKKOS_ADD_TEST_EXECUTABLE(${EXE_NAME}
SOURCES ${PARSE_SOURCES} SOURCES ${PARSE_SOURCES}
) )
KOKKOS_ADD_TEST(NAME ${ROOT_NAME} KOKKOS_ADD_TEST(NAME ${ROOT_NAME}
EXE ${EXE_NAME} EXE ${ROOT_NAME}
FAIL_REGULAR_EXPRESSION " FAILED " FAIL_REGULAR_EXPRESSION " FAILED "
) )
ENDIF() ENDIF()
ENDFUNCTION() ENDFUNCTION()
FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME)
SET(TARGET_NAME ${PACKAGE_NAME}_${ROOT_NAME})
IF (NOT TARGET ${TARGET_NAME})
MESSAGE(SEND_ERROR "No target ${TARGET_NAME} exists - cannot set target properties")
ENDIF()
SET_PROPERTY(TARGET ${TARGET_PROPERTY} PROPERTY ${ARGN})
ENDFUNCTION()
MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_compiler_id.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_compiler_id.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_devices.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_devices.cmake)
@ -178,20 +186,17 @@ MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_corner_cases.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_corner_cases.cmake)
ENDMACRO() ENDMACRO()
MACRO(KOKKOS_ADD_TEST_EXECUTABLE EXE_NAME) MACRO(KOKKOS_ADD_TEST_EXECUTABLE ROOT_NAME)
CMAKE_PARSE_ARGUMENTS(PARSE CMAKE_PARSE_ARGUMENTS(PARSE
"" ""
"" ""
"SOURCES" "SOURCES"
${ARGN}) ${ARGN})
KOKKOS_ADD_EXECUTABLE(${EXE_NAME} KOKKOS_ADD_EXECUTABLE(${ROOT_NAME}
SOURCES ${PARSE_SOURCES} SOURCES ${PARSE_SOURCES}
${PARSE_UNPARSED_ARGUMENTS} ${PARSE_UNPARSED_ARGUMENTS}
TESTONLYLIBS kokkos_gtest TESTONLYLIBS kokkos_gtest
) )
IF (NOT KOKKOS_HAS_TRILINOS)
ADD_DEPENDENCIES(check ${EXE_NAME})
ENDIF()
ENDMACRO() ENDMACRO()
MACRO(KOKKOS_PACKAGE_POSTPROCESS) MACRO(KOKKOS_PACKAGE_POSTPROCESS)
@ -230,6 +235,15 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
$<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_COMPILE_OPTIONS}> $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_COMPILE_OPTIONS}>
) )
TARGET_COMPILE_DEFINITIONS(
${LIBRARY_NAME} PUBLIC
$<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_COMPILE_DEFINITIONS}>
)
TARGET_LINK_LIBRARIES(
${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_LIBRARIES}
)
IF (KOKKOS_ENABLE_CUDA) IF (KOKKOS_ENABLE_CUDA)
TARGET_COMPILE_OPTIONS( TARGET_COMPILE_OPTIONS(
${LIBRARY_NAME} ${LIBRARY_NAME}
@ -245,6 +259,13 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
) )
ENDIF() ENDIF()
IF (KOKKOS_ENABLE_HIP)
TARGET_COMPILE_OPTIONS(
${LIBRARY_NAME}
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_AMDGPU_OPTIONS}>
)
ENDIF()
LIST(LENGTH KOKKOS_XCOMPILER_OPTIONS XOPT_LENGTH) LIST(LENGTH KOKKOS_XCOMPILER_OPTIONS XOPT_LENGTH)
IF (XOPT_LENGTH GREATER 1) IF (XOPT_LENGTH GREATER 1)
MESSAGE(FATAL_ERROR "CMake deduplication does not allow multiple -Xcompiler flags (${KOKKOS_XCOMPILER_OPTIONS}): will require Kokkos to upgrade to minimum 3.12") MESSAGE(FATAL_ERROR "CMake deduplication does not allow multiple -Xcompiler flags (${KOKKOS_XCOMPILER_OPTIONS}): will require Kokkos to upgrade to minimum 3.12")
@ -390,3 +411,15 @@ MACRO(KOKKOS_ADD_TEST_DIRECTORIES)
ENDIF() ENDIF()
ENDIF() ENDIF()
ENDMACRO() ENDMACRO()
MACRO(KOKKOS_ADD_EXAMPLE_DIRECTORIES)
if (KOKKOS_HAS_TRILINOS)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(${ARGN})
else()
IF(KOKKOS_ENABLE_EXAMPLES)
FOREACH(EXAMPLE_DIR ${ARGN})
ADD_SUBDIRECTORY(${EXAMPLE_DIR})
ENDFOREACH()
ENDIF()
endif()
ENDMACRO()

View File

@ -0,0 +1,11 @@
FUNCTION(kokkos_set_msvc_flags full_standard int_standard)
IF (CMAKE_CXX_EXTENSIONS)
SET(KOKKOS_CXX_STANDARD_FLAG "" PARENT_SCOPE)
SET(KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG "" PARENT_SCOPE)
ELSE()
SET(KOKKOS_CXX_STANDARD_FLAG "" PARENT_SCOPE)
SET(KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG "" PARENT_SCOPE)
ENDIF()
ENDFUNCTION()

View File

@ -55,19 +55,9 @@
# Check for CUDA support # Check for CUDA support
IF (NOT TPL_ENABLE_CUDA OR CUDA_VERSION VERSION_LESS "4.1") IF (NOT TPL_ENABLE_CUDA)
MESSAGE(FATAL_ERROR "\nCUSPARSE: did not find acceptable version of CUDA libraries (4.1 or greater)") MESSAGE(FATAL_ERROR "\nCUSPARSE requires CUDA")
ELSE() ELSE()
IF(CMAKE_VERSION VERSION_LESS "2.8.8")
# FindCUDA before CMake 2.8.8 does not find cusparse library; therefore, we must
find_library(CUDA_cusparse_LIBRARY
cusparse
HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib
)
IF(CUDA_cusparse_LIBRARY STREQUAL "CUDA_cusparse_LIBRARY-NOTFOUND")
MESSAGE(FATAL_ERROR "\nCUSPARSE: could not find cuspasre library.")
ENDIF()
ENDIF()
GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})

View File

@ -76,19 +76,18 @@ CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial" CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial" CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial"
GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" GCC_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" IBM_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" CLANG_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" INTEL_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
#CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" #CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
PGI_WARNING_FLAGS="" PGI_WARNING_FLAGS=""
# Default. Machine specific can override. # Default. Machine specific can override.
DEBUG=False DEBUG=False
ARGS="" ARGS=""
CUSTOM_BUILD_LIST="" CUSTOM_BUILD_LIST=""
QTHREADS_PATH=""
DRYRUN=False DRYRUN=False
BUILD_ONLY=False BUILD_ONLY=False
declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=1 declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=1
@ -114,9 +113,6 @@ do
--kokkos-path*) --kokkos-path*)
KOKKOS_PATH="${key#*=}" KOKKOS_PATH="${key#*=}"
;; ;;
--qthreads-path*)
QTHREADS_PATH="${key#*=}"
;;
--build-list*) --build-list*)
CUSTOM_BUILD_LIST="${key#*=}" CUSTOM_BUILD_LIST="${key#*=}"
;; ;;
@ -417,8 +413,8 @@ if [ "$PRINT_HELP" = "True" ]; then
echo "--build-list=BUILD,BUILD,BUILD..." echo "--build-list=BUILD,BUILD,BUILD..."
echo " Provide a comma-separated list of builds instead of running all builds" echo " Provide a comma-separated list of builds instead of running all builds"
echo " Valid items:" echo " Valid items:"
echo " OpenMP, Pthread, Qthreads, Serial, OpenMP_Serial, Pthread_Serial" echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial"
echo " Qthreads_Serial, Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial"
echo "" echo ""
echo "ARGS: list of expressions matching compilers to test" echo "ARGS: list of expressions matching compilers to test"
@ -483,33 +479,6 @@ for ARG in $ARGS; do
done done
done done
# Check if Qthreads build requested.
HAVE_QTHREADS_BUILD="False"
if [ -n "$CUSTOM_BUILD_LIST" ]; then
if [[ "$CUSTOM_BUILD_LIST" = *Qthreads* ]]; then
HAVE_QTHREADS_BUILD="True"
fi
else
for COMPILER_DATA in "${COMPILERS[@]}"; do
ARR=($COMPILER_DATA)
BUILD_LIST=${ARR[2]}
if [[ "$BUILD_LIST" = *Qthreads* ]]; then
HAVE_QTHREADS_BUILD="True"
fi
done
fi
# Ensure Qthreads path is set if Qthreads build is requested.
if [ "$HAVE_QTHREADS_BUILD" = "True" ]; then
if [ -z "$QTHREADS_PATH" ]; then
echo "Need to supply Qthreads path (--qthreads-path) when testing Qthreads backend." >&2
exit 1
else
# Strip trailing slashes from path.
QTHREADS_PATH=$(echo $QTHREADS_PATH | sed 's/\/*$//')
fi
fi
# #
# Functions. # Functions.
# #
@ -627,14 +596,6 @@ single_build_and_test() {
local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
fi fi
if [[ "$build" = *Qthreads* ]]; then
if [[ "$build_type" = hwloc* ]]; then
local extra_args="$extra_args --qthreads-path=${QTHREADS_PATH}_hwloc"
else
local extra_args="$extra_args --qthreads-path=$QTHREADS_PATH"
fi
fi
if [[ "$OPT_FLAG" = "" ]]; then if [[ "$OPT_FLAG" = "" ]]; then
OPT_FLAG="-O3" OPT_FLAG="-O3"
fi fi

View File

@ -9,13 +9,9 @@ IF(Kokkos_ENABLE_CUDA)
TestCuda.cpp TestCuda.cpp
) )
KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_Cuda KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_Cuda
SOURCES ${SOURCES} SOURCES ${SOURCES}
) )
KOKKOS_ADD_TEST( NAME PerformanceTest_Cuda
EXE PerfTestExec_Cuda
)
ENDIF() ENDIF()
IF(Kokkos_ENABLE_PTHREAD) IF(Kokkos_ENABLE_PTHREAD)
@ -23,13 +19,9 @@ IF(Kokkos_ENABLE_PTHREAD)
TestMain.cpp TestMain.cpp
TestThreads.cpp TestThreads.cpp
) )
KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_Threads KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_Threads
SOURCES ${SOURCES} SOURCES ${SOURCES}
) )
KOKKOS_ADD_TEST( NAME PerformanceTest_Threads
EXE PerfTestExec_Threads
)
ENDIF() ENDIF()
IF(Kokkos_ENABLE_OPENMP) IF(Kokkos_ENABLE_OPENMP)
@ -37,13 +29,9 @@ IF(Kokkos_ENABLE_OPENMP)
TestMain.cpp TestMain.cpp
TestOpenMP.cpp TestOpenMP.cpp
) )
KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_OpenMP KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_OpenMP
SOURCES ${SOURCES} SOURCES ${SOURCES}
) )
KOKKOS_ADD_TEST( NAME PerformanceTest_OpenMP
EXE PerfTestExec_OpenMP
)
ENDIF() ENDIF()
IF(Kokkos_ENABLE_HPX) IF(Kokkos_ENABLE_HPX)
@ -51,12 +39,8 @@ IF(Kokkos_ENABLE_HPX)
TestMain.cpp TestMain.cpp
TestHPX.cpp TestHPX.cpp
) )
KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_HPX KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_HPX
SOURCES ${SOURCES} SOURCES ${SOURCES}
) )
KOKKOS_ADD_TEST( NAME PerformanceTest_HPX
EXE PerfTestExec_HPX
)
ENDIF() ENDIF()

View File

@ -103,19 +103,19 @@ class Bitset {
} }
} }
KOKKOS_INLINE_FUNCTION KOKKOS_DEFAULTED_FUNCTION
Bitset(const Bitset<Device>&) = default; Bitset(const Bitset<Device>&) = default;
KOKKOS_INLINE_FUNCTION KOKKOS_DEFAULTED_FUNCTION
Bitset& operator=(const Bitset<Device>&) = default; Bitset& operator=(const Bitset<Device>&) = default;
KOKKOS_INLINE_FUNCTION KOKKOS_DEFAULTED_FUNCTION
Bitset(Bitset<Device>&&) = default; Bitset(Bitset<Device>&&) = default;
KOKKOS_INLINE_FUNCTION KOKKOS_DEFAULTED_FUNCTION
Bitset& operator=(Bitset<Device>&&) = default; Bitset& operator=(Bitset<Device>&&) = default;
KOKKOS_INLINE_FUNCTION KOKKOS_DEFAULTED_FUNCTION
~Bitset() = default; ~Bitset() = default;
/// number of bits in the set /// number of bits in the set

View File

@ -238,6 +238,53 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
#endif #endif
} }
/// \brief Constructor that allocates View objects on both host and device.
///
/// This constructor works like the analogous constructor of View.
/// The first arguments are wrapped up in a ViewCtor class, this allows
/// for a label, without initializing, and all of the other things that can
/// be wrapped up in a Ctor class.
/// The arguments that follow are the dimensions of the
/// View objects. For example, if the View has three dimensions,
/// the first three integer arguments will be nonzero, and you may
/// omit the integer arguments that follow.
template <class... P>
DualView(const Impl::ViewCtorProp<P...>& arg_prop,
typename std::enable_if<!Impl::ViewCtorProp<P...>::has_pointer,
size_t>::type const n0 =
KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
: d_view(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7),
h_view(create_mirror_view(d_view)) // without UVM, host View mirrors
,
modified_flags(t_modified_flags("DualView::modified_flags")) {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
modified_host = t_modified_flag(modified_flags, 0);
modified_device = t_modified_flag(modified_flags, 1);
#endif
}
explicit inline DualView(const ViewAllocateWithoutInitializing& arg_prop,
const size_t arg_N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
: DualView(Impl::ViewCtorProp<std::string,
Kokkos::Impl::WithoutInitializing_t>(
arg_prop.label, Kokkos::WithoutInitializing),
arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6,
arg_N7) {}
//! Copy constructor (shallow copy) //! Copy constructor (shallow copy)
template <class SS, class LS, class DS, class MS> template <class SS, class LS, class DS, class MS>
DualView(const DualView<SS, LS, DS, MS>& src) DualView(const DualView<SS, LS, DS, MS>& src)
@ -470,23 +517,43 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
/// as modified, by calling the modify() method with the /// as modified, by calling the modify() method with the
/// appropriate template parameter. /// appropriate template parameter.
template <class Device> template <class Device>
void sync(const typename Impl::enable_if< void sync(const typename std::enable_if<
(std::is_same<typename traits::data_type, (std::is_same<typename traits::data_type,
typename traits::non_const_data_type>::value) || typename traits::non_const_data_type>::value) ||
(std::is_same<Device, int>::value), (std::is_same<Device, int>::value),
int>::type& = 0) { int>::type& = 0) {
if (modified_flags.data() == NULL) return; if (modified_flags.data() == nullptr) return;
int dev = get_device_side<Device>(); int dev = get_device_side<Device>();
if (dev == 1) { // if Device is the same as DualView's device type if (dev == 1) { // if Device is the same as DualView's device type
if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) {
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same<typename t_dev::memory_space,
Kokkos::CudaUVMSpace>::value) {
if (d_view.data() == h_view.data())
Kokkos::Impl::cuda_prefetch_pointer(
Kokkos::Cuda(), d_view.data(),
sizeof(typename t_dev::value_type) * d_view.span(), true);
}
#endif
deep_copy(d_view, h_view); deep_copy(d_view, h_view);
modified_flags(0) = modified_flags(1) = 0; modified_flags(0) = modified_flags(1) = 0;
} }
} }
if (dev == 0) { // hopefully Device is the same as DualView's host type if (dev == 0) { // hopefully Device is the same as DualView's host type
if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) {
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same<typename t_dev::memory_space,
Kokkos::CudaUVMSpace>::value) {
if (d_view.data() == h_view.data())
Kokkos::Impl::cuda_prefetch_pointer(
Kokkos::Cuda(), d_view.data(),
sizeof(typename t_dev::value_type) * d_view.span(), false);
}
#endif
deep_copy(h_view, d_view); deep_copy(h_view, d_view);
modified_flags(0) = modified_flags(1) = 0; modified_flags(0) = modified_flags(1) = 0;
} }
@ -499,12 +566,12 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
} }
template <class Device> template <class Device>
void sync(const typename Impl::enable_if< void sync(const typename std::enable_if<
(!std::is_same<typename traits::data_type, (!std::is_same<typename traits::data_type,
typename traits::non_const_data_type>::value) || typename traits::non_const_data_type>::value) ||
(std::is_same<Device, int>::value), (std::is_same<Device, int>::value),
int>::type& = 0) { int>::type& = 0) {
if (modified_flags.data() == NULL) return; if (modified_flags.data() == nullptr) return;
int dev = get_device_side<Device>(); int dev = get_device_side<Device>();
@ -527,8 +594,18 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
typename traits::non_const_data_type>::value) typename traits::non_const_data_type>::value)
Impl::throw_runtime_exception( Impl::throw_runtime_exception(
"Calling sync_host on a DualView with a const datatype."); "Calling sync_host on a DualView with a const datatype.");
if (modified_flags.data() == NULL) return; if (modified_flags.data() == nullptr) return;
if (modified_flags(1) > modified_flags(0)) { if (modified_flags(1) > modified_flags(0)) {
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same<typename t_dev::memory_space,
Kokkos::CudaUVMSpace>::value) {
if (d_view.data() == h_view.data())
Kokkos::Impl::cuda_prefetch_pointer(
Kokkos::Cuda(), d_view.data(),
sizeof(typename t_dev::value_type) * d_view.span(), false);
}
#endif
deep_copy(h_view, d_view); deep_copy(h_view, d_view);
modified_flags(1) = modified_flags(0) = 0; modified_flags(1) = modified_flags(0) = 0;
} }
@ -539,8 +616,18 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
typename traits::non_const_data_type>::value) typename traits::non_const_data_type>::value)
Impl::throw_runtime_exception( Impl::throw_runtime_exception(
"Calling sync_device on a DualView with a const datatype."); "Calling sync_device on a DualView with a const datatype.");
if (modified_flags.data() == NULL) return; if (modified_flags.data() == nullptr) return;
if (modified_flags(0) > modified_flags(1)) { if (modified_flags(0) > modified_flags(1)) {
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same<typename t_dev::memory_space,
Kokkos::CudaUVMSpace>::value) {
if (d_view.data() == h_view.data())
Kokkos::Impl::cuda_prefetch_pointer(
Kokkos::Cuda(), d_view.data(),
sizeof(typename t_dev::value_type) * d_view.span(), true);
}
#endif
deep_copy(d_view, h_view); deep_copy(d_view, h_view);
modified_flags(1) = modified_flags(0) = 0; modified_flags(1) = modified_flags(0) = 0;
} }
@ -548,7 +635,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
template <class Device> template <class Device>
bool need_sync() const { bool need_sync() const {
if (modified_flags.data() == NULL) return false; if (modified_flags.data() == nullptr) return false;
int dev = get_device_side<Device>(); int dev = get_device_side<Device>();
if (dev == 1) { // if Device is the same as DualView's device type if (dev == 1) { // if Device is the same as DualView's device type
@ -565,12 +652,12 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
} }
inline bool need_sync_host() const { inline bool need_sync_host() const {
if (modified_flags.data() == NULL) return false; if (modified_flags.data() == nullptr) return false;
return modified_flags(0) < modified_flags(1); return modified_flags(0) < modified_flags(1);
} }
inline bool need_sync_device() const { inline bool need_sync_device() const {
if (modified_flags.data() == NULL) return false; if (modified_flags.data() == nullptr) return false;
return modified_flags(1) < modified_flags(0); return modified_flags(1) < modified_flags(0);
} }
@ -581,7 +668,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
/// data as modified. /// data as modified.
template <class Device> template <class Device>
void modify() { void modify() {
if (modified_flags.data() == NULL) return; if (modified_flags.data() == nullptr) return;
int dev = get_device_side<Device>(); int dev = get_device_side<Device>();
if (dev == 1) { // if Device is the same as DualView's device type if (dev == 1) { // if Device is the same as DualView's device type
@ -612,7 +699,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
} }
inline void modify_host() { inline void modify_host() {
if (modified_flags.data() != NULL) { if (modified_flags.data() != nullptr) {
modified_flags(0) = modified_flags(0) =
(modified_flags(1) > modified_flags(0) ? modified_flags(1) (modified_flags(1) > modified_flags(0) ? modified_flags(1)
: modified_flags(0)) + : modified_flags(0)) +
@ -631,7 +718,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
} }
inline void modify_device() { inline void modify_device() {
if (modified_flags.data() != NULL) { if (modified_flags.data() != nullptr) {
modified_flags(1) = modified_flags(1) =
(modified_flags(1) > modified_flags(0) ? modified_flags(1) (modified_flags(1) > modified_flags(0) ? modified_flags(1)
: modified_flags(0)) + : modified_flags(0)) +
@ -650,7 +737,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
} }
inline void clear_sync_state() { inline void clear_sync_state() {
if (modified_flags.data() != NULL) if (modified_flags.data() != nullptr)
modified_flags(1) = modified_flags(0) = 0; modified_flags(1) = modified_flags(0) = 0;
} }
@ -675,7 +762,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
h_view = create_mirror_view(d_view); h_view = create_mirror_view(d_view);
/* Reset dirty flags */ /* Reset dirty flags */
if (modified_flags.data() == NULL) { if (modified_flags.data() == nullptr) {
modified_flags = t_modified_flags("DualView::modified_flags"); modified_flags = t_modified_flags("DualView::modified_flags");
} else } else
modified_flags(1) = modified_flags(0) = 0; modified_flags(1) = modified_flags(0) = 0;
@ -693,7 +780,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
if (modified_flags.data() == NULL) { if (modified_flags.data() == nullptr) {
modified_flags = t_modified_flags("DualView::modified_flags"); modified_flags = t_modified_flags("DualView::modified_flags");
} }
if (modified_flags(1) >= modified_flags(0)) { if (modified_flags(1) >= modified_flags(0)) {
@ -866,4 +953,27 @@ void deep_copy(
} // namespace Kokkos } // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//
// Non-member resize and realloc
//
template <class... Properties, class... Args>
void resize(DualView<Properties...>& dv, Args&&... args) noexcept(
noexcept(dv.resize(std::forward<Args>(args)...))) {
dv.resize(std::forward<Args>(args)...);
}
template <class... Properties, class... Args>
void realloc(DualView<Properties...>& dv, Args&&... args) noexcept(
noexcept(dv.realloc(std::forward<Args>(args)...))) {
dv.realloc(std::forward<Args>(args)...);
}
} // end namespace Kokkos
#endif #endif

View File

@ -293,6 +293,7 @@ KOKKOS_INLINE_FUNCTION void dyn_rank_view_verify_operator_bounds(
dyn_rank_view_error_operator_bounds<0>(buffer + n, LEN - n, map, args...); dyn_rank_view_error_operator_bounds<0>(buffer + n, LEN - n, map, args...);
Kokkos::Impl::throw_runtime_exception(std::string(buffer)); Kokkos::Impl::throw_runtime_exception(std::string(buffer));
#else #else
(void)tracker;
Kokkos::abort("DynRankView bounds error"); Kokkos::abort("DynRankView bounds error");
#endif #endif
} }
@ -1065,8 +1066,8 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
//---------------------------------------- //----------------------------------------
// Standard constructor, destructor, and assignment operators... // Standard constructor, destructor, and assignment operators...
KOKKOS_INLINE_FUNCTION KOKKOS_DEFAULTED_FUNCTION
~DynRankView() {} ~DynRankView() = default;
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
DynRankView() : m_track(), m_map(), m_rank() {} // Default ctor DynRankView() : m_track(), m_map(), m_rank() {} // Default ctor
@ -1773,7 +1774,7 @@ struct DynRankViewRemap {
const Kokkos::Impl::ParallelFor<DynRankViewRemap, Policy> closure( const Kokkos::Impl::ParallelFor<DynRankViewRemap, Policy> closure(
*this, Policy(0, n0)); *this, Policy(0, n0));
closure.execute(); closure.execute();
// Kokkos::fence(); // ?? // ExecSpace().fence(); // ??
} }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -1806,7 +1807,8 @@ inline void deep_copy(
const DynRankView<DT, DP...>& dst, const DynRankView<DT, DP...>& dst,
typename ViewTraits<DT, DP...>::const_value_type& value, typename ViewTraits<DT, DP...>::const_value_type& value,
typename std::enable_if<std::is_same< typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = 0) { typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
static_assert( static_assert(
std::is_same<typename ViewTraits<DT, DP...>::non_const_value_type, std::is_same<typename ViewTraits<DT, DP...>::non_const_value_type,
typename ViewTraits<DT, DP...>::value_type>::value, typename ViewTraits<DT, DP...>::value_type>::value,
@ -1843,7 +1845,7 @@ inline void deep_copy(
(std::is_same<typename DstType::traits::specialize, void>::value && (std::is_same<typename DstType::traits::specialize, void>::value &&
std::is_same<typename SrcType::traits::specialize, void>::value && std::is_same<typename SrcType::traits::specialize, void>::value &&
(Kokkos::is_dyn_rank_view<DstType>::value || (Kokkos::is_dyn_rank_view<DstType>::value ||
Kokkos::is_dyn_rank_view<SrcType>::value))>::type* = 0) { Kokkos::is_dyn_rank_view<SrcType>::value))>::type* = nullptr) {
static_assert( static_assert(
std::is_same<typename DstType::traits::value_type, std::is_same<typename DstType::traits::value_type,
typename DstType::traits::non_const_value_type>::value, typename DstType::traits::non_const_value_type>::value,
@ -2009,7 +2011,7 @@ inline typename DynRankView<T, P...>::HostMirror create_mirror(
typename std::enable_if< typename std::enable_if<
std::is_same<typename ViewTraits<T, P...>::specialize, void>::value && std::is_same<typename ViewTraits<T, P...>::specialize, void>::value &&
!std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout, !std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
Kokkos::LayoutStride>::value>::type* = 0) { Kokkos::LayoutStride>::value>::type* = nullptr) {
typedef DynRankView<T, P...> src_type; typedef DynRankView<T, P...> src_type;
typedef typename src_type::HostMirror dst_type; typedef typename src_type::HostMirror dst_type;
@ -2036,7 +2038,8 @@ template <class Space, class T, class... P>
typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror( typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror(
const Space&, const Kokkos::DynRankView<T, P...>& src, const Space&, const Kokkos::DynRankView<T, P...>& src,
typename std::enable_if<std::is_same< typename std::enable_if<std::is_same<
typename ViewTraits<T, P...>::specialize, void>::value>::type* = 0) { typename ViewTraits<T, P...>::specialize, void>::value>::type* =
nullptr) {
return typename Impl::MirrorDRVType<Space, T, P...>::view_type( return typename Impl::MirrorDRVType<Space, T, P...>::view_type(
src.label(), Impl::reconstructLayout(src.layout(), src.rank())); src.label(), Impl::reconstructLayout(src.layout(), src.rank()));
} }
@ -2050,7 +2053,7 @@ inline typename DynRankView<T, P...>::HostMirror create_mirror_view(
typename DynRankView<T, P...>::HostMirror::memory_space>::value && typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<typename DynRankView<T, P...>::data_type, std::is_same<typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>:: typename DynRankView<T, P...>::HostMirror::data_type>::
value)>::type* = 0) { value)>::type* = nullptr) {
return src; return src;
} }
@ -2072,7 +2075,8 @@ template <class Space, class T, class... P>
typename Impl::MirrorDRViewType<Space, T, P...>::view_type create_mirror_view( typename Impl::MirrorDRViewType<Space, T, P...>::view_type create_mirror_view(
const Space&, const Kokkos::DynRankView<T, P...>& src, const Space&, const Kokkos::DynRankView<T, P...>& src,
typename std::enable_if< typename std::enable_if<
Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = 0) { Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* =
nullptr) {
return src; return src;
} }
@ -2094,7 +2098,8 @@ create_mirror_view_and_copy(
const Space&, const Kokkos::DynRankView<T, P...>& src, const Space&, const Kokkos::DynRankView<T, P...>& src,
std::string const& name = "", std::string const& name = "",
typename std::enable_if< typename std::enable_if<
Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = 0) { Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* =
nullptr) {
(void)name; (void)name;
return src; return src;
} }
@ -2139,7 +2144,7 @@ inline void resize(DynRankView<T, P...>& v,
static_assert(Kokkos::ViewTraits<T, P...>::is_managed, static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
"Can only resize managed views"); "Can only resize managed views");
drview_type v_resized(v.label(), n0, n1, n2, n3, n4, n5, n6); drview_type v_resized(v.label(), n0, n1, n2, n3, n4, n5, n6, n7);
Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v); Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v);
@ -2166,7 +2171,7 @@ inline void realloc(DynRankView<T, P...>& v,
const std::string label = v.label(); const std::string label = v.label();
v = drview_type(); // Deallocate first, if the only view to allocation v = drview_type(); // Deallocate first, if the only view to allocation
v = drview_type(label, n0, n1, n2, n3, n4, n5, n6); v = drview_type(label, n0, n1, n2, n3, n4, n5, n6, n7);
} }
} // namespace Kokkos } // namespace Kokkos

View File

@ -70,10 +70,10 @@ struct ChunkArraySpace<Kokkos::CudaSpace> {
using memory_space = typename Kokkos::CudaUVMSpace; using memory_space = typename Kokkos::CudaUVMSpace;
}; };
#endif #endif
#ifdef KOKKOS_ENABLE_ROCM #ifdef KOKKOS_ENABLE_HIP
template <> template <>
struct ChunkArraySpace<Kokkos::Experimental::ROCmSpace> { struct ChunkArraySpace<Kokkos::Experimental::HIPSpace> {
using memory_space = typename Kokkos::Experimental::ROCmHostPinnedSpace; using memory_space = typename Kokkos::Experimental::HIPHostPinnedSpace;
}; };
#endif #endif
} // end namespace Impl } // end namespace Impl
@ -248,8 +248,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
//---------------------------------------- //----------------------------------------
template <typename I0, class... Args> template <typename I0, class... Args>
KOKKOS_INLINE_FUNCTION reference_type operator()(const I0& i0, KOKKOS_INLINE_FUNCTION reference_type
const Args&... args) const { operator()(const I0& i0, const Args&... /*args*/) const {
static_assert(Kokkos::Impl::are_integral<I0, Args...>::value, static_assert(Kokkos::Impl::are_integral<I0, Args...>::value,
"Indices must be integral type"); "Indices must be integral type");
@ -265,7 +265,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
// If not bounds checking then we assume a non-zero pointer is valid. // If not bounds checking then we assume a non-zero pointer is valid.
#if !defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) #if !defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK)
if (0 == *ch) if (nullptr == *ch)
#endif #endif
{ {
// Verify that allocation of the requested chunk in in progress. // Verify that allocation of the requested chunk in in progress.
@ -280,7 +280,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
// Allocation of this chunk is in progress // Allocation of this chunk is in progress
// so wait for allocation to complete. // so wait for allocation to complete.
while (0 == *ch) while (nullptr == *ch)
; ;
} }
@ -325,7 +325,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
--*pc; --*pc;
typename traits::memory_space().deallocate( typename traits::memory_space().deallocate(
m_chunks[*pc], sizeof(local_value_type) << m_chunk_shift); m_chunks[*pc], sizeof(local_value_type) << m_chunk_shift);
m_chunks[*pc] = 0; m_chunks[*pc] = nullptr;
} }
} }
// *m_chunks[m_chunk_max+1] stores the 'extent' requested by resize // *m_chunks[m_chunk_max+1] stores the 'extent' requested by resize
@ -366,10 +366,10 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
// Initialize or destroy array of chunk pointers. // Initialize or destroy array of chunk pointers.
// Two entries beyond the max chunks are allocation counters. // Two entries beyond the max chunks are allocation counters.
inline void operator()(unsigned i) const { inline void operator()(unsigned i) const {
if (m_destroy && i < m_chunk_max && 0 != m_chunks[i]) { if (m_destroy && i < m_chunk_max && nullptr != m_chunks[i]) {
typename traits::memory_space().deallocate(m_chunks[i], m_chunk_size); typename traits::memory_space().deallocate(m_chunks[i], m_chunk_size);
} }
m_chunks[i] = 0; m_chunks[i] = nullptr;
} }
void execute(bool arg_destroy) { void execute(bool arg_destroy) {
@ -419,7 +419,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
const unsigned min_chunk_size, const unsigned min_chunk_size,
const unsigned max_extent) const unsigned max_extent)
: m_track(), : m_track(),
m_chunks(0) m_chunks(nullptr)
// The chunk size is guaranteed to be a power of two // The chunk size is guaranteed to be a power of two
, ,
m_chunk_shift(Kokkos::Impl::integral_power_of_two_that_contains( m_chunk_shift(Kokkos::Impl::integral_power_of_two_that_contains(
@ -528,7 +528,7 @@ struct CommonSubview<Kokkos::Experimental::DynamicView<DP...>,
typedef SrcType src_subview_type; typedef SrcType src_subview_type;
dst_subview_type dst_sub; dst_subview_type dst_sub;
src_subview_type src_sub; src_subview_type src_sub;
CommonSubview(const DstType& dst, const SrcType& src, const Arg0& arg0) CommonSubview(const DstType& dst, const SrcType& src, const Arg0& /*arg0*/)
: dst_sub(dst), src_sub(src) {} : dst_sub(dst), src_sub(src) {}
}; };

View File

@ -187,7 +187,7 @@ template <typename ReportType, typename DeviceType>
void ErrorReporter<ReportType, DeviceType>::resize(const size_t new_size) { void ErrorReporter<ReportType, DeviceType>::resize(const size_t new_size) {
m_reports.resize(new_size); m_reports.resize(new_size);
m_reporters.resize(new_size); m_reporters.resize(new_size);
Kokkos::fence(); typename DeviceType::execution_space().fence();
} }
} // namespace Experimental } // namespace Experimental

View File

@ -362,19 +362,18 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
//---------------------------------------- //----------------------------------------
private: private:
enum { static constexpr bool is_layout_left =
is_layout_left = std::is_same<typename traits::array_layout, Kokkos::LayoutLeft>::value;
std::is_same<typename traits::array_layout, Kokkos::LayoutLeft>::value,
is_layout_right = static constexpr bool is_layout_right =
std::is_same<typename traits::array_layout, Kokkos::LayoutRight>::value, std::is_same<typename traits::array_layout, Kokkos::LayoutRight>::value;
is_layout_stride = std::is_same<typename traits::array_layout, static constexpr bool is_layout_stride =
Kokkos::LayoutStride>::value, std::is_same<typename traits::array_layout, Kokkos::LayoutStride>::value;
is_default_map = std::is_same<typename traits::specialize, void>::value && static constexpr bool is_default_map =
(is_layout_left || is_layout_right || is_layout_stride) std::is_same<typename traits::specialize, void>::value &&
}; (is_layout_left || is_layout_right || is_layout_stride);
template <class Space, bool = Kokkos::Impl::MemorySpaceAccess< template <class Space, bool = Kokkos::Impl::MemorySpaceAccess<
Space, typename traits::memory_space>::accessible> Space, typename traits::memory_space>::accessible>
@ -804,8 +803,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
//---------------------------------------- //----------------------------------------
// Standard destructor, constructors, and assignment operators // Standard destructor, constructors, and assignment operators
KOKKOS_INLINE_FUNCTION KOKKOS_DEFAULTED_FUNCTION
~OffsetView() {} ~OffsetView() = default;
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
OffsetView() : m_track(), m_map() { OffsetView() : m_track(), m_map() {
@ -1317,7 +1316,7 @@ KOKKOS_INLINE_FUNCTION
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
Kokkos::Impl::ALL_t shift_input(const Kokkos::Impl::ALL_t arg, Kokkos::Impl::ALL_t shift_input(const Kokkos::Impl::ALL_t arg,
const int64_t offset) { const int64_t /*offset*/) {
return arg; return arg;
} }
@ -1347,9 +1346,9 @@ KOKKOS_INLINE_FUNCTION void map_arg_to_new_begin(
template <size_t N, class Arg, class A> template <size_t N, class Arg, class A>
KOKKOS_INLINE_FUNCTION void map_arg_to_new_begin( KOKKOS_INLINE_FUNCTION void map_arg_to_new_begin(
const size_t i, Kokkos::Array<int64_t, N>& subviewBegins, const size_t /*i*/, Kokkos::Array<int64_t, N>& /*subviewBegins*/,
typename std::enable_if<N == 0, const Arg>::type shiftedArg, const Arg arg, typename std::enable_if<N == 0, const Arg>::type /*shiftedArg*/,
const A viewBegins, size_t& counter) {} const Arg /*arg*/, const A /*viewBegins*/, size_t& /*counter*/) {}
template <class D, class... P, class T> template <class D, class... P, class T>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -1832,7 +1831,8 @@ inline void deep_copy(
const OffsetView<DT, DP...>& dst, const OffsetView<DT, DP...>& dst,
typename ViewTraits<DT, DP...>::const_value_type& value, typename ViewTraits<DT, DP...>::const_value_type& value,
typename std::enable_if<std::is_same< typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = 0) { typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
static_assert( static_assert(
std::is_same<typename ViewTraits<DT, DP...>::non_const_value_type, std::is_same<typename ViewTraits<DT, DP...>::non_const_value_type,
typename ViewTraits<DT, DP...>::value_type>::value, typename ViewTraits<DT, DP...>::value_type>::value,
@ -1846,7 +1846,8 @@ template <class DT, class... DP, class ST, class... SP>
inline void deep_copy( inline void deep_copy(
const OffsetView<DT, DP...>& dst, const OffsetView<ST, SP...>& value, const OffsetView<DT, DP...>& dst, const OffsetView<ST, SP...>& value,
typename std::enable_if<std::is_same< typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = 0) { typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
static_assert( static_assert(
std::is_same<typename ViewTraits<DT, DP...>::value_type, std::is_same<typename ViewTraits<DT, DP...>::value_type,
typename ViewTraits<ST, SP...>::non_const_value_type>::value, typename ViewTraits<ST, SP...>::non_const_value_type>::value,
@ -1859,7 +1860,8 @@ template <class DT, class... DP, class ST, class... SP>
inline void deep_copy( inline void deep_copy(
const OffsetView<DT, DP...>& dst, const View<ST, SP...>& value, const OffsetView<DT, DP...>& dst, const View<ST, SP...>& value,
typename std::enable_if<std::is_same< typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = 0) { typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
static_assert( static_assert(
std::is_same<typename ViewTraits<DT, DP...>::value_type, std::is_same<typename ViewTraits<DT, DP...>::value_type,
typename ViewTraits<ST, SP...>::non_const_value_type>::value, typename ViewTraits<ST, SP...>::non_const_value_type>::value,
@ -1873,7 +1875,8 @@ template <class DT, class... DP, class ST, class... SP>
inline void deep_copy( inline void deep_copy(
const View<DT, DP...>& dst, const OffsetView<ST, SP...>& value, const View<DT, DP...>& dst, const OffsetView<ST, SP...>& value,
typename std::enable_if<std::is_same< typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = 0) { typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
static_assert( static_assert(
std::is_same<typename ViewTraits<DT, DP...>::value_type, std::is_same<typename ViewTraits<DT, DP...>::value_type,
typename ViewTraits<ST, SP...>::non_const_value_type>::value, typename ViewTraits<ST, SP...>::non_const_value_type>::value,
@ -2011,7 +2014,7 @@ create_mirror_view(
std::is_same< std::is_same<
typename Kokkos::Experimental::OffsetView<T, P...>::data_type, typename Kokkos::Experimental::OffsetView<T, P...>::data_type,
typename Kokkos::Experimental::OffsetView< typename Kokkos::Experimental::OffsetView<
T, P...>::HostMirror::data_type>::value)>::type* = 0) { T, P...>::HostMirror::data_type>::value)>::type* = nullptr) {
return src; return src;
} }

View File

@ -171,24 +171,41 @@ struct DefaultContribution<Kokkos::Cuda,
}; };
#endif #endif
#ifdef KOKKOS_ENABLE_HIP
template <>
struct DefaultDuplication<Kokkos::Experimental::HIP> {
enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
};
template <>
struct DefaultContribution<Kokkos::Experimental::HIP,
Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
template <>
struct DefaultContribution<Kokkos::Experimental::HIP,
Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
#endif
/* ScatterValue <Op=ScatterSum, contribution=ScatterNonAtomic> is the object /* ScatterValue <Op=ScatterSum, contribution=ScatterNonAtomic> is the object
returned by the access operator() of ScatterAccess, This class inherits from returned by the access operator() of ScatterAccess, This class inherits from
the Sum<> reducer and it wraps join(dest, src) with convenient operator+=, the Sum<> reducer and it wraps join(dest, src) with convenient operator+=,
etc. Note the addition of update(ValueType const& rhs) and reset() so that etc. Note the addition of update(ValueType const& rhs) and reset() so that
all reducers can have common functions See ReduceDuplicates and all reducers can have common functions See ReduceDuplicates and
ResetDuplicates ) */ ResetDuplicates ) */
template <typename ValueType, int Op, int contribution> template <typename ValueType, int Op, typename DeviceType, int contribution>
struct ScatterValue; struct ScatterValue;
template <typename ValueType> template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
Kokkos::Experimental::ScatterNonAtomic> Kokkos::Experimental::ScatterNonAtomic>
: Sum<ValueType, Kokkos::DefaultExecutionSpace> { : Sum<ValueType, DeviceType> {
public: public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Sum<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {} : Sum<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
: Sum<ValueType, Kokkos::DefaultExecutionSpace>(other.reference()) {} : Sum<ValueType, DeviceType>(other.reference()) {}
KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
this->join(this->reference(), rhs); this->join(this->reference(), rhs);
} }
@ -206,13 +223,13 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum,
* of ScatterAccess, similar to that returned by an Atomic View, it wraps * of ScatterAccess, similar to that returned by an Atomic View, it wraps
Kokkos::atomic_add with convenient operator+=, etc. This version also has the Kokkos::atomic_add with convenient operator+=, etc. This version also has the
update(rhs) and reset() functions. */ update(rhs) and reset() functions. */
template <typename ValueType> template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
Kokkos::Experimental::ScatterAtomic> Kokkos::Experimental::ScatterAtomic>
: Sum<ValueType, Kokkos::DefaultExecutionSpace> { : Sum<ValueType, DeviceType> {
public: public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Sum<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {} : Sum<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
this->join(this->reference(), rhs); this->join(this->reference(), rhs);
@ -244,15 +261,15 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum,
etc. Note the addition of update(ValueType const& rhs) and reset() so that etc. Note the addition of update(ValueType const& rhs) and reset() so that
all reducers can have common functions See ReduceDuplicates and all reducers can have common functions See ReduceDuplicates and
ResetDuplicates ) */ ResetDuplicates ) */
template <typename ValueType> template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
Kokkos::Experimental::ScatterNonAtomic> Kokkos::Experimental::ScatterNonAtomic>
: Prod<ValueType, Kokkos::DefaultExecutionSpace> { : Prod<ValueType, DeviceType> {
public: public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Prod<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {} : Prod<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
: Prod<ValueType, Kokkos::DefaultExecutionSpace>(other.reference()) {} : Prod<ValueType, DeviceType>(other.reference()) {}
KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) { KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
this->join(this->reference(), rhs); this->join(this->reference(), rhs);
} }
@ -271,13 +288,13 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd,
atomic_prod with convenient operator*=, etc. atomic_prod uses the atomic_prod with convenient operator*=, etc. atomic_prod uses the
atomic_compare_exchange. This version also has the update(rhs) and reset() atomic_compare_exchange. This version also has the update(rhs) and reset()
functions. */ functions. */
template <typename ValueType> template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
Kokkos::Experimental::ScatterAtomic> Kokkos::Experimental::ScatterAtomic>
: Prod<ValueType, Kokkos::DefaultExecutionSpace> { : Prod<ValueType, DeviceType> {
public: public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Prod<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {} : Prod<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) { KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
this->join(this->reference(), rhs); this->join(this->reference(), rhs);
@ -320,15 +337,15 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd,
Note the addition of update(ValueType const& rhs) and reset() are so that all Note the addition of update(ValueType const& rhs) and reset() are so that all
reducers can have a common update function See ReduceDuplicates and reducers can have a common update function See ReduceDuplicates and
ResetDuplicates ) */ ResetDuplicates ) */
template <typename ValueType> template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
Kokkos::Experimental::ScatterNonAtomic> Kokkos::Experimental::ScatterNonAtomic>
: Min<ValueType, Kokkos::DefaultExecutionSpace> { : Min<ValueType, DeviceType> {
public: public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Min<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {} : Min<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
: Min<ValueType, Kokkos::DefaultExecutionSpace>(other.reference()) {} : Min<ValueType, DeviceType>(other.reference()) {}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join(this->reference(), rhs); this->join(this->reference(), rhs);
} }
@ -340,13 +357,13 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin,
* of ScatterAccess, similar to that returned by an Atomic View, it wraps and * of ScatterAccess, similar to that returned by an Atomic View, it wraps and
atomic_min with the update(rhs) function. atomic_min uses the atomic_min with the update(rhs) function. atomic_min uses the
atomic_compare_exchange. This version also has the reset() function */ atomic_compare_exchange. This version also has the reset() function */
template <typename ValueType> template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
Kokkos::Experimental::ScatterAtomic> Kokkos::Experimental::ScatterAtomic>
: Min<ValueType, Kokkos::DefaultExecutionSpace> { : Min<ValueType, DeviceType> {
public: public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Min<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {} : Min<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION KOKKOS_FORCEINLINE_FUNCTION
void atomic_min(ValueType& dest, const ValueType& src) const { void atomic_min(ValueType& dest, const ValueType& src) const {
@ -382,15 +399,15 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin,
Note the addition of update(ValueType const& rhs) and reset() are so that all Note the addition of update(ValueType const& rhs) and reset() are so that all
reducers can have a common update function See ReduceDuplicates and reducers can have a common update function See ReduceDuplicates and
ResetDuplicates ) */ ResetDuplicates ) */
template <typename ValueType> template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
Kokkos::Experimental::ScatterNonAtomic> Kokkos::Experimental::ScatterNonAtomic>
: Max<ValueType, Kokkos::DefaultExecutionSpace> { : Max<ValueType, DeviceType> {
public: public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Max<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {} : Max<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
: Max<ValueType, Kokkos::DefaultExecutionSpace>(other.reference()) {} : Max<ValueType, DeviceType>(other.reference()) {}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join(this->reference(), rhs); this->join(this->reference(), rhs);
} }
@ -402,13 +419,13 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax,
* of ScatterAccess, similar to that returned by an Atomic View, it wraps and * of ScatterAccess, similar to that returned by an Atomic View, it wraps and
atomic_max with the update(rhs) function. atomic_max uses the atomic_max with the update(rhs) function. atomic_max uses the
atomic_compare_exchange. This version also has the reset() function */ atomic_compare_exchange. This version also has the reset() function */
template <typename ValueType> template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
Kokkos::Experimental::ScatterAtomic> Kokkos::Experimental::ScatterAtomic>
: Max<ValueType, Kokkos::DefaultExecutionSpace> { : Max<ValueType, DeviceType> {
public: public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Max<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {} : Max<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION KOKKOS_FORCEINLINE_FUNCTION
void atomic_max(ValueType& dest, const ValueType& src) const { void atomic_max(ValueType& dest, const ValueType& src) const {
@ -558,6 +575,8 @@ struct ReduceDuplicatesBase {
Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0,
&kpID); &kpID);
} }
#else
(void)name;
#endif #endif
typedef RangePolicy<ExecSpace, size_t> policy_type; typedef RangePolicy<ExecSpace, size_t> policy_type;
typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type; typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type;
@ -584,8 +603,9 @@ struct ReduceDuplicates
: Base(src_in, dst_in, stride_in, start_in, n_in, name) {} : Base(src_in, dst_in, stride_in, start_in, n_in, name) {}
KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
for (size_t j = Base::start; j < Base::n; ++j) { for (size_t j = Base::start; j < Base::n; ++j) {
ScatterValue<ValueType, Op, Kokkos::Experimental::ScatterNonAtomic> sv( ScatterValue<ValueType, Op, ExecSpace,
Base::dst[i]); Kokkos::Experimental::ScatterNonAtomic>
sv(Base::dst[i]);
sv.update(Base::src[i + Base::stride * j]); sv.update(Base::src[i + Base::stride * j]);
} }
} }
@ -607,6 +627,8 @@ struct ResetDuplicatesBase {
Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0,
&kpID); &kpID);
} }
#else
(void)name;
#endif #endif
typedef RangePolicy<ExecSpace, size_t> policy_type; typedef RangePolicy<ExecSpace, size_t> policy_type;
typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type; typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type;
@ -630,8 +652,9 @@ struct ResetDuplicates : public ResetDuplicatesBase<ExecSpace, ValueType, Op> {
ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name) ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name)
: Base(data_in, size_in, name) {} : Base(data_in, size_in, name) {}
KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
ScatterValue<ValueType, Op, Kokkos::Experimental::ScatterNonAtomic> sv( ScatterValue<ValueType, Op, ExecSpace,
Base::data[i]); Kokkos::Experimental::ScatterNonAtomic>
sv(Base::data[i]);
sv.reset(); sv.reset();
} }
}; };
@ -768,8 +791,8 @@ class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
contribution> contribution>
view_type; view_type;
typedef typename view_type::original_value_type original_value_type; typedef typename view_type::original_value_type original_value_type;
typedef Kokkos::Impl::Experimental::ScatterValue<original_value_type, Op, typedef Kokkos::Impl::Experimental::ScatterValue<
override_contribution> original_value_type, Op, DeviceType, override_contribution>
value_type; value_type;
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -777,13 +800,8 @@ class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
ScatterAccess(view_type const& view_in) : view(view_in) {} ScatterAccess(view_type const& view_in) : view(view_in) {}
KOKKOS_DEFAULTED_FUNCTION
// KOKKOS_DEFAULTED_FUNCTION ~ScatterAccess() = default;
// ~ScatterAccess() = default;
KOKKOS_INLINE_FUNCTION
~ScatterAccess()
{
}
template <typename... Args> template <typename... Args>
KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const { KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const {
@ -1190,8 +1208,8 @@ class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterDuplicated,
contribution> contribution>
view_type; view_type;
typedef typename view_type::original_value_type original_value_type; typedef typename view_type::original_value_type original_value_type;
typedef Kokkos::Impl::Experimental::ScatterValue<original_value_type, Op, typedef Kokkos::Impl::Experimental::ScatterValue<
override_contribution> original_value_type, Op, DeviceType, override_contribution>
value_type; value_type;
KOKKOS_FORCEINLINE_FUNCTION KOKKOS_FORCEINLINE_FUNCTION

View File

@ -112,7 +112,7 @@ struct StaticCrsGraphBalancerFunctor {
} }
} else { } else {
if ((count >= (current_block + 1) * cost_per_workset) || if ((count >= (current_block + 1) * cost_per_workset) ||
(iRow + 2 == row_offsets.extent(0))) { (iRow + 2 == int_type(row_offsets.extent(0)))) {
if (end_block > current_block + 1) { if (end_block > current_block + 1) {
int_type num_block = end_block - current_block; int_type num_block = end_block - current_block;
row_block_offsets(current_block + 1) = iRow; row_block_offsets(current_block + 1) = iRow;
@ -358,8 +358,8 @@ class StaticCrsGraph {
/** \brief Destroy this view of the array. /** \brief Destroy this view of the array.
* If the last view then allocated memory is deallocated. * If the last view then allocated memory is deallocated.
*/ */
KOKKOS_INLINE_FUNCTION KOKKOS_DEFAULTED_FUNCTION
~StaticCrsGraph() {} ~StaticCrsGraph() = default;
/** \brief Return number of rows in the graph /** \brief Return number of rows in the graph
*/ */
@ -396,7 +396,7 @@ class StaticCrsGraph {
const data_type count = static_cast<data_type>(row_map(i + 1) - start); const data_type count = static_cast<data_type>(row_map(i + 1) - start);
if (count == 0) { if (count == 0) {
return GraphRowViewConst<StaticCrsGraph>(NULL, 1, 0); return GraphRowViewConst<StaticCrsGraph>(nullptr, 1, 0);
} else { } else {
return GraphRowViewConst<StaticCrsGraph>(entries, 1, count, start); return GraphRowViewConst<StaticCrsGraph>(entries, 1, count, start);
} }
@ -414,9 +414,10 @@ class StaticCrsGraph {
row_map_type, View<size_type*, array_layout, device_type> > row_map_type, View<size_type*, array_layout, device_type> >
partitioner(row_map, block_offsets, fix_cost_per_row, num_blocks); partitioner(row_map, block_offsets, fix_cost_per_row, num_blocks);
Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, numRows()), Kokkos::parallel_for("Kokkos::StaticCrsGraph::create_block_partitioning",
Kokkos::RangePolicy<execution_space>(0, numRows()),
partitioner); partitioner);
Kokkos::fence(); typename device_type::execution_space().fence();
row_block_offsets = block_offsets; row_block_offsets = block_offsets;
} }
@ -522,7 +523,8 @@ DataType maximum_entry(const StaticCrsGraph<DataType, Arg1Type, Arg2Type,
typedef Impl::StaticCrsGraphMaximumEntry<GraphType> FunctorType; typedef Impl::StaticCrsGraphMaximumEntry<GraphType> FunctorType;
DataType result = 0; DataType result = 0;
Kokkos::parallel_reduce(graph.entries.extent(0), FunctorType(graph), result); Kokkos::parallel_reduce("Kokkos::maximum_entry", graph.entries.extent(0),
FunctorType(graph), result);
return result; return result;
} }

View File

@ -201,9 +201,9 @@ class UnorderedMapInsertResult {
/// ///
template <typename Key, typename Value, template <typename Key, typename Value,
typename Device = Kokkos::DefaultExecutionSpace, typename Device = Kokkos::DefaultExecutionSpace,
typename Hasher = pod_hash<typename Impl::remove_const<Key>::type>, typename Hasher = pod_hash<typename std::remove_const<Key>::type>,
typename EqualTo = typename EqualTo =
pod_equal_to<typename Impl::remove_const<Key>::type> > pod_equal_to<typename std::remove_const<Key>::type> >
class UnorderedMap { class UnorderedMap {
private: private:
typedef typename ViewTraits<Key, Device, void, void>::host_mirror_space typedef typename ViewTraits<Key, Device, void, void>::host_mirror_space
@ -215,13 +215,13 @@ class UnorderedMap {
// key_types // key_types
typedef Key declared_key_type; typedef Key declared_key_type;
typedef typename Impl::remove_const<declared_key_type>::type key_type; typedef typename std::remove_const<declared_key_type>::type key_type;
typedef typename Impl::add_const<key_type>::type const_key_type; typedef typename std::add_const<key_type>::type const_key_type;
// value_types // value_types
typedef Value declared_value_type; typedef Value declared_value_type;
typedef typename Impl::remove_const<declared_value_type>::type value_type; typedef typename std::remove_const<declared_value_type>::type value_type;
typedef typename Impl::add_const<value_type>::type const_value_type; typedef typename std::add_const<value_type>::type const_value_type;
typedef Device device_type; typedef Device device_type;
typedef typename Device::execution_space execution_space; typedef typename Device::execution_space execution_space;
@ -296,25 +296,13 @@ class UnorderedMap {
//! \name Public member functions //! \name Public member functions
//@{ //@{
UnorderedMap()
: m_bounded_insert(),
m_hasher(),
m_equal_to(),
m_size(),
m_available_indexes(),
m_hash_lists(),
m_next_index(),
m_keys(),
m_values(),
m_scalars() {}
/// \brief Constructor /// \brief Constructor
/// ///
/// \param capacity_hint [in] Initial guess of how many unique keys will be /// \param capacity_hint [in] Initial guess of how many unique keys will be
/// inserted into the map \param hash [in] Hasher function for \c Key /// inserted into the map \param hash [in] Hasher function for \c Key
/// instances. The /// instances. The
/// default value usually suffices. /// default value usually suffices.
UnorderedMap(size_type capacity_hint, hasher_type hasher = hasher_type(), UnorderedMap(size_type capacity_hint = 0, hasher_type hasher = hasher_type(),
equal_to_type equal_to = equal_to_type()) equal_to_type equal_to = equal_to_type())
: m_bounded_insert(true), : m_bounded_insert(true),
m_hasher(hasher), m_hasher(hasher),
@ -689,7 +677,7 @@ class UnorderedMap {
template <typename SKey, typename SValue> template <typename SKey, typename SValue>
UnorderedMap( UnorderedMap(
UnorderedMap<SKey, SValue, Device, Hasher, EqualTo> const &src, UnorderedMap<SKey, SValue, Device, Hasher, EqualTo> const &src,
typename Impl::enable_if< typename std::enable_if<
Impl::UnorderedMapCanAssign<declared_key_type, declared_value_type, Impl::UnorderedMapCanAssign<declared_key_type, declared_value_type,
SKey, SValue>::value, SKey, SValue>::value,
int>::type = 0) int>::type = 0)
@ -705,7 +693,7 @@ class UnorderedMap {
m_scalars(src.m_scalars) {} m_scalars(src.m_scalars) {}
template <typename SKey, typename SValue> template <typename SKey, typename SValue>
typename Impl::enable_if< typename std::enable_if<
Impl::UnorderedMapCanAssign<declared_key_type, declared_value_type, SKey, Impl::UnorderedMapCanAssign<declared_key_type, declared_value_type, SKey,
SValue>::value, SValue>::value,
declared_map_type &>::type declared_map_type &>::type
@ -724,9 +712,9 @@ class UnorderedMap {
} }
template <typename SKey, typename SValue, typename SDevice> template <typename SKey, typename SValue, typename SDevice>
typename Impl::enable_if< typename std::enable_if<
std::is_same<typename Impl::remove_const<SKey>::type, key_type>::value && std::is_same<typename std::remove_const<SKey>::type, key_type>::value &&
std::is_same<typename Impl::remove_const<SValue>::type, std::is_same<typename std::remove_const<SValue>::type,
value_type>::value>::type value_type>::value>::type
create_copy_view( create_copy_view(
UnorderedMap<SKey, SValue, SDevice, Hasher, EqualTo> const &src) { UnorderedMap<SKey, SValue, SDevice, Hasher, EqualTo> const &src) {

View File

@ -118,12 +118,12 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
if (DV::template need_sync<typename DV::t_dev::device_type>()) { if (DV::template need_sync<typename DV::t_dev::device_type>()) {
set_functor_host f(DV::h_view, val); set_functor_host f(DV::h_view, val);
parallel_for(n, f); parallel_for("Kokkos::vector::assign", n, f);
typename DV::t_host::execution_space().fence(); typename DV::t_host::execution_space().fence();
DV::template modify<typename DV::t_host::device_type>(); DV::template modify<typename DV::t_host::device_type>();
} else { } else {
set_functor f(DV::d_view, val); set_functor f(DV::d_view, val);
parallel_for(n, f); parallel_for("Kokkos::vector::assign", n, f);
typename DV::t_dev::execution_space().fence(); typename DV::t_dev::execution_space().fence();
DV::template modify<typename DV::t_dev::device_type>(); DV::template modify<typename DV::t_dev::device_type>();
} }

View File

@ -77,7 +77,8 @@ struct BitsetCount {
size_type apply() const { size_type apply() const {
size_type count = 0u; size_type count = 0u;
parallel_reduce(m_bitset.m_blocks.extent(0), *this, count); parallel_reduce("Kokkos::Impl::BitsetCount::apply",
m_bitset.m_blocks.extent(0), *this, count);
return count; return count;
} }

View File

@ -58,8 +58,8 @@ inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
create_mirror_view( create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>& const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>&
view, view,
typename Impl::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type, typename std::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0) { Arg3Type>::is_hostspace>::type* = 0) {
return view; return view;
} }
#else #else
@ -70,8 +70,8 @@ inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
create_mirror_view( create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>& const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>&
view, view,
typename Impl::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type, typename std::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0) { Arg3Type>::is_hostspace>::type* = 0) {
return view; return view;
} }
#endif #endif
@ -128,8 +128,8 @@ inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
create_mirror_view( create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>& const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>&
view, view,
typename Impl::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type, typename std::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0) Arg3Type>::is_hostspace>::type* = 0)
#else #else
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type, template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
typename SizeType> typename SizeType>
@ -138,8 +138,8 @@ inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
create_mirror_view( create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>& const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>&
view, view,
typename Impl::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type, typename std::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0) Arg3Type>::is_hostspace>::type* = 0)
#endif #endif
{ {
return create_mirror(view); return create_mirror(view);

View File

@ -71,7 +71,10 @@ struct UnorderedMapRehash {
UnorderedMapRehash(map_type const& dst, const_map_type const& src) UnorderedMapRehash(map_type const& dst, const_map_type const& src)
: m_dst(dst), m_src(src) {} : m_dst(dst), m_src(src) {}
void apply() const { parallel_for(m_src.capacity(), *this); } void apply() const {
parallel_for("Kokkos::Impl::UnorderedMapRehash::apply", m_src.capacity(),
*this);
}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const { void operator()(size_type i) const {
@ -91,7 +94,10 @@ struct UnorderedMapErase {
UnorderedMapErase(map_type const& map) : m_map(map) {} UnorderedMapErase(map_type const& map) : m_map(map) {}
void apply() const { parallel_for(m_map.m_hash_lists.extent(0), *this); } void apply() const {
parallel_for("Kokkos::Impl::UnorderedMapErase::apply",
m_map.m_hash_lists.extent(0), *this);
}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const { void operator()(size_type i) const {
@ -152,7 +158,10 @@ struct UnorderedMapHistogram {
m_distance("UnorderedMap Histogram"), m_distance("UnorderedMap Histogram"),
m_block_distance("UnorderedMap Histogram") {} m_block_distance("UnorderedMap Histogram") {}
void calculate() { parallel_for(m_map.m_hash_lists.extent(0), *this); } void calculate() {
parallel_for("Kokkos::Impl::UnorderedMapHistogram::calculate",
m_map.m_hash_lists.extent(0), *this);
}
void clear() { void clear() {
Kokkos::deep_copy(m_length, 0); Kokkos::deep_copy(m_length, 0);
@ -229,7 +238,10 @@ struct UnorderedMapPrint {
UnorderedMapPrint(map_type const& map) : m_map(map) {} UnorderedMapPrint(map_type const& map) : m_map(map) {}
void apply() { parallel_for(m_map.m_hash_lists.extent(0), *this); } void apply() {
parallel_for("Kokkos::Impl::UnorderedMapPrint::apply",
m_map.m_hash_lists.extent(0), *this);
}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const { void operator()(size_type i) const {
@ -245,21 +257,22 @@ struct UnorderedMapPrint {
}; };
template <typename DKey, typename DValue, typename SKey, typename SValue> template <typename DKey, typename DValue, typename SKey, typename SValue>
struct UnorderedMapCanAssign : public false_ {}; struct UnorderedMapCanAssign : public std::false_type {};
template <typename Key, typename Value> template <typename Key, typename Value>
struct UnorderedMapCanAssign<Key, Value, Key, Value> : public true_ {}; struct UnorderedMapCanAssign<Key, Value, Key, Value> : public std::true_type {};
template <typename Key, typename Value> template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key, Value, Key, Value> : public true_ {}; struct UnorderedMapCanAssign<const Key, Value, Key, Value>
: public std::true_type {};
template <typename Key, typename Value> template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key, const Value, Key, Value> struct UnorderedMapCanAssign<const Key, const Value, Key, Value>
: public true_ {}; : public std::true_type {};
template <typename Key, typename Value> template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key, const Value, const Key, Value> struct UnorderedMapCanAssign<const Key, const Value, const Key, Value>
: public true_ {}; : public std::true_type {};
} // namespace Impl } // namespace Impl
} // namespace Kokkos } // namespace Kokkos

View File

@ -3,7 +3,7 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
foreach(Tag Threads;Serial;OpenMP;HPX;Cuda) foreach(Tag Threads;Serial;OpenMP;HPX;Cuda;HIP)
# Because there is always an exception to the rule # Because there is always an exception to the rule
if(Tag STREQUAL "Threads") if(Tag STREQUAL "Threads")
set(DEVICE "PTHREAD") set(DEVICE "PTHREAD")
@ -13,23 +13,31 @@ foreach(Tag Threads;Serial;OpenMP;HPX;Cuda)
string(TOLOWER ${Tag} dir) string(TOLOWER ${Tag} dir)
# Add test for that backend if it is enabled # Add test for that backend if it is enabled
if(Kokkos_ENABLE_${DEVICE}) if(Kokkos_ENABLE_${DEVICE})
KOKKOS_ADD_EXECUTABLE_AND_TEST( set(UnitTestSources UnitTestMain.cpp)
UnitTest_${Tag} set(dir ${CMAKE_CURRENT_BINARY_DIR}/${dir})
SOURCES file(MAKE_DIRECTORY ${dir})
UnitTestMain.cpp foreach(Name
${dir}/Test${Tag}_BitSet.cpp Bitset
${dir}/Test${Tag}_DualView.cpp DualView
${dir}/Test${Tag}_DynamicView.cpp DynamicView
${dir}/Test${Tag}_DynRankViewAPI_generic.cpp DynViewAPI_generic
${dir}/Test${Tag}_DynRankViewAPI_rank12345.cpp DynViewAPI_rank12345
${dir}/Test${Tag}_DynRankViewAPI_rank67.cpp DynViewAPI_rank67
${dir}/Test${Tag}_ErrorReporter.cpp ErrorReporter
${dir}/Test${Tag}_OffsetView.cpp OffsetView
${dir}/Test${Tag}_ScatterView.cpp ScatterView
${dir}/Test${Tag}_StaticCrsGraph.cpp StaticCrsGraph
${dir}/Test${Tag}_UnorderedMap.cpp UnorderedMap
${dir}/Test${Tag}_Vector.cpp Vector
${dir}/Test${Tag}_ViewCtorPropEmbeddedDim.cpp ViewCtorPropEmbeddedDim
)
set(file ${dir}/Test${Tag}_${Name}.cpp)
file(WRITE ${file}
"#include <Test${Tag}_Category.hpp>\n"
"#include <Test${Name}.hpp>\n"
) )
list(APPEND UnitTestSources ${file})
endforeach()
KOKKOS_ADD_EXECUTABLE_AND_TEST(UnitTest_${Tag} SOURCES ${UnitTestSources})
endif() endif()
endforeach() endforeach()

View File

@ -9,7 +9,7 @@ vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/serial
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/threads vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/threads
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/rocm vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/rocm
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/cuda vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/cuda
vpath %.cpp ${CURDIR}
default: build_all default: build_all
echo "End Build" echo "End Build"
@ -31,14 +31,24 @@ KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests
TEST_TARGETS = TEST_TARGETS =
TARGETS = TARGETS =
TESTS = Bitset DualView DynamicView DynViewAPI_generic DynViewAPI_rank12345 DynViewAPI_rank67 ErrorReporter OffsetView ScatterView StaticCrsGraph UnorderedMap Vector ViewCtorPropEmbeddedDim
tmp := $(foreach device, $(KOKKOS_DEVICELIST), \
tmp2 := $(foreach test, $(TESTS), \
$(if $(filter Test$(device)_$(test).cpp, $(shell ls Test$(device)_$(test).cpp 2>/dev/null)),,\
$(shell echo "\#include<Test"$(device)"_Category.hpp>" > Test$(device)_$(test).cpp); \
$(shell echo "\#include<Test"$(test)".hpp>" >> Test$(device)_$(test).cpp); \
)\
) \
)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = UnitTestMain.o gtest-all.o OBJ_CUDA = UnitTestMain.o gtest-all.o
OBJ_CUDA += TestCuda_BitSet.o OBJ_CUDA += TestCuda_Bitset.o
OBJ_CUDA += TestCuda_DualView.o OBJ_CUDA += TestCuda_DualView.o
OBJ_CUDA += TestCuda_DynamicView.o OBJ_CUDA += TestCuda_DynamicView.o
OBJ_CUDA += TestCuda_DynRankViewAPI_generic.o OBJ_CUDA += TestCuda_DynViewAPI_generic.o
OBJ_CUDA += TestCuda_DynRankViewAPI_rank12345.o OBJ_CUDA += TestCuda_DynViewAPI_rank12345.o
OBJ_CUDA += TestCuda_DynRankViewAPI_rank67.o OBJ_CUDA += TestCuda_DynViewAPI_rank67.o
OBJ_CUDA += TestCuda_ErrorReporter.o OBJ_CUDA += TestCuda_ErrorReporter.o
OBJ_CUDA += TestCuda_OffsetView.o OBJ_CUDA += TestCuda_OffsetView.o
OBJ_CUDA += TestCuda_ScatterView.o OBJ_CUDA += TestCuda_ScatterView.o
@ -50,33 +60,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
TEST_TARGETS += test-cuda TEST_TARGETS += test-cuda
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
OBJ_ROCM = UnitTestMain.o gtest-all.o
OBJ_ROCM += TestROCm_BitSet.o
OBJ_ROCM += TestROCm_DualView.o
OBJ_ROCM += TestROCm_DynamicView.o
OBJ_ROCM += TestROCm_DynRankViewAPI_generic.o
OBJ_ROCM += TestROCm_DynRankViewAPI_rank12345.o
OBJ_ROCM += TestROCm_DynRankViewAPI_rank67.o
OBJ_ROCM += TestROCm_ErrorReporter.o
OBJ_ROCM += TestROCm_OffsetView.o
OBJ_ROCM += TestROCm_ScatterView.o
OBJ_ROCM += TestROCm_StaticCrsGraph.o
OBJ_ROCM += TestROCm_UnorderedMap.o
OBJ_ROCM += TestROCm_Vector.o
OBJ_ROCM += TestROCm_ViewCtorPropEmbeddedDim.o
TARGETS += KokkosContainers_UnitTest_ROCm
TEST_TARGETS += test-rocm
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = UnitTestMain.o gtest-all.o OBJ_THREADS = UnitTestMain.o gtest-all.o
OBJ_THREADS += TestThreads_BitSet.o OBJ_THREADS += TestThreads_Bitset.o
OBJ_THREADS += TestThreads_DualView.o OBJ_THREADS += TestThreads_DualView.o
OBJ_THREADS += TestThreads_DynamicView.o OBJ_THREADS += TestThreads_DynamicView.o
OBJ_THREADS += TestThreads_DynRankViewAPI_generic.o OBJ_THREADS += TestThreads_DynViewAPI_generic.o
OBJ_THREADS += TestThreads_DynRankViewAPI_rank12345.o OBJ_THREADS += TestThreads_DynViewAPI_rank12345.o
OBJ_THREADS += TestThreads_DynRankViewAPI_rank67.o OBJ_THREADS += TestThreads_DynViewAPI_rank67.o
OBJ_THREADS += TestThreads_ErrorReporter.o OBJ_THREADS += TestThreads_ErrorReporter.o
OBJ_THREADS += TestThreads_OffsetView.o OBJ_THREADS += TestThreads_OffsetView.o
OBJ_THREADS += TestThreads_ScatterView.o OBJ_THREADS += TestThreads_ScatterView.o
@ -90,12 +81,12 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = UnitTestMain.o gtest-all.o OBJ_OPENMP = UnitTestMain.o gtest-all.o
OBJ_OPENMP += TestOpenMP_BitSet.o OBJ_OPENMP += TestOpenMP_Bitset.o
OBJ_OPENMP += TestOpenMP_DualView.o OBJ_OPENMP += TestOpenMP_DualView.o
OBJ_OPENMP += TestOpenMP_DynamicView.o OBJ_OPENMP += TestOpenMP_DynamicView.o
OBJ_OPENMP += TestOpenMP_DynRankViewAPI_generic.o OBJ_OPENMP += TestOpenMP_DynViewAPI_generic.o
OBJ_OPENMP += TestOpenMP_DynRankViewAPI_rank12345.o OBJ_OPENMP += TestOpenMP_DynViewAPI_rank12345.o
OBJ_OPENMP += TestOpenMP_DynRankViewAPI_rank67.o OBJ_OPENMP += TestOpenMP_DynViewAPI_rank67.o
OBJ_OPENMP += TestOpenMP_ErrorReporter.o OBJ_OPENMP += TestOpenMP_ErrorReporter.o
OBJ_OPENMP += TestOpenMP_OffsetView.o OBJ_OPENMP += TestOpenMP_OffsetView.o
OBJ_OPENMP += TestOpenMP_ScatterView.o OBJ_OPENMP += TestOpenMP_ScatterView.o
@ -109,12 +100,12 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
OBJ_HPX = UnitTestMain.o gtest-all.o OBJ_HPX = UnitTestMain.o gtest-all.o
OBJ_HPX += TestHPX_BitSet.o OBJ_HPX += TestHPX_Bitset.o
OBJ_HPX += TestHPX_DualView.o OBJ_HPX += TestHPX_DualView.o
OBJ_HPX += TestHPX_DynamicView.o OBJ_HPX += TestHPX_DynamicView.o
OBJ_HPX += TestHPX_DynRankViewAPI_generic.o OBJ_HPX += TestHPX_DynViewAPI_generic.o
OBJ_HPX += TestHPX_DynRankViewAPI_rank12345.o OBJ_HPX += TestHPX_DynViewAPI_rank12345.o
OBJ_HPX += TestHPX_DynRankViewAPI_rank67.o OBJ_HPX += TestHPX_DynViewAPI_rank67.o
OBJ_HPX += TestHPX_ErrorReporter.o OBJ_HPX += TestHPX_ErrorReporter.o
OBJ_HPX += TestHPX_OffsetView.o OBJ_HPX += TestHPX_OffsetView.o
OBJ_HPX += TestHPX_ScatterView.o OBJ_HPX += TestHPX_ScatterView.o
@ -128,12 +119,12 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = UnitTestMain.o gtest-all.o OBJ_SERIAL = UnitTestMain.o gtest-all.o
OBJ_SERIAL += TestSerial_BitSet.o OBJ_SERIAL += TestSerial_Bitset.o
OBJ_SERIAL += TestSerial_DualView.o OBJ_SERIAL += TestSerial_DualView.o
OBJ_SERIAL += TestSerial_DynamicView.o OBJ_SERIAL += TestSerial_DynamicView.o
OBJ_SERIAL += TestSerial_DynRankViewAPI_generic.o OBJ_SERIAL += TestSerial_DynViewAPI_generic.o
OBJ_SERIAL += TestSerial_DynRankViewAPI_rank12345.o OBJ_SERIAL += TestSerial_DynViewAPI_rank12345.o
OBJ_SERIAL += TestSerial_DynRankViewAPI_rank67.o OBJ_SERIAL += TestSerial_DynViewAPI_rank67.o
OBJ_SERIAL += TestSerial_ErrorReporter.o OBJ_SERIAL += TestSerial_ErrorReporter.o
OBJ_SERIAL += TestSerial_OffsetView.o OBJ_SERIAL += TestSerial_OffsetView.o
OBJ_SERIAL += TestSerial_ScatterView.o OBJ_SERIAL += TestSerial_ScatterView.o
@ -148,9 +139,6 @@ endif
KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Cuda $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Cuda
KokkosContainers_UnitTest_ROCm: $(OBJ_ROCM) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(EXTRA_PATH) $(OBJ_ROCM) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_ROCm
KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Threads $(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Threads
@ -166,9 +154,6 @@ KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
test-cuda: KokkosContainers_UnitTest_Cuda test-cuda: KokkosContainers_UnitTest_Cuda
./KokkosContainers_UnitTest_Cuda ./KokkosContainers_UnitTest_Cuda
test-rocm: KokkosContainers_UnitTest_ROCm
./KokkosContainers_UnitTest_ROCm
test-threads: KokkosContainers_UnitTest_Threads test-threads: KokkosContainers_UnitTest_Threads
./KokkosContainers_UnitTest_Threads ./KokkosContainers_UnitTest_Threads
@ -186,7 +171,7 @@ build_all: $(TARGETS)
test: $(TEST_TARGETS) test: $(TEST_TARGETS)
clean: kokkos-clean clean: kokkos-clean
rm -f *.o $(TARGETS) rm -f *.o $(TARGETS) *.cpp
# Compilation rules # Compilation rules

View File

@ -253,8 +253,10 @@ void test_bitset() {
} }
} }
// FIXME_HIP deadlock
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, bitset) { test_bitset<TEST_EXECSPACE>(); } TEST(TEST_CATEGORY, bitset) { test_bitset<TEST_EXECSPACE>(); }
#endif
} // namespace Test } // namespace Test
#endif // KOKKOS_TEST_BITSET_HPP #endif // KOKKOS_TEST_BITSET_HPP

View File

@ -67,11 +67,17 @@ struct test_dualview_combinations {
Scalar result; Scalar result;
template <typename ViewType> template <typename ViewType>
Scalar run_me(unsigned int n, unsigned int m) { Scalar run_me(unsigned int n, unsigned int m, bool with_init) {
if (n < 10) n = 10; if (n < 10) n = 10;
if (m < 3) m = 3; if (m < 3) m = 3;
ViewType a("A", n, m);
ViewType a;
if (with_init) {
a = ViewType("A", n, m);
} else {
a = ViewType(Kokkos::ViewAllocateWithoutInitializing("A"), n, m);
}
Kokkos::deep_copy(a.d_view, 1); Kokkos::deep_copy(a.d_view, 1);
a.template modify<typename ViewType::execution_space>(); a.template modify<typename ViewType::execution_space>();
@ -96,9 +102,9 @@ struct test_dualview_combinations {
return count - a.d_view.extent(0) * a.d_view.extent(1) - 2 - 4 - 3 * 2; return count - a.d_view.extent(0) * a.d_view.extent(1) - 2 - 4 - 3 * 2;
} }
test_dualview_combinations(unsigned int size) { test_dualview_combinations(unsigned int size, bool with_init) {
result = run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >( result = run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(
size, 3); size, 3, with_init);
} }
}; };
@ -124,18 +130,25 @@ struct test_dual_view_deep_copy {
typedef Device execution_space; typedef Device execution_space;
template <typename ViewType> template <typename ViewType>
void run_me() { void run_me(int n, const int m, const bool use_templ_sync) {
const unsigned int n = 10; ViewType a, b;
const unsigned int m = 5; if (n >= 0) {
const unsigned int sum_total = n * m; a = ViewType("A", n, m);
b = ViewType("B", n, m);
ViewType a("A", n, m); } else {
ViewType b("B", n, m); n = 0;
}
const scalar_type sum_total = scalar_type(n * m);
Kokkos::deep_copy(a.d_view, 1); Kokkos::deep_copy(a.d_view, 1);
a.template modify<typename ViewType::execution_space>(); if (use_templ_sync) {
a.template sync<typename ViewType::host_mirror_space>(); a.template modify<typename ViewType::execution_space>();
a.template sync<typename ViewType::host_mirror_space>();
} else {
a.modify_device();
a.sync_host();
}
// Check device view is initialized as expected // Check device view is initialized as expected
scalar_type a_d_sum = 0; scalar_type a_d_sum = 0;
@ -159,7 +172,11 @@ struct test_dual_view_deep_copy {
// Test deep_copy // Test deep_copy
Kokkos::deep_copy(b, a); Kokkos::deep_copy(b, a);
b.template sync<typename ViewType::host_mirror_space>(); if (use_templ_sync) {
b.template sync<typename ViewType::host_mirror_space>();
} else {
b.sync_host();
}
// Perform same checks on b as done on a // Perform same checks on b as done on a
// Check device view is initialized as expected // Check device view is initialized as expected
@ -183,6 +200,145 @@ struct test_dual_view_deep_copy {
} // end run_me } // end run_me
test_dual_view_deep_copy() { test_dual_view_deep_copy() {
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(10, 5,
true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(10, 5,
false);
// Test zero length but allocated (a.d_view.data!=nullptr but
// a.d_view.span()==0)
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(0, 5, true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(0, 5,
false);
// Test default constructed view
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(-1, 5,
true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(-1, 5,
false);
}
};
template <typename Scalar, class Device>
struct test_dualview_resize {
typedef Scalar scalar_type;
typedef Device execution_space;
template <typename ViewType>
void run_me() {
const unsigned int n = 10;
const unsigned int m = 5;
const unsigned int factor = 2;
ViewType a("A", n, m);
Kokkos::deep_copy(a.d_view, 1);
/* Covers case "Resize on Device" */
a.modify_device();
Kokkos::resize(a, factor * n, factor * m);
ASSERT_EQ(a.extent(0), n * factor);
ASSERT_EQ(a.extent(1), m * factor);
Kokkos::deep_copy(a.d_view, 1);
a.sync_host();
// Check device view is initialized as expected
scalar_type a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
typedef typename ViewType::t_dev::memory_space::execution_space
t_dev_exec_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
a_d_sum);
// Check host view is synced as expected
scalar_type a_h_sum = 0;
for (size_t i = 0; i < a.h_view.extent(0); ++i)
for (size_t j = 0; j < a.h_view.extent(1); ++j) {
a_h_sum += a.h_view(i, j);
}
// Check
ASSERT_EQ(a_h_sum, a_d_sum);
ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1));
/* Covers case "Resize on Host" */
a.modify_host();
Kokkos::resize(a, n / factor, m / factor);
ASSERT_EQ(a.extent(0), n / factor);
ASSERT_EQ(a.extent(1), m / factor);
a.sync_device();
// Check device view is initialized as expected
a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
typedef typename ViewType::t_dev::memory_space::execution_space
t_dev_exec_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
a_d_sum);
// Check host view is synced as expected
a_h_sum = 0;
for (size_t i = 0; i < a.h_view.extent(0); ++i)
for (size_t j = 0; j < a.h_view.extent(1); ++j) {
a_h_sum += a.h_view(i, j);
}
// Check
ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1));
ASSERT_EQ(a_h_sum, a_d_sum);
} // end run_me
test_dualview_resize() {
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >();
}
};
template <typename Scalar, class Device>
struct test_dualview_realloc {
typedef Scalar scalar_type;
typedef Device execution_space;
template <typename ViewType>
void run_me() {
const unsigned int n = 10;
const unsigned int m = 5;
ViewType a("A", n, m);
Kokkos::realloc(a, n, m);
Kokkos::deep_copy(a.d_view, 1);
a.modify_device();
a.sync_host();
// Check device view is initialized as expected
scalar_type a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
typedef typename ViewType::t_dev::memory_space::execution_space
t_dev_exec_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
a_d_sum);
// Check host view is synced as expected
scalar_type a_h_sum = 0;
for (size_t i = 0; i < a.h_view.extent(0); ++i)
for (size_t j = 0; j < a.h_view.extent(1); ++j) {
a_h_sum += a.h_view(i, j);
}
// Check
ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1));
ASSERT_EQ(a_h_sum, a_d_sum);
} // end run_me
test_dualview_realloc() {
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(); run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >();
} }
}; };
@ -190,8 +346,8 @@ struct test_dual_view_deep_copy {
} // namespace Impl } // namespace Impl
template <typename Scalar, typename Device> template <typename Scalar, typename Device>
void test_dualview_combinations(unsigned int size) { void test_dualview_combinations(unsigned int size, bool with_init) {
Impl::test_dualview_combinations<Scalar, Device> test(size); Impl::test_dualview_combinations<Scalar, Device> test(size, with_init);
ASSERT_EQ(test.result, 0); ASSERT_EQ(test.result, 0);
} }
@ -200,8 +356,22 @@ void test_dualview_deep_copy() {
Impl::test_dual_view_deep_copy<Scalar, Device>(); Impl::test_dual_view_deep_copy<Scalar, Device>();
} }
template <typename Scalar, typename Device>
void test_dualview_realloc() {
Impl::test_dualview_realloc<Scalar, Device>();
}
template <typename Scalar, typename Device>
void test_dualview_resize() {
Impl::test_dualview_resize<Scalar, Device>();
}
TEST(TEST_CATEGORY, dualview_combination) { TEST(TEST_CATEGORY, dualview_combination) {
test_dualview_combinations<int, TEST_EXECSPACE>(10); test_dualview_combinations<int, TEST_EXECSPACE>(10, true);
}
TEST(TEST_CATEGORY, dualview_combinations_without_init) {
test_dualview_combinations<int, TEST_EXECSPACE>(10, false);
} }
TEST(TEST_CATEGORY, dualview_deep_copy) { TEST(TEST_CATEGORY, dualview_deep_copy) {
@ -209,6 +379,14 @@ TEST(TEST_CATEGORY, dualview_deep_copy) {
test_dualview_deep_copy<double, TEST_EXECSPACE>(); test_dualview_deep_copy<double, TEST_EXECSPACE>();
} }
TEST(TEST_CATEGORY, dualview_realloc) {
test_dualview_realloc<int, TEST_EXECSPACE>();
}
TEST(TEST_CATEGORY, dualview_resize) {
test_dualview_resize<int, TEST_EXECSPACE>();
}
} // namespace Test } // namespace Test
#endif // KOKKOS_TEST_UNORDERED_MAP_HPP #endif // KOKKOS_TEST_DUALVIEW_HPP

View File

@ -706,8 +706,6 @@ class TestDynViewAPI {
typedef typename View0::host_mirror_space host_view_space; typedef typename View0::host_mirror_space host_view_space;
TestDynViewAPI() {}
static void run_tests() { static void run_tests() {
run_test_resize_realloc(); run_test_resize_realloc();
run_test_mirror(); run_test_mirror();
@ -1078,12 +1076,12 @@ class TestDynViewAPI {
ASSERT_TRUE(Kokkos::is_dyn_rank_view<dView0>::value); ASSERT_TRUE(Kokkos::is_dyn_rank_view<dView0>::value);
ASSERT_FALSE(Kokkos::is_dyn_rank_view<Kokkos::View<double> >::value); ASSERT_FALSE(Kokkos::is_dyn_rank_view<Kokkos::View<double> >::value);
ASSERT_TRUE(dx.data() == 0); // Okay with UVM ASSERT_TRUE(dx.data() == nullptr); // Okay with UVM
ASSERT_TRUE(dy.data() == 0); // Okay with UVM ASSERT_TRUE(dy.data() == nullptr); // Okay with UVM
ASSERT_TRUE(dz.data() == 0); // Okay with UVM ASSERT_TRUE(dz.data() == nullptr); // Okay with UVM
ASSERT_TRUE(hx.data() == 0); ASSERT_TRUE(hx.data() == nullptr);
ASSERT_TRUE(hy.data() == 0); ASSERT_TRUE(hy.data() == nullptr);
ASSERT_TRUE(hz.data() == 0); ASSERT_TRUE(hz.data() == nullptr);
ASSERT_EQ(dx.extent(0), 0u); // Okay with UVM ASSERT_EQ(dx.extent(0), 0u); // Okay with UVM
ASSERT_EQ(dy.extent(0), 0u); // Okay with UVM ASSERT_EQ(dy.extent(0), 0u); // Okay with UVM
ASSERT_EQ(dz.extent(0), 0u); // Okay with UVM ASSERT_EQ(dz.extent(0), 0u); // Okay with UVM
@ -1154,11 +1152,11 @@ class TestDynViewAPI {
ASSERT_EQ(dx.use_count(), size_t(2)); ASSERT_EQ(dx.use_count(), size_t(2));
ASSERT_FALSE(dx.data() == 0); ASSERT_FALSE(dx.data() == nullptr);
ASSERT_FALSE(const_dx.data() == 0); ASSERT_FALSE(const_dx.data() == nullptr);
ASSERT_FALSE(unmanaged_dx.data() == 0); ASSERT_FALSE(unmanaged_dx.data() == nullptr);
ASSERT_FALSE(unmanaged_from_ptr_dx.data() == 0); ASSERT_FALSE(unmanaged_from_ptr_dx.data() == nullptr);
ASSERT_FALSE(dy.data() == 0); ASSERT_FALSE(dy.data() == nullptr);
ASSERT_NE(dx, dy); ASSERT_NE(dx, dy);
ASSERT_EQ(dx.extent(0), unsigned(N0)); ASSERT_EQ(dx.extent(0), unsigned(N0));
@ -1318,17 +1316,17 @@ class TestDynViewAPI {
ASSERT_NE(dx, dz); ASSERT_NE(dx, dz);
dx = dView0(); dx = dView0();
ASSERT_TRUE(dx.data() == 0); ASSERT_TRUE(dx.data() == nullptr);
ASSERT_FALSE(dy.data() == 0); ASSERT_FALSE(dy.data() == nullptr);
ASSERT_FALSE(dz.data() == 0); ASSERT_FALSE(dz.data() == nullptr);
dy = dView0(); dy = dView0();
ASSERT_TRUE(dx.data() == 0); ASSERT_TRUE(dx.data() == nullptr);
ASSERT_TRUE(dy.data() == 0); ASSERT_TRUE(dy.data() == nullptr);
ASSERT_FALSE(dz.data() == 0); ASSERT_FALSE(dz.data() == nullptr);
dz = dView0(); dz = dView0();
ASSERT_TRUE(dx.data() == 0); ASSERT_TRUE(dx.data() == nullptr);
ASSERT_TRUE(dy.data() == 0); ASSERT_TRUE(dy.data() == nullptr);
ASSERT_TRUE(dz.data() == 0); ASSERT_TRUE(dz.data() == nullptr);
// View - DynRankView Interoperability tests // View - DynRankView Interoperability tests
// deep_copy from view to dynrankview // deep_copy from view to dynrankview

View File

@ -44,7 +44,10 @@
#include <TestDynViewAPI.hpp> #include <TestDynViewAPI.hpp>
namespace Test { namespace Test {
// FIXME_HIP attempt to access inaccessible memory space
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, dyn_rank_view_api_generic) { TEST(TEST_CATEGORY, dyn_rank_view_api_generic) {
TestDynViewAPI<double, TEST_EXECSPACE>::run_tests(); TestDynViewAPI<double, TEST_EXECSPACE>::run_tests();
} }
#endif
} // namespace Test } // namespace Test

View File

@ -45,7 +45,10 @@
#include <TestDynViewAPI.hpp> #include <TestDynViewAPI.hpp>
namespace Test { namespace Test {
// FIXME_HIP failing with wrong value
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, dyn_rank_view_api_operator_rank12345) { TEST(TEST_CATEGORY, dyn_rank_view_api_operator_rank12345) {
TestDynViewAPI<double, TEST_EXECSPACE>::run_operator_test_rank12345(); TestDynViewAPI<double, TEST_EXECSPACE>::run_operator_test_rank12345();
} }
#endif
} // namespace Test } // namespace Test

View File

@ -79,7 +79,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_size); ASSERT_EQ(da.size(), da_size);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for( Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(0, da_size), Kokkos::RangePolicy<execution_space>(0, da_size),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -93,7 +92,6 @@ struct TestDynamicView {
result_sum); result_sum);
ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2)); ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2));
#endif
#endif #endif
// add 3x more entries i.e. 4x larger than previous size // add 3x more entries i.e. 4x larger than previous size
@ -103,7 +101,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_resize); ASSERT_EQ(da.size(), da_resize);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for( Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(da_size, da_resize), Kokkos::RangePolicy<execution_space>(da_size, da_resize),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -118,7 +115,6 @@ struct TestDynamicView {
ASSERT_EQ(new_result_sum + result_sum, ASSERT_EQ(new_result_sum + result_sum,
(value_type)(da_resize * (da_resize - 1) / 2)); (value_type)(da_resize * (da_resize - 1) / 2));
#endif
#endif #endif
} // end scope } // end scope
@ -135,7 +131,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_size); ASSERT_EQ(da.size(), da_size);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for( Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(0, da_size), Kokkos::RangePolicy<execution_space>(0, da_size),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -149,7 +144,6 @@ struct TestDynamicView {
result_sum); result_sum);
ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2)); ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2));
#endif
#endif #endif
// add 3x more entries i.e. 4x larger than previous size // add 3x more entries i.e. 4x larger than previous size
@ -159,7 +153,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_resize); ASSERT_EQ(da.size(), da_resize);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for( Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(da_size, da_resize), Kokkos::RangePolicy<execution_space>(da_size, da_resize),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -174,7 +167,6 @@ struct TestDynamicView {
ASSERT_EQ(new_result_sum + result_sum, ASSERT_EQ(new_result_sum + result_sum,
(value_type)(da_resize * (da_resize - 1) / 2)); (value_type)(da_resize * (da_resize - 1) / 2));
#endif
#endif #endif
} // end scope } // end scope
@ -191,7 +183,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_size); ASSERT_EQ(da.size(), da_size);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for( Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(0, da_size), Kokkos::RangePolicy<execution_space>(0, da_size),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -205,7 +196,6 @@ struct TestDynamicView {
result_sum); result_sum);
ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2)); ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2));
#endif
#endif #endif
// remove the final 3/4 entries i.e. first 1/4 remain // remove the final 3/4 entries i.e. first 1/4 remain
@ -214,7 +204,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_resize); ASSERT_EQ(da.size(), da_resize);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for( Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(0, da_resize), Kokkos::RangePolicy<execution_space>(0, da_resize),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -228,7 +217,6 @@ struct TestDynamicView {
new_result_sum); new_result_sum);
ASSERT_EQ(new_result_sum, (value_type)(da_resize * (da_resize - 1) / 2)); ASSERT_EQ(new_result_sum, (value_type)(da_resize * (da_resize - 1) / 2));
#endif
#endif #endif
} // end scope } // end scope
} }

View File

@ -50,9 +50,13 @@
#include <Kokkos_Core.hpp> #include <Kokkos_Core.hpp>
#include <Kokkos_ErrorReporter.hpp> #include <Kokkos_ErrorReporter.hpp>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
namespace Test { namespace Test {
// Just save the data in the report. Informative text goies in the // Just save the data in the report. Informative text goes in the
// operator<<(..). // operator<<(..).
template <typename DataType1, typename DataType2, typename DataType3> template <typename DataType1, typename DataType2, typename DataType3>
struct ThreeValReport { struct ThreeValReport {
@ -85,7 +89,7 @@ struct ErrorReporterDriverBase {
error_reporter_type; error_reporter_type;
error_reporter_type m_errorReporter; error_reporter_type m_errorReporter;
ErrorReporterDriverBase(int reporter_capacity, int test_size) ErrorReporterDriverBase(int reporter_capacity, int /*test_size*/)
: m_errorReporter(reporter_capacity) {} : m_errorReporter(reporter_capacity) {}
KOKKOS_INLINE_FUNCTION bool error_condition(const int work_idx) const { KOKKOS_INLINE_FUNCTION bool error_condition(const int work_idx) const {
@ -176,7 +180,8 @@ struct ErrorReporterDriver : public ErrorReporterDriverBase<DeviceType> {
} }
}; };
#if defined(KOKKOS_CLASS_LAMBDA) #if defined(KOKKOS_CLASS_LAMBDA) && \
(!defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA))
template <typename DeviceType> template <typename DeviceType>
struct ErrorReporterDriverUseLambda struct ErrorReporterDriverUseLambda
: public ErrorReporterDriverBase<DeviceType> { : public ErrorReporterDriverBase<DeviceType> {
@ -225,7 +230,8 @@ struct ErrorReporterDriverNativeOpenMP
}; };
#endif #endif
#if defined(KOKKOS_CLASS_LAMBDA) #if defined(KOKKOS_CLASS_LAMBDA) && \
(!defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA))
TEST(TEST_CATEGORY, ErrorReporterViaLambda) { TEST(TEST_CATEGORY, ErrorReporterViaLambda) {
TestErrorReporter<ErrorReporterDriverUseLambda<TEST_EXECSPACE>>(); TestErrorReporter<ErrorReporterDriverUseLambda<TEST_EXECSPACE>>();
} }

View File

@ -42,10 +42,10 @@
//@HEADER //@HEADER
*/ */
#ifndef KOKKOS_TEST_ROCM_HPP #ifndef KOKKOS_TEST_HIP_HPP
#define KOKKOS_TEST_ROCM_HPP #define KOKKOS_TEST_HIP_HPP
#define TEST_CATEGORY rocm #define TEST_CATEGORY hip
#define TEST_EXECSPACE Kokkos::Experimental::ROCm #define TEST_EXECSPACE Kokkos::Experimental::HIP
#endif #endif

View File

@ -60,7 +60,7 @@ using std::endl;
namespace Test { namespace Test {
template <typename Scalar, typename Device> template <typename Scalar, typename Device>
void test_offsetview_construction(unsigned int size) { void test_offsetview_construction() {
typedef Kokkos::Experimental::OffsetView<Scalar**, Device> offset_view_type; typedef Kokkos::Experimental::OffsetView<Scalar**, Device> offset_view_type;
typedef Kokkos::View<Scalar**, Device> view_type; typedef Kokkos::View<Scalar**, Device> view_type;
@ -185,15 +185,17 @@ void test_offsetview_construction(unsigned int size) {
Kokkos::deep_copy(view3D, 1); Kokkos::deep_copy(view3D, 1);
Kokkos::Array<int64_t, 3> begins = {{-10, -20, -30}};
Kokkos::Experimental::OffsetView<Scalar***, Device> offsetView3D(view3D,
begins);
typedef Kokkos::MDRangePolicy<Device, Kokkos::Rank<3>, typedef Kokkos::MDRangePolicy<Device, Kokkos::Rank<3>,
Kokkos::IndexType<int64_t> > Kokkos::IndexType<int64_t> >
range3_type; range3_type;
typedef typename range3_type::point_type point3_type; typedef typename range3_type::point_type point3_type;
typename point3_type::value_type begins0 = -10, begins1 = -20,
begins2 = -30;
Kokkos::Array<int64_t, 3> begins = {{begins0, begins1, begins2}};
Kokkos::Experimental::OffsetView<Scalar***, Device> offsetView3D(view3D,
begins);
range3_type rangePolicy3DZero(point3_type{{0, 0, 0}}, range3_type rangePolicy3DZero(point3_type{{0, 0, 0}},
point3_type{{extent0, extent1, extent2}}); point3_type{{extent0, extent1, extent2}});
@ -207,9 +209,8 @@ void test_offsetview_construction(unsigned int size) {
view3DSum); view3DSum);
range3_type rangePolicy3D( range3_type rangePolicy3D(
point3_type{{begins[0], begins[1], begins[2]}}, point3_type{{begins0, begins1, begins2}},
point3_type{ point3_type{{begins0 + extent0, begins1 + extent1, begins2 + extent2}});
{begins[0] + extent0, begins[1] + extent1, begins[2] + extent2}});
int offsetView3DSum = 0; int offsetView3DSum = 0;
Kokkos::parallel_reduce( Kokkos::parallel_reduce(
@ -388,7 +389,7 @@ void test_offsetview_unmanaged_construction() {
} }
template <typename Scalar, typename Device> template <typename Scalar, typename Device>
void test_offsetview_subview(unsigned int size) { void test_offsetview_subview() {
{ // test subview 1 { // test subview 1
Kokkos::Experimental::OffsetView<Scalar*, Device> sliceMe("offsetToSlice", Kokkos::Experimental::OffsetView<Scalar*, Device> sliceMe("offsetToSlice",
{-10, 20}); {-10, 20});
@ -675,7 +676,7 @@ void test_offsetview_offsets_rank3() {
#endif #endif
TEST(TEST_CATEGORY, offsetview_construction) { TEST(TEST_CATEGORY, offsetview_construction) {
test_offsetview_construction<int, TEST_EXECSPACE>(10); test_offsetview_construction<int, TEST_EXECSPACE>();
} }
TEST(TEST_CATEGORY, offsetview_unmanaged_construction) { TEST(TEST_CATEGORY, offsetview_unmanaged_construction) {
@ -683,7 +684,7 @@ TEST(TEST_CATEGORY, offsetview_unmanaged_construction) {
} }
TEST(TEST_CATEGORY, offsetview_subview) { TEST(TEST_CATEGORY, offsetview_subview) {
test_offsetview_subview<int, TEST_EXECSPACE>(10); test_offsetview_subview<int, TEST_EXECSPACE>();
} }
#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)

View File

@ -50,21 +50,21 @@
namespace Test { namespace Test {
template <typename ExecSpace, typename Layout, int duplication, template <typename DeviceType, typename Layout, int duplication,
int contribution, int op> int contribution, int op>
struct test_scatter_view_impl_cls; struct test_scatter_view_impl_cls;
template <typename ExecSpace, typename Layout, int duplication, template <typename DeviceType, typename Layout, int duplication,
int contribution> int contribution>
struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution, struct test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
Kokkos::Experimental::ScatterSum> { Kokkos::Experimental::ScatterSum> {
public: public:
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, ExecSpace, typedef Kokkos::Experimental::ScatterView<double * [3], Layout, DeviceType,
Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterSum,
duplication, contribution> duplication, contribution>
scatter_view_type; scatter_view_type;
typedef Kokkos::View<double * [3], Layout, ExecSpace> orig_view_type; typedef Kokkos::View<double * [3], Layout, DeviceType> orig_view_type;
scatter_view_type scatter_view; scatter_view_type scatter_view;
int scatterSize; int scatterSize;
@ -90,7 +90,8 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
void run_parallel(int n) { void run_parallel(int n) {
scatterSize = n; scatterSize = n;
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n); auto policy =
Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
Kokkos::parallel_for(policy, *this, "scatter_view_test: Sum"); Kokkos::parallel_for(policy, *this, "scatter_view_test: Sum");
} }
@ -123,17 +124,17 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
} }
}; };
template <typename ExecSpace, typename Layout, int duplication, template <typename DeviceType, typename Layout, int duplication,
int contribution> int contribution>
struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution, struct test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
Kokkos::Experimental::ScatterProd> { Kokkos::Experimental::ScatterProd> {
public: public:
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, ExecSpace, typedef Kokkos::Experimental::ScatterView<double * [3], Layout, DeviceType,
Kokkos::Experimental::ScatterProd, Kokkos::Experimental::ScatterProd,
duplication, contribution> duplication, contribution>
scatter_view_type; scatter_view_type;
typedef Kokkos::View<double * [3], Layout, ExecSpace> orig_view_type; typedef Kokkos::View<double * [3], Layout, DeviceType> orig_view_type;
scatter_view_type scatter_view; scatter_view_type scatter_view;
int scatterSize; int scatterSize;
@ -159,7 +160,8 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
void run_parallel(int n) { void run_parallel(int n) {
scatterSize = n; scatterSize = n;
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n); auto policy =
Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
} }
@ -192,17 +194,17 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
} }
}; };
template <typename ExecSpace, typename Layout, int duplication, template <typename DeviceType, typename Layout, int duplication,
int contribution> int contribution>
struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution, struct test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
Kokkos::Experimental::ScatterMin> { Kokkos::Experimental::ScatterMin> {
public: public:
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, ExecSpace, typedef Kokkos::Experimental::ScatterView<double * [3], Layout, DeviceType,
Kokkos::Experimental::ScatterMin, Kokkos::Experimental::ScatterMin,
duplication, contribution> duplication, contribution>
scatter_view_type; scatter_view_type;
typedef Kokkos::View<double * [3], Layout, ExecSpace> orig_view_type; typedef Kokkos::View<double * [3], Layout, DeviceType> orig_view_type;
scatter_view_type scatter_view; scatter_view_type scatter_view;
int scatterSize; int scatterSize;
@ -228,7 +230,8 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
void run_parallel(int n) { void run_parallel(int n) {
scatterSize = n; scatterSize = n;
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n); auto policy =
Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
} }
@ -261,17 +264,17 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
} }
}; };
template <typename ExecSpace, typename Layout, int duplication, template <typename DeviceType, typename Layout, int duplication,
int contribution> int contribution>
struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution, struct test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
Kokkos::Experimental::ScatterMax> { Kokkos::Experimental::ScatterMax> {
public: public:
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, ExecSpace, typedef Kokkos::Experimental::ScatterView<double * [3], Layout, DeviceType,
Kokkos::Experimental::ScatterMax, Kokkos::Experimental::ScatterMax,
duplication, contribution> duplication, contribution>
scatter_view_type; scatter_view_type;
typedef Kokkos::View<double * [3], Layout, ExecSpace> orig_view_type; typedef Kokkos::View<double * [3], Layout, DeviceType> orig_view_type;
scatter_view_type scatter_view; scatter_view_type scatter_view;
int scatterSize; int scatterSize;
@ -297,7 +300,7 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
void run_parallel(int n) { void run_parallel(int n) {
scatterSize = n; scatterSize = n;
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n); Kokkos::RangePolicy<typename DeviceType::execution_space, int> policy(0, n);
Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
} }
@ -330,20 +333,18 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
} }
}; };
template <typename ExecSpace, typename Layout, int duplication, template <typename DeviceType, typename Layout, int duplication,
int contribution, int op> int contribution, int op>
struct test_scatter_view_config { struct test_scatter_view_config {
public: public:
typedef typedef
typename test_scatter_view_impl_cls<ExecSpace, Layout, duplication, typename test_scatter_view_impl_cls<DeviceType, Layout, duplication,
contribution, op>::scatter_view_type contribution, op>::scatter_view_type
scatter_view_def; scatter_view_def;
typedef typename test_scatter_view_impl_cls<ExecSpace, Layout, duplication, typedef typename test_scatter_view_impl_cls<DeviceType, Layout, duplication,
contribution, op>::orig_view_type contribution, op>::orig_view_type
orig_view_def; orig_view_def;
test_scatter_view_config() {}
void run_test(int n) { void run_test(int n) {
// Test creation via create_scatter_view // Test creation via create_scatter_view
{ {
@ -351,7 +352,7 @@ struct test_scatter_view_config {
scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view< scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view<
op, duplication, contribution>(original_view); op, duplication, contribution>(original_view);
test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution, test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
op> op>
scatter_view_test_impl(scatter_view); scatter_view_test_impl(scatter_view);
scatter_view_test_impl.initialize(original_view); scatter_view_test_impl.initialize(original_view);
@ -379,7 +380,7 @@ struct test_scatter_view_config {
orig_view_def original_view("original_view", n); orig_view_def original_view("original_view", n);
scatter_view_def scatter_view(original_view); scatter_view_def scatter_view(original_view);
test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution, test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
op> op>
scatter_view_test_impl(scatter_view); scatter_view_test_impl(scatter_view);
scatter_view_test_impl.initialize(original_view); scatter_view_test_impl.initialize(original_view);
@ -405,17 +406,18 @@ struct test_scatter_view_config {
} }
}; };
template <typename ExecSpace, int ScatterType> template <typename DeviceType, int ScatterType>
struct TestDuplicatedScatterView { struct TestDuplicatedScatterView {
TestDuplicatedScatterView(int n) { TestDuplicatedScatterView(int n) {
// ScatterSum test // ScatterSum test
test_scatter_view_config< test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
ExecSpace, Kokkos::LayoutRight, Kokkos::Experimental::ScatterDuplicated, Kokkos::Experimental::ScatterDuplicated,
Kokkos::Experimental::ScatterNonAtomic, ScatterType> Kokkos::Experimental::ScatterNonAtomic,
ScatterType>
test_sv_right_config; test_sv_right_config;
test_sv_right_config.run_test(n); test_sv_right_config.run_test(n);
test_scatter_view_config< test_scatter_view_config<
ExecSpace, Kokkos::LayoutLeft, Kokkos::Experimental::ScatterDuplicated, DeviceType, Kokkos::LayoutLeft, Kokkos::Experimental::ScatterDuplicated,
Kokkos::Experimental::ScatterNonAtomic, ScatterType> Kokkos::Experimental::ScatterNonAtomic, ScatterType>
test_sv_left_config; test_sv_left_config;
test_sv_left_config.run_test(n); test_sv_left_config.run_test(n);
@ -429,6 +431,16 @@ template <int ScatterType>
struct TestDuplicatedScatterView<Kokkos::Cuda, ScatterType> { struct TestDuplicatedScatterView<Kokkos::Cuda, ScatterType> {
TestDuplicatedScatterView(int) {} TestDuplicatedScatterView(int) {}
}; };
template <int ScatterType>
struct TestDuplicatedScatterView<
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, ScatterType> {
TestDuplicatedScatterView(int) {}
};
template <int ScatterType>
struct TestDuplicatedScatterView<
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaUVMSpace>, ScatterType> {
TestDuplicatedScatterView(int) {}
};
#endif #endif
#ifdef KOKKOS_ENABLE_ROCM #ifdef KOKKOS_ENABLE_ROCM
@ -440,17 +452,15 @@ struct TestDuplicatedScatterView<Kokkos::Experimental::ROCm, ScatterType> {
}; };
#endif #endif
template <typename ExecSpace, int ScatterType> template <typename DeviceType, int ScatterType>
void test_scatter_view(int n) { void test_scatter_view(int n) {
// all of these configurations should compile okay, but only some of them are using execution_space = typename DeviceType::execution_space;
// correct and/or sensible in terms of memory use
Kokkos::Experimental::UniqueToken<ExecSpace> unique_token{ExecSpace()};
// no atomics or duplication is only sensible if the execution space // no atomics or duplication is only sensible if the execution space
// is running essentially in serial (doesn't have to be Serial though, // is running essentially in serial (doesn't have to be Serial though,
// we also test OpenMP with one thread: LAMMPS cares about that) // we also test OpenMP with one thread: LAMMPS cares about that)
if (unique_token.size() == 1) { if (execution_space().concurrency() == 1) {
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight, test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
Kokkos::Experimental::ScatterNonDuplicated, Kokkos::Experimental::ScatterNonDuplicated,
Kokkos::Experimental::ScatterNonAtomic, Kokkos::Experimental::ScatterNonAtomic,
ScatterType> ScatterType>
@ -458,9 +468,9 @@ void test_scatter_view(int n) {
test_sv_config.run_test(n); test_sv_config.run_test(n);
} }
#ifdef KOKKOS_ENABLE_SERIAL #ifdef KOKKOS_ENABLE_SERIAL
if (!std::is_same<ExecSpace, Kokkos::Serial>::value) { if (!std::is_same<DeviceType, Kokkos::Serial>::value) {
#endif #endif
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight, test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
Kokkos::Experimental::ScatterNonDuplicated, Kokkos::Experimental::ScatterNonDuplicated,
Kokkos::Experimental::ScatterAtomic, ScatterType> Kokkos::Experimental::ScatterAtomic, ScatterType>
test_sv_config; test_sv_config;
@ -473,16 +483,18 @@ void test_scatter_view(int n) {
constexpr std::size_t maximum_allowed_total_bytes = constexpr std::size_t maximum_allowed_total_bytes =
8ull * 1024ull * 1024ull * 1024ull; 8ull * 1024ull * 1024ull * 1024ull;
std::size_t const maximum_allowed_copy_bytes = std::size_t const maximum_allowed_copy_bytes =
maximum_allowed_total_bytes / std::size_t(unique_token.size()); maximum_allowed_total_bytes /
std::size_t(execution_space().concurrency());
constexpr std::size_t bytes_per_value = sizeof(double) * 3; constexpr std::size_t bytes_per_value = sizeof(double) * 3;
std::size_t const maximum_allowed_copy_values = std::size_t const maximum_allowed_copy_values =
maximum_allowed_copy_bytes / bytes_per_value; maximum_allowed_copy_bytes / bytes_per_value;
n = std::min(n, int(maximum_allowed_copy_values)); n = std::min(n, int(maximum_allowed_copy_values));
TestDuplicatedScatterView<ExecSpace, ScatterType> duptest(n); TestDuplicatedScatterView<DeviceType, ScatterType> duptest(n);
} }
// FIXME_HIP ScatterView requires UniqueToken
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, scatterview) { TEST(TEST_CATEGORY, scatterview) {
#ifndef KOKKOS_ENABLE_ROCM
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum>(10); test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum>(10);
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(10); test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(10);
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(10); test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(10);
@ -504,8 +516,38 @@ TEST(TEST_CATEGORY, scatterview) {
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(big_n); test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(big_n);
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(big_n); test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(big_n);
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMax>(big_n); test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMax>(big_n);
}
TEST(TEST_CATEGORY, scatterview_devicetype) {
using device_type =
Kokkos::Device<TEST_EXECSPACE, typename TEST_EXECSPACE::memory_space>;
test_scatter_view<device_type, Kokkos::Experimental::ScatterSum>(10);
test_scatter_view<device_type, Kokkos::Experimental::ScatterProd>(10);
test_scatter_view<device_type, Kokkos::Experimental::ScatterMin>(10);
test_scatter_view<device_type, Kokkos::Experimental::ScatterMax>(10);
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same<TEST_EXECSPACE, Kokkos::Cuda>::value) {
using cuda_device_type = Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>;
test_scatter_view<cuda_device_type, Kokkos::Experimental::ScatterSum>(10);
test_scatter_view<cuda_device_type, Kokkos::Experimental::ScatterProd>(10);
test_scatter_view<cuda_device_type, Kokkos::Experimental::ScatterMin>(10);
test_scatter_view<cuda_device_type, Kokkos::Experimental::ScatterMax>(10);
using cudauvm_device_type =
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaUVMSpace>;
test_scatter_view<cudauvm_device_type, Kokkos::Experimental::ScatterSum>(
10);
test_scatter_view<cudauvm_device_type, Kokkos::Experimental::ScatterProd>(
10);
test_scatter_view<cudauvm_device_type, Kokkos::Experimental::ScatterMin>(
10);
test_scatter_view<cudauvm_device_type, Kokkos::Experimental::ScatterMax>(
10);
}
#endif #endif
} }
#endif
} // namespace Test } // namespace Test

View File

@ -174,6 +174,9 @@ struct TestFind {
} // namespace Impl } // namespace Impl
// MSVC reports a syntax error for this test.
// WORKAROUND MSVC
#ifndef _WIN32
template <typename Device> template <typename Device>
void test_insert(uint32_t num_nodes, uint32_t num_inserts, void test_insert(uint32_t num_nodes, uint32_t num_inserts,
uint32_t num_duplicates, bool near) { uint32_t num_duplicates, bool near) {
@ -225,6 +228,7 @@ void test_insert(uint32_t num_nodes, uint32_t num_inserts,
EXPECT_EQ(0u, map.size()); EXPECT_EQ(0u, map.size());
} }
} }
#endif
template <typename Device> template <typename Device>
void test_failed_insert(uint32_t num_nodes) { void test_failed_insert(uint32_t num_nodes) {
@ -291,12 +295,17 @@ void test_deep_copy(uint32_t num_nodes) {
} }
} }
// FIXME_HIP deadlock
#ifndef KOKKOS_ENABLE_HIP
// WORKAROUND MSVC
#ifndef _WIN32
TEST(TEST_CATEGORY, UnorderedMap_insert) { TEST(TEST_CATEGORY, UnorderedMap_insert) {
for (int i = 0; i < 500; ++i) { for (int i = 0; i < 500; ++i) {
test_insert<TEST_EXECSPACE>(100000, 90000, 100, true); test_insert<TEST_EXECSPACE>(100000, 90000, 100, true);
test_insert<TEST_EXECSPACE>(100000, 90000, 100, false); test_insert<TEST_EXECSPACE>(100000, 90000, 100, false);
} }
} }
#endif
TEST(TEST_CATEGORY, UnorderedMap_failed_insert) { TEST(TEST_CATEGORY, UnorderedMap_failed_insert) {
for (int i = 0; i < 1000; ++i) test_failed_insert<TEST_EXECSPACE>(10000); for (int i = 0; i < 1000; ++i) test_failed_insert<TEST_EXECSPACE>(10000);
@ -305,6 +314,19 @@ TEST(TEST_CATEGORY, UnorderedMap_failed_insert) {
TEST(TEST_CATEGORY, UnorderedMap_deep_copy) { TEST(TEST_CATEGORY, UnorderedMap_deep_copy) {
for (int i = 0; i < 2; ++i) test_deep_copy<TEST_EXECSPACE>(10000); for (int i = 0; i < 2; ++i) test_deep_copy<TEST_EXECSPACE>(10000);
} }
#endif
TEST(TEST_CATEGORY, UnorderedMap_valid_empty) {
using Key = int;
using Value = int;
using Map = Kokkos::UnorderedMap<Key, Value, TEST_EXECSPACE>;
Map m{};
Map n{};
n = Map{m.capacity()};
n.rehash(m.capacity());
Kokkos::deep_copy(n, m);
}
} // namespace Test } // namespace Test

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cuda/TestCuda_Category.hpp>
#include <TestDynViewAPI_rank67.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cuda/TestCuda_Category.hpp>
#include <TestScatterView.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cuda/TestCuda_Category.hpp>
#include <TestStaticCrsGraph.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cuda/TestCuda_Category.hpp>
#include <TestUnorderedMap.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cuda/TestCuda_Category.hpp>
#include <TestViewCtorPropEmbeddedDim.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestDynViewAPI_generic.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestDynViewAPI_rank12345.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestDynViewAPI_rank67.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestDynamicView.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestErrorReporter.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestOffsetView.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestScatterView.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestStaticCrsGraph.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestUnorderedMap.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestViewCtorPropEmbeddedDim.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <openmp/TestOpenMP_Category.hpp>
#include <TestDynViewAPI_generic.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <openmp/TestOpenMP_Category.hpp>
#include <TestDynViewAPI_rank12345.hpp>

Some files were not shown because too many files have changed in this diff Show More